From a6e4adf80e07a9450da1fb6309f00176cf777842 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Thu, 6 Sep 2018 16:21:02 -0400 Subject: [PATCH 1/8] Commit this file before deleting it so I have record --- numba_xnd/shared/mem_info_type.py | 88 +++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 numba_xnd/shared/mem_info_type.py diff --git a/numba_xnd/shared/mem_info_type.py b/numba_xnd/shared/mem_info_type.py new file mode 100644 index 000000000..8f6b201a7 --- /dev/null +++ b/numba_xnd/shared/mem_info_type.py @@ -0,0 +1,88 @@ +import typing + +import numba + +from .c_struct_type import CStructType +from .llvm import i64, index + + +class SimpleMemInfoModel(numba.datamodel.models.OpaqueModel): + """ + Like numba.datamodel.models.MemInfoModel, but just returns False for contains_nrt_meminfo instead of traversing types + """ + + def contains_nrt_meminfo(self): + return False + + def has_nrt_meminfo(self): + return True + + def get_nrt_meminfo(self, builder, value): + return value + + +class MemInfoType(numba.types.Type): + def __init__(self): + super().__init__(name=str(self.inner_type)) + + def __init_subclass__(cls, inner_type: typing.Type[CStructType], **kwargs): + super().__init_subclass__(**kwargs) + cls.inner_type = inner_type + + numba.extending.register_model(cls)(SimpleMemInfoModel) + + @numba.extending.infer_getattr + class Template(numba.typing.templates.AttributeTemplate): + key = cls + + def resolve_data(self, val): + return inner_type() + + def resolve_size(self, val): + return numba.types.int64 + + def resolve_refct(self, val): + return numba.types.int64 + + @numba.extending.lower_getattr(cls, "data") + def get_data_impl(context, builder, ty, val): + # context.nrt.incref(builder, cls(), val) + + return context.nrt.meminfo_data(builder, val) + + @numba.extending.lower_getattr(cls, "size") + def get_size_impl(context, builder, ty, val): + # copied from _define_nrt_meminfo_data + struct_ptr = builder.bitcast( + val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() + ) + return builder.load(builder.gep(struct_ptr, [index(0), index(4)], True)) + + @numba.extending.lower_getattr(cls, "refct") + def get_refct_impl(context, builder, ty, val): + # copied from _define_nrt_meminfo_data + struct_ptr = builder.bitcast( + val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() + ) + return builder.load(builder.gep(struct_ptr, [index(0), index(0)], True)) + + n_bytes = i64(inner_type.n_bytes) + + @numba.extending.intrinsic + def alloc(typingctx, n_t=numba.types.Const(1)): + n = None + if isinstance(n_t, numba.types.Const): + n = i64(n_t.value) + elif not isinstance(n_t, numba.types.Integer): + return + + sig = cls()(n_t) + + def codegen(context, builder, sig, args, n=n): + if n is None: + n = args[0] + return context.nrt.meminfo_alloc(builder, size=builder.mul(n, n_bytes)) + + return sig, codegen + + cls.alloc = alloc From 3b998cafc208f936e58b7507714d1e84312519d2 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Fri, 7 Sep 2018 09:00:10 -0400 Subject: [PATCH 2/8] Commmit --- notebooks/12. Numba Tuples.ipynb | 189 ++++++++++++++ numba_xnd/__init__.py | 7 +- numba_xnd/gumath.py | 63 +---- numba_xnd/libndtypes.py | 106 ++++---- numba_xnd/libxnd.py | 130 +++++++--- numba_xnd/pyndtypes.py | 23 +- numba_xnd/pyxnd.py | 29 ++- numba_xnd/shared/__init__.py | 4 +- numba_xnd/shared/c_function_intrinsic.py | 76 ++++++ numba_xnd/shared/c_struct_type.py | 156 ++++++++++++ numba_xnd/shared/extending.py | 305 +---------------------- numba_xnd/shared/llvm.py | 1 + numba_xnd/shared/mem_info_type.py | 88 ------- numba_xnd/shared/numba.py | 11 +- numba_xnd/shared/wrapper_type.py | 80 ++++++ tests/test_libndtypes.py | 58 ++--- tests/test_libxnd.py | 162 ++++++++---- tests/test_pyndtypes.py | 14 +- 18 files changed, 867 insertions(+), 635 deletions(-) create mode 100644 notebooks/12. Numba Tuples.ipynb create mode 100644 numba_xnd/shared/c_function_intrinsic.py create mode 100644 numba_xnd/shared/c_struct_type.py delete mode 100644 numba_xnd/shared/mem_info_type.py create mode 100644 numba_xnd/shared/wrapper_type.py diff --git a/notebooks/12. Numba Tuples.ipynb b/notebooks/12. Numba Tuples.ipynb new file mode 100644 index 000000000..da57447e5 --- /dev/null +++ b/notebooks/12. Numba Tuples.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try to get tuples to work with range https://github.com/numba/numba/issues/2771\n", + "\n", + "We can do this with recursion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I wanna be able to construct a tuple from this signature:\n", + "\n", + "`create_tuple(lambda i: ..., n)`\n", + "\n", + "So that:\n", + "\n", + "```python\n", + "create_tuple(lambda i: i**2, 3) == (0, 1, 4)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from numba import njit" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "TypingError", + "evalue": "Failed at nopython (nopython frontend)\nFailed at nopython (nopython frontend)\nFailed at nopython (nopython frontend)\nCan't unify return type from the following types: tuple(int64 x 1), tuple(int64 x 2)\nReturn of: IR name '$12.5', type 'tuple(int64 x 1)', location: \nFile \"\", line 4:\ndef create_tuple(f, n, i=0):\n \n if i == n - 1:\n return (f(i),)\n ^\nReturn of: IR name '$22.13', type 'tuple(int64 x 2)', location: \nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n[1] During: resolving callee type: recursive(type(CPUDispatcher()))\n[2] During: typing of call at (5)\n\n\nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n\n[1] During: resolving callee type: recursive(type(CPUDispatcher()))\n[2] During: typing of call at (5)\n\n\nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n\n[1] During: resolving callee type: type(CPUDispatcher())\n[2] During: typing of call at (14)\n\n\nFile \"\", line 14:\ndef test():\n return create_tuple(f, 3)\n ^\n\nThis is not usually a problem with Numba itself but instead often caused by\nthe use of unsupported features or an issue in resolving types.\n\nTo see Python/NumPy features supported by the latest release of Numba visit:\nhttp://numba.pydata.org/numba-doc/dev/reference/pysupported.html\nand\nhttp://numba.pydata.org/numba-doc/dev/reference/numpysupported.html\n\nFor more information about typing errors and how to debug them visit:\nhttp://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile\n\nIf you think your code should work with Numba, please report the error message\nand traceback, along with a minimal reproducer at:\nhttps://github.com/numba/numba/issues/new\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcreate_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mtest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/p/numba-xnd/numba/numba/dispatcher.py\u001b[0m in \u001b[0;36m_compile_for_args\u001b[0;34m(self, *args, **kws)\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpatch_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 348\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0merror_rewrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'typing'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUnsupportedError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;31m# Something unsupported is present in the user code, add help info\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/p/numba-xnd/numba/numba/dispatcher.py\u001b[0m in \u001b[0;36merror_rewrite\u001b[0;34m(e, issue_type)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 316\u001b[0;31m \u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 317\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[0margtypes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/p/numba-xnd/numba/numba/six.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 658\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 659\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypingError\u001b[0m: Failed at nopython (nopython frontend)\nFailed at nopython (nopython frontend)\nFailed at nopython (nopython frontend)\nCan't unify return type from the following types: tuple(int64 x 1), tuple(int64 x 2)\nReturn of: IR name '$12.5', type 'tuple(int64 x 1)', location: \nFile \"\", line 4:\ndef create_tuple(f, n, i=0):\n \n if i == n - 1:\n return (f(i),)\n ^\nReturn of: IR name '$22.13', type 'tuple(int64 x 2)', location: \nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n[1] During: resolving callee type: recursive(type(CPUDispatcher()))\n[2] During: typing of call at (5)\n\n\nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n\n[1] During: resolving callee type: recursive(type(CPUDispatcher()))\n[2] During: typing of call at (5)\n\n\nFile \"\", line 5:\ndef create_tuple(f, n, i=0):\n \n return (f(i),)\n return (f(i),) + create_tuple(f, n, i + 1)\n ^\n\n[1] During: resolving callee type: type(CPUDispatcher())\n[2] During: typing of call at (14)\n\n\nFile \"\", line 14:\ndef test():\n return create_tuple(f, 3)\n ^\n\nThis is not usually a problem with Numba itself but instead often caused by\nthe use of unsupported features or an issue in resolving types.\n\nTo see Python/NumPy features supported by the latest release of Numba visit:\nhttp://numba.pydata.org/numba-doc/dev/reference/pysupported.html\nand\nhttp://numba.pydata.org/numba-doc/dev/reference/numpysupported.html\n\nFor more information about typing errors and how to debug them visit:\nhttp://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile\n\nIf you think your code should work with Numba, please report the error message\nand traceback, along with a minimal reproducer at:\nhttps://github.com/numba/numba/issues/new\n" + ] + } + ], + "source": [ + "@njit\n", + "def create_tuple(f, n, i=0):\n", + " if i == n - 1:\n", + " return (f(i),)\n", + " return (f(i),) + create_tuple(f, n, i + 1)\n", + "\n", + "\n", + "@njit\n", + "def f(i):\n", + " return i**2\n", + "\n", + "@njit\n", + "def test():\n", + " return create_tuple(f, 3)\n", + "\n", + "test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "OK that doesn't work... Instead we create $n$ dispatchers, one for each tuple size." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((0, 1, 8, 27), (0, 1, 4, 9))" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def create_tuple_creator(f, n):\n", + " assert n > 0\n", + " f = njit(f)\n", + " @njit\n", + " def creator(args):\n", + " return (f(0, *args),)\n", + " for i in range(1, n):\n", + " # need to pass in creator and i to lambda to capture in scope\n", + " @njit\n", + " def creator(args, creator=creator, i=i):\n", + " return creator(args) + (f(i, *args),)\n", + " return njit(lambda *args: creator(args))\n", + "\n", + "creator = create_tuple_creator(lambda i, j: i**j, 4)\n", + "\n", + "@njit\n", + "def test():\n", + " return creator(3), creator(2)\n", + "\n", + "test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's re-create some of the examples in the numba issue: https://github.com/numba/numba/issues/2771#issuecomment-368620310" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)\n", + "(0, 2, 4, 6, 8, 10, 12, 14, 16, 18)\n", + "((True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False))\n" + ] + } + ], + "source": [ + "range_10 = create_tuple_creator(lambda i: i, 10)\n", + "range_10_x = create_tuple_creator(lambda i, x: i*x, 10)\n", + "true_10 = create_tuple_creator(lambda _: True, 10)\n", + "false_10 = create_tuple_creator(lambda _: False, 10)\n", + "zip_10 = create_tuple_creator(lambda i, l, r: (l[i], r[i]), 10)\n", + "\n", + "@njit\n", + "def foo(x):\n", + " print(range_10()) # tuple([i for i in range(10)])\n", + " print(range_10_x(x)) # tuple([i*x for i in range(10)])\n", + " print(zip_10(true_10(), false_10())), # tuple(zip((True,)*10, (False,)*10))\n", + "\n", + " \n", + "foo(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/numba_xnd/__init__.py b/numba_xnd/__init__.py index e0130bf69..d7a947a06 100644 --- a/numba_xnd/__init__.py +++ b/numba_xnd/__init__.py @@ -3,9 +3,10 @@ import xnd_structinfo from . import libndtypes as _ # NOQA -from . import libxnd as _ # NOQA from . import pyndtypes as _ # NOQA -from . import pyxnd as _ # NOQA -from .gumath import register_kernel # NOQA + +# from . import libxnd as _ # NOQA +# from . import pyxnd as _ # NOQA +# from .gumath import register_kernel # NOQA llvmlite.binding.load_library_permanently(xnd_structinfo.__file__) diff --git a/numba_xnd/gumath.py b/numba_xnd/gumath.py index 2aea8454b..ca076494f 100644 --- a/numba_xnd/gumath.py +++ b/numba_xnd/gumath.py @@ -48,11 +48,6 @@ def inner(fn): return inner -@numba.njit -def create_xnd_view(x): - return libxnd.xnd_view_from_xnd(shared.null_char_ptr(), x) - - def wrap_kernel_dispatcher(n_args): """ Returns a new dispatcher that is suitable to be registered with `register_kernel_direct`. @@ -68,54 +63,22 @@ def something(a, b, ret): ret[()] = a + b[0] """ - # TODO: Catch exceptions and return -1 - # TODO: Switch from hard coded # args, try to avoid eval and recursive calls (both confusing) - def inner(dispatcher): - if n_args == 0: - - def fn(stack, ctx): - dispatcher() - return 0 - - elif n_args == 1: - - def fn(stack, ctx): - dispatcher(create_xnd_view(stack[0])) - return 0 - - elif n_args == 2: - - def fn(stack, ctx): - dispatcher(create_xnd_view(stack[0]), create_xnd_view(stack[1])) - return 0 + get_args = shared.create_tuple_creator(lambda i, v: v[i], n_args) - elif n_args == 3: - - def fn(stack, ctx): - dispatcher( - create_xnd_view(stack[0]), - create_xnd_view(stack[1]), - create_xnd_view(stack[2]), - ) - return 0 - - elif n_args == 4: - - def fn(stack, ctx): - dispatcher( - create_xnd_view(stack[0]), - create_xnd_view(stack[1]), - create_xnd_view(stack[2]), - create_xnd_view(stack[3]), + # TODO: use global context and see if we hit error + def inner(dispatcher): + @numba.njit + def fn(stack, ctx): + # allocate all views at once here, otherwise allocation won't work + views = libxnd.create_xnd_view(n_args) + for i in range(n_args): + libxnd.xnd_view_from_xnd_no_alloc( + views[i], shared.null_char_ptr(), stack[i] ) - return 0 - - else: - raise NotImplementedError( - "Gumath kernel generation is limited at four args currently." - ) + dispatcher(*get_args(views)) + return 0 - return numba.njit(fn) + return fn return inner diff --git a/numba_xnd/libndtypes.py b/numba_xnd/libndtypes.py index 6c8404357..c80a76e13 100644 --- a/numba_xnd/libndtypes.py +++ b/numba_xnd/libndtypes.py @@ -16,90 +16,110 @@ NDT_MAX_DIM = 128 -ndt_t = shared.WrappedCStruct( - "ndt_t", {"ndim": numba.types.int32, "tag": numba.types.int64}, create_wrapper=True -) -wrap_ndt, unwrap_ndt = ndt_t.wrap, ndt_t.unwrap +class NdtType( + shared.CStructType, + c_name="ndt_t", + attrs={"ndim": numba.types.int32, "tag": numba.types.int64}, +): + pass + + +class NdtWrapperType(shared.WrapperType, inner_type=NdtType): + pass + +wrap_ndt, unwrap_ndt = NdtWrapperType.wrap, NdtWrapperType.unwrap -ndt_ndarray_t = shared.WrappedCStruct( - "ndt_ndarray_t", - { + +class NdtNdarrayType( + shared.CStructType, + c_name="ndt_ndarray_t", + attrs={ "ndim": numba.types.int32, "shape": numba.types.UniTuple(numba.types.int64, NDT_MAX_DIM), }, -) -create_ndt_ndarray = ndt_ndarray_t.create -ndt_slice_t = shared.WrappedCStruct( - "ndt_slice_t", - {"start": numba.types.int64, "stop": numba.types.int64, "step": numba.types.int64}, -) -ndt_context_t = shared.WrappedCStruct( - "ndt_context_t", - { +): + pass + + +alloc_ndt_ndarray = NdtNdarrayType.alloc + + +class NdtSliceType( + shared.CStructType, + c_name="ndt_slice_t", + attrs={ + "start": numba.types.int64, + "stop": numba.types.int64, + "step": numba.types.int64, + }, +): + pass + + +class NdtContextType( + shared.CStructType, + c_name="ndt_context_t", + attrs={ "flags": numba.types.uint32, "err": numba.types.int32, "msg": numba.types.int32, "ConstMsg": shared.c_string_type, "DynamicMsg": shared.c_string_type, }, -) -create_ndt_context = ndt_context_t.create +): + pass + -# for gumath kernel -@numba.extending.unbox(ndt_context_t.NumbaType) -def unbox_ndt_context(typ, val, c): - return numba.extending.NativeValue( - c.builder.bitcast(val, ndt_context_t.llvm_ptr_type) - ) +alloc_ndt_context = NdtContextType.alloc -ndt_as_ndarray = shared.WrappedCFunction( - "ndt_as_ndarray", - numba.types.int32, - (ndt_ndarray_t.numba_type, ndt_t.numba_type, ndt_context_t.numba_type), +ndt_as_ndarray = shared.CFunctionIntrinsic( + "ndt_as_ndarray", numba.types.int32, (NdtNdarrayType, NdtType, NdtContextType) ) -ndt_is_concrete = shared.WrappedCFunction( - "ndt_is_concrete", numba.types.int32, (ndt_t.numba_type,) +ndt_is_concrete = shared.CFunctionIntrinsic( + "ndt_is_concrete", numba.types.int32, (NdtType,) ) -ndt_err_occurred = shared.WrappedCFunction( - "ndt_err_occurred", numba.types.boolean, (ndt_context_t.numba_type,) +ndt_err_occurred = shared.CFunctionIntrinsic( + "ndt_err_occurred", numba.types.boolean, (NdtContextType,) ) -ndt_context_msg = shared.WrappedCFunction( - "ndt_context_msg", shared.c_string_type, (ndt_context_t.numba_type,) +ndt_context_msg = shared.CFunctionIntrinsic( + "ndt_context_msg", shared.c_string_type, (NdtContextType,) ) @numba.njit def ndt_static_context(): - ctx = create_ndt_context() - ctx.flags = 0 - ctx.err = 0 - ctx.msg = 0 - ctx.ConstMsg = shared.c_string_const("Success") + # pylint: disable=E + ctx = alloc_ndt_context(1) + ctx.flags(0, 0) + ctx.err(0, 0) + ctx.msg(0, 0) + ctx.ConstMsg(0, shared.c_string_const("Success")) return ctx -@numba.extending.overload_attribute(ndt_t.WrapperNumbaType, "shape") +@numba.extending.overload_attribute(NdtWrapperType, "shape") def ndt_wrapper_shape(t): ndim = shared.get_ndim(t.ndt_value) def get(t): - a = create_ndt_ndarray() + # pylint: disable=E + a = alloc_ndt_ndarray(1) ctx = ndt_static_context() ndt_as_ndarray(a, unwrap_ndt(t), ctx) if ndt_err_occurred(ctx): shared.print_c_string(ndt_context_msg(ctx)) raise RuntimeError("ndt_as_ndarray failed.") - return a.shape[:ndim] + return a.shape(0)[:ndim] return get -@numba.extending.overload_attribute(ndt_t.WrapperNumbaType, "ndim") +@numba.extending.overload_attribute(NdtWrapperType, "ndim") def ndt_wrapper_ndim(t): ndim = shared.get_ndim(t.ndt_value) diff --git a/numba_xnd/libxnd.py b/numba_xnd/libxnd.py index fcacdc47d..690d9d7a8 100644 --- a/numba_xnd/libxnd.py +++ b/numba_xnd/libxnd.py @@ -1,3 +1,5 @@ +import types + import llvmlite import ndtypes import xnd @@ -24,12 +26,6 @@ "view": xnd_t.numba_type, }, embedded={"view"}, - create_wrapper=True, -) -create_xnd_view, wrap_xnd_view, unwrap_xnd_view = ( - xnd_view_t.create, - xnd_view_t.wrap, - xnd_view_t.unwrap, ) @@ -90,22 +86,21 @@ ) xnd_view_from_xnd = shared.WrappedCFunction( "xnd_view_from_xnd", - xnd_view_t.numba_type, - (shared.c_string_type, xnd_t.numba_type), - accepts_return=True, + numba.types.void, + (xnd_view_t.numba_type, shared.c_string_type, xnd_t.numba_type), ) xnd_view_subscript = shared.WrappedCFunction( "xnd_view_subscript", - xnd_view_t.numba_type, + numba.types.void, ( + xnd_view_t.numba_type, xnd_view_t.numba_type, xnd_index_t.numba_type, numba.types.intc, libndtypes.ndt_context_t.numba_type, ), - accepts_return=True, ) xnd_equal = shared.WrappedCFunction( "xnd_equal", @@ -120,22 +115,81 @@ ) -# for gumath kernel -@numba.extending.unbox(xnd_t.NumbaType) -def unbox_xnd(typ, val, c): - return numba.extending.NativeValue(c.builder.bitcast(val, xnd_t.llvm_ptr_type)) +class XndViewMemInfo(numba.types.MemInfoPointer): + def __init__(self): + super().__init__(xnd_view_t.numba_type) + + +xnd_view_mem_info = XndViewMemInfo() +numba.extending.register_model(XndViewMemInfo)(numba.datamodel.models.MemInfoModel) + + +@numba.extending.lower_getattr(XndViewMemInfo, "data") +def xnd_view_mem_info_get_data_impl(context, builder, ty, val): + return context.nrt.meminfo_data(builder, val) + + +@numba.extending.lower_getattr(XndViewMemInfo, "size") +def xnd_view_mem_info_get_size_impl(context, builder, ty, val): + # copied from _define_nrt_meminfo_data + struct_ptr = builder.bitcast( + val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() + ) + return builder.load( + builder.gep(struct_ptr, [shared.index(0), shared.index(4)], True) + ) + +@numba.extending.lower_getattr(XndViewMemInfo, "refct") +def xnd_view_mem_info_get_size_impl(context, builder, ty, val): + # copied from _define_nrt_meminfo_data + struct_ptr = builder.bitcast( + val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() + ) + return builder.load( + builder.gep(struct_ptr, [shared.index(0), shared.index(0)], True) + ) -@numba.extending.overload_attribute(xnd_view_t.WrapperNumbaType, "type") + +def create_xnd_view_mem_info_codegen(context, builder, sig, args): + # copied from jitclass imp_dtor + llvoidptr = context.get_value_type(numba.types.voidptr) + llsize = context.get_value_type(numba.types.uintp) + dtor_ftype = llvmlite.ir.FunctionType( + llvmlite.ir.VoidType(), [llvoidptr, llsize, llvoidptr] + ) + + dtor = builder.module.get_or_insert_function(dtor_ftype, name="xnd_view_clear") + return context.nrt.meminfo_alloc_dtor( + builder, context.get_constant(numba.types.uintp, xnd_view_t.n_bytes), dtor + ) + + +@numba.extending.intrinsic +def create_xnd_view_mem_info(typingctx): + return xnd_view_mem_info(), create_xnd_view_mem_info_codegen + + +xnd_view_mem_info_wrapper = shared.WrapperType( + xnd_view_mem_info, "XndViewMemInfo", numba.datamodel.models.MemInfoModel +) +xnd_view_mem_info_wrapper.WrapperNumbaType.dtype = xnd_view_t.numba_type +wrap_xnd_view_mem_info, unwrap_xnd_view_mem_info = ( + xnd_view_mem_info_wrapper.wrap, + xnd_view_mem_info_wrapper.unwrap, +) + + +@numba.extending.overload_attribute(xnd_view_mem_info_wrapper.WrapperNumbaType, "type") def xnd_wrapper_type(x_wrapper_t): def get(x_wrapper): - x_v = unwrap_xnd_view(x_wrapper) - return libndtypes.wrap_ndt(x_v.view.type, x_wrapper) + x_v = unwrap_xnd_view_mem_info(x_wrapper) + return libndtypes.wrap_ndt(x_v.data.view.type, x_wrapper) return get -@numba.extending.overload_attribute(xnd_view_t.WrapperNumbaType, "ndim") +@numba.extending.overload_attribute(xnd_view_mem_info_wrapper.WrapperNumbaType, "ndim") def xnd_wrapper_ndim(x_wrapped): def get(x_wrapped): return x_wrapped.type.ndim @@ -143,14 +197,14 @@ def get(x_wrapped): return get -@numba.extending.overload_attribute(xnd_view_t.WrapperNumbaType, "value") +@numba.extending.overload_attribute(xnd_view_mem_info_wrapper.WrapperNumbaType, "value") def xnd_wrapper_value(x_wrapper): n = x_wrapper.ndt_value if n == ndtypes.ndt("int64"): def get(x_wrapper): - x = unwrap_xnd_view(x_wrapper).view + x = unwrap_xnd_view_mem_info(x_wrapper).data.view return shared.ptr_load_type(numba.types.int64, x.ptr) return get @@ -158,7 +212,7 @@ def get(x_wrapper): if n == ndtypes.ndt("float64"): def get(x_wrapper): - x = unwrap_xnd_view(x_wrapper).view + x = unwrap_xnd_view_mem_info(x_wrapper).data.view return shared.ptr_load_type(numba.types.float64, x.ptr) return get @@ -174,22 +228,23 @@ def ndtypes_index(t): @shared.overload_any("getitem") def xnd_wrapper_getitem(x_wrapper, index): - if not isinstance(x_wrapper, xnd_view_t.WrapperNumbaType): + if not isinstance(x_wrapper, xnd_view_mem_info_wrapper.WrapperNumbaType): return if isinstance(index, numba.types.Integer): resulting_type = str(ndtypes_index(x_wrapper.ndt_value)) def getitem(x_wrapper, index): - x_v = unwrap_xnd_view(x_wrapper) + x_v = unwrap_xnd_view_mem_info(x_wrapper).data x_index = create_xnd_index() x_index.tag = XND_KEY_INDEX x_index.Index = index ctx = libndtypes.ndt_static_context() - ret_x_v = xnd_view_subscript(x_v, x_index, shared.i64_to_i32(1), ctx) - assert not shared.ptr_is_none(ret_x_v.view.ptr) + ret_x_v_mi = create_xnd_view_mem_info() + xnd_view_subscript(ret_x_v_mi.data, x_v, x_index, shared.i64_to_i32(1), ctx) + assert not shared.ptr_is_none(ret_x_v_mi.data.view.ptr) assert not libndtypes.ndt_err_occurred(ctx) - return wrap_xnd_view(ret_x_v, resulting_type) + return wrap_xnd_view_mem_info(ret_x_v_mi, resulting_type) return getitem elif isinstance(index, numba.types.BaseTuple): @@ -205,30 +260,35 @@ def getitem(x_wrapper, index): return lambda x_wrapper, index: x_wrapper def getitem(x_wrapper, index): - x_v = unwrap_xnd_view(x_wrapper) + x_v = unwrap_xnd_view_mem_info(x_wrapper).data x_index = create_xnd_index(n_items) for i in range(n_items): x_index_cur = x_index[i] x_index_cur.tag = XND_KEY_INDEX x_index_cur.Index = index[i] ctx = libndtypes.ndt_static_context() - ret_x_v = xnd_view_subscript(x_v, x_index, shared.i64_to_i32(n_items), ctx) - assert not shared.ptr_is_none(ret_x_v.view.ptr) + ret_x_v_mi = create_xnd_view_mem_info() + xnd_view_subscript( + ret_x_v_mi.data, x_v, x_index, shared.i64_to_i32(n_items), ctx + ) + assert not shared.ptr_is_none(ret_x_v_mi.data.view.ptr) assert not libndtypes.ndt_err_occurred(ctx) - return wrap_xnd_view(ret_x_v, resulting_type) + return wrap_xnd_view_mem_info(ret_x_v_mi, resulting_type) return getitem @shared.overload_any("setitem") def xnd_wrapper_setitem(x_wrapper, index, value): - if not isinstance(x_wrapper, xnd_view_t.WrapperNumbaType): + if not isinstance(x_wrapper, xnd_view_mem_info_wrapper.WrapperNumbaType): return if value == numba.types.int64: def setitem(x_wrapper, index, value): shared.ptr_store_type( - numba.types.int64, unwrap_xnd_view(x_wrapper[index]).view.ptr, value + numba.types.int64, + unwrap_xnd_view_mem_info(x_wrapper[index]).data.view.ptr, + value, ) return setitem @@ -237,7 +297,9 @@ def setitem(x_wrapper, index, value): def setitem(x_wrapper, index, value): shared.ptr_store_type( - numba.types.float64, unwrap_xnd_view(x_wrapper[index]).view.ptr, value + numba.types.float64, + unwrap_xnd_view_mem_info(x_wrapper[index]).data.view.ptr, + value, ) return setitem diff --git a/numba_xnd/pyndtypes.py b/numba_xnd/pyndtypes.py index 91a6d259c..809546c59 100644 --- a/numba_xnd/pyndtypes.py +++ b/numba_xnd/pyndtypes.py @@ -4,23 +4,30 @@ from . import libndtypes, shared -ndt_object = shared.WrappedCStruct("NdtObject", {"ndt": libndtypes.ndt_t.numba_type}) +class NdtObjectType( + shared.CStructType, + c_name="NdtObject", + attrs={"ndt": libndtypes.NdtType(nrt_allocated=False)}, +): + pass -ndt_from_type = shared.WrappedCFunction( - "ndt_from_type", ndt_object.numba_type, (libndtypes.ndt_t.numba_type,) -) + +# ndt_from_type = shared.CFunctionIntrinsic( +# "ndt_from_type", NdtObjectType(), (libndtypes.NdtType(),) +# ) @numba.extending.typeof_impl.register(ndtypes.ndt) def typeof_ndt(val, c): - return libndtypes.ndt_t.WrapperNumbaType(val) + return libndtypes.NdtWrapperType(False, val) -@numba.extending.unbox(libndtypes.ndt_t.WrapperNumbaType) +@numba.extending.unbox(libndtypes.NdtWrapperType) def unbox_ndt_wrapper(typ, o, c): - n_o = c.builder.bitcast(o, ndt_object.llvm_ptr_type) - n = ndt_object.getattr_impl(None, c.builder, None, n_o, "ndt") + n = NdtObjectType.getattr_impl( + builder=c.builder, attr="ndt", struct=o, i=shared.index(0) + ) return numba.extending.NativeValue(n) diff --git a/numba_xnd/pyxnd.py b/numba_xnd/pyxnd.py index acb00f58e..3b3965caf 100644 --- a/numba_xnd/pyxnd.py +++ b/numba_xnd/pyxnd.py @@ -23,26 +23,33 @@ @numba.extending.typeof_impl.register(xnd.xnd) def typeof_xnd(val, c): - return libxnd.xnd_view_t.WrapperNumbaType(val.type) + return libxnd.xnd_view_mem_info_wrapper.WrapperNumbaType(val.type) -@numba.extending.unbox(libxnd.xnd_view_t.WrapperNumbaType) +@numba.extending.unbox(libxnd.xnd_view_mem_info_wrapper.WrapperNumbaType) def unbox_xnd_wrapper(typ, o, c): - x_o = c.builder.bitcast(o, xnd_object.llvm_ptr_type) - x = xnd_object.getattr_impl(None, c.builder, None, x_o, "xnd") - x_v = libxnd.xnd_view_from_xnd.codegen(c.builder, (o, x)) - return numba.extending.NativeValue(x_v) + c.pyapi.incref(o) + x_v_m_i = libxnd.create_xnd_view_mem_info_codegen(c.context, c.builder, None, None) + x_v = libxnd.xnd_view_mem_info_get_data_impl(c.context, c.builder, None, x_v_m_i) + + x = xnd_object.getattr_impl(None, c.builder, None, o, "xnd") + libxnd.xnd_view_from_xnd.codegen(c.builder, (x_v, o, x)) + c.context.nrt.incref(c.builder, typ, x_v_m_i) + return numba.extending.NativeValue(x_v_m_i) -@numba.extending.box(libxnd.xnd_view_t.WrapperNumbaType) -def box_xnd_wrapper(typ, x, c): +@numba.extending.box(libxnd.xnd_view_mem_info_wrapper.WrapperNumbaType) +def box_xnd_wrapper(typ, x_v_m_i, c): builder = c.builder + x_v = libxnd.xnd_view_mem_info_get_data_impl(c.context, builder, None, x_v_m_i) + # This means the view is from `xnd_view_from_xnd` and we have access to # the python object. We can just return the existing python object - flags = libxnd.xnd_view_t.getattr_impl(None, builder, None, x, "flags") + # TOOD: This could get moved to `xnd` to handle this set of flags + flags = libxnd.xnd_view_t.getattr_impl(None, builder, None, x_v, "flags") flags_is_0 = builder.icmp_unsigned("==", flags, flags.type(0)) - o = libxnd.xnd_view_t.getattr_impl(None, builder, None, x, "obj") + o = libxnd.xnd_view_t.getattr_impl(None, builder, None, x_v, "obj") o_is_not_null = builder.icmp_unsigned("!=", o, o.type(None)) has_original_object = builder.and_(flags_is_0, o_is_not_null) @@ -52,7 +59,7 @@ def box_xnd_wrapper(typ, x, c): with then: builder.store(o, o_ptr) with otherwise: - x_o = xnd_from_xnd_view.codegen(builder, (x,)) + x_o = xnd_from_xnd_view.codegen(builder, (x_v,)) o = builder.bitcast(x_o, shared.ptr(shared.char)) builder.store(o, o_ptr) o = builder.load(o_ptr) diff --git a/numba_xnd/shared/__init__.py b/numba_xnd/shared/__init__.py index d61b3993c..973f9351a 100644 --- a/numba_xnd/shared/__init__.py +++ b/numba_xnd/shared/__init__.py @@ -1,5 +1,7 @@ -# expose +from .c_function_intrinsic import * # NOQA +from .c_struct_type import * # NOQA from .extending import * # NOQA from .helpers import * # NOQA from .llvm import * # NOQA from .numba import * # NOQA +from .wrapper_type import * # NOQA diff --git a/numba_xnd/shared/c_function_intrinsic.py b/numba_xnd/shared/c_function_intrinsic.py new file mode 100644 index 000000000..1a45e208d --- /dev/null +++ b/numba_xnd/shared/c_function_intrinsic.py @@ -0,0 +1,76 @@ +import inspect + +import llvmlite + +import numba + +from .c_struct_type import CStructType +from .extending import llvm_type_from_numba_type +from .llvm import char, ptr + + +class CFunctionIntrinsic(numba.extending._Intrinsic): + """ + Creates an intrinsic for a C function. Also exposes the underlying codegen, if you want + to use that from a low level. + """ + + def __init__(self, func_name, numba_ret_type, numba_arg_types): + assert isinstance(numba_arg_types, tuple) + assert isinstance(numba_ret_type, numba.types.Type) + self.arg_types = [] + for t in numba_arg_types: + # args should be either numba type instances or subclasses of CStructType + if isinstance(t, numba.types.Type): + self.arg_types.append(llvm_type_from_numba_type(t)) + elif issubclass(t, CStructType): + self.arg_types.append(ptr(char)) + else: + raise TypeError("Should either be numba type or CStruct type", t) + + self.func_name = func_name + self.numba_ret_type = numba_ret_type + self.numba_arg_types = numba_arg_types + + self.ret_type = llvm_type_from_numba_type(self.numba_ret_type) + + super().__init__(func_name, self.create_impl()) + self._register() + + def __str__(self): + return f"{self.func_name}" + + def codegen(self, builder, args): + return builder.call( + builder.module.get_or_insert_function( + llvmlite.ir.FunctionType(self.ret_type, self.arg_types), + name=self.func_name, + ), + args, + ) + + def create_impl(self): + def impl(typingctx, *numba_arg_types): + for actual_type, sig_type in zip(numba_arg_types, self.numba_arg_types): + if not (actual_type == sig_type or isinstance(actual_type, sig_type)): + return + + return ( + self.ret_type(*numba_arg_types), + lambda context, builder, sig, args: self.codegen(builder, args), + ) + + impl.__name__ = self.func_name + # change the function signature to take positional instead of variadic arguments + # so that numba type inference will work on it properly + # This should be like if you defined the intrinsic function explicitly with all the arguments + impl.__signature__ = inspect.signature(impl).replace( + parameters=[ + inspect.Parameter( + f"_p{i}", # arg name doesn't matter + inspect.Parameter.POSITIONAL_OR_KEYWORD, + ) + for i in range(len(self.numba_arg_types) + 1) + ] + ) + return impl diff --git a/numba_xnd/shared/c_struct_type.py b/numba_xnd/shared/c_struct_type.py new file mode 100644 index 000000000..478746aa4 --- /dev/null +++ b/numba_xnd/shared/c_struct_type.py @@ -0,0 +1,156 @@ +import types + +import llvmlite.ir + +import numba.extending +import numba.types +import numba.typing.templates +import xnd_structinfo + +from .extending import llvm_type_from_numba_type +from .llvm import char_ptr, i64, index, ptr + +SIZEOF_MEMINFO = 20 # in bytes + + +class CStructModel(numba.datamodel.models.OpaqueModel): + def contains_nrt_meminfo(self): + return False + + def has_nrt_meminfo(self): + return self.fe_type.nrt_allocated + + def get_nrt_meminfo(self, builder: llvmlite.ir.IRBuilder, value): + """ + nrt meminfo pointer begins before the allocated data pointer. So we subtract size of meminfo to get to this pointer + """ + # move back `SIZEOF_MEMINFO` bytes (since this is ptr(char)) + return builder.gep(value, [index(-SIZEOF_MEMINFO)]) + + +class CStructType(numba.types.Type): + """ + Creates a Numba type for the C struct called `c_name` + + It registers typing and lowering for it's attributes. + `attrs` should be a dictionary mapping attribute names to the numba type of that attribute. + + `embedded` is a set of attribute names that actually are embedded in the struct instead of referenced. + So if `hi` is an attribute that has a numba type with a data model of `some_other_thing*`, then if `hi` + is in `embedded`, this struct has `some_other_thing` embedded in it, instead of a pointer to it. + + Supports `t.(i)` for getting values and `t.(i, val)` for setting values. + """ + + muatable = True + + def __init__(self, nrt_allocated): + self.nrt_allocated = nrt_allocated + super().__init__(name=f"CStruct({self.c_name}, {nrt_allocated})") + + def __init_subclass__(cls, c_name, attrs, embedded=tuple(), **kwargs): + super().__init_subclass__(**kwargs) + + cls.c_name, cls.attrs, cls.embedded = c_name, attrs, embedded + cls.n_bytes = getattr(xnd_structinfo, f"sizeof_{c_name}")() + + numba.extending.register_model(cls)(CStructModel) + + resolvers = { + field: cls._type_and_lower_field(field, numba_type) + for field, numba_type in attrs.items() + } + + @numba.extending.infer_getattr + class CStructTemplate(numba.typing.templates.AttributeTemplate): + key = cls + + def resolve(self, value, attr): + if attr in resolvers: + return resolvers[attr](self, value) + + cls.alloc = numba.extending.intrinsic(cls._alloc) + + @classmethod + def _type_and_lower_field(cls, field: str, numba_type: numba.types.Type): + # Validate input type + if not isinstance(numba_type, numba.types.Type): + raise TypeError( + f"{cls.c_name}.{field}: {numba_type} should be an instance of a numba type" + ) + + # Type function + fn_key = f"{cls.c_name}.{field}" + + @numba.typing.templates.bound_function(fn_key) + def resolve(self, ty, args, kws): + print("Trying", fn_key, ty, args, kws) + if kws or not args or not isinstance(args[0], numba.types.Integer): + return + print("Resolved!") + # getting value + if len(args) == 1: + return numba.typing.templates.signature(numba_type, *args) + # setting value + if len(args) == 2: # and args[1] == numba_type: + print(args) + return numba.typing.templates.signature( + numba.types.none, args[0], numba_type + ) + print("no resolved", len(args), args[1], numba_type) + + # Lower function + @numba.targets.imputils.lower_builtin(fn_key, cls, numba.types.Integer) + def lower_get(context, builder, sig, args): + return cls.getattr_impl(builder, field, *args) + + @numba.targets.imputils.lower_builtin( + fn_key, cls, numba.types.Integer, type(numba_type) + ) + def lower_set(context, builder, sig, args): + return cls.setattr_impl(builder, field, *args) + + return resolve + + @classmethod + def _call_get_function(cls, builder, attr, struct, i, is_embedded): + attr_llvm_type = llvm_type_from_numba_type(cls.attrs[attr]) + ret_type = attr_llvm_type if is_embedded else ptr(attr_llvm_type) + return builder.call( + builder.module.get_or_insert_function( + llvmlite.ir.FunctionType(ret_type, [char_ptr]), + name=f"get_{cls.c_name}_{attr}", + ), + [builder.gep(struct, [i])], + ) + + @classmethod + def _alloc(cls, typingctx, n_t): + if not isinstance(n_t, numba.types.Integer): + return + + sig = cls(nrt_allocated=True)(n_t) + + def codegen(context, builder, sig, args): + n, = args + mi = context.nrt.meminfo_alloc( + builder, size=builder.mul(n, i64(cls.n_bytes)) + ) + # move forward to data which is allocated after meminfo + return builder.gep(mi, [index(SIZEOF_MEMINFO)]) + + return sig, codegen + + @classmethod + def getattr_impl(cls, builder, attr, struct, i): + is_embedded = attr in cls.embedded + ret = cls._call_get_function(builder, attr, struct, i, is_embedded) + return ret if is_embedded else builder.load(ret) + + @classmethod + def setattr_impl(cls, builder, attr, struct, i, value): + is_embedded = attr in cls.embedded + builder.store( + value=builder.load(value) if is_embedded else value, + ptr=cls._call_get_function(builder, attr, struct, i, is_embedded), + ) diff --git a/numba_xnd/shared/extending.py b/numba_xnd/shared/extending.py index fa8b54733..6d37793f6 100644 --- a/numba_xnd/shared/extending.py +++ b/numba_xnd/shared/extending.py @@ -1,238 +1,15 @@ +import functools import inspect import llvmlite.ir -import ndtypes import numba.extending import numba.types import numba.typing.templates -import xnd_structinfo - -from .llvm import char, ptr - - -def create_numba_type(name, llvm_type): - """ - Creates an empty type class with a name and returns and instance of it. - """ - - class InnerType(numba.types.Type): - def __init__(self): - super().__init__(name=name) - - @numba.extending.register_model(InnerType) - class InnerModel(numba.extending.models.PrimitiveModel): - def __init__(self, dmm, fe_type): - super().__init__(dmm, fe_type, llvm_type) - - return InnerType() - - -# TODO: Make this a subclass of numba type, however each instance of it should have -# be a different singleton type -class WrappedCStruct: - def __init__(self, name, attrs, embedded=tuple(), create_wrapper=False): - """ - Creates a Numba type and model for the c struct `name` - - It also registers typing and lowering for it's attributes. - `attrs` should be a dictionary mapping attribute names to the numba type of that attribute. - - `embedded` is a set of attribute names that actually are embedded in the struct instead of referenced. - So if `hi` is an attribute that has a numba type with a data model of `some_other_thing*`, then if `hi` - is in `embedded`, this struct has `some_other_thing` embedded in it, instead of a pointer to it. - - If `create_wrapper` is true, then this also creates a wrapper type that has same datamodel, but requires a - `ndt_type` attribute that holds a ndtypes.ndt instance. - """ - for t in attrs.values(): - assert isinstance(t, numba.types.Type) - - self.name, self.attrs, self.embedded = name, attrs, embedded - - self.n_bytes = getattr(xnd_structinfo, f"sizeof_{name}")() - self.llvm_type = llvmlite.ir.ArrayType(char, self.n_bytes) - - self.llvm_ptr_type = ptr(self.llvm_type) - - self.NumbaType = self._create_numba_type() - self.numba_type = self.NumbaType() - - self.NumbaModel = numba.extending.register_model(self.NumbaType)( - self._create_numba_model() - ) - - numba.extending.infer_getattr(self._create_getattr_template()) - - numba.extending.lower_getattr_generic(self.NumbaType)(self.getattr_impl) - numba.extending.lower_setattr_generic(self.NumbaType)(self.settattr_impl) - self.create = numba.extending.intrinsic(support_literals=True)(self.create_impl) - - numba.extending.type_callable("getitem")(self.type_getitem) - numba.targets.imputils.lower_builtin( - "getitem", self.NumbaType, numba.types.Integer - )(self.lower_getitem) - - if not create_wrapper: - return - - self.WrapperNumbaType = self.create_wrapper_numba_type() - numba.extending.register_model(self.WrapperNumbaType)(self.NumbaModel) - numba.extending.lower_cast(self.NumbaType, self.WrapperNumbaType)( - lambda context, builder, fromty, toty, val: val - ) - - self.wrap = numba.extending.intrinsic(support_literals=True)(self.wrap_impl) - self.unwrap = numba.extending.intrinsic(self.unwrap_impl) - - def __str__(self): - return f"{self.name}({self.llvm_type})" - - def _create_numba_type(self): - name = self.name - - class NumbaType(numba.types.Type): - def __init__(self): - super().__init__(name=name) - - return NumbaType - - def _create_numba_model(self): - be_type = self.llvm_ptr_type - llvm_type = self.llvm_type - - class NumbaModel(numba.extending.models.PrimitiveModel): - def __init__(self, dmm, fe_type): - super().__init__(dmm, fe_type, be_type) - - def get_return_type(self): - return llvm_type - - def get_data_type(self): - return llvm_type - - def as_return(self, builder, value): - return builder.load(value) - - def from_return(self, builder, value): - return numba.cgutils.alloca_once_value(builder, value) - - def as_data(self, builder, value): - return builder.load(value) - - def from_data(self, builder, value): - return numba.cgutils.alloca_once_value(builder, value) - - return NumbaModel - - def _create_getattr_template(self): - attrs = self.attrs - - class GetattrTemplate(numba.typing.templates.AttributeTemplate): - key = self.NumbaType - - def generic_resolve(self, val, attr): - if attr in attrs: - return attrs[attr] - - return GetattrTemplate - - def _call_get_function(self, builder, value, attr, is_embedded): - attr_llvm_type = llvm_type_from_numba_type(self.attrs[attr]) - ret_type = attr_llvm_type if is_embedded else ptr(attr_llvm_type) - fn = builder.module.get_or_insert_function( - llvmlite.ir.FunctionType(ret_type, [self.llvm_ptr_type]), - name=f"get_{self.name}_{attr}", - ) - return_value = builder.call(fn, [value]) - return return_value - - def getattr_impl(self, context, builder, typ, value, attr): - is_embedded = attr in self.embedded - ret = self._call_get_function(builder, value, attr, is_embedded) - return ret if is_embedded else builder.load(ret) - - def settattr_impl(self, context, builder, sig, args, attr): - target, value = args - is_embedded = attr in self.embedded - builder.store( - value=builder.load(value) if is_embedded else value, - ptr=self._call_get_function(builder, target, attr, is_embedded), - ) - - def create_impl(self, typingctx, n_t=numba.types.Const(1)): - if not isinstance(n_t, numba.types.Const): - return - - def codegen(context, builder, sig, args): - return numba.cgutils.alloca_once(builder, self.llvm_type, n_t.value) - - return self.numba_type(numba.types.int64), codegen - - def type_getitem(self, context): - def typer(val_t, i_t): - if val_t == self.numba_type and isinstance(i_t, numba.types.Integer): - return self.numba_type - - return typer - - def lower_getitem(self, context, builder, sig, args): - x, i = args - return builder.gep(x, [i]) - - def create_wrapper_numba_type(self): - name = self.name - numba_type = self.numba_type - - class WrapperNumbaType(numba.types.Type): - def __init__(self, n): - assert isinstance(n, ndtypes.ndt) - self.ndt_value = n - super().__init__(f"{name}Wrapper({n})") - - def can_convert_from(self, typingctx, other): - """ - Support conversions from unwrapped to wrapped types implicitly. - """ - if other == numba_type: - return numba.typeconv.Conversion.promote - - return WrapperNumbaType - - def wrap_impl(self, typingctx, inner_t, ndt_type_t): - if inner_t != self.numba_type: - return - # supports passing in strings as ndt's - if isinstance(ndt_type_t, numba.types.Const): - n = ndtypes.ndt(ndt_type_t.value) - arg_type = numba.types.string - elif hasattr(ndt_type_t, "ndt_value"): - n = ndt_type_t.ndt_value - arg_type = ndt_type_t - else: - return - - sig = self.WrapperNumbaType(n)(self.numba_type, arg_type) - - def codegen(context, builder, sig, args): - return args[0] - - return sig, codegen - - def unwrap_impl(self, typingctx, wrapper_t): - if not isinstance(wrapper_t, self.WrapperNumbaType): - return - - sig = self.numba_type(wrapper_t) - - def codegen(context, builder, sig, args): - return args[0] - - return sig, codegen def llvm_type_from_numba_type(numba_type): - datamodel = numba.datamodel.registry.default_manager.lookup(numba_type) + datamodel = numba.datamodel.registry.default_manager[numba_type] return datamodel.get_value_type() @@ -284,75 +61,17 @@ def typer(*args, dispatcher=dispatcher): return inner -class WrappedCFunction(numba.extending._Intrinsic): +def create_tuple_creator(f, n): """ - Creates an intrinsic for a C function. Also exposes the underlying codegen, if you want - to use that from a low level. + To work around https://github.com/numba/numba/issues/2771 """ + assert n > 0 + f = numba.njit(f) - def __init__( - self, func_name, numba_ret_type, numba_arg_types, accepts_return=False - ): - assert isinstance(numba_arg_types, tuple) - for t in (numba_ret_type, *numba_arg_types): - assert isinstance(t, numba.types.Type) - - self.func_name = func_name - self.numba_ret_type = numba_ret_type - self.numba_arg_types = numba_arg_types - self.accepts_return = accepts_return - self.sig = self.numba_ret_type(*self.numba_arg_types) + creator = functools.reduce( + lambda creator, i: numba.njit(lambda args: creator(args) + (f(i, *args),)), + range(1, n), + numba.njit(lambda args: (f(0, *args),)), + ) - self.ret_type = llvm_type_from_numba_type(self.numba_ret_type) - self.arg_types = [llvm_type_from_numba_type(t) for t in self.numba_arg_types] - - # c functions that return struct values sometimes actually take in a pointer to that struct as the first argument - if accepts_return: - self.arg_types = (self.ret_type, *self.arg_types) - self.ret_type = llvmlite.ir.VoidType() - - super().__init__(func_name, self.create_impl()) - self._register() - - def __str__(self): - return f"{self.func_name}" - - def codegen(self, builder, args): - if self.accepts_return: - ret_ptr = builder.alloca(self.arg_types[0].pointee) - args = (ret_ptr, *args) - res = builder.call( - builder.module.get_or_insert_function( - llvmlite.ir.FunctionType(self.ret_type, self.arg_types), - name=self.func_name, - ), - args, - ) - if self.accepts_return: - return ret_ptr - return res - - def create_impl(self): - def impl(typingctx, *numba_arg_types_): - if numba_arg_types_ != self.numba_arg_types: - return - - return ( - self.sig, - lambda context, builder, sig, args: self.codegen(builder, args), - ) - - impl.__name__ = self.func_name - # change the function signature to take positional instead of variadic arguments - # so that numba type inference will work on it properly - # This should be like if you defined the intrinsic function explicitly with all the arguments - impl.__signature__ = inspect.signature(impl).replace( - parameters=[ - inspect.Parameter( - f"_p{i}", # arg name doesn't matter - inspect.Parameter.POSITIONAL_OR_KEYWORD, - ) - for i in range(len(self.numba_arg_types) + 1) - ] - ) - return impl + return numba.njit(lambda *args: creator(args)) diff --git a/numba_xnd/shared/llvm.py b/numba_xnd/shared/llvm.py index aae56defc..54e1fb396 100644 --- a/numba_xnd/shared/llvm.py +++ b/numba_xnd/shared/llvm.py @@ -4,6 +4,7 @@ i8, i16, i32, i64 = map(ir.IntType, [8, 16, 32, 64]) int_ = i32 char = i8 +char_ptr = ptr(char) def index(i): diff --git a/numba_xnd/shared/mem_info_type.py b/numba_xnd/shared/mem_info_type.py deleted file mode 100644 index 8f6b201a7..000000000 --- a/numba_xnd/shared/mem_info_type.py +++ /dev/null @@ -1,88 +0,0 @@ -import typing - -import numba - -from .c_struct_type import CStructType -from .llvm import i64, index - - -class SimpleMemInfoModel(numba.datamodel.models.OpaqueModel): - """ - Like numba.datamodel.models.MemInfoModel, but just returns False for contains_nrt_meminfo instead of traversing types - """ - - def contains_nrt_meminfo(self): - return False - - def has_nrt_meminfo(self): - return True - - def get_nrt_meminfo(self, builder, value): - return value - - -class MemInfoType(numba.types.Type): - def __init__(self): - super().__init__(name=str(self.inner_type)) - - def __init_subclass__(cls, inner_type: typing.Type[CStructType], **kwargs): - super().__init_subclass__(**kwargs) - cls.inner_type = inner_type - - numba.extending.register_model(cls)(SimpleMemInfoModel) - - @numba.extending.infer_getattr - class Template(numba.typing.templates.AttributeTemplate): - key = cls - - def resolve_data(self, val): - return inner_type() - - def resolve_size(self, val): - return numba.types.int64 - - def resolve_refct(self, val): - return numba.types.int64 - - @numba.extending.lower_getattr(cls, "data") - def get_data_impl(context, builder, ty, val): - # context.nrt.incref(builder, cls(), val) - - return context.nrt.meminfo_data(builder, val) - - @numba.extending.lower_getattr(cls, "size") - def get_size_impl(context, builder, ty, val): - # copied from _define_nrt_meminfo_data - struct_ptr = builder.bitcast( - val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() - ) - return builder.load(builder.gep(struct_ptr, [index(0), index(4)], True)) - - @numba.extending.lower_getattr(cls, "refct") - def get_refct_impl(context, builder, ty, val): - # copied from _define_nrt_meminfo_data - struct_ptr = builder.bitcast( - val, numba.runtime.nrtdynmod._meminfo_struct_type.as_pointer() - ) - return builder.load(builder.gep(struct_ptr, [index(0), index(0)], True)) - - n_bytes = i64(inner_type.n_bytes) - - @numba.extending.intrinsic - def alloc(typingctx, n_t=numba.types.Const(1)): - n = None - if isinstance(n_t, numba.types.Const): - n = i64(n_t.value) - elif not isinstance(n_t, numba.types.Integer): - return - - sig = cls()(n_t) - - def codegen(context, builder, sig, args, n=n): - if n is None: - n = args[0] - return context.nrt.meminfo_alloc(builder, size=builder.mul(n, n_bytes)) - - return sig, codegen - - cls.alloc = alloc diff --git a/numba_xnd/shared/numba.py b/numba_xnd/shared/numba.py index 5d64e79aa..f368bd335 100644 --- a/numba_xnd/shared/numba.py +++ b/numba_xnd/shared/numba.py @@ -5,8 +5,8 @@ import numba.targets.listobj import numba.types -from .extending import create_numba_type, llvm_type_from_numba_type -from .llvm import char, i32, i64, ptr +from .extending import llvm_type_from_numba_type +from .llvm import char, char_ptr, i32, i64, ptr @numba.extending.intrinsic @@ -22,8 +22,7 @@ def codegen(context, builder, sig, args): return sig, codegen -c_string = ptr(char) -c_string_type = create_numba_type("CString", c_string) +c_string_type = numba.types.Opaque("c_string") @numba.extending.intrinsic(support_literals=True) @@ -46,7 +45,7 @@ def print_c_string(typingctx, c_str_t): def codegen(context, builder, sig, args): return builder.call( builder.module.get_or_insert_function( - llvmlite.ir.FunctionType(i32, [c_string]), name="puts" + llvmlite.ir.FunctionType(i32, [char_ptr]), name="puts" ), args, ) @@ -175,6 +174,6 @@ def null_char_ptr(typingctx): sig = c_string_type() def codegen(context, builder, sig, args): - return llvmlite.ir.Constant(c_string, None) + return llvmlite.ir.Constant(char_ptr, None) return sig, codegen diff --git a/numba_xnd/shared/wrapper_type.py b/numba_xnd/shared/wrapper_type.py new file mode 100644 index 000000000..833e6b156 --- /dev/null +++ b/numba_xnd/shared/wrapper_type.py @@ -0,0 +1,80 @@ +import typing + +import ndtypes + +import numba.extending +import numba.types +import numba.typing.templates + +from .c_struct_type import CStructModel, CStructType + + +class WrapperType(numba.types.Type): + def __init__(self, nrt_allocated: bool, ndt_value: ndtypes.ndt): + assert isinstance(ndt_value, ndtypes.ndt) + self.ndt_value = ndt_value + self.nrt_allocated = nrt_allocated + super().__init__( + name=f"Wrapper({self.inner_type.c_name}, {nrt_allocated}, {ndt_value})" + ) + + @property + def key(self): + return self.ndt_value + + def can_convert_from(self, typingctx, other): + """ + Support conversions from unwrapped to wrapped types implicitly. + """ + if ( + isinstance(other, self.inner_type) + and other.nrt_allocated == self.nrt_allocated + ): + return numba.typeconv.Conversion.promote + + def __init_subclass__(cls, inner_type: typing.Type[CStructType], **kwargs): + super().__init_subclass__(**kwargs) + + cls.inner_type = inner_type + numba.extending.register_model(cls)(CStructModel) + + # allow casting from unwrapped to wrapped value + numba.extending.lower_cast(inner_type, cls)( + lambda context, builder, fromty, toty, val: val + ) + + cls.wrap = numba.extending.intrinsic(support_literals=True)(cls.wrap_impl) + cls.unwrap = numba.extending.intrinsic(cls.unwrap_impl) + + @classmethod + def wrap_impl(cls, typingctx, inner_t, ndt_type_t): + if not isinstance(inner_t, cls.inner_type): + return + # supports passing in strings as ndt's + if isinstance(ndt_type_t, numba.types.Const): + n = ndtypes.ndt(ndt_type_t.value) + arg_type = numba.types.string + elif hasattr(ndt_type_t, "ndt_value"): + n = ndt_type_t.ndt_value + arg_type = ndt_type_t + else: + return + + sig = cls(inner_t.nrt_allocated, n)(inner_t, arg_type) + + def codegen(context, builder, sig, args): + return args[0] + + return sig, codegen + + @classmethod + def unwrap_impl(cls, typingctx, wrapper_t): + if not isinstance(wrapper_t, cls): + return + + sig = cls.inner_type(wrapper_t.nrt_allocated)(wrapper_t) + + def codegen(context, builder, sig, args): + return args[0] + + return sig, codegen diff --git a/tests/test_libndtypes.py b/tests/test_libndtypes.py index ecc81ac33..df845536d 100644 --- a/tests/test_libndtypes.py +++ b/tests/test_libndtypes.py @@ -5,62 +5,40 @@ import numba_xnd from numba import njit -n = ndt("10 * 4 * 4 * int64") - @njit -def get_ndim(x): - return numba_xnd.libndtypes.unwrap_ndt(x).ndim +def is_concrete(x): + # pylint: disable=E + return numba_xnd.libndtypes.ndt_is_concrete(numba_xnd.libndtypes.unwrap_ndt(x)) @njit -def get_shape(x): - a = numba_xnd.libndtypes.create_ndt_ndarray() - numba_xnd.libndtypes.ndt_as_ndarray( - a, numba_xnd.libndtypes.unwrap_ndt(x), numba_xnd.libndtypes.create_ndt_context() - ) - return (a.shape[0], a.shape[1], a.shape[2]) +def static_context_err(): + # pylint: disable=E + ctx = numba_xnd.libndtypes.ndt_static_context() + return ctx.err @njit -def is_concrete(x): - return numba_xnd.libndtypes.ndt_is_concrete(numba_xnd.libndtypes.unwrap_ndt(x)) - +def get_ndim(n): + # pylint: disable=E + return n.ndim -class TestNdt(unittest.TestCase): - def test_ndim(self): - self.assertEqual(get_ndim(n), 3) - def test_shape(self): - self.assertEqual(get_shape(n), (10, 4, 4)) +@njit +def get_shape(n): + # pylint: disable=E + return n.shape - def test_is_concrete(self): - self.assertEqual(is_concrete(n), 1) +class TestNdt(unittest.TestCase): def test_static_context(self): - @njit - def static_context(): - ctx = numba_xnd.libndtypes.ndt_static_context() - assert not numba_xnd.libndtypes.ndt_err_occurred(ctx) - - static_context() + self.assertFalse(static_context_err()) class TestNdtWrapper(unittest.TestCase): def test_ndim(self): - @njit - def get_ndim(t_object_wrapper): - t = numba_xnd.libndtypes.unwrap_ndt(t_object_wrapper) - t_wrapper = numba_xnd.libndtypes.wrap_ndt(t, t_object_wrapper) - return t_wrapper.ndim - - self.assertEqual(get_ndim(n), 3) + self.assertEqual(get_ndim(ndt("10 * 4 * 4 * int64")), 3) def test_shape(self): - @njit - def get_shape(t_object_wrapper): - t = numba_xnd.libndtypes.unwrap_ndt(t_object_wrapper) - t_wrapper = numba_xnd.libndtypes.wrap_ndt(t, t_object_wrapper) - return t_wrapper.shape - - self.assertEqual(get_shape(n), (10, 4, 4)) + self.assertEqual(get_shape(ndt("10 * 4 * 4 * int64")), (10, 4, 4)) diff --git a/tests/test_libxnd.py b/tests/test_libxnd.py index 303e3fc89..b407ab750 100644 --- a/tests/test_libxnd.py +++ b/tests/test_libxnd.py @@ -11,22 +11,23 @@ @njit def subscript_single_index(x_wrapped, i): - x = numba_xnd.libxnd.unwrap_xnd_view(x_wrapped) + x = numba_xnd.libxnd.unwrap_xnd_view_mem_info(x_wrapped).data index = numba_xnd.libxnd.create_xnd_index() index.tag = numba_xnd.libxnd.XND_KEY_INDEX index.Index = i ctx = numba_xnd.libndtypes.ndt_static_context() - ret_xnd = numba_xnd.libxnd.xnd_view_subscript( - x, index, numba_xnd.shared.i64_to_i32(1), ctx + ret_x_v_mi = numba_xnd.libxnd.create_xnd_view_mem_info() + numba_xnd.libxnd.xnd_view_subscript( + ret_x_v_mi, x, index, numba_xnd.shared.i64_to_i32(1), ctx ) - assert not numba_xnd.shared.ptr_is_none(ret_xnd.view.ptr) + assert not numba_xnd.shared.ptr_is_none(ret_x_v_mi.data.view.ptr) assert not numba_xnd.libndtypes.ndt_err_occurred(ctx) - return numba_xnd.libxnd.wrap_xnd_view(ret_xnd, x_wrapped) + return numba_xnd.libxnd.wrap_xnd_view_mem_info_mem_info(ret_x_v_mi, x_wrapped) @njit def subscript_two_ints(x_wrapped, i, j): - x = numba_xnd.libxnd.unwrap_xnd_view(x_wrapped) + x = numba_xnd.libxnd.unwrap_xnd_view_mem_info(x_wrapped).data index = numba_xnd.libxnd.create_xnd_index(2) index.tag = numba_xnd.libxnd.XND_KEY_INDEX index.Index = i @@ -34,46 +35,49 @@ def subscript_two_ints(x_wrapped, i, j): second_index.tag = numba_xnd.libxnd.XND_KEY_INDEX second_index.Index = j ctx = numba_xnd.libndtypes.ndt_static_context() - ret_xnd = numba_xnd.libxnd.xnd_view_subscript( - x, index, numba_xnd.shared.i64_to_i32(2), ctx + ret_x_v_mi = numba_xnd.libxnd.create_xnd_view_mem_info() + numba_xnd.libxnd.xnd_view_subscript( + ret_x_v_mi, x, index, numba_xnd.shared.i64_to_i32(2), ctx ) - assert not numba_xnd.shared.ptr_is_none(ret_xnd.view.ptr) + assert not numba_xnd.shared.ptr_is_none(ret_x_v_mi.data.view.ptr) assert not numba_xnd.libndtypes.ndt_err_occurred(ctx) - - return numba_xnd.libxnd.wrap_xnd_view(ret_xnd, x_wrapped) + return numba_xnd.libxnd.wrap_xnd_view_mem_info_mem_info(ret_x_v_mi, x_wrapped) @njit def subscript_field(x_wrapped): - x = numba_xnd.libxnd.unwrap_xnd_view(x_wrapped) + x = numba_xnd.libxnd.unwrap_xnd_view_mem_info(x_wrapped).data index = numba_xnd.libxnd.create_xnd_index() index.tag = numba_xnd.libxnd.XND_KEY_FIELD_NAME index.FieldName = numba_xnd.shared.c_string_const("there") ctx = numba_xnd.libndtypes.ndt_static_context() - ret_xnd = numba_xnd.libxnd.xnd_view_subscript( - x, index, numba_xnd.shared.i64_to_i32(1), ctx + ret_x_v_mi = numba_xnd.libxnd.create_xnd_view_mem_info() + + numba_xnd.libxnd.xnd_view_subscript( + ret_x_v_mi, x, index, numba_xnd.shared.i64_to_i32(1), ctx ) - assert not numba_xnd.shared.ptr_is_none(ret_xnd.view.ptr), "ptr is not null" + assert not numba_xnd.shared.ptr_is_none(ret_x_v_mi.data.view.ptr), "ptr is not null" assert not numba_xnd.libndtypes.ndt_err_occurred(ctx), "ndt error" - return numba_xnd.libxnd.wrap_xnd_view(ret_xnd, x_wrapped) + return numba_xnd.libxnd.wrap_xnd_view_mem_info(ret_x_v_mi, x_wrapped) @njit def subscript_slice(x_wrapped, start, stop, step): - x = numba_xnd.libxnd.unwrap_xnd_view(x_wrapped) + x = numba_xnd.libxnd.unwrap_xnd_view_mem_info(x_wrapped).data index = numba_xnd.libxnd.create_xnd_index() index.tag = numba_xnd.libxnd.XND_KEY_SLICE index.Slice.start = start index.Slice.stop = stop index.Slice.step = step ctx = numba_xnd.libndtypes.ndt_static_context() - ret_xnd = numba_xnd.libxnd.xnd_view_subscript( - x, index, numba_xnd.shared.i64_to_i32(1), ctx + ret_x_v_mi = numba_xnd.libxnd.create_xnd_view_mem_info() + numba_xnd.libxnd.xnd_view_subscript( + ret_x_v_mi, x, index, numba_xnd.shared.i64_to_i32(1), ctx ) - assert not numba_xnd.shared.ptr_is_none(ret_xnd.view.ptr) + assert not numba_xnd.shared.ptr_is_none(ret_x_v_mi.data.view.ptr) assert not numba_xnd.libndtypes.ndt_err_occurred(ctx) - return numba_xnd.libxnd.wrap_xnd_view(ret_xnd, x_wrapped) + return numba_xnd.libxnd.wrap_xnd_view_mem_info(ret_x_v_mi, x_wrapped) class TestViewSubscript(unittest.TestCase): @@ -81,7 +85,6 @@ def test_single_int(self): self.assertEqual(subscript_single_index(x, 1), x[1]) self.assertEqual(x, xnd([[1, 2, 3], [4, 5, 6]])) - @unittest.skip def test_two_ints(self): self.assertEqual(subscript_two_ints(x, 1, 1), xnd(5)) self.assertEqual(x, xnd([[1, 2, 3], [4, 5, 6]])) @@ -103,16 +106,44 @@ def test_slice(self): @njit def is_equal(x, y): - return numba_xnd.libxnd.xnd_equal( - numba_xnd.libxnd.unwrap_xnd_view(x).view, - numba_xnd.libxnd.unwrap_xnd_view(y).view, - numba_xnd.libndtypes.create_ndt_context(), - ) + print("STARTING") + x_xnd_view_mem_info = numba_xnd.libxnd.unwrap_xnd_view_mem_info(x) + y_xnd_view_mem_info = numba_xnd.libxnd.unwrap_xnd_view_mem_info(y) + print("X_SIZE", x_xnd_view_mem_info.size, x_xnd_view_mem_info.refct) + print("Y_SIZE", y_xnd_view_mem_info.size) + print("flags", x_xnd_view_mem_info.data.flags) + # print("obj", numba_xnd.shared.ptr_to_int(x_xnd_view_mem_info.data.obj)) + x_ = x_xnd_view_mem_info.data.view + y_ = y_xnd_view_mem_info.data.view + # print("x_ptr", numba_xnd.shared.ptr_to_int(x_)) + # print("ndim", x_.type.ndim) + # print(numba_xnd.shared.ptr_to_int(x_), numba_xnd.shared.ptr_to_int(y_)) + # print(numba_xnd.shared.ptr_to_int(x_.ptr), numba_xnd.shared.ptr_to_int(y_.ptr)) + ctx = numba_xnd.libndtypes.ndt_static_context() + print("STRATING EQUAL") + ret = numba_xnd.libxnd.xnd_equal(x_, y_, ctx) + print("done EQUAL") + assert not numba_xnd.libndtypes.ndt_err_occurred(ctx) + print("NO ERROR") + return ret class TestEqual(unittest.TestCase): def test_arrays_equal(self): - self.assertTrue(is_equal(xnd([1, 2, 3]), xnd([1, 2, 3]))) + f = xnd([1, 2, 3]) + print("id", id(f)) + + # print( + # "XND", + # xnd_structinfo.value_int64( + # xnd_structinfo.get_XndObject_xnd( + # xnd_structinfo.capsulate_XndObject( + # _xnd.Xnd(value=f.value, type=f.type) + # ) + # ) + # ), + # ) + self.assertTrue(is_equal(f, xnd([1, 2, 3]))) def test_arrays_not_equal(self): self.assertFalse(is_equal(xnd([1, 2, 3]), xnd([1, 2, 4]))) @@ -123,7 +154,8 @@ def test_bool(self): @njit def get_bool(x): return numba_xnd.shared.ptr_load_type( - numba.types.boolean, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.boolean, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertTrue(get_bool(xnd(True))) @@ -133,7 +165,8 @@ def test_int8(self): @njit def get_int8(x): return numba_xnd.shared.ptr_load_type( - numba.types.int8, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.int8, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_int8(xnd(10, type="int8")), 10) @@ -142,7 +175,8 @@ def test_int16(self): @njit def get_int16(x): return numba_xnd.shared.ptr_load_type( - numba.types.int16, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.int16, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_int16(xnd(10, type="int16")), 10) @@ -151,7 +185,8 @@ def test_int32(self): @njit def get_int32(x): return numba_xnd.shared.ptr_load_type( - numba.types.int32, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.int32, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_int32(xnd(10, type="int32")), 10) @@ -160,7 +195,8 @@ def test_int64(self): @njit def get_int64(x): return numba_xnd.shared.ptr_load_type( - numba.types.int64, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.int64, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_int64(xnd(10, type="int64")), 10) @@ -169,7 +205,8 @@ def test_uint8(self): @njit def get_uint8(x): return numba_xnd.shared.ptr_load_type( - numba.types.uint8, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.uint8, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_uint8(xnd(10, type="uint8")), 10) @@ -178,7 +215,8 @@ def test_float32(self): @njit def get_float32(x): return numba_xnd.shared.ptr_load_type( - numba.types.float32, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr + numba.types.float32, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, ) self.assertEqual(get_float32(xnd(10.0, type="float32")), 10.0) @@ -189,7 +227,9 @@ def test_bool(self): @njit def set_bool(x, y): numba_xnd.shared.ptr_store_type( - numba.types.boolean, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.boolean, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(True) @@ -202,7 +242,9 @@ def test_int8(self): @njit def set_int8(x, y): numba_xnd.shared.ptr_store_type( - numba.types.int8, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.int8, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(123, type="int8") @@ -213,7 +255,9 @@ def test_int16(self): @njit def set_int16(x, y): numba_xnd.shared.ptr_store_type( - numba.types.int16, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.int16, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(123, type="int16") @@ -224,7 +268,9 @@ def test_int32(self): @njit def set_int32(x, y): numba_xnd.shared.ptr_store_type( - numba.types.int32, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.int32, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(123, type="int32") @@ -235,7 +281,9 @@ def test_uint8(self): @njit def set_uint8(x, y): numba_xnd.shared.ptr_store_type( - numba.types.uint8, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.uint8, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(123, type="uint8") @@ -246,7 +294,9 @@ def test_float32(self): @njit def set_float32(x, y): numba_xnd.shared.ptr_store_type( - numba.types.float32, numba_xnd.libxnd.unwrap_xnd_view(x).view.ptr, y + numba.types.float32, + numba_xnd.libxnd.unwrap_xnd_view_mem_info(x).data.view.ptr, + y, ) x = xnd(123.123, type="float32") @@ -275,24 +325,28 @@ def get_value(x_wrapped): self.assertEqual(x, xnd(123)) def test_index_int(self): - @numba_xnd.register_kernel("A * int64, int64 -> int64") - def index_thing(a, i, res): - res[()] = a[i.value].value + @njit + def index_thing(a, i): + return a[i] - self.assertEqual(index_thing(xnd([10, 1]), xnd(1)), xnd(1)) - self.assertEqual(index_thing(xnd([10, 1]), xnd(0)), xnd(10)) + self.assertEqual(index_thing(xnd([10, 1]), 1), xnd(1)) + self.assertEqual(index_thing(xnd([10, 1]), 0), xnd(10)) - @unittest.skip def test_index_tuple(self): - @numba_xnd.register_kernel("A * B * int64 -> int64") - def index_tuple(a, res): - res[()] = a[1, 0].value + @njit + def index_tuple(a, i, j): + return a[i, j].value - self.assertEqual(index_tuple(xnd([[1, 2], [3, 4]])), xnd(3)) + x = xnd([[1, 2], [3, 4]]) + results = [((0, 0), 1), ((0, 1), 2), ((1, 0), 3), ((1, 1), 4)] + for args, res in results: + i, j = args + with self.subTest(i=i, j=j): + self.assertEqual(index_tuple(x, i, j), res) def test_index_tuple_empty(self): - @numba_xnd.register_kernel("int64 -> int64") - def index_tuple_empty(a, res): - res[()] = a[()].value + @njit + def index_tuple_empty(a): + return a[()] self.assertEqual(index_tuple_empty(xnd(20)), xnd(20)) diff --git a/tests/test_pyndtypes.py b/tests/test_pyndtypes.py index ec0d1ea45..5e2d82127 100644 --- a/tests/test_pyndtypes.py +++ b/tests/test_pyndtypes.py @@ -2,12 +2,18 @@ from ndtypes import ndt -import numba_xnd # NOQA +import numba_xnd # NOQA pylint: disable=W0611 from numba import njit n = ndt("10 * 4 * 4 * 5 * 10 * int64") -# class TestBoxingUnboxing(unittest.TestCase): -# def test_boxes_unboxes(self): -# self.assertEqual(njit(lambda x: x)(n), n) +@unittest.skip("Boxing not working currently") +class TestBoxingUnboxing(unittest.TestCase): + def test_boxes_unboxes(self): + self.assertEqual(njit(lambda x: x)(n), n) + + +class TestUnboxingWorks(unittest.TestCase): + def test_unbox(self): + self.assertEqual(njit(lambda x: None)(n), None) From 7ac9376b97a38bb6c558fe8cca8efb1e2a2863f5 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 11 Sep 2018 11:38:56 -0400 Subject: [PATCH 3/8] Make numba debug mode --- .numba_config.yaml | 1 + environment.yml | 1 + 2 files changed, 2 insertions(+) create mode 100644 .numba_config.yaml diff --git a/.numba_config.yaml b/.numba_config.yaml new file mode 100644 index 000000000..d1057db7d --- /dev/null +++ b/.numba_config.yaml @@ -0,0 +1 @@ +developer_mode: 1 diff --git a/environment.yml b/environment.yml index 9c8fd06bb..ee6abe059 100644 --- a/environment.yml +++ b/environment.yml @@ -11,3 +11,4 @@ dependencies: - xnd - coverage - codecov + - pyyaml # for numba config parsing From b3c19b222e05d6b4899237ba19b69faefff5e79e Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 11 Sep 2018 11:39:05 -0400 Subject: [PATCH 4/8] Fix getting attr --- numba_xnd/shared/c_struct_type.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/numba_xnd/shared/c_struct_type.py b/numba_xnd/shared/c_struct_type.py index 478746aa4..d8e6946db 100644 --- a/numba_xnd/shared/c_struct_type.py +++ b/numba_xnd/shared/c_struct_type.py @@ -84,20 +84,16 @@ def _type_and_lower_field(cls, field: str, numba_type: numba.types.Type): @numba.typing.templates.bound_function(fn_key) def resolve(self, ty, args, kws): - print("Trying", fn_key, ty, args, kws) if kws or not args or not isinstance(args[0], numba.types.Integer): return - print("Resolved!") # getting value if len(args) == 1: return numba.typing.templates.signature(numba_type, *args) # setting value if len(args) == 2: # and args[1] == numba_type: - print(args) return numba.typing.templates.signature( numba.types.none, args[0], numba_type ) - print("no resolved", len(args), args[1], numba_type) # Lower function @numba.targets.imputils.lower_builtin(fn_key, cls, numba.types.Integer) @@ -108,7 +104,8 @@ def lower_get(context, builder, sig, args): fn_key, cls, numba.types.Integer, type(numba_type) ) def lower_set(context, builder, sig, args): - return cls.setattr_impl(builder, field, *args) + cls.setattr_impl(builder, field, *args) + return context.get_dummy_value() return resolve From 3e5647956893f4d512f693c5f2fd9119c34c3172 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 11 Sep 2018 12:11:18 -0400 Subject: [PATCH 5/8] Generate sizeof meminfo --- numba_xnd/shared/c_struct_type.py | 2 +- structinfo_config.py | 4 +- xnd_structinfo.c | 187 ++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 3 deletions(-) diff --git a/numba_xnd/shared/c_struct_type.py b/numba_xnd/shared/c_struct_type.py index d8e6946db..ecf9ca0f4 100644 --- a/numba_xnd/shared/c_struct_type.py +++ b/numba_xnd/shared/c_struct_type.py @@ -10,7 +10,7 @@ from .extending import llvm_type_from_numba_type from .llvm import char_ptr, i64, index, ptr -SIZEOF_MEMINFO = 20 # in bytes +SIZEOF_MEMINFO = xnd_structinfo.sizeof_NRT_MemInfo() class CStructModel(numba.datamodel.models.OpaqueModel): diff --git a/structinfo_config.py b/structinfo_config.py index 174312250..58fd9918e 100644 --- a/structinfo_config.py +++ b/structinfo_config.py @@ -11,9 +11,9 @@ output_filename = "xnd_structinfo.c" modulename = "xnd_structinfo" -include_dirs = lib_dirs + [include_dir] +include_dirs = lib_dirs + [include_dir, "./numba/numba/runtime"] library_dirs = [site_packages[: site_packages.find("/python")]] -includes = ["pyndtypes.h", "pyxnd.h", "pygumath.h"] +includes = ["pyndtypes.h", "pyxnd.h", "pygumath.h", "nrt.c"] libraries = [] # Additional C code to be added to output diff --git a/xnd_structinfo.c b/xnd_structinfo.c index 616093d94..b924c5e8f 100644 --- a/xnd_structinfo.c +++ b/xnd_structinfo.c @@ -2,6 +2,7 @@ #include "pyndtypes.h" #include "pyxnd.h" #include "pygumath.h" +#include "nrt.c" extern PyObject* ndt_from_type(ndt_t* val) { return Ndt_FromType(val); } extern PyObject* xnd_from_type_xnd(PyTypeObject* t, xnd_t* val) { return Xnd_FromXnd(t, val); }; @@ -374,6 +375,32 @@ extern /* pointer to `const gm_tbl_t*` */ void * get_GufuncObject_tbl(void* ptr) extern size_t offsetof_GufuncObject_tbl(void){ return offsetof(GufuncObject, tbl); } extern /* pointer to `char*` */ void * get_GufuncObject_name(void* ptr){ return &(((GufuncObject*)ptr)->name); } extern size_t offsetof_GufuncObject_name(void){ return offsetof(GufuncObject, name); } +extern size_t sizeof_NRT_MemInfo(void){ return sizeof(NRT_MemInfo); } +extern /* pointer to `size_t` */ void * get_NRT_MemInfo_refct(void* ptr){ return &(((NRT_MemInfo*)ptr)->refct); } +extern size_t offsetof_NRT_MemInfo_refct(void){ return offsetof(NRT_MemInfo, refct); } +extern /* pointer to `NRT_dtor_function` */ void * get_NRT_MemInfo_dtor(void* ptr){ return &(((NRT_MemInfo*)ptr)->dtor); } +extern size_t offsetof_NRT_MemInfo_dtor(void){ return offsetof(NRT_MemInfo, dtor); } +extern /* pointer to `void*` */ void * get_NRT_MemInfo_dtor_info(void* ptr){ return &(((NRT_MemInfo*)ptr)->dtor_info); } +extern size_t offsetof_NRT_MemInfo_dtor_info(void){ return offsetof(NRT_MemInfo, dtor_info); } +extern /* pointer to `void*` */ void * get_NRT_MemInfo_data(void* ptr){ return &(((NRT_MemInfo*)ptr)->data); } +extern size_t offsetof_NRT_MemInfo_data(void){ return offsetof(NRT_MemInfo, data); } +extern /* pointer to `size_t` */ void * get_NRT_MemInfo_size(void* ptr){ return &(((NRT_MemInfo*)ptr)->size); } +extern size_t offsetof_NRT_MemInfo_size(void){ return offsetof(NRT_MemInfo, size); } +extern size_t sizeof_NRT_MemSys(void){ return sizeof(NRT_MemSys); } +extern /* pointer to `NRT_atomic_inc_dec_func atomic_inc,` */ void * get_NRT_MemSys_atomic_dec(void* ptr){ return &(((NRT_MemSys*)ptr)->atomic_dec); } +extern size_t offsetof_NRT_MemSys_atomic_dec(void){ return offsetof(NRT_MemSys, atomic_dec); } +extern /* pointer to `atomic_meminfo_cas_func` */ void * get_NRT_MemSys_atomic_cas(void* ptr){ return &(((NRT_MemSys*)ptr)->atomic_cas); } +extern size_t offsetof_NRT_MemSys_atomic_cas(void){ return offsetof(NRT_MemSys, atomic_cas); } +extern /* pointer to `int` */ void * get_NRT_MemSys_shutting(void* ptr){ return &(((NRT_MemSys*)ptr)->shutting); } +extern size_t offsetof_NRT_MemSys_shutting(void){ return offsetof(NRT_MemSys, shutting); } +extern /* pointer to `size_t stats_alloc,stats_free,stats_mi_alloc,` */ void * get_NRT_MemSys_stats_mi_free(void* ptr){ return &(((NRT_MemSys*)ptr)->stats_mi_free); } +extern size_t offsetof_NRT_MemSys_stats_mi_free(void){ return offsetof(NRT_MemSys, stats_mi_free); } +extern /* pointer to `NRT_malloc_func` */ void * get_NRT_MemSys_allocator_malloc(void* ptr){ return &(((NRT_MemSys*)ptr)->allocator.malloc); } +extern size_t offsetof_NRT_MemSys_allocator_malloc(void){ return offsetof(NRT_MemSys, allocator.malloc); } +extern /* pointer to `NRT_realloc_func` */ void * get_NRT_MemSys_allocator_realloc(void* ptr){ return &(((NRT_MemSys*)ptr)->allocator.realloc); } +extern size_t offsetof_NRT_MemSys_allocator_realloc(void){ return offsetof(NRT_MemSys, allocator.realloc); } +extern /* pointer to `NRT_free_func` */ void * get_NRT_MemSys_allocator_free(void* ptr){ return &(((NRT_MemSys*)ptr)->allocator.free); } +extern size_t offsetof_NRT_MemSys_allocator_free(void){ return offsetof(NRT_MemSys, allocator.free); } #ifdef PYTHON_MODULE #include "Python.h" @@ -2167,6 +2194,140 @@ static PyObject *pyc_get_GufuncObject_name(PyObject *self, PyObject *args) { return NULL; } static PyObject *pyc_offsetof_GufuncObject_name(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_GufuncObject_name())); } +static PyObject *pyc_sizeof_NRT_MemInfo(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(sizeof_NRT_MemInfo())); } +static PyObject *pyc_get_NRT_MemInfo_refct(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemInfo", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemInfo_refct(PyCapsule_GetPointer(ptr, "NRT_MemInfo*")), "size_t", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemInfo"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemInfo_refct(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemInfo_refct())); } +static PyObject *pyc_get_NRT_MemInfo_dtor(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemInfo", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemInfo_dtor(PyCapsule_GetPointer(ptr, "NRT_MemInfo*")), "NRT_dtor_function", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemInfo"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemInfo_dtor(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemInfo_dtor())); } +static PyObject *pyc_get_NRT_MemInfo_dtor_info(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemInfo", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemInfo_dtor_info(PyCapsule_GetPointer(ptr, "NRT_MemInfo*")), "void*", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemInfo"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemInfo_dtor_info(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemInfo_dtor_info())); } +static PyObject *pyc_get_NRT_MemInfo_data(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemInfo", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemInfo_data(PyCapsule_GetPointer(ptr, "NRT_MemInfo*")), "void*", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemInfo"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemInfo_data(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemInfo_data())); } +static PyObject *pyc_get_NRT_MemInfo_size(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemInfo", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemInfo_size(PyCapsule_GetPointer(ptr, "NRT_MemInfo*")), "size_t", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemInfo"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemInfo_size(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemInfo_size())); } +static PyObject *pyc_sizeof_NRT_MemSys(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(sizeof_NRT_MemSys())); } +static PyObject *pyc_get_NRT_MemSys_atomic_dec(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_atomic_dec(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "NRT_atomic_inc_dec_func atomic_inc,", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_atomic_dec(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_atomic_dec())); } +static PyObject *pyc_get_NRT_MemSys_atomic_cas(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_atomic_cas(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "atomic_meminfo_cas_func", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_atomic_cas(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_atomic_cas())); } +static PyObject *pyc_get_NRT_MemSys_shutting(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_shutting(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "int", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_shutting(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_shutting())); } +static PyObject *pyc_get_NRT_MemSys_stats_mi_free(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_stats_mi_free(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "size_t stats_alloc,stats_free,stats_mi_alloc,", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_stats_mi_free(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_stats_mi_free())); } +static PyObject *pyc_get_NRT_MemSys_allocator_malloc(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_allocator_malloc(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "NRT_malloc_func", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_allocator_malloc(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_allocator_malloc())); } +static PyObject *pyc_get_NRT_MemSys_allocator_realloc(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_allocator_realloc(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "NRT_realloc_func", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_allocator_realloc(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_allocator_realloc())); } +static PyObject *pyc_get_NRT_MemSys_allocator_free(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "NRT_MemSys", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_NRT_MemSys_allocator_free(PyCapsule_GetPointer(ptr, "NRT_MemSys*")), "NRT_free_func", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted NRT_MemSys"); + return NULL; +} +static PyObject *pyc_offsetof_NRT_MemSys_allocator_free(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_NRT_MemSys_allocator_free())); } static PyObject *pyc_capsule_to_int32(PyObject *self, PyObject *args) { PyObject* ptr=NULL; @@ -2563,6 +2724,32 @@ static PyMethodDef xnd_structinfo_methods[] = { {"offsetof_GufuncObject_tbl", (PyCFunction)pyc_offsetof_GufuncObject_tbl, METH_VARARGS, "offsetof_GufuncObject_tbl() -> int"}, {"get_GufuncObject_name", (PyCFunction)pyc_get_GufuncObject_name, METH_VARARGS, "get_GufuncObject_name(< capsule(GufuncObject) >) -> < capsule( &GufuncObject->name ) >"}, {"offsetof_GufuncObject_name", (PyCFunction)pyc_offsetof_GufuncObject_name, METH_VARARGS, "offsetof_GufuncObject_name() -> int"}, + {"sizeof_NRT_MemInfo", (PyCFunction)pyc_sizeof_NRT_MemInfo, METH_VARARGS, "sizeof_NRT_MemInfo() -> int"}, + {"get_NRT_MemInfo_refct", (PyCFunction)pyc_get_NRT_MemInfo_refct, METH_VARARGS, "get_NRT_MemInfo_refct(< capsule(NRT_MemInfo) >) -> < capsule( &NRT_MemInfo->refct ) >"}, + {"offsetof_NRT_MemInfo_refct", (PyCFunction)pyc_offsetof_NRT_MemInfo_refct, METH_VARARGS, "offsetof_NRT_MemInfo_refct() -> int"}, + {"get_NRT_MemInfo_dtor", (PyCFunction)pyc_get_NRT_MemInfo_dtor, METH_VARARGS, "get_NRT_MemInfo_dtor(< capsule(NRT_MemInfo) >) -> < capsule( &NRT_MemInfo->dtor ) >"}, + {"offsetof_NRT_MemInfo_dtor", (PyCFunction)pyc_offsetof_NRT_MemInfo_dtor, METH_VARARGS, "offsetof_NRT_MemInfo_dtor() -> int"}, + {"get_NRT_MemInfo_dtor_info", (PyCFunction)pyc_get_NRT_MemInfo_dtor_info, METH_VARARGS, "get_NRT_MemInfo_dtor_info(< capsule(NRT_MemInfo) >) -> < capsule( &NRT_MemInfo->dtor_info ) >"}, + {"offsetof_NRT_MemInfo_dtor_info", (PyCFunction)pyc_offsetof_NRT_MemInfo_dtor_info, METH_VARARGS, "offsetof_NRT_MemInfo_dtor_info() -> int"}, + {"get_NRT_MemInfo_data", (PyCFunction)pyc_get_NRT_MemInfo_data, METH_VARARGS, "get_NRT_MemInfo_data(< capsule(NRT_MemInfo) >) -> < capsule( &NRT_MemInfo->data ) >"}, + {"offsetof_NRT_MemInfo_data", (PyCFunction)pyc_offsetof_NRT_MemInfo_data, METH_VARARGS, "offsetof_NRT_MemInfo_data() -> int"}, + {"get_NRT_MemInfo_size", (PyCFunction)pyc_get_NRT_MemInfo_size, METH_VARARGS, "get_NRT_MemInfo_size(< capsule(NRT_MemInfo) >) -> < capsule( &NRT_MemInfo->size ) >"}, + {"offsetof_NRT_MemInfo_size", (PyCFunction)pyc_offsetof_NRT_MemInfo_size, METH_VARARGS, "offsetof_NRT_MemInfo_size() -> int"}, + {"sizeof_NRT_MemSys", (PyCFunction)pyc_sizeof_NRT_MemSys, METH_VARARGS, "sizeof_NRT_MemSys() -> int"}, + {"get_NRT_MemSys_atomic_dec", (PyCFunction)pyc_get_NRT_MemSys_atomic_dec, METH_VARARGS, "get_NRT_MemSys_atomic_dec(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->atomic_dec ) >"}, + {"offsetof_NRT_MemSys_atomic_dec", (PyCFunction)pyc_offsetof_NRT_MemSys_atomic_dec, METH_VARARGS, "offsetof_NRT_MemSys_atomic_dec() -> int"}, + {"get_NRT_MemSys_atomic_cas", (PyCFunction)pyc_get_NRT_MemSys_atomic_cas, METH_VARARGS, "get_NRT_MemSys_atomic_cas(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->atomic_cas ) >"}, + {"offsetof_NRT_MemSys_atomic_cas", (PyCFunction)pyc_offsetof_NRT_MemSys_atomic_cas, METH_VARARGS, "offsetof_NRT_MemSys_atomic_cas() -> int"}, + {"get_NRT_MemSys_shutting", (PyCFunction)pyc_get_NRT_MemSys_shutting, METH_VARARGS, "get_NRT_MemSys_shutting(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->shutting ) >"}, + {"offsetof_NRT_MemSys_shutting", (PyCFunction)pyc_offsetof_NRT_MemSys_shutting, METH_VARARGS, "offsetof_NRT_MemSys_shutting() -> int"}, + {"get_NRT_MemSys_stats_mi_free", (PyCFunction)pyc_get_NRT_MemSys_stats_mi_free, METH_VARARGS, "get_NRT_MemSys_stats_mi_free(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->stats_mi_free ) >"}, + {"offsetof_NRT_MemSys_stats_mi_free", (PyCFunction)pyc_offsetof_NRT_MemSys_stats_mi_free, METH_VARARGS, "offsetof_NRT_MemSys_stats_mi_free() -> int"}, + {"get_NRT_MemSys_allocator_malloc", (PyCFunction)pyc_get_NRT_MemSys_allocator_malloc, METH_VARARGS, "get_NRT_MemSys_allocator_malloc(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->allocator.malloc ) >"}, + {"offsetof_NRT_MemSys_allocator_malloc", (PyCFunction)pyc_offsetof_NRT_MemSys_allocator_malloc, METH_VARARGS, "offsetof_NRT_MemSys_allocator_malloc() -> int"}, + {"get_NRT_MemSys_allocator_realloc", (PyCFunction)pyc_get_NRT_MemSys_allocator_realloc, METH_VARARGS, "get_NRT_MemSys_allocator_realloc(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->allocator.realloc ) >"}, + {"offsetof_NRT_MemSys_allocator_realloc", (PyCFunction)pyc_offsetof_NRT_MemSys_allocator_realloc, METH_VARARGS, "offsetof_NRT_MemSys_allocator_realloc() -> int"}, + {"get_NRT_MemSys_allocator_free", (PyCFunction)pyc_get_NRT_MemSys_allocator_free, METH_VARARGS, "get_NRT_MemSys_allocator_free(< capsule(NRT_MemSys) >) -> < capsule( &NRT_MemSys->allocator.free ) >"}, + {"offsetof_NRT_MemSys_allocator_free", (PyCFunction)pyc_offsetof_NRT_MemSys_allocator_free, METH_VARARGS, "offsetof_NRT_MemSys_allocator_free() -> int"}, {"value_int32", (PyCFunction)pyc_capsule_to_int32, METH_VARARGS, "(capsule) -> "}, {"value_int64", (PyCFunction)pyc_capsule_to_int64, METH_VARARGS, "(capsule) -> "}, {"value_bytes", (PyCFunction)pyc_capsule_to_bytes, METH_VARARGS, "(capsule[, size]) -> "}, From 5772fefb1e5057c203e3444498d9276e5c59d547 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 11 Sep 2018 12:49:07 -0400 Subject: [PATCH 6/8] Print data pointers --- numba_xnd/shared/c_struct_type.py | 12 ++++++-- numba_xnd/shared/llvm.py | 11 +++++++ structinfo_config.py | 1 + xnd_structinfo.c | 48 ++++++++++++++++++++++++++++++- 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/numba_xnd/shared/c_struct_type.py b/numba_xnd/shared/c_struct_type.py index ecf9ca0f4..7674c0f86 100644 --- a/numba_xnd/shared/c_struct_type.py +++ b/numba_xnd/shared/c_struct_type.py @@ -8,7 +8,7 @@ import xnd_structinfo from .extending import llvm_type_from_numba_type -from .llvm import char_ptr, i64, index, ptr +from .llvm import char_ptr, i64, index, print_pointer, ptr SIZEOF_MEMINFO = xnd_structinfo.sizeof_NRT_MemInfo() @@ -25,7 +25,9 @@ def get_nrt_meminfo(self, builder: llvmlite.ir.IRBuilder, value): nrt meminfo pointer begins before the allocated data pointer. So we subtract size of meminfo to get to this pointer """ # move back `SIZEOF_MEMINFO` bytes (since this is ptr(char)) - return builder.gep(value, [index(-SIZEOF_MEMINFO)]) + # print() + return builder.gep(value, [index(-SIZEOF_MEMINFO)], inbounds=True) + # return value class CStructType(numba.types.Type): @@ -133,8 +135,12 @@ def codegen(context, builder, sig, args): mi = context.nrt.meminfo_alloc( builder, size=builder.mul(n, i64(cls.n_bytes)) ) + print_pointer(builder, mi) + print_pointer(builder, context.nrt.meminfo_data(builder, mi)) # move forward to data which is allocated after meminfo - return builder.gep(mi, [index(SIZEOF_MEMINFO)]) + calculated_data = builder.gep(mi, [index(SIZEOF_MEMINFO)]) + print_pointer(builder, calculated_data) + return calculated_data return sig, codegen diff --git a/numba_xnd/shared/llvm.py b/numba_xnd/shared/llvm.py index 54e1fb396..0784a66d8 100644 --- a/numba_xnd/shared/llvm.py +++ b/numba_xnd/shared/llvm.py @@ -1,3 +1,4 @@ +import llvmlite from llvmlite import ir from llvmlite.ir import PointerType as ptr @@ -29,3 +30,13 @@ def pycapsule_import(c, path, i: int, fntype, name=None): builder.bitcast(builder.gep(xnd_api, [index(i * 8)], True), ptr(ptr(fntype))), name=name, ) + + +def print_pointer(builder, ptr_): + builder.call( + builder.module.get_or_insert_function( + llvmlite.ir.FunctionType(llvmlite.ir.VoidType(), [ptr(char)]), + name="print_pointer", + ), + [ptr_], + ) diff --git a/structinfo_config.py b/structinfo_config.py index 58fd9918e..913c6ac5b 100644 --- a/structinfo_config.py +++ b/structinfo_config.py @@ -22,6 +22,7 @@ extern PyObject* xnd_from_type_xnd(PyTypeObject* t, xnd_t* val) { return Xnd_FromXnd(t, val); }; extern PyObject* xnd_view_move_ndt(const PyObject *v, ndt_t *t) { return Xnd_ViewMoveNdt(v, t); }; extern PyObject* xnd_from_xnd_view(xnd_view_t *v) {return Xnd_FromXndView(v); }; +extern void print_pointer(void* ptr){printf("%p\\n", ptr);} //extern PyObject* xnd_from_xndonly(xnd_t *val) { return Xnd_FromXndOnly(val); }; extern void print_bytes(const void *object, size_t size) { diff --git a/xnd_structinfo.c b/xnd_structinfo.c index b924c5e8f..36532a8eb 100644 --- a/xnd_structinfo.c +++ b/xnd_structinfo.c @@ -8,6 +8,7 @@ extern PyObject* ndt_from_type(ndt_t* val) { return Ndt_FromType(val); } extern PyObject* xnd_from_type_xnd(PyTypeObject* t, xnd_t* val) { return Xnd_FromXnd(t, val); }; extern PyObject* xnd_view_move_ndt(const PyObject *v, ndt_t *t) { return Xnd_ViewMoveNdt(v, t); }; extern PyObject* xnd_from_xnd_view(xnd_view_t *v) {return Xnd_FromXndView(v); }; +extern void print_pointer(void* ptr){printf("%p\n", ptr);} //extern PyObject* xnd_from_xndonly(xnd_t *val) { return Xnd_FromXndOnly(val); }; extern void print_bytes(const void *object, size_t size) { @@ -128,16 +129,22 @@ extern /* pointer to `int64_t` */ void * get_ndt_t_Function_nargs(void* ptr){ re extern size_t offsetof_ndt_t_Function_nargs(void){ return offsetof(ndt_t, Function.nargs); } extern /* pointer to `ndt_t**` */ void * get_ndt_t_Function_types(void* ptr){ return &(((ndt_t*)ptr)->Function.types); } extern size_t offsetof_ndt_t_Function_types(void){ return offsetof(ndt_t, Function.types); } +extern /* pointer to `enum ndt_contig` */ void * get_ndt_t_FixedDim_tag(void* ptr){ return &(((ndt_t*)ptr)->FixedDim.tag); } +extern size_t offsetof_ndt_t_FixedDim_tag(void){ return offsetof(ndt_t, FixedDim.tag); } extern /* pointer to `int64_t` */ void * get_ndt_t_FixedDim_shape(void* ptr){ return &(((ndt_t*)ptr)->FixedDim.shape); } extern size_t offsetof_ndt_t_FixedDim_shape(void){ return offsetof(ndt_t, FixedDim.shape); } extern /* pointer to `ndt_t*` */ void * get_ndt_t_FixedDim_type(void* ptr){ return &(((ndt_t*)ptr)->FixedDim.type); } extern size_t offsetof_ndt_t_FixedDim_type(void){ return offsetof(ndt_t, FixedDim.type); } extern /* pointer to `ndt_t*` */ void * get_ndt_t_VarDim_type(void* ptr){ return &(((ndt_t*)ptr)->VarDim.type); } extern size_t offsetof_ndt_t_VarDim_type(void){ return offsetof(ndt_t, VarDim.type); } +extern /* pointer to `enum ndt_contig` */ void * get_ndt_t_SymbolicDim_tag(void* ptr){ return &(((ndt_t*)ptr)->SymbolicDim.tag); } +extern size_t offsetof_ndt_t_SymbolicDim_tag(void){ return offsetof(ndt_t, SymbolicDim.tag); } extern /* pointer to `char*` */ void * get_ndt_t_SymbolicDim_name(void* ptr){ return &(((ndt_t*)ptr)->SymbolicDim.name); } extern size_t offsetof_ndt_t_SymbolicDim_name(void){ return offsetof(ndt_t, SymbolicDim.name); } extern /* pointer to `ndt_t*` */ void * get_ndt_t_SymbolicDim_type(void* ptr){ return &(((ndt_t*)ptr)->SymbolicDim.type); } extern size_t offsetof_ndt_t_SymbolicDim_type(void){ return offsetof(ndt_t, SymbolicDim.type); } +extern /* pointer to `enum ndt_contig` */ void * get_ndt_t_EllipsisDim_tag(void* ptr){ return &(((ndt_t*)ptr)->EllipsisDim.tag); } +extern size_t offsetof_ndt_t_EllipsisDim_tag(void){ return offsetof(ndt_t, EllipsisDim.tag); } extern /* pointer to `char*` */ void * get_ndt_t_EllipsisDim_name(void* ptr){ return &(((ndt_t*)ptr)->EllipsisDim.name); } extern size_t offsetof_ndt_t_EllipsisDim_name(void){ return offsetof(ndt_t, EllipsisDim.name); } extern /* pointer to `ndt_t*` */ void * get_ndt_t_EllipsisDim_type(void* ptr){ return &(((ndt_t*)ptr)->EllipsisDim.type); } @@ -899,6 +906,17 @@ static PyObject *pyc_get_ndt_t_Function_types(PyObject *self, PyObject *args) { return NULL; } static PyObject *pyc_offsetof_ndt_t_Function_types(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_Function_types())); } +static PyObject *pyc_get_ndt_t_FixedDim_tag(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_ndt_t_FixedDim_tag(PyCapsule_GetPointer(ptr, "ndt_t*")), "enum ndt_contig", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted ndt_t"); + return NULL; +} +static PyObject *pyc_offsetof_ndt_t_FixedDim_tag(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_FixedDim_tag())); } static PyObject *pyc_get_ndt_t_FixedDim_shape(PyObject *self, PyObject *args) { PyObject* ptr=NULL; if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) @@ -932,6 +950,17 @@ static PyObject *pyc_get_ndt_t_VarDim_type(PyObject *self, PyObject *args) { return NULL; } static PyObject *pyc_offsetof_ndt_t_VarDim_type(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_VarDim_type())); } +static PyObject *pyc_get_ndt_t_SymbolicDim_tag(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_ndt_t_SymbolicDim_tag(PyCapsule_GetPointer(ptr, "ndt_t*")), "enum ndt_contig", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted ndt_t"); + return NULL; +} +static PyObject *pyc_offsetof_ndt_t_SymbolicDim_tag(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_SymbolicDim_tag())); } static PyObject *pyc_get_ndt_t_SymbolicDim_name(PyObject *self, PyObject *args) { PyObject* ptr=NULL; if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) @@ -954,6 +983,17 @@ static PyObject *pyc_get_ndt_t_SymbolicDim_type(PyObject *self, PyObject *args) return NULL; } static PyObject *pyc_offsetof_ndt_t_SymbolicDim_type(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_SymbolicDim_type())); } +static PyObject *pyc_get_ndt_t_EllipsisDim_tag(PyObject *self, PyObject *args) { + PyObject* ptr=NULL; + if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) + return NULL; + if (PyCapsule_CheckExact(ptr)) { + return PyCapsule_New( get_ndt_t_EllipsisDim_tag(PyCapsule_GetPointer(ptr, "ndt_t*")), "enum ndt_contig", NULL); + } + PyErr_SetString(PyExc_TypeError, "expected capsuleted ndt_t"); + return NULL; +} +static PyObject *pyc_offsetof_ndt_t_EllipsisDim_tag(PyObject *self, PyObject *args) { return PyLong_FromLong((long)(offsetof_ndt_t_EllipsisDim_tag())); } static PyObject *pyc_get_ndt_t_EllipsisDim_name(PyObject *self, PyObject *args) { PyObject* ptr=NULL; if (!PyArg_UnpackTuple(args, "ndt_t", 0, 1, &ptr)) @@ -2475,16 +2515,22 @@ static PyMethodDef xnd_structinfo_methods[] = { {"offsetof_ndt_t_Function_nargs", (PyCFunction)pyc_offsetof_ndt_t_Function_nargs, METH_VARARGS, "offsetof_ndt_t_Function_nargs() -> int"}, {"get_ndt_t_Function_types", (PyCFunction)pyc_get_ndt_t_Function_types, METH_VARARGS, "get_ndt_t_Function_types(< capsule(ndt_t) >) -> < capsule( &ndt_t->Function.types ) >"}, {"offsetof_ndt_t_Function_types", (PyCFunction)pyc_offsetof_ndt_t_Function_types, METH_VARARGS, "offsetof_ndt_t_Function_types() -> int"}, + {"get_ndt_t_FixedDim_tag", (PyCFunction)pyc_get_ndt_t_FixedDim_tag, METH_VARARGS, "get_ndt_t_FixedDim_tag(< capsule(ndt_t) >) -> < capsule( &ndt_t->FixedDim.tag ) >"}, + {"offsetof_ndt_t_FixedDim_tag", (PyCFunction)pyc_offsetof_ndt_t_FixedDim_tag, METH_VARARGS, "offsetof_ndt_t_FixedDim_tag() -> int"}, {"get_ndt_t_FixedDim_shape", (PyCFunction)pyc_get_ndt_t_FixedDim_shape, METH_VARARGS, "get_ndt_t_FixedDim_shape(< capsule(ndt_t) >) -> < capsule( &ndt_t->FixedDim.shape ) >"}, {"offsetof_ndt_t_FixedDim_shape", (PyCFunction)pyc_offsetof_ndt_t_FixedDim_shape, METH_VARARGS, "offsetof_ndt_t_FixedDim_shape() -> int"}, {"get_ndt_t_FixedDim_type", (PyCFunction)pyc_get_ndt_t_FixedDim_type, METH_VARARGS, "get_ndt_t_FixedDim_type(< capsule(ndt_t) >) -> < capsule( &ndt_t->FixedDim.type ) >"}, {"offsetof_ndt_t_FixedDim_type", (PyCFunction)pyc_offsetof_ndt_t_FixedDim_type, METH_VARARGS, "offsetof_ndt_t_FixedDim_type() -> int"}, {"get_ndt_t_VarDim_type", (PyCFunction)pyc_get_ndt_t_VarDim_type, METH_VARARGS, "get_ndt_t_VarDim_type(< capsule(ndt_t) >) -> < capsule( &ndt_t->VarDim.type ) >"}, {"offsetof_ndt_t_VarDim_type", (PyCFunction)pyc_offsetof_ndt_t_VarDim_type, METH_VARARGS, "offsetof_ndt_t_VarDim_type() -> int"}, + {"get_ndt_t_SymbolicDim_tag", (PyCFunction)pyc_get_ndt_t_SymbolicDim_tag, METH_VARARGS, "get_ndt_t_SymbolicDim_tag(< capsule(ndt_t) >) -> < capsule( &ndt_t->SymbolicDim.tag ) >"}, + {"offsetof_ndt_t_SymbolicDim_tag", (PyCFunction)pyc_offsetof_ndt_t_SymbolicDim_tag, METH_VARARGS, "offsetof_ndt_t_SymbolicDim_tag() -> int"}, {"get_ndt_t_SymbolicDim_name", (PyCFunction)pyc_get_ndt_t_SymbolicDim_name, METH_VARARGS, "get_ndt_t_SymbolicDim_name(< capsule(ndt_t) >) -> < capsule( &ndt_t->SymbolicDim.name ) >"}, {"offsetof_ndt_t_SymbolicDim_name", (PyCFunction)pyc_offsetof_ndt_t_SymbolicDim_name, METH_VARARGS, "offsetof_ndt_t_SymbolicDim_name() -> int"}, {"get_ndt_t_SymbolicDim_type", (PyCFunction)pyc_get_ndt_t_SymbolicDim_type, METH_VARARGS, "get_ndt_t_SymbolicDim_type(< capsule(ndt_t) >) -> < capsule( &ndt_t->SymbolicDim.type ) >"}, {"offsetof_ndt_t_SymbolicDim_type", (PyCFunction)pyc_offsetof_ndt_t_SymbolicDim_type, METH_VARARGS, "offsetof_ndt_t_SymbolicDim_type() -> int"}, + {"get_ndt_t_EllipsisDim_tag", (PyCFunction)pyc_get_ndt_t_EllipsisDim_tag, METH_VARARGS, "get_ndt_t_EllipsisDim_tag(< capsule(ndt_t) >) -> < capsule( &ndt_t->EllipsisDim.tag ) >"}, + {"offsetof_ndt_t_EllipsisDim_tag", (PyCFunction)pyc_offsetof_ndt_t_EllipsisDim_tag, METH_VARARGS, "offsetof_ndt_t_EllipsisDim_tag() -> int"}, {"get_ndt_t_EllipsisDim_name", (PyCFunction)pyc_get_ndt_t_EllipsisDim_name, METH_VARARGS, "get_ndt_t_EllipsisDim_name(< capsule(ndt_t) >) -> < capsule( &ndt_t->EllipsisDim.name ) >"}, {"offsetof_ndt_t_EllipsisDim_name", (PyCFunction)pyc_offsetof_ndt_t_EllipsisDim_name, METH_VARARGS, "offsetof_ndt_t_EllipsisDim_name() -> int"}, {"get_ndt_t_EllipsisDim_type", (PyCFunction)pyc_get_ndt_t_EllipsisDim_type, METH_VARARGS, "get_ndt_t_EllipsisDim_type(< capsule(ndt_t) >) -> < capsule( &ndt_t->EllipsisDim.type ) >"}, @@ -2771,6 +2817,6 @@ PyInit_xnd_structinfo(void) { import_xnd(); import_gumath(); - return PyModule_Create(&xnd_structinfomodule); + return PyModule_Create(&xnd_structinfomodule); } #endif From 863a984139f1b855cab9d0e55378f65298fa2cc4 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 16 Oct 2018 19:01:22 -0400 Subject: [PATCH 7/8] Remove vendored numba --- numba/.binstar.yml | 79 - numba/.coveragerc | 22 - numba/.gitattributes | 1 - numba/.github/ISSUE_TEMPLATE.md | 47 - numba/.github/PULL_REQUEST_TEMPLATE.md | 35 - numba/.gitignore | 26 - numba/.travis.yml | 53 - numba/AUTHORS | 60 - numba/CHANGE_LOG | 1963 ------- numba/CONTRIBUTING.md | 20 - numba/LICENSE | 24 - numba/LICENSES.third-party | 78 - numba/MANIFEST.in | 10 - numba/README.rst | 143 - numba/appveyor.yml | 44 - numba/benchmarks/README | 20 - numba/benchmarks/bm_euler.py | 63 - numba/benchmarks/bm_laplace2d.py | 65 - numba/benchmarks/runall.py | 90 - numba/bin/numba | 8 - numba/buildscripts/appveyor/run_with_env.cmd | 90 - numba/buildscripts/condarecipe.local/bld.bat | 4 - numba/buildscripts/condarecipe.local/build.sh | 5 - .../condarecipe.local/license.txt | 24 - .../buildscripts/condarecipe.local/mandel.py | 43 - .../buildscripts/condarecipe.local/meta.yaml | 56 - .../condarecipe.local/run_test.bat | 18 - .../condarecipe.local/run_test.sh | 37 - .../condarecipe_clone_icc_rt/bld.bat | 3 - .../condarecipe_clone_icc_rt/build.sh | 5 - .../condarecipe_clone_icc_rt/meta.yaml | 34 - .../condarecipe_clone_icc_rt/scripts/build.py | 33 - .../buildscripts/incremental/after_success.sh | 11 - numba/buildscripts/incremental/build.cmd | 10 - numba/buildscripts/incremental/build.sh | 14 - .../incremental/install_miniconda.sh | 15 - .../incremental/setup_conda_environment.cmd | 30 - .../incremental/setup_conda_environment.sh | 66 - numba/buildscripts/incremental/test.cmd | 32 - numba/buildscripts/incremental/test.sh | 51 - numba/buildscripts/remove_unwanted_files.py | 36 - numba/codecov.yml | 22 - numba/condatestall.py | 51 - numba/docs/Makefile | 177 - numba/docs/_static/numba_blue_icon_rgb.png | Bin 8527 -> 0 bytes numba/docs/_templates/EMPTY | 0 numba/docs/gh-pages.py | 150 - numba/docs/make.bat | 242 - numba/docs/source/conf.py | 300 -- numba/docs/source/cuda-reference/host.rst | 152 - numba/docs/source/cuda-reference/index.rst | 8 - numba/docs/source/cuda-reference/kernel.rst | 305 -- numba/docs/source/cuda-reference/memory.rst | 21 - .../docs/source/cuda/cuda_array_interface.rst | 76 - numba/docs/source/cuda/cudapysupported.rst | 207 - numba/docs/source/cuda/device-functions.rst | 15 - numba/docs/source/cuda/device-management.rst | 76 - numba/docs/source/cuda/examples.rst | 86 - numba/docs/source/cuda/faq.rst | 20 - numba/docs/source/cuda/index.rst | 21 - numba/docs/source/cuda/intrinsics.rst | 60 - numba/docs/source/cuda/ipc.rst | 35 - numba/docs/source/cuda/kernels.rst | 229 - numba/docs/source/cuda/memory.rst | 212 - numba/docs/source/cuda/overview.rst | 59 - numba/docs/source/cuda/random.rst | 66 - numba/docs/source/cuda/reduction.rst | 37 - numba/docs/source/cuda/simulator.rst | 85 - numba/docs/source/cuda/ufunc.rst | 154 - numba/docs/source/developer/architecture.rst | 944 ---- numba/docs/source/developer/contributing.rst | 298 -- .../docs/source/developer/custom_pipeline.rst | 33 - numba/docs/source/developer/dispatching.rst | 267 - numba/docs/source/developer/environment.rst | 58 - numba/docs/source/developer/generators.rst | 307 -- numba/docs/source/developer/index.rst | 20 - numba/docs/source/developer/listings.rst | 11 - .../developer/live_variable_analysis.rst | 86 - numba/docs/source/developer/numba-runtime.rst | 130 - numba/docs/source/developer/rewrites.rst | 397 -- numba/docs/source/developer/stencil.rst | 170 - numba/docs/source/extending/high-level.rst | 111 - numba/docs/source/extending/index.rst | 29 - .../source/extending/interval-example.rst | 326 -- numba/docs/source/extending/low-level.rst | 179 - numba/docs/source/glossary.rst | 96 - numba/docs/source/hsa/device-functions.rst | 15 - numba/docs/source/hsa/device-management.rst | 5 - numba/docs/source/hsa/examples.rst | 115 - numba/docs/source/hsa/index.rst | 13 - numba/docs/source/hsa/intrinsics.rst | 40 - numba/docs/source/hsa/kernels.rst | 175 - numba/docs/source/hsa/memory.rst | 64 - numba/docs/source/hsa/overview.rst | 51 - numba/docs/source/index.rst | 26 - numba/docs/source/proposals/cfunc.rst | 147 - .../source/proposals/extension-points.rst | 414 -- numba/docs/source/proposals/index.rst | 35 - .../docs/source/proposals/integer-typing.rst | 186 - numba/docs/source/proposals/jit-classes.rst | 231 - .../source/proposals/np-where-override.py | 51 - .../source/proposals/recursion_callstack.svg | 4 - .../docs/source/proposals/type-inference.rst | 124 - .../source/proposals/typing_recursion.rst | 129 - .../docs/source/reference/aot-compilation.rst | 74 - numba/docs/source/reference/envvars.rst | 266 - numba/docs/source/reference/fpsemantics.rst | 85 - numba/docs/source/reference/index.rst | 16 - .../docs/source/reference/jit-compilation.rst | 478 -- .../docs/source/reference/numpysupported.rst | 600 --- numba/docs/source/reference/pysemantics.rst | 50 - numba/docs/source/reference/pysupported.rst | 578 -- numba/docs/source/reference/python27-eol.rst | 20 - numba/docs/source/reference/types.rst | 200 - numba/docs/source/reference/utils.rst | 34 - numba/docs/source/release-notes.rst | 5 - numba/docs/source/user/cfunc.rst | 133 - numba/docs/source/user/examples.rst | 36 - numba/docs/source/user/faq.rst | 251 - numba/docs/source/user/generated-jit.rst | 71 - numba/docs/source/user/index.rst | 20 - numba/docs/source/user/installing.rst | 72 - numba/docs/source/user/jit.rst | 199 - numba/docs/source/user/jitclass.rst | 101 - numba/docs/source/user/overview.rst | 34 - numba/docs/source/user/parallel.rst | 141 - numba/docs/source/user/performance-tips.rst | 230 - numba/docs/source/user/pycc.rst | 137 - numba/docs/source/user/stencil.rst | 244 - numba/docs/source/user/troubleshoot.rst | 490 -- numba/docs/source/user/vectorize.rst | 297 -- numba/examples/.coveragerc | 8 - numba/examples/__init__.py | 0 numba/examples/binarytree.py | 181 - numba/examples/blackscholes/blackscholes.ods | Bin 38825 -> 0 bytes numba/examples/blackscholes/blackscholes.py | 78 - .../blackscholes/blackscholes_cuda.py | 130 - .../blackscholes/blackscholes_numba.py | 95 - .../examples/blackscholes/blackscholes_pa.py | 59 - .../examples/blackscholes/blackscholes_vec.py | 56 - numba/examples/blur_image.py | 68 - numba/examples/bubblesort.py | 64 - numba/examples/cffi_example.py | 23 - numba/examples/compile_with_pycc.py | 9 - numba/examples/ctypes_example.py | 40 - numba/examples/cuda_dask.py | 106 - numba/examples/cuda_ipc.py | 60 - numba/examples/cuda_mpi.py | 95 - numba/examples/cudajit/matmul.ods | Bin 42474 -> 0 bytes numba/examples/cudajit/matmul.py | 74 - numba/examples/cudajit/matmul_benchmark.txt | 13 - numba/examples/cudajit/matmul_smem.py | 86 - numba/examples/cudajit/sum.py | 30 - numba/examples/fbcorr.py | 41 - .../gaussian-blur/gaussian-blur-pa.py | 95 - numba/examples/gaussian-blur/gaussian-blur.py | 121 - numba/examples/harris/harris.py | 73 - numba/examples/harris/harris_numba.py | 88 - numba/examples/harris/harris_pa.py | 86 - numba/examples/harris/harris_py.py | 84 - numba/examples/jitclass.py | 40 - numba/examples/juliaset/juliaset.py | 77 - numba/examples/juliaset/juliaset_numba.py | 79 - numba/examples/juliaset/juliaset_pa.py | 79 - numba/examples/k-means/k-means.py | 50 - numba/examples/k-means/k-means_numba.py | 51 - numba/examples/k-means/k-means_pa.py | 51 - .../kernel_density_estimation.py | 44 - .../kernel_density_estimation_numba.py | 45 - .../kernel_density_estimation_pa.py | 45 - .../laplace2d/laplace2d-benchmark.txt | 29 - .../laplace2d/laplace2d-numba-cuda-improve.py | 127 - .../laplace2d/laplace2d-numba-cuda-smem.py | 113 - .../laplace2d/laplace2d-numba-cuda.py | 90 - numba/examples/laplace2d/laplace2d-numba.py | 64 - numba/examples/laplace2d/laplace2d-pa.py | 65 - numba/examples/laplace2d/laplace2d.ods | Bin 33896 -> 0 bytes numba/examples/laplace2d/laplace2d.py | 71 - .../linear_regression/linear_regression.py | 40 - .../linear_regression_numba.py | 40 - .../linear_regression/linear_regression_pa.py | 42 - numba/examples/linkedlist.py | 72 - .../logistic_regression.py | 52 - numba/examples/mandel/mandel_jit.py | 53 - numba/examples/mandel/mandel_vectorize.py | 55 - numba/examples/mergesort.py | 131 - numba/examples/movemean.py | 25 - numba/examples/nbody/nbody.py | 130 - .../nbody/nbody_modified_by_MarkHarris.py | 129 - numba/examples/nogil.py | 85 - numba/examples/notebooks/LinearRegr.ipynb | 200 - numba/examples/notebooks/LinearRegr.py | 126 - numba/examples/notebooks/Using Numba.ipynb | 220 - numba/examples/notebooks/j0 in Numba.ipynb | 321 -- numba/examples/notebooks/numba.ipynb | 804 --- numba/examples/objects.py | 16 - numba/examples/pi.py | 33 - numba/examples/ra24.py | 71 - numba/examples/stack.py | 114 - numba/examples/structures.py | 22 - numba/examples/sum.py | 34 - numba/examples/tests/__init__.py | 107 - numba/examples/tests/matplotlibrc | 1 - numba/examples/tests/sitecustomize.py | 8 - numba/examples/ufuncs.py | 141 - numba/examples/vectorize/cuda_polynomial.py | 86 - numba/examples/vectorize/perfstat.ods | Bin 18964 -> 0 bytes numba/examples/vectorize/perfstatevents.txt | 1 - numba/examples/vectorize/polynomial.py | 69 - numba/examples/vectorize/sum.py | 67 - numba/examples/wave-2d/wave_2d.py | 122 - numba/examples/wave-2d/wave_2d_numba.py | 124 - numba/examples/wave-2d/wave_2d_pa.py | 123 - numba/examples/wave-2d/wave_2d_pa_demo.py | 115 - numba/numba/__init__.py | 169 - numba/numba/_arraystruct.h | 21 - numba/numba/_dispatcher.c | 678 --- numba/numba/_dispatcher.h | 33 - numba/numba/_dispatcherimpl.cpp | 104 - numba/numba/_dynfunc.c | 457 -- numba/numba/_dynfuncmod.c | 93 - numba/numba/_hashtable.c | 530 -- numba/numba/_hashtable.h | 132 - numba/numba/_helperlib.c | 969 ---- numba/numba/_helpermod.c | 243 - numba/numba/_lapack.c | 1946 ------- numba/numba/_math_c99.c | 781 --- numba/numba/_math_c99.h | 90 - numba/numba/_npymath_exports.c | 46 - numba/numba/_numba_common.h | 20 - numba/numba/_pymodule.h | 56 - numba/numba/_random.c | 478 -- numba/numba/_typeof.c | 906 ---- numba/numba/_typeof.h | 10 - numba/numba/_version.py | 239 - numba/numba/analysis.py | 233 - numba/numba/annotations/__init__.py | 0 numba/numba/annotations/template.html | 144 - numba/numba/annotations/type_annotations.py | 283 - numba/numba/appdirs.py | 557 -- numba/numba/array_analysis.py | 1895 ------- numba/numba/bytecode.py | 330 -- numba/numba/caching.py | 732 --- numba/numba/callwrapper.py | 211 - numba/numba/capsulethunk.h | 108 - numba/numba/ccallback.py | 178 - numba/numba/cffi_support.py | 6 - numba/numba/cgutils.py | 1037 ---- numba/numba/compiler.py | 1041 ---- numba/numba/config.py | 327 -- numba/numba/consts.py | 95 - numba/numba/controlflow.py | 646 --- numba/numba/ctypes_support.py | 14 - numba/numba/cuda/__init__.py | 17 - numba/numba/cuda/api.py | 376 -- numba/numba/cuda/args.py | 80 - numba/numba/cuda/codegen.py | 71 - numba/numba/cuda/compiler.py | 822 --- numba/numba/cuda/cudadecl.py | 524 -- numba/numba/cuda/cudadrv/__init__.py | 9 - numba/numba/cuda/cudadrv/_extras.c | 45 - numba/numba/cuda/cudadrv/autotune.py | 321 -- numba/numba/cuda/cudadrv/devicearray.py | 651 --- numba/numba/cuda/cudadrv/devices.py | 228 - numba/numba/cuda/cudadrv/driver.py | 1840 ------- numba/numba/cuda/cudadrv/drvapi.py | 348 -- numba/numba/cuda/cudadrv/enums.py | 432 -- numba/numba/cuda/cudadrv/error.py | 18 - numba/numba/cuda/cudadrv/libs.py | 96 - numba/numba/cuda/cudadrv/ndarray.py | 22 - numba/numba/cuda/cudadrv/nvvm.py | 662 --- numba/numba/cuda/cudaimpl.py | 601 --- numba/numba/cuda/cudamath.py | 97 - numba/numba/cuda/decorators.py | 127 - numba/numba/cuda/descriptor.py | 14 - numba/numba/cuda/device_init.py | 51 - numba/numba/cuda/dispatcher.py | 231 - numba/numba/cuda/errors.py | 41 - numba/numba/cuda/initialize.py | 11 - numba/numba/cuda/intrinsic_wrapper.py | 78 - numba/numba/cuda/kernels/__init__.py | 0 numba/numba/cuda/kernels/reduction.py | 263 - numba/numba/cuda/kernels/transpose.py | 65 - numba/numba/cuda/libdevice.py | 125 - numba/numba/cuda/nvvmutils.py | 160 - numba/numba/cuda/printimpl.py | 78 - numba/numba/cuda/random.py | 282 - numba/numba/cuda/simulator/__init__.py | 27 - numba/numba/cuda/simulator/api.py | 95 - numba/numba/cuda/simulator/compiler.py | 6 - .../numba/cuda/simulator/cudadrv/__init__.py | 1 - .../cuda/simulator/cudadrv/devicearray.py | 230 - numba/numba/cuda/simulator/cudadrv/devices.py | 86 - numba/numba/cuda/simulator/cudadrv/driver.py | 15 - numba/numba/cuda/simulator/cudadrv/drvapi.py | 4 - numba/numba/cuda/simulator/cudadrv/nvvm.py | 23 - numba/numba/cuda/simulator/kernel.py | 265 - numba/numba/cuda/simulator/kernelapi.py | 280 - numba/numba/cuda/simulator/reduction.py | 13 - numba/numba/cuda/simulator_init.py | 19 - numba/numba/cuda/stubs.py | 472 -- numba/numba/cuda/target.py | 255 - numba/numba/cuda/testing.py | 81 - numba/numba/cuda/tests/__init__.py | 22 - numba/numba/cuda/tests/cudadrv/__init__.py | 6 - .../numba/cuda/tests/cudadrv/data/__init__.py | 0 .../numba/cuda/tests/cudadrv/data/jitlink.cu | 13 - .../numba/cuda/tests/cudadrv/data/jitlink.ptx | 30 - .../cuda/tests/cudadrv/test_array_attr.py | 81 - .../cuda/tests/cudadrv/test_context_stack.py | 42 - .../tests/cudadrv/test_cuda_array_slicing.py | 277 - .../tests/cudadrv/test_cuda_auto_context.py | 21 - .../tests/cudadrv/test_cuda_devicerecord.py | 92 - .../cuda/tests/cudadrv/test_cuda_driver.py | 131 - .../cuda/tests/cudadrv/test_cuda_memory.py | 154 - .../cuda/tests/cudadrv/test_cuda_ndarray.py | 244 - .../cuda/tests/cudadrv/test_deallocations.py | 177 - numba/numba/cuda/tests/cudadrv/test_detect.py | 17 - numba/numba/cuda/tests/cudadrv/test_events.py | 38 - .../cuda/tests/cudadrv/test_host_alloc.py | 52 - .../cuda/tests/cudadrv/test_inline_ptx.py | 38 - .../numba/cuda/tests/cudadrv/test_ir_patch.py | 28 - numba/numba/cuda/tests/cudadrv/test_linker.py | 42 - .../cuda/tests/cudadrv/test_nvvm_driver.py | 177 - numba/numba/cuda/tests/cudadrv/test_pinned.py | 37 - .../numba/cuda/tests/cudadrv/test_profiler.py | 22 - .../cuda/tests/cudadrv/test_reset_device.py | 41 - .../cuda/tests/cudadrv/test_select_device.py | 46 - numba/numba/cuda/tests/cudapy/__init__.py | 6 - .../numba/cuda/tests/cudapy/test_alignment.py | 41 - numba/numba/cuda/tests/cudapy/test_array.py | 65 - .../cuda/tests/cudapy/test_array_args.py | 29 - .../cuda/tests/cudapy/test_array_methods.py | 37 - numba/numba/cuda/tests/cudapy/test_atomics.py | 428 -- numba/numba/cuda/tests/cudapy/test_autojit.py | 26 - .../cuda/tests/cudapy/test_blackscholes.py | 128 - numba/numba/cuda/tests/cudapy/test_boolean.py | 25 - numba/numba/cuda/tests/cudapy/test_casting.py | 74 - numba/numba/cuda/tests/cudapy/test_complex.py | 255 - .../cuda/tests/cudapy/test_complex_kernel.py | 24 - .../cuda/tests/cudapy/test_const_string.py | 55 - .../numba/cuda/tests/cudapy/test_constmem.py | 60 - .../tests/cudapy/test_cuda_array_interface.py | 111 - .../cuda/tests/cudapy/test_cuda_autojit.py | 80 - numba/numba/cuda/tests/cudapy/test_debug.py | 94 - .../numba/cuda/tests/cudapy/test_debuginfo.py | 56 - .../cuda/tests/cudapy/test_device_func.py | 114 - numba/numba/cuda/tests/cudapy/test_errors.py | 46 - .../numba/cuda/tests/cudapy/test_exception.py | 107 - .../numba/cuda/tests/cudapy/test_fastmath.py | 43 - numba/numba/cuda/tests/cudapy/test_forall.py | 39 - numba/numba/cuda/tests/cudapy/test_freevar.py | 30 - numba/numba/cuda/tests/cudapy/test_globals.py | 61 - numba/numba/cuda/tests/cudapy/test_gufunc.py | 303 -- .../cuda/tests/cudapy/test_gufunc_scalar.py | 161 - .../tests/cudapy/test_gufunc_scheduling.py | 96 - numba/numba/cuda/tests/cudapy/test_idiv.py | 39 - numba/numba/cuda/tests/cudapy/test_inspect.py | 72 - .../cuda/tests/cudapy/test_intrinsics.py | 365 -- numba/numba/cuda/tests/cudapy/test_ipc.py | 277 - numba/numba/cuda/tests/cudapy/test_lang.py | 63 - numba/numba/cuda/tests/cudapy/test_laplace.py | 126 - .../numba/cuda/tests/cudapy/test_localmem.py | 65 - numba/numba/cuda/tests/cudapy/test_macro.py | 96 - numba/numba/cuda/tests/cudapy/test_mandel.py | 34 - numba/numba/cuda/tests/cudapy/test_math.py | 523 -- numba/numba/cuda/tests/cudapy/test_matmul.py | 75 - .../cuda/tests/cudapy/test_montecarlo.py | 24 - .../numba/cuda/tests/cudapy/test_multigpu.py | 121 - .../cuda/tests/cudapy/test_multiprocessing.py | 46 - .../cuda/tests/cudapy/test_multithreads.py | 98 - numba/numba/cuda/tests/cudapy/test_nondet.py | 51 - .../numba/cuda/tests/cudapy/test_operator.py | 43 - numba/numba/cuda/tests/cudapy/test_powi.py | 52 - numba/numba/cuda/tests/cudapy/test_print.py | 68 - .../cuda/tests/cudapy/test_py2_div_issue.py | 33 - numba/numba/cuda/tests/cudapy/test_random.py | 101 - .../cuda/tests/cudapy/test_record_dtype.py | 289 - .../numba/cuda/tests/cudapy/test_reduction.py | 77 - .../test_retrieve_autoconverted_arrays.py | 84 - .../numba/cuda/tests/cudapy/test_serialize.py | 84 - numba/numba/cuda/tests/cudapy/test_slicing.py | 30 - numba/numba/cuda/tests/cudapy/test_sm.py | 73 - .../cuda/tests/cudapy/test_smart_array.py | 33 - numba/numba/cuda/tests/cudapy/test_sync.py | 169 - .../numba/cuda/tests/cudapy/test_transpose.py | 29 - numba/numba/cuda/tests/cudapy/test_userexc.py | 31 - .../numba/cuda/tests/cudapy/test_vectorize.py | 193 - .../tests/cudapy/test_vectorize_complex.py | 21 - .../cuda/tests/cudapy/test_vectorize_decor.py | 65 - .../tests/cudapy/test_vectorize_device.py | 37 - .../tests/cudapy/test_vectorize_scalar_arg.py | 44 - .../numba/cuda/tests/cudapy/test_warp_ops.py | 245 - numba/numba/cuda/tests/cudasim/__init__.py | 6 - numba/numba/cuda/tests/cudasim/support.py | 6 - .../cuda/tests/cudasim/test_cudasim_issues.py | 71 - numba/numba/cuda/tests/nocuda/__init__.py | 6 - numba/numba/cuda/tests/nocuda/test_nvvm.py | 30 - numba/numba/cuda/vectorizers.py | 66 - numba/numba/dataflow.py | 872 --- numba/numba/datamodel/__init__.py | 4 - numba/numba/datamodel/manager.py | 49 - numba/numba/datamodel/models.py | 1311 ----- numba/numba/datamodel/packer.py | 215 - numba/numba/datamodel/registry.py | 20 - numba/numba/datamodel/testing.py | 152 - numba/numba/debuginfo.py | 378 -- numba/numba/decorators.py | 260 - numba/numba/dispatcher.py | 747 --- numba/numba/dummyarray.py | 402 -- numba/numba/errors.py | 608 --- numba/numba/extending.py | 371 -- numba/numba/findlib.py | 38 - numba/numba/funcdesc.py | 199 - numba/numba/generators.py | 350 -- numba/numba/inline_closurecall.py | 1059 ---- numba/numba/interpreter.py | 1009 ---- numba/numba/io_support.py | 7 - numba/numba/ir.py | 1015 ---- numba/numba/ir_utils.py | 1724 ------ numba/numba/itanium_mangler.py | 248 - numba/numba/jitclass/__init__.py | 2 - numba/numba/jitclass/_box.c | 184 - numba/numba/jitclass/base.py | 468 -- numba/numba/jitclass/boxing.py | 183 - numba/numba/jitclass/decorators.py | 29 - numba/numba/lowering.py | 1034 ---- numba/numba/macro.py | 9 - numba/numba/mathnames.h | 78 - numba/numba/mviewbuf.c | 461 -- numba/numba/npdatetime.py | 200 - numba/numba/npyufunc/__init__.py | 26 - numba/numba/npyufunc/_internal.c | 715 --- numba/numba/npyufunc/_internal.h | 27 - numba/numba/npyufunc/_ufunc.c | 218 - numba/numba/npyufunc/array_exprs.py | 405 -- numba/numba/npyufunc/decorators.py | 182 - numba/numba/npyufunc/deviceufunc.py | 839 --- numba/numba/npyufunc/dufunc.py | 286 - numba/numba/npyufunc/gufunc_scheduler.cpp | 362 -- numba/numba/npyufunc/gufunc_scheduler.h | 48 - numba/numba/npyufunc/parallel.py | 437 -- numba/numba/npyufunc/parfor.py | 990 ---- numba/numba/npyufunc/sigparse.py | 65 - numba/numba/npyufunc/tbbpool.cpp | 119 - numba/numba/npyufunc/ufuncbuilder.py | 335 -- numba/numba/npyufunc/workqueue.c | 316 -- numba/numba/npyufunc/workqueue.h | 38 - numba/numba/npyufunc/wrappers.py | 698 --- numba/numba/numba_entry.py | 320 -- numba/numba/numpy_support.py | 532 -- numba/numba/objmode.py | 597 --- numba/numba/parfor.py | 3064 ----------- numba/numba/postproc.py | 213 - numba/numba/pretty_annotate.py | 282 - numba/numba/pycc/__init__.py | 101 - numba/numba/pycc/cc.py | 296 - numba/numba/pycc/compiler.py | 523 -- numba/numba/pycc/decorators.py | 75 - numba/numba/pycc/llvm_types.py | 36 - numba/numba/pycc/modulemixin.c | 200 - numba/numba/pycc/platform.py | 258 - numba/numba/pycc/pycc | 3 - numba/numba/pythonapi.py | 1476 ----- numba/numba/rewrites/__init__.py | 8 - numba/numba/rewrites/ir_print.py | 82 - numba/numba/rewrites/macros.py | 131 - numba/numba/rewrites/registry.py | 90 - numba/numba/rewrites/static_binop.py | 35 - numba/numba/rewrites/static_getitem.py | 85 - numba/numba/rewrites/static_raise.py | 62 - numba/numba/roc/README.md | 36 - numba/numba/roc/__init__.py | 40 - numba/numba/roc/api.py | 196 - numba/numba/roc/codegen.py | 50 - numba/numba/roc/compiler.py | 464 -- numba/numba/roc/decorators.py | 59 - numba/numba/roc/descriptor.py | 14 - numba/numba/roc/dispatch.py | 150 - numba/numba/roc/enums.py | 4 - numba/numba/roc/gcn_occupancy.py | 90 - numba/numba/roc/hlc/__init__.py | 19 - numba/numba/roc/hlc/common.py | 161 - numba/numba/roc/hlc/config.py | 8 - numba/numba/roc/hlc/hlc.py | 306 -- numba/numba/roc/hlc/libhlc.py | 247 - numba/numba/roc/hsadecl.py | 191 - numba/numba/roc/hsadrv/__init__.py | 4 - numba/numba/roc/hsadrv/devicearray.py | 365 -- numba/numba/roc/hsadrv/devices.py | 157 - numba/numba/roc/hsadrv/driver.py | 1548 ------ numba/numba/roc/hsadrv/drvapi.py | 1738 ------ numba/numba/roc/hsadrv/enums.py | 482 -- numba/numba/roc/hsadrv/enums_ext.py | 254 - numba/numba/roc/hsadrv/error.py | 33 - numba/numba/roc/hsaimpl.py | 301 -- numba/numba/roc/initialize.py | 27 - numba/numba/roc/mathdecl.py | 346 -- numba/numba/roc/mathimpl.py | 107 - numba/numba/roc/stubs.py | 168 - numba/numba/roc/target.py | 330 -- numba/numba/roc/tests/__init__.py | 17 - numba/numba/roc/tests/hsadrv/__init__.py | 6 - numba/numba/roc/tests/hsadrv/test_async.py | 55 - numba/numba/roc/tests/hsadrv/test_driver.py | 638 --- numba/numba/roc/tests/hsapy/__init__.py | 6 - numba/numba/roc/tests/hsapy/run_far_branch.py | 46 - .../roc/tests/hsapy/test_async_kernel.py | 68 - numba/numba/roc/tests/hsapy/test_atomics.py | 71 - numba/numba/roc/tests/hsapy/test_autojit.py | 45 - numba/numba/roc/tests/hsapy/test_barrier.py | 75 - numba/numba/roc/tests/hsapy/test_compiler.py | 128 - numba/numba/roc/tests/hsapy/test_decorator.py | 53 - .../roc/tests/hsapy/test_gufuncbuilding.py | 167 - .../numba/roc/tests/hsapy/test_large_code.py | 36 - numba/numba/roc/tests/hsapy/test_linkage.py | 30 - numba/numba/roc/tests/hsapy/test_math.py | 182 - numba/numba/roc/tests/hsapy/test_matmul.py | 118 - numba/numba/roc/tests/hsapy/test_memory.py | 92 - numba/numba/roc/tests/hsapy/test_occupancy.py | 110 - .../numba/roc/tests/hsapy/test_positioning.py | 43 - numba/numba/roc/tests/hsapy/test_reduction.py | 78 - numba/numba/roc/tests/hsapy/test_scan.py | 449 -- numba/numba/roc/tests/hsapy/test_simple.py | 143 - .../roc/tests/hsapy/test_ufuncbuilding.py | 110 - numba/numba/roc/vectorizers.py | 149 - numba/numba/runtests.py | 116 - numba/numba/runtime/__init__.py | 3 - numba/numba/runtime/_nrt_python.c | 416 -- numba/numba/runtime/_nrt_pythonmod.c | 198 - numba/numba/runtime/context.py | 222 - numba/numba/runtime/nrt.c | 445 -- numba/numba/runtime/nrt.h | 226 - numba/numba/runtime/nrt.py | 121 - numba/numba/runtime/nrtdynmod.py | 213 - numba/numba/runtime/nrtopt.py | 171 - numba/numba/scripts/__init__.py | 0 numba/numba/scripts/generate_lower_listing.py | 169 - numba/numba/serialize.py | 112 - numba/numba/servicelib/__init__.py | 3 - numba/numba/servicelib/service.py | 89 - numba/numba/servicelib/threadlocal.py | 47 - numba/numba/sigutils.py | 47 - numba/numba/six.py | 838 --- numba/numba/smartarray.py | 228 - numba/numba/special.py | 19 - numba/numba/stencil.py | 744 --- numba/numba/stencilparfor.py | 676 --- numba/numba/targets/__init__.py | 1 - numba/numba/targets/arraymath.py | 1779 ------ numba/numba/targets/arrayobj.py | 4751 ----------------- numba/numba/targets/base.py | 1083 ---- numba/numba/targets/boxing.py | 1018 ---- numba/numba/targets/builtins.py | 433 -- numba/numba/targets/callconv.py | 497 -- numba/numba/targets/cffiimpl.py | 23 - numba/numba/targets/cmathimpl.py | 522 -- numba/numba/targets/codegen.py | 845 --- numba/numba/targets/cpu.py | 282 - numba/numba/targets/descriptors.py | 9 - numba/numba/targets/enumimpl.py | 79 - numba/numba/targets/externals.py | 203 - numba/numba/targets/fastmathpass.py | 36 - numba/numba/targets/imputils.py | 427 -- numba/numba/targets/intrinsics.py | 103 - numba/numba/targets/iterators.py | 141 - numba/numba/targets/linalg.py | 2795 ---------- numba/numba/targets/listobj.py | 1091 ---- numba/numba/targets/mathimpl.py | 405 -- numba/numba/targets/mergesort.py | 126 - numba/numba/targets/npdatetime.py | 628 --- numba/numba/targets/npyfuncs.py | 1794 ------- numba/numba/targets/npyimpl.py | 542 -- numba/numba/targets/numbers.py | 1389 ----- numba/numba/targets/operatorimpl.py | 45 - numba/numba/targets/optional.py | 120 - numba/numba/targets/options.py | 80 - numba/numba/targets/polynomial.py | 59 - numba/numba/targets/printimpl.py | 84 - numba/numba/targets/quicksort.py | 242 - numba/numba/targets/randomimpl.py | 1493 ------ numba/numba/targets/rangeobj.py | 212 - numba/numba/targets/registry.py | 107 - numba/numba/targets/removerefctpass.py | 109 - numba/numba/targets/setobj.py | 1418 ----- numba/numba/targets/slicing.py | 209 - numba/numba/targets/smartarray.py | 41 - numba/numba/targets/tupleobj.py | 295 - numba/numba/targets/ufunc_db.py | 994 ---- numba/numba/testing/__init__.py | 71 - numba/numba/testing/__main__.py | 4 - numba/numba/testing/ddt.py | 241 - numba/numba/testing/loader.py | 27 - numba/numba/testing/main.py | 639 --- numba/numba/testing/notebook.py | 172 - numba/numba/tests/__init__.py | 39 - numba/numba/tests/annotation_usecases.py | 16 - numba/numba/tests/cache_usecases.py | 158 - numba/numba/tests/cffi_usecases.py | 199 - numba/numba/tests/cfunc_cache_usecases.py | 75 - numba/numba/tests/compile_with_pycc.py | 101 - numba/numba/tests/complex_usecases.py | 95 - numba/numba/tests/ctypes_usecases.py | 116 - numba/numba/tests/dummy_module.py | 4 - numba/numba/tests/enum_usecases.py | 48 - numba/numba/tests/matmul_usecase.py | 57 - numba/numba/tests/npyufunc/__init__.py | 11 - numba/numba/tests/npyufunc/cache_usecases.py | 76 - numba/numba/tests/npyufunc/test_caching.py | 229 - numba/numba/tests/npyufunc/test_dufunc.py | 88 - numba/numba/tests/npyufunc/test_errors.py | 199 - numba/numba/tests/npyufunc/test_gufunc.py | 174 - .../npyufunc/test_parallel_env_variable.py | 32 - .../tests/npyufunc/test_parallel_low_work.py | 43 - .../npyufunc/test_parallel_ufunc_issues.py | 130 - numba/numba/tests/npyufunc/test_ufunc.py | 105 - .../tests/npyufunc/test_ufuncbuilding.py | 377 -- .../tests/npyufunc/test_vectorize_decor.py | 105 - numba/numba/tests/pdlike_usecase.py | 306 -- .../pycc_distutils_usecase/setup_distutils.py | 15 - .../setup_setuptools.py | 15 - .../pycc_distutils_usecase/source_module.py | 18 - numba/numba/tests/recursion_usecases.py | 217 - numba/numba/tests/serialize_usecases.py | 114 - numba/numba/tests/support.py | 670 --- numba/numba/tests/test_alignment.py | 40 - numba/numba/tests/test_annotations.py | 183 - numba/numba/tests/test_api.py | 36 - numba/numba/tests/test_array_analysis.py | 854 --- numba/numba/tests/test_array_attr.py | 376 -- numba/numba/tests/test_array_constants.py | 178 - numba/numba/tests/test_array_exprs.py | 490 -- numba/numba/tests/test_array_iterators.py | 487 -- numba/numba/tests/test_array_manipulation.py | 590 -- numba/numba/tests/test_array_methods.py | 923 ---- numba/numba/tests/test_array_reductions.py | 677 --- numba/numba/tests/test_array_return.py | 43 - numba/numba/tests/test_auto_constants.py | 35 - numba/numba/tests/test_blackscholes.py | 205 - numba/numba/tests/test_buffer_protocol.py | 316 -- numba/numba/tests/test_builtins.py | 962 ---- numba/numba/tests/test_casting.py | 102 - numba/numba/tests/test_cffi.py | 199 - numba/numba/tests/test_cfunc.py | 370 -- numba/numba/tests/test_cgutils.py | 123 - numba/numba/tests/test_chained_assign.py | 153 - numba/numba/tests/test_closure.py | 455 -- numba/numba/tests/test_codegen.py | 207 - numba/numba/tests/test_compile_cache.py | 133 - numba/numba/tests/test_complex.py | 317 -- numba/numba/tests/test_comprehension.py | 451 -- numba/numba/tests/test_config.py | 107 - numba/numba/tests/test_conversion.py | 221 - numba/numba/tests/test_copy_propagate.py | 108 - numba/numba/tests/test_ctypes.py | 236 - numba/numba/tests/test_dataflow.py | 207 - numba/numba/tests/test_datamodel.py | 205 - numba/numba/tests/test_debug.py | 314 -- numba/numba/tests/test_debuginfo.py | 55 - numba/numba/tests/test_del.py | 39 - numba/numba/tests/test_deprecations.py | 25 - numba/numba/tests/test_dicts.py | 26 - numba/numba/tests/test_dispatcher.py | 1487 ------ numba/numba/tests/test_dummyarray.py | 201 - numba/numba/tests/test_dyn_array.py | 1495 ------ numba/numba/tests/test_dyn_func.py | 46 - numba/numba/tests/test_enums.py | 146 - numba/numba/tests/test_errorhandling.py | 103 - numba/numba/tests/test_errormodels.py | 29 - numba/numba/tests/test_exceptions.py | 203 - numba/numba/tests/test_extended_arg.py | 48 - numba/numba/tests/test_extending.py | 750 --- numba/numba/tests/test_extending_types.py | 92 - numba/numba/tests/test_fancy_indexing.py | 231 - numba/numba/tests/test_fastmath.py | 69 - numba/numba/tests/test_flow_control.py | 854 --- numba/numba/tests/test_func_interface.py | 45 - numba/numba/tests/test_func_lifetime.py | 168 - numba/numba/tests/test_generators.py | 643 --- numba/numba/tests/test_gil.py | 185 - numba/numba/tests/test_globals.py | 171 - numba/numba/tests/test_hashing.py | 178 - numba/numba/tests/test_import.py | 52 - numba/numba/tests/test_indexing.py | 1147 ---- numba/numba/tests/test_inlining.py | 78 - numba/numba/tests/test_interproc.py | 49 - numba/numba/tests/test_intwidth.py | 93 - numba/numba/tests/test_ir.py | 183 - numba/numba/tests/test_itanium_mangler.py | 84 - numba/numba/tests/test_iteration.py | 198 - numba/numba/tests/test_jitclasses.py | 644 --- numba/numba/tests/test_jitmethod.py | 71 - numba/numba/tests/test_linalg.py | 2415 --------- numba/numba/tests/test_lists.py | 1429 ----- numba/numba/tests/test_llvm_version_check.py | 44 - numba/numba/tests/test_locals.py | 19 - numba/numba/tests/test_looplifting.py | 498 -- numba/numba/tests/test_mandelbrot.py | 37 - numba/numba/tests/test_mangling.py | 41 - numba/numba/tests/test_mathlib.py | 670 --- numba/numba/tests/test_maxmin.py | 42 - numba/numba/tests/test_multi3.py | 44 - numba/numba/tests/test_nan.py | 41 - numba/numba/tests/test_nested_calls.py | 151 - numba/numba/tests/test_np_functions.py | 595 --- numba/numba/tests/test_npdatetime.py | 762 --- numba/numba/tests/test_nrt.py | 508 -- numba/numba/tests/test_nrt_refct.py | 115 - numba/numba/tests/test_numberctor.py | 256 - numba/numba/tests/test_numconv.py | 39 - numba/numba/tests/test_numpy_support.py | 431 -- numba/numba/tests/test_numpyadapt.py | 44 - numba/numba/tests/test_obj_lifetime.py | 387 -- numba/numba/tests/test_object_mode.py | 163 - numba/numba/tests/test_objects.py | 72 - numba/numba/tests/test_operators.py | 1558 ------ numba/numba/tests/test_optional.py | 245 - numba/numba/tests/test_overlap.py | 135 - numba/numba/tests/test_parfors.py | 2342 -------- numba/numba/tests/test_pipeline.py | 47 - numba/numba/tests/test_polynomial.py | 119 - numba/numba/tests/test_print.py | 184 - numba/numba/tests/test_profiler.py | 79 - numba/numba/tests/test_pycc.py | 335 -- numba/numba/tests/test_python_int.py | 56 - numba/numba/tests/test_random.py | 1521 ------ numba/numba/tests/test_range.py | 157 - numba/numba/tests/test_recarray_usecases.py | 150 - numba/numba/tests/test_record_dtype.py | 865 --- numba/numba/tests/test_recursion.py | 120 - numba/numba/tests/test_remove_dead.py | 190 - numba/numba/tests/test_return_values.py | 80 - numba/numba/tests/test_runtests.py | 110 - numba/numba/tests/test_serialize.py | 177 - numba/numba/tests/test_sets.py | 816 --- numba/numba/tests/test_slices.py | 83 - numba/numba/tests/test_smart_array.py | 85 - numba/numba/tests/test_sort.py | 927 ---- numba/numba/tests/test_stencils.py | 2401 --------- numba/numba/tests/test_storeslice.py | 73 - numba/numba/tests/test_support.py | 349 -- numba/numba/tests/test_svml.py | 404 -- .../numba/tests/test_sys_stdin_assignment.py | 69 - .../tests/test_target_overloadselector.py | 149 - numba/numba/tests/test_threadsafety.py | 97 - numba/numba/tests/test_tracing.py | 182 - numba/numba/tests/test_tuples.py | 504 -- numba/numba/tests/test_typeconv.py | 259 - numba/numba/tests/test_typeinfer.py | 702 --- numba/numba/tests/test_typenames.py | 19 - numba/numba/tests/test_typeof.py | 569 -- numba/numba/tests/test_types.py | 564 -- numba/numba/tests/test_typingerror.py | 207 - numba/numba/tests/test_ufuncs.py | 1854 ------- numba/numba/tests/test_unicode_literals.py | 32 - numba/numba/tests/test_unicode_names.py | 67 - numba/numba/tests/test_unpack_sequence.py | 244 - numba/numba/tests/test_unsafe_intrinsics.py | 85 - numba/numba/tests/test_usecases.py | 232 - numba/numba/tests/test_utils.py | 85 - .../test_vectorization_type_inference.py | 44 - numba/numba/tests/test_warnings.py | 123 - numba/numba/tests/test_wrapper.py | 107 - numba/numba/tests/timsort.py | 944 ---- numba/numba/tests/true_div_usecase.py | 12 - numba/numba/tests/usecases.py | 89 - numba/numba/tracing.py | 218 - numba/numba/transforms.py | 301 -- numba/numba/typeconv/__init__.py | 1 - numba/numba/typeconv/_typeconv.cpp | 203 - numba/numba/typeconv/castgraph.py | 136 - numba/numba/typeconv/rules.py | 59 - numba/numba/typeconv/test.cpp | 39 - numba/numba/typeconv/typeconv.cpp | 211 - numba/numba/typeconv/typeconv.hpp | 94 - numba/numba/typeconv/typeconv.py | 115 - numba/numba/typeinfer.py | 1325 ----- numba/numba/types/__init__.py | 167 - numba/numba/types/abstract.py | 382 -- numba/numba/types/common.py | 99 - numba/numba/types/containers.py | 430 -- numba/numba/types/functions.py | 435 -- numba/numba/types/iterators.py | 106 - numba/numba/types/misc.py | 420 -- numba/numba/types/npytypes.py | 454 -- numba/numba/types/scalars.py | 218 - numba/numba/typing/__init__.py | 4 - numba/numba/typing/arraydecl.py | 708 --- numba/numba/typing/bufproto.py | 74 - numba/numba/typing/builtins.py | 971 ---- numba/numba/typing/cffi_utils.py | 181 - numba/numba/typing/cmathdecl.py | 71 - numba/numba/typing/collections.py | 125 - numba/numba/typing/context.py | 625 --- numba/numba/typing/ctypes_utils.py | 125 - numba/numba/typing/enumdecl.py | 63 - numba/numba/typing/listdecl.py | 226 - numba/numba/typing/mathdecl.py | 132 - numba/numba/typing/npdatetime.py | 261 - numba/numba/typing/npydecl.py | 1272 ----- numba/numba/typing/operatordecl.py | 58 - numba/numba/typing/randomdecl.py | 288 - numba/numba/typing/setdecl.py | 190 - numba/numba/typing/templates.py | 715 --- numba/numba/typing/typeof.py | 215 - numba/numba/unittest_support.py | 10 - numba/numba/unsafe/__init__.py | 7 - numba/numba/unsafe/ndarray.py | 81 - numba/numba/unsafe/tuple.py | 32 - numba/numba/utils.py | 660 --- numba/requirements.txt | 5 - numba/run_coverage.py | 43 - numba/runtests.py | 11 - numba/setup.py | 263 - numba/tutorials/Numba First Steps.ipynb | 685 --- numba/tutorials/Numba types.ipynb | 1192 ----- numba/tutorials/Numpy and numba.ipynb | 1330 ----- numba/versioneer.py | 1046 ---- 818 files changed, 200817 deletions(-) delete mode 100644 numba/.binstar.yml delete mode 100644 numba/.coveragerc delete mode 100644 numba/.gitattributes delete mode 100644 numba/.github/ISSUE_TEMPLATE.md delete mode 100644 numba/.github/PULL_REQUEST_TEMPLATE.md delete mode 100644 numba/.gitignore delete mode 100644 numba/.travis.yml delete mode 100644 numba/AUTHORS delete mode 100644 numba/CHANGE_LOG delete mode 100644 numba/CONTRIBUTING.md delete mode 100644 numba/LICENSE delete mode 100644 numba/LICENSES.third-party delete mode 100644 numba/MANIFEST.in delete mode 100644 numba/README.rst delete mode 100644 numba/appveyor.yml delete mode 100644 numba/benchmarks/README delete mode 100644 numba/benchmarks/bm_euler.py delete mode 100644 numba/benchmarks/bm_laplace2d.py delete mode 100644 numba/benchmarks/runall.py delete mode 100755 numba/bin/numba delete mode 100644 numba/buildscripts/appveyor/run_with_env.cmd delete mode 100644 numba/buildscripts/condarecipe.local/bld.bat delete mode 100644 numba/buildscripts/condarecipe.local/build.sh delete mode 100644 numba/buildscripts/condarecipe.local/license.txt delete mode 100644 numba/buildscripts/condarecipe.local/mandel.py delete mode 100644 numba/buildscripts/condarecipe.local/meta.yaml delete mode 100644 numba/buildscripts/condarecipe.local/run_test.bat delete mode 100644 numba/buildscripts/condarecipe.local/run_test.sh delete mode 100644 numba/buildscripts/condarecipe_clone_icc_rt/bld.bat delete mode 100644 numba/buildscripts/condarecipe_clone_icc_rt/build.sh delete mode 100644 numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml delete mode 100644 numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py delete mode 100755 numba/buildscripts/incremental/after_success.sh delete mode 100644 numba/buildscripts/incremental/build.cmd delete mode 100755 numba/buildscripts/incremental/build.sh delete mode 100755 numba/buildscripts/incremental/install_miniconda.sh delete mode 100644 numba/buildscripts/incremental/setup_conda_environment.cmd delete mode 100755 numba/buildscripts/incremental/setup_conda_environment.sh delete mode 100644 numba/buildscripts/incremental/test.cmd delete mode 100755 numba/buildscripts/incremental/test.sh delete mode 100644 numba/buildscripts/remove_unwanted_files.py delete mode 100644 numba/codecov.yml delete mode 100644 numba/condatestall.py delete mode 100644 numba/docs/Makefile delete mode 100644 numba/docs/_static/numba_blue_icon_rgb.png delete mode 100644 numba/docs/_templates/EMPTY delete mode 100644 numba/docs/gh-pages.py delete mode 100644 numba/docs/make.bat delete mode 100644 numba/docs/source/conf.py delete mode 100644 numba/docs/source/cuda-reference/host.rst delete mode 100644 numba/docs/source/cuda-reference/index.rst delete mode 100644 numba/docs/source/cuda-reference/kernel.rst delete mode 100644 numba/docs/source/cuda-reference/memory.rst delete mode 100644 numba/docs/source/cuda/cuda_array_interface.rst delete mode 100644 numba/docs/source/cuda/cudapysupported.rst delete mode 100644 numba/docs/source/cuda/device-functions.rst delete mode 100644 numba/docs/source/cuda/device-management.rst delete mode 100644 numba/docs/source/cuda/examples.rst delete mode 100644 numba/docs/source/cuda/faq.rst delete mode 100644 numba/docs/source/cuda/index.rst delete mode 100644 numba/docs/source/cuda/intrinsics.rst delete mode 100644 numba/docs/source/cuda/ipc.rst delete mode 100644 numba/docs/source/cuda/kernels.rst delete mode 100644 numba/docs/source/cuda/memory.rst delete mode 100644 numba/docs/source/cuda/overview.rst delete mode 100644 numba/docs/source/cuda/random.rst delete mode 100644 numba/docs/source/cuda/reduction.rst delete mode 100644 numba/docs/source/cuda/simulator.rst delete mode 100644 numba/docs/source/cuda/ufunc.rst delete mode 100644 numba/docs/source/developer/architecture.rst delete mode 100644 numba/docs/source/developer/contributing.rst delete mode 100644 numba/docs/source/developer/custom_pipeline.rst delete mode 100644 numba/docs/source/developer/dispatching.rst delete mode 100644 numba/docs/source/developer/environment.rst delete mode 100644 numba/docs/source/developer/generators.rst delete mode 100644 numba/docs/source/developer/index.rst delete mode 100644 numba/docs/source/developer/listings.rst delete mode 100644 numba/docs/source/developer/live_variable_analysis.rst delete mode 100644 numba/docs/source/developer/numba-runtime.rst delete mode 100644 numba/docs/source/developer/rewrites.rst delete mode 100644 numba/docs/source/developer/stencil.rst delete mode 100644 numba/docs/source/extending/high-level.rst delete mode 100644 numba/docs/source/extending/index.rst delete mode 100644 numba/docs/source/extending/interval-example.rst delete mode 100644 numba/docs/source/extending/low-level.rst delete mode 100644 numba/docs/source/glossary.rst delete mode 100644 numba/docs/source/hsa/device-functions.rst delete mode 100644 numba/docs/source/hsa/device-management.rst delete mode 100644 numba/docs/source/hsa/examples.rst delete mode 100644 numba/docs/source/hsa/index.rst delete mode 100644 numba/docs/source/hsa/intrinsics.rst delete mode 100644 numba/docs/source/hsa/kernels.rst delete mode 100644 numba/docs/source/hsa/memory.rst delete mode 100644 numba/docs/source/hsa/overview.rst delete mode 100644 numba/docs/source/index.rst delete mode 100644 numba/docs/source/proposals/cfunc.rst delete mode 100644 numba/docs/source/proposals/extension-points.rst delete mode 100644 numba/docs/source/proposals/index.rst delete mode 100644 numba/docs/source/proposals/integer-typing.rst delete mode 100644 numba/docs/source/proposals/jit-classes.rst delete mode 100644 numba/docs/source/proposals/np-where-override.py delete mode 100644 numba/docs/source/proposals/recursion_callstack.svg delete mode 100644 numba/docs/source/proposals/type-inference.rst delete mode 100644 numba/docs/source/proposals/typing_recursion.rst delete mode 100644 numba/docs/source/reference/aot-compilation.rst delete mode 100644 numba/docs/source/reference/envvars.rst delete mode 100644 numba/docs/source/reference/fpsemantics.rst delete mode 100644 numba/docs/source/reference/index.rst delete mode 100644 numba/docs/source/reference/jit-compilation.rst delete mode 100644 numba/docs/source/reference/numpysupported.rst delete mode 100644 numba/docs/source/reference/pysemantics.rst delete mode 100644 numba/docs/source/reference/pysupported.rst delete mode 100644 numba/docs/source/reference/python27-eol.rst delete mode 100644 numba/docs/source/reference/types.rst delete mode 100644 numba/docs/source/reference/utils.rst delete mode 100644 numba/docs/source/release-notes.rst delete mode 100644 numba/docs/source/user/cfunc.rst delete mode 100644 numba/docs/source/user/examples.rst delete mode 100644 numba/docs/source/user/faq.rst delete mode 100644 numba/docs/source/user/generated-jit.rst delete mode 100644 numba/docs/source/user/index.rst delete mode 100644 numba/docs/source/user/installing.rst delete mode 100644 numba/docs/source/user/jit.rst delete mode 100644 numba/docs/source/user/jitclass.rst delete mode 100644 numba/docs/source/user/overview.rst delete mode 100644 numba/docs/source/user/parallel.rst delete mode 100644 numba/docs/source/user/performance-tips.rst delete mode 100644 numba/docs/source/user/pycc.rst delete mode 100644 numba/docs/source/user/stencil.rst delete mode 100644 numba/docs/source/user/troubleshoot.rst delete mode 100644 numba/docs/source/user/vectorize.rst delete mode 100644 numba/examples/.coveragerc delete mode 100644 numba/examples/__init__.py delete mode 100755 numba/examples/binarytree.py delete mode 100644 numba/examples/blackscholes/blackscholes.ods delete mode 100755 numba/examples/blackscholes/blackscholes.py delete mode 100755 numba/examples/blackscholes/blackscholes_cuda.py delete mode 100755 numba/examples/blackscholes/blackscholes_numba.py delete mode 100644 numba/examples/blackscholes/blackscholes_pa.py delete mode 100644 numba/examples/blackscholes/blackscholes_vec.py delete mode 100755 numba/examples/blur_image.py delete mode 100755 numba/examples/bubblesort.py delete mode 100755 numba/examples/cffi_example.py delete mode 100755 numba/examples/compile_with_pycc.py delete mode 100755 numba/examples/ctypes_example.py delete mode 100644 numba/examples/cuda_dask.py delete mode 100644 numba/examples/cuda_ipc.py delete mode 100755 numba/examples/cuda_mpi.py delete mode 100644 numba/examples/cudajit/matmul.ods delete mode 100755 numba/examples/cudajit/matmul.py delete mode 100644 numba/examples/cudajit/matmul_benchmark.txt delete mode 100755 numba/examples/cudajit/matmul_smem.py delete mode 100755 numba/examples/cudajit/sum.py delete mode 100755 numba/examples/fbcorr.py delete mode 100644 numba/examples/gaussian-blur/gaussian-blur-pa.py delete mode 100644 numba/examples/gaussian-blur/gaussian-blur.py delete mode 100644 numba/examples/harris/harris.py delete mode 100644 numba/examples/harris/harris_numba.py delete mode 100644 numba/examples/harris/harris_pa.py delete mode 100644 numba/examples/harris/harris_py.py delete mode 100755 numba/examples/jitclass.py delete mode 100644 numba/examples/juliaset/juliaset.py delete mode 100644 numba/examples/juliaset/juliaset_numba.py delete mode 100644 numba/examples/juliaset/juliaset_pa.py delete mode 100644 numba/examples/k-means/k-means.py delete mode 100644 numba/examples/k-means/k-means_numba.py delete mode 100644 numba/examples/k-means/k-means_pa.py delete mode 100644 numba/examples/kernel-density-estimation/kernel_density_estimation.py delete mode 100644 numba/examples/kernel-density-estimation/kernel_density_estimation_numba.py delete mode 100644 numba/examples/kernel-density-estimation/kernel_density_estimation_pa.py delete mode 100644 numba/examples/laplace2d/laplace2d-benchmark.txt delete mode 100755 numba/examples/laplace2d/laplace2d-numba-cuda-improve.py delete mode 100755 numba/examples/laplace2d/laplace2d-numba-cuda-smem.py delete mode 100755 numba/examples/laplace2d/laplace2d-numba-cuda.py delete mode 100755 numba/examples/laplace2d/laplace2d-numba.py delete mode 100755 numba/examples/laplace2d/laplace2d-pa.py delete mode 100644 numba/examples/laplace2d/laplace2d.ods delete mode 100755 numba/examples/laplace2d/laplace2d.py delete mode 100644 numba/examples/linear_regression/linear_regression.py delete mode 100644 numba/examples/linear_regression/linear_regression_numba.py delete mode 100644 numba/examples/linear_regression/linear_regression_pa.py delete mode 100755 numba/examples/linkedlist.py delete mode 100644 numba/examples/logistic-regression/logistic_regression.py delete mode 100755 numba/examples/mandel/mandel_jit.py delete mode 100755 numba/examples/mandel/mandel_vectorize.py delete mode 100755 numba/examples/mergesort.py delete mode 100755 numba/examples/movemean.py delete mode 100755 numba/examples/nbody/nbody.py delete mode 100755 numba/examples/nbody/nbody_modified_by_MarkHarris.py delete mode 100755 numba/examples/nogil.py delete mode 100644 numba/examples/notebooks/LinearRegr.ipynb delete mode 100644 numba/examples/notebooks/LinearRegr.py delete mode 100644 numba/examples/notebooks/Using Numba.ipynb delete mode 100644 numba/examples/notebooks/j0 in Numba.ipynb delete mode 100644 numba/examples/notebooks/numba.ipynb delete mode 100755 numba/examples/objects.py delete mode 100644 numba/examples/pi.py delete mode 100755 numba/examples/ra24.py delete mode 100755 numba/examples/stack.py delete mode 100755 numba/examples/structures.py delete mode 100755 numba/examples/sum.py delete mode 100644 numba/examples/tests/__init__.py delete mode 100644 numba/examples/tests/matplotlibrc delete mode 100644 numba/examples/tests/sitecustomize.py delete mode 100755 numba/examples/ufuncs.py delete mode 100755 numba/examples/vectorize/cuda_polynomial.py delete mode 100644 numba/examples/vectorize/perfstat.ods delete mode 100644 numba/examples/vectorize/perfstatevents.txt delete mode 100755 numba/examples/vectorize/polynomial.py delete mode 100755 numba/examples/vectorize/sum.py delete mode 100644 numba/examples/wave-2d/wave_2d.py delete mode 100644 numba/examples/wave-2d/wave_2d_numba.py delete mode 100644 numba/examples/wave-2d/wave_2d_pa.py delete mode 100644 numba/examples/wave-2d/wave_2d_pa_demo.py delete mode 100644 numba/numba/__init__.py delete mode 100644 numba/numba/_arraystruct.h delete mode 100644 numba/numba/_dispatcher.c delete mode 100644 numba/numba/_dispatcher.h delete mode 100644 numba/numba/_dispatcherimpl.cpp delete mode 100644 numba/numba/_dynfunc.c delete mode 100644 numba/numba/_dynfuncmod.c delete mode 100644 numba/numba/_hashtable.c delete mode 100644 numba/numba/_hashtable.h delete mode 100644 numba/numba/_helperlib.c delete mode 100644 numba/numba/_helpermod.c delete mode 100644 numba/numba/_lapack.c delete mode 100644 numba/numba/_math_c99.c delete mode 100644 numba/numba/_math_c99.h delete mode 100644 numba/numba/_npymath_exports.c delete mode 100644 numba/numba/_numba_common.h delete mode 100644 numba/numba/_pymodule.h delete mode 100644 numba/numba/_random.c delete mode 100644 numba/numba/_typeof.c delete mode 100644 numba/numba/_typeof.h delete mode 100644 numba/numba/_version.py delete mode 100644 numba/numba/analysis.py delete mode 100644 numba/numba/annotations/__init__.py delete mode 100644 numba/numba/annotations/template.html delete mode 100644 numba/numba/annotations/type_annotations.py delete mode 100644 numba/numba/appdirs.py delete mode 100644 numba/numba/array_analysis.py delete mode 100644 numba/numba/bytecode.py delete mode 100644 numba/numba/caching.py delete mode 100644 numba/numba/callwrapper.py delete mode 100644 numba/numba/capsulethunk.h delete mode 100644 numba/numba/ccallback.py delete mode 100644 numba/numba/cffi_support.py delete mode 100644 numba/numba/cgutils.py delete mode 100644 numba/numba/compiler.py delete mode 100644 numba/numba/config.py delete mode 100644 numba/numba/consts.py delete mode 100644 numba/numba/controlflow.py delete mode 100644 numba/numba/ctypes_support.py delete mode 100644 numba/numba/cuda/__init__.py delete mode 100644 numba/numba/cuda/api.py delete mode 100644 numba/numba/cuda/args.py delete mode 100644 numba/numba/cuda/codegen.py delete mode 100644 numba/numba/cuda/compiler.py delete mode 100644 numba/numba/cuda/cudadecl.py delete mode 100644 numba/numba/cuda/cudadrv/__init__.py delete mode 100644 numba/numba/cuda/cudadrv/_extras.c delete mode 100644 numba/numba/cuda/cudadrv/autotune.py delete mode 100644 numba/numba/cuda/cudadrv/devicearray.py delete mode 100644 numba/numba/cuda/cudadrv/devices.py delete mode 100644 numba/numba/cuda/cudadrv/driver.py delete mode 100644 numba/numba/cuda/cudadrv/drvapi.py delete mode 100644 numba/numba/cuda/cudadrv/enums.py delete mode 100644 numba/numba/cuda/cudadrv/error.py delete mode 100644 numba/numba/cuda/cudadrv/libs.py delete mode 100644 numba/numba/cuda/cudadrv/ndarray.py delete mode 100644 numba/numba/cuda/cudadrv/nvvm.py delete mode 100644 numba/numba/cuda/cudaimpl.py delete mode 100644 numba/numba/cuda/cudamath.py delete mode 100644 numba/numba/cuda/decorators.py delete mode 100644 numba/numba/cuda/descriptor.py delete mode 100644 numba/numba/cuda/device_init.py delete mode 100644 numba/numba/cuda/dispatcher.py delete mode 100644 numba/numba/cuda/errors.py delete mode 100644 numba/numba/cuda/initialize.py delete mode 100644 numba/numba/cuda/intrinsic_wrapper.py delete mode 100644 numba/numba/cuda/kernels/__init__.py delete mode 100644 numba/numba/cuda/kernels/reduction.py delete mode 100644 numba/numba/cuda/kernels/transpose.py delete mode 100644 numba/numba/cuda/libdevice.py delete mode 100644 numba/numba/cuda/nvvmutils.py delete mode 100644 numba/numba/cuda/printimpl.py delete mode 100644 numba/numba/cuda/random.py delete mode 100644 numba/numba/cuda/simulator/__init__.py delete mode 100644 numba/numba/cuda/simulator/api.py delete mode 100644 numba/numba/cuda/simulator/compiler.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/__init__.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/devicearray.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/devices.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/driver.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/drvapi.py delete mode 100644 numba/numba/cuda/simulator/cudadrv/nvvm.py delete mode 100644 numba/numba/cuda/simulator/kernel.py delete mode 100644 numba/numba/cuda/simulator/kernelapi.py delete mode 100644 numba/numba/cuda/simulator/reduction.py delete mode 100644 numba/numba/cuda/simulator_init.py delete mode 100644 numba/numba/cuda/stubs.py delete mode 100644 numba/numba/cuda/target.py delete mode 100644 numba/numba/cuda/testing.py delete mode 100644 numba/numba/cuda/tests/__init__.py delete mode 100644 numba/numba/cuda/tests/cudadrv/__init__.py delete mode 100644 numba/numba/cuda/tests/cudadrv/data/__init__.py delete mode 100644 numba/numba/cuda/tests/cudadrv/data/jitlink.cu delete mode 100644 numba/numba/cuda/tests/cudadrv/data/jitlink.ptx delete mode 100644 numba/numba/cuda/tests/cudadrv/test_array_attr.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_context_stack.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_auto_context.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_driver.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_memory.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_cuda_ndarray.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_deallocations.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_detect.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_events.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_host_alloc.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_inline_ptx.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_ir_patch.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_linker.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_nvvm_driver.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_pinned.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_profiler.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_reset_device.py delete mode 100644 numba/numba/cuda/tests/cudadrv/test_select_device.py delete mode 100644 numba/numba/cuda/tests/cudapy/__init__.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_alignment.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_array.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_array_args.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_array_methods.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_atomics.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_autojit.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_blackscholes.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_boolean.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_casting.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_complex.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_complex_kernel.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_const_string.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_constmem.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_cuda_array_interface.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_cuda_autojit.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_debug.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_debuginfo.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_device_func.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_errors.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_exception.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_fastmath.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_forall.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_freevar.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_globals.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_gufunc.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_gufunc_scalar.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_gufunc_scheduling.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_idiv.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_inspect.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_intrinsics.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_ipc.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_lang.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_laplace.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_localmem.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_macro.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_mandel.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_math.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_matmul.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_montecarlo.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_multigpu.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_multiprocessing.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_multithreads.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_nondet.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_operator.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_powi.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_print.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_py2_div_issue.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_random.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_record_dtype.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_reduction.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_serialize.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_slicing.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_sm.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_smart_array.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_sync.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_transpose.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_userexc.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_vectorize.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_vectorize_complex.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_vectorize_decor.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_vectorize_device.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py delete mode 100644 numba/numba/cuda/tests/cudapy/test_warp_ops.py delete mode 100644 numba/numba/cuda/tests/cudasim/__init__.py delete mode 100644 numba/numba/cuda/tests/cudasim/support.py delete mode 100644 numba/numba/cuda/tests/cudasim/test_cudasim_issues.py delete mode 100644 numba/numba/cuda/tests/nocuda/__init__.py delete mode 100644 numba/numba/cuda/tests/nocuda/test_nvvm.py delete mode 100644 numba/numba/cuda/vectorizers.py delete mode 100644 numba/numba/dataflow.py delete mode 100644 numba/numba/datamodel/__init__.py delete mode 100644 numba/numba/datamodel/manager.py delete mode 100644 numba/numba/datamodel/models.py delete mode 100644 numba/numba/datamodel/packer.py delete mode 100644 numba/numba/datamodel/registry.py delete mode 100644 numba/numba/datamodel/testing.py delete mode 100644 numba/numba/debuginfo.py delete mode 100644 numba/numba/decorators.py delete mode 100644 numba/numba/dispatcher.py delete mode 100644 numba/numba/dummyarray.py delete mode 100644 numba/numba/errors.py delete mode 100644 numba/numba/extending.py delete mode 100644 numba/numba/findlib.py delete mode 100644 numba/numba/funcdesc.py delete mode 100644 numba/numba/generators.py delete mode 100644 numba/numba/inline_closurecall.py delete mode 100644 numba/numba/interpreter.py delete mode 100644 numba/numba/io_support.py delete mode 100644 numba/numba/ir.py delete mode 100644 numba/numba/ir_utils.py delete mode 100644 numba/numba/itanium_mangler.py delete mode 100644 numba/numba/jitclass/__init__.py delete mode 100644 numba/numba/jitclass/_box.c delete mode 100644 numba/numba/jitclass/base.py delete mode 100644 numba/numba/jitclass/boxing.py delete mode 100644 numba/numba/jitclass/decorators.py delete mode 100644 numba/numba/lowering.py delete mode 100644 numba/numba/macro.py delete mode 100644 numba/numba/mathnames.h delete mode 100644 numba/numba/mviewbuf.c delete mode 100644 numba/numba/npdatetime.py delete mode 100644 numba/numba/npyufunc/__init__.py delete mode 100644 numba/numba/npyufunc/_internal.c delete mode 100644 numba/numba/npyufunc/_internal.h delete mode 100644 numba/numba/npyufunc/_ufunc.c delete mode 100644 numba/numba/npyufunc/array_exprs.py delete mode 100644 numba/numba/npyufunc/decorators.py delete mode 100644 numba/numba/npyufunc/deviceufunc.py delete mode 100644 numba/numba/npyufunc/dufunc.py delete mode 100644 numba/numba/npyufunc/gufunc_scheduler.cpp delete mode 100644 numba/numba/npyufunc/gufunc_scheduler.h delete mode 100644 numba/numba/npyufunc/parallel.py delete mode 100644 numba/numba/npyufunc/parfor.py delete mode 100644 numba/numba/npyufunc/sigparse.py delete mode 100644 numba/numba/npyufunc/tbbpool.cpp delete mode 100644 numba/numba/npyufunc/ufuncbuilder.py delete mode 100644 numba/numba/npyufunc/workqueue.c delete mode 100644 numba/numba/npyufunc/workqueue.h delete mode 100644 numba/numba/npyufunc/wrappers.py delete mode 100644 numba/numba/numba_entry.py delete mode 100644 numba/numba/numpy_support.py delete mode 100644 numba/numba/objmode.py delete mode 100644 numba/numba/parfor.py delete mode 100644 numba/numba/postproc.py delete mode 100644 numba/numba/pretty_annotate.py delete mode 100644 numba/numba/pycc/__init__.py delete mode 100644 numba/numba/pycc/cc.py delete mode 100644 numba/numba/pycc/compiler.py delete mode 100644 numba/numba/pycc/decorators.py delete mode 100644 numba/numba/pycc/llvm_types.py delete mode 100644 numba/numba/pycc/modulemixin.c delete mode 100644 numba/numba/pycc/platform.py delete mode 100644 numba/numba/pycc/pycc delete mode 100644 numba/numba/pythonapi.py delete mode 100644 numba/numba/rewrites/__init__.py delete mode 100644 numba/numba/rewrites/ir_print.py delete mode 100644 numba/numba/rewrites/macros.py delete mode 100644 numba/numba/rewrites/registry.py delete mode 100644 numba/numba/rewrites/static_binop.py delete mode 100644 numba/numba/rewrites/static_getitem.py delete mode 100644 numba/numba/rewrites/static_raise.py delete mode 100644 numba/numba/roc/README.md delete mode 100644 numba/numba/roc/__init__.py delete mode 100644 numba/numba/roc/api.py delete mode 100644 numba/numba/roc/codegen.py delete mode 100644 numba/numba/roc/compiler.py delete mode 100644 numba/numba/roc/decorators.py delete mode 100644 numba/numba/roc/descriptor.py delete mode 100644 numba/numba/roc/dispatch.py delete mode 100644 numba/numba/roc/enums.py delete mode 100644 numba/numba/roc/gcn_occupancy.py delete mode 100644 numba/numba/roc/hlc/__init__.py delete mode 100644 numba/numba/roc/hlc/common.py delete mode 100644 numba/numba/roc/hlc/config.py delete mode 100644 numba/numba/roc/hlc/hlc.py delete mode 100644 numba/numba/roc/hlc/libhlc.py delete mode 100644 numba/numba/roc/hsadecl.py delete mode 100644 numba/numba/roc/hsadrv/__init__.py delete mode 100644 numba/numba/roc/hsadrv/devicearray.py delete mode 100644 numba/numba/roc/hsadrv/devices.py delete mode 100644 numba/numba/roc/hsadrv/driver.py delete mode 100644 numba/numba/roc/hsadrv/drvapi.py delete mode 100644 numba/numba/roc/hsadrv/enums.py delete mode 100644 numba/numba/roc/hsadrv/enums_ext.py delete mode 100644 numba/numba/roc/hsadrv/error.py delete mode 100644 numba/numba/roc/hsaimpl.py delete mode 100644 numba/numba/roc/initialize.py delete mode 100644 numba/numba/roc/mathdecl.py delete mode 100644 numba/numba/roc/mathimpl.py delete mode 100644 numba/numba/roc/stubs.py delete mode 100644 numba/numba/roc/target.py delete mode 100644 numba/numba/roc/tests/__init__.py delete mode 100644 numba/numba/roc/tests/hsadrv/__init__.py delete mode 100644 numba/numba/roc/tests/hsadrv/test_async.py delete mode 100644 numba/numba/roc/tests/hsadrv/test_driver.py delete mode 100644 numba/numba/roc/tests/hsapy/__init__.py delete mode 100644 numba/numba/roc/tests/hsapy/run_far_branch.py delete mode 100644 numba/numba/roc/tests/hsapy/test_async_kernel.py delete mode 100644 numba/numba/roc/tests/hsapy/test_atomics.py delete mode 100644 numba/numba/roc/tests/hsapy/test_autojit.py delete mode 100644 numba/numba/roc/tests/hsapy/test_barrier.py delete mode 100644 numba/numba/roc/tests/hsapy/test_compiler.py delete mode 100644 numba/numba/roc/tests/hsapy/test_decorator.py delete mode 100644 numba/numba/roc/tests/hsapy/test_gufuncbuilding.py delete mode 100644 numba/numba/roc/tests/hsapy/test_large_code.py delete mode 100644 numba/numba/roc/tests/hsapy/test_linkage.py delete mode 100644 numba/numba/roc/tests/hsapy/test_math.py delete mode 100644 numba/numba/roc/tests/hsapy/test_matmul.py delete mode 100644 numba/numba/roc/tests/hsapy/test_memory.py delete mode 100644 numba/numba/roc/tests/hsapy/test_occupancy.py delete mode 100644 numba/numba/roc/tests/hsapy/test_positioning.py delete mode 100644 numba/numba/roc/tests/hsapy/test_reduction.py delete mode 100644 numba/numba/roc/tests/hsapy/test_scan.py delete mode 100644 numba/numba/roc/tests/hsapy/test_simple.py delete mode 100644 numba/numba/roc/tests/hsapy/test_ufuncbuilding.py delete mode 100644 numba/numba/roc/vectorizers.py delete mode 100644 numba/numba/runtests.py delete mode 100644 numba/numba/runtime/__init__.py delete mode 100644 numba/numba/runtime/_nrt_python.c delete mode 100644 numba/numba/runtime/_nrt_pythonmod.c delete mode 100644 numba/numba/runtime/context.py delete mode 100644 numba/numba/runtime/nrt.c delete mode 100644 numba/numba/runtime/nrt.h delete mode 100644 numba/numba/runtime/nrt.py delete mode 100644 numba/numba/runtime/nrtdynmod.py delete mode 100644 numba/numba/runtime/nrtopt.py delete mode 100644 numba/numba/scripts/__init__.py delete mode 100644 numba/numba/scripts/generate_lower_listing.py delete mode 100644 numba/numba/serialize.py delete mode 100644 numba/numba/servicelib/__init__.py delete mode 100644 numba/numba/servicelib/service.py delete mode 100644 numba/numba/servicelib/threadlocal.py delete mode 100644 numba/numba/sigutils.py delete mode 100644 numba/numba/six.py delete mode 100644 numba/numba/smartarray.py delete mode 100644 numba/numba/special.py delete mode 100644 numba/numba/stencil.py delete mode 100644 numba/numba/stencilparfor.py delete mode 100644 numba/numba/targets/__init__.py delete mode 100644 numba/numba/targets/arraymath.py delete mode 100644 numba/numba/targets/arrayobj.py delete mode 100644 numba/numba/targets/base.py delete mode 100644 numba/numba/targets/boxing.py delete mode 100644 numba/numba/targets/builtins.py delete mode 100644 numba/numba/targets/callconv.py delete mode 100644 numba/numba/targets/cffiimpl.py delete mode 100644 numba/numba/targets/cmathimpl.py delete mode 100644 numba/numba/targets/codegen.py delete mode 100644 numba/numba/targets/cpu.py delete mode 100644 numba/numba/targets/descriptors.py delete mode 100644 numba/numba/targets/enumimpl.py delete mode 100644 numba/numba/targets/externals.py delete mode 100644 numba/numba/targets/fastmathpass.py delete mode 100644 numba/numba/targets/imputils.py delete mode 100644 numba/numba/targets/intrinsics.py delete mode 100644 numba/numba/targets/iterators.py delete mode 100644 numba/numba/targets/linalg.py delete mode 100644 numba/numba/targets/listobj.py delete mode 100644 numba/numba/targets/mathimpl.py delete mode 100644 numba/numba/targets/mergesort.py delete mode 100644 numba/numba/targets/npdatetime.py delete mode 100644 numba/numba/targets/npyfuncs.py delete mode 100644 numba/numba/targets/npyimpl.py delete mode 100644 numba/numba/targets/numbers.py delete mode 100644 numba/numba/targets/operatorimpl.py delete mode 100644 numba/numba/targets/optional.py delete mode 100644 numba/numba/targets/options.py delete mode 100644 numba/numba/targets/polynomial.py delete mode 100644 numba/numba/targets/printimpl.py delete mode 100644 numba/numba/targets/quicksort.py delete mode 100644 numba/numba/targets/randomimpl.py delete mode 100644 numba/numba/targets/rangeobj.py delete mode 100644 numba/numba/targets/registry.py delete mode 100644 numba/numba/targets/removerefctpass.py delete mode 100644 numba/numba/targets/setobj.py delete mode 100644 numba/numba/targets/slicing.py delete mode 100644 numba/numba/targets/smartarray.py delete mode 100644 numba/numba/targets/tupleobj.py delete mode 100644 numba/numba/targets/ufunc_db.py delete mode 100644 numba/numba/testing/__init__.py delete mode 100644 numba/numba/testing/__main__.py delete mode 100644 numba/numba/testing/ddt.py delete mode 100644 numba/numba/testing/loader.py delete mode 100644 numba/numba/testing/main.py delete mode 100644 numba/numba/testing/notebook.py delete mode 100644 numba/numba/tests/__init__.py delete mode 100644 numba/numba/tests/annotation_usecases.py delete mode 100644 numba/numba/tests/cache_usecases.py delete mode 100644 numba/numba/tests/cffi_usecases.py delete mode 100644 numba/numba/tests/cfunc_cache_usecases.py delete mode 100644 numba/numba/tests/compile_with_pycc.py delete mode 100644 numba/numba/tests/complex_usecases.py delete mode 100644 numba/numba/tests/ctypes_usecases.py delete mode 100644 numba/numba/tests/dummy_module.py delete mode 100644 numba/numba/tests/enum_usecases.py delete mode 100644 numba/numba/tests/matmul_usecase.py delete mode 100644 numba/numba/tests/npyufunc/__init__.py delete mode 100644 numba/numba/tests/npyufunc/cache_usecases.py delete mode 100644 numba/numba/tests/npyufunc/test_caching.py delete mode 100644 numba/numba/tests/npyufunc/test_dufunc.py delete mode 100644 numba/numba/tests/npyufunc/test_errors.py delete mode 100644 numba/numba/tests/npyufunc/test_gufunc.py delete mode 100644 numba/numba/tests/npyufunc/test_parallel_env_variable.py delete mode 100644 numba/numba/tests/npyufunc/test_parallel_low_work.py delete mode 100644 numba/numba/tests/npyufunc/test_parallel_ufunc_issues.py delete mode 100644 numba/numba/tests/npyufunc/test_ufunc.py delete mode 100644 numba/numba/tests/npyufunc/test_ufuncbuilding.py delete mode 100644 numba/numba/tests/npyufunc/test_vectorize_decor.py delete mode 100644 numba/numba/tests/pdlike_usecase.py delete mode 100644 numba/numba/tests/pycc_distutils_usecase/setup_distutils.py delete mode 100644 numba/numba/tests/pycc_distutils_usecase/setup_setuptools.py delete mode 100644 numba/numba/tests/pycc_distutils_usecase/source_module.py delete mode 100644 numba/numba/tests/recursion_usecases.py delete mode 100644 numba/numba/tests/serialize_usecases.py delete mode 100644 numba/numba/tests/support.py delete mode 100644 numba/numba/tests/test_alignment.py delete mode 100644 numba/numba/tests/test_annotations.py delete mode 100644 numba/numba/tests/test_api.py delete mode 100644 numba/numba/tests/test_array_analysis.py delete mode 100644 numba/numba/tests/test_array_attr.py delete mode 100644 numba/numba/tests/test_array_constants.py delete mode 100644 numba/numba/tests/test_array_exprs.py delete mode 100644 numba/numba/tests/test_array_iterators.py delete mode 100644 numba/numba/tests/test_array_manipulation.py delete mode 100644 numba/numba/tests/test_array_methods.py delete mode 100644 numba/numba/tests/test_array_reductions.py delete mode 100644 numba/numba/tests/test_array_return.py delete mode 100644 numba/numba/tests/test_auto_constants.py delete mode 100644 numba/numba/tests/test_blackscholes.py delete mode 100644 numba/numba/tests/test_buffer_protocol.py delete mode 100644 numba/numba/tests/test_builtins.py delete mode 100644 numba/numba/tests/test_casting.py delete mode 100644 numba/numba/tests/test_cffi.py delete mode 100644 numba/numba/tests/test_cfunc.py delete mode 100644 numba/numba/tests/test_cgutils.py delete mode 100644 numba/numba/tests/test_chained_assign.py delete mode 100644 numba/numba/tests/test_closure.py delete mode 100644 numba/numba/tests/test_codegen.py delete mode 100644 numba/numba/tests/test_compile_cache.py delete mode 100644 numba/numba/tests/test_complex.py delete mode 100644 numba/numba/tests/test_comprehension.py delete mode 100644 numba/numba/tests/test_config.py delete mode 100644 numba/numba/tests/test_conversion.py delete mode 100644 numba/numba/tests/test_copy_propagate.py delete mode 100644 numba/numba/tests/test_ctypes.py delete mode 100644 numba/numba/tests/test_dataflow.py delete mode 100644 numba/numba/tests/test_datamodel.py delete mode 100644 numba/numba/tests/test_debug.py delete mode 100644 numba/numba/tests/test_debuginfo.py delete mode 100644 numba/numba/tests/test_del.py delete mode 100644 numba/numba/tests/test_deprecations.py delete mode 100644 numba/numba/tests/test_dicts.py delete mode 100644 numba/numba/tests/test_dispatcher.py delete mode 100644 numba/numba/tests/test_dummyarray.py delete mode 100644 numba/numba/tests/test_dyn_array.py delete mode 100644 numba/numba/tests/test_dyn_func.py delete mode 100644 numba/numba/tests/test_enums.py delete mode 100644 numba/numba/tests/test_errorhandling.py delete mode 100644 numba/numba/tests/test_errormodels.py delete mode 100644 numba/numba/tests/test_exceptions.py delete mode 100644 numba/numba/tests/test_extended_arg.py delete mode 100644 numba/numba/tests/test_extending.py delete mode 100644 numba/numba/tests/test_extending_types.py delete mode 100644 numba/numba/tests/test_fancy_indexing.py delete mode 100644 numba/numba/tests/test_fastmath.py delete mode 100644 numba/numba/tests/test_flow_control.py delete mode 100644 numba/numba/tests/test_func_interface.py delete mode 100644 numba/numba/tests/test_func_lifetime.py delete mode 100644 numba/numba/tests/test_generators.py delete mode 100644 numba/numba/tests/test_gil.py delete mode 100644 numba/numba/tests/test_globals.py delete mode 100644 numba/numba/tests/test_hashing.py delete mode 100644 numba/numba/tests/test_import.py delete mode 100644 numba/numba/tests/test_indexing.py delete mode 100644 numba/numba/tests/test_inlining.py delete mode 100644 numba/numba/tests/test_interproc.py delete mode 100644 numba/numba/tests/test_intwidth.py delete mode 100644 numba/numba/tests/test_ir.py delete mode 100644 numba/numba/tests/test_itanium_mangler.py delete mode 100644 numba/numba/tests/test_iteration.py delete mode 100644 numba/numba/tests/test_jitclasses.py delete mode 100644 numba/numba/tests/test_jitmethod.py delete mode 100644 numba/numba/tests/test_linalg.py delete mode 100644 numba/numba/tests/test_lists.py delete mode 100644 numba/numba/tests/test_llvm_version_check.py delete mode 100644 numba/numba/tests/test_locals.py delete mode 100644 numba/numba/tests/test_looplifting.py delete mode 100644 numba/numba/tests/test_mandelbrot.py delete mode 100644 numba/numba/tests/test_mangling.py delete mode 100644 numba/numba/tests/test_mathlib.py delete mode 100644 numba/numba/tests/test_maxmin.py delete mode 100644 numba/numba/tests/test_multi3.py delete mode 100644 numba/numba/tests/test_nan.py delete mode 100644 numba/numba/tests/test_nested_calls.py delete mode 100644 numba/numba/tests/test_np_functions.py delete mode 100644 numba/numba/tests/test_npdatetime.py delete mode 100644 numba/numba/tests/test_nrt.py delete mode 100644 numba/numba/tests/test_nrt_refct.py delete mode 100644 numba/numba/tests/test_numberctor.py delete mode 100644 numba/numba/tests/test_numconv.py delete mode 100644 numba/numba/tests/test_numpy_support.py delete mode 100644 numba/numba/tests/test_numpyadapt.py delete mode 100644 numba/numba/tests/test_obj_lifetime.py delete mode 100644 numba/numba/tests/test_object_mode.py delete mode 100644 numba/numba/tests/test_objects.py delete mode 100644 numba/numba/tests/test_operators.py delete mode 100644 numba/numba/tests/test_optional.py delete mode 100644 numba/numba/tests/test_overlap.py delete mode 100644 numba/numba/tests/test_parfors.py delete mode 100644 numba/numba/tests/test_pipeline.py delete mode 100644 numba/numba/tests/test_polynomial.py delete mode 100644 numba/numba/tests/test_print.py delete mode 100644 numba/numba/tests/test_profiler.py delete mode 100644 numba/numba/tests/test_pycc.py delete mode 100644 numba/numba/tests/test_python_int.py delete mode 100644 numba/numba/tests/test_random.py delete mode 100644 numba/numba/tests/test_range.py delete mode 100644 numba/numba/tests/test_recarray_usecases.py delete mode 100644 numba/numba/tests/test_record_dtype.py delete mode 100644 numba/numba/tests/test_recursion.py delete mode 100644 numba/numba/tests/test_remove_dead.py delete mode 100644 numba/numba/tests/test_return_values.py delete mode 100755 numba/numba/tests/test_runtests.py delete mode 100644 numba/numba/tests/test_serialize.py delete mode 100644 numba/numba/tests/test_sets.py delete mode 100644 numba/numba/tests/test_slices.py delete mode 100644 numba/numba/tests/test_smart_array.py delete mode 100644 numba/numba/tests/test_sort.py delete mode 100644 numba/numba/tests/test_stencils.py delete mode 100644 numba/numba/tests/test_storeslice.py delete mode 100644 numba/numba/tests/test_support.py delete mode 100644 numba/numba/tests/test_svml.py delete mode 100644 numba/numba/tests/test_sys_stdin_assignment.py delete mode 100644 numba/numba/tests/test_target_overloadselector.py delete mode 100644 numba/numba/tests/test_threadsafety.py delete mode 100644 numba/numba/tests/test_tracing.py delete mode 100644 numba/numba/tests/test_tuples.py delete mode 100644 numba/numba/tests/test_typeconv.py delete mode 100644 numba/numba/tests/test_typeinfer.py delete mode 100644 numba/numba/tests/test_typenames.py delete mode 100644 numba/numba/tests/test_typeof.py delete mode 100644 numba/numba/tests/test_types.py delete mode 100644 numba/numba/tests/test_typingerror.py delete mode 100644 numba/numba/tests/test_ufuncs.py delete mode 100644 numba/numba/tests/test_unicode_literals.py delete mode 100644 numba/numba/tests/test_unicode_names.py delete mode 100644 numba/numba/tests/test_unpack_sequence.py delete mode 100644 numba/numba/tests/test_unsafe_intrinsics.py delete mode 100644 numba/numba/tests/test_usecases.py delete mode 100644 numba/numba/tests/test_utils.py delete mode 100644 numba/numba/tests/test_vectorization_type_inference.py delete mode 100644 numba/numba/tests/test_warnings.py delete mode 100644 numba/numba/tests/test_wrapper.py delete mode 100644 numba/numba/tests/timsort.py delete mode 100644 numba/numba/tests/true_div_usecase.py delete mode 100644 numba/numba/tests/usecases.py delete mode 100644 numba/numba/tracing.py delete mode 100644 numba/numba/transforms.py delete mode 100644 numba/numba/typeconv/__init__.py delete mode 100644 numba/numba/typeconv/_typeconv.cpp delete mode 100644 numba/numba/typeconv/castgraph.py delete mode 100644 numba/numba/typeconv/rules.py delete mode 100644 numba/numba/typeconv/test.cpp delete mode 100644 numba/numba/typeconv/typeconv.cpp delete mode 100644 numba/numba/typeconv/typeconv.hpp delete mode 100644 numba/numba/typeconv/typeconv.py delete mode 100644 numba/numba/typeinfer.py delete mode 100644 numba/numba/types/__init__.py delete mode 100644 numba/numba/types/abstract.py delete mode 100644 numba/numba/types/common.py delete mode 100644 numba/numba/types/containers.py delete mode 100644 numba/numba/types/functions.py delete mode 100644 numba/numba/types/iterators.py delete mode 100644 numba/numba/types/misc.py delete mode 100644 numba/numba/types/npytypes.py delete mode 100644 numba/numba/types/scalars.py delete mode 100644 numba/numba/typing/__init__.py delete mode 100644 numba/numba/typing/arraydecl.py delete mode 100644 numba/numba/typing/bufproto.py delete mode 100644 numba/numba/typing/builtins.py delete mode 100644 numba/numba/typing/cffi_utils.py delete mode 100644 numba/numba/typing/cmathdecl.py delete mode 100644 numba/numba/typing/collections.py delete mode 100644 numba/numba/typing/context.py delete mode 100644 numba/numba/typing/ctypes_utils.py delete mode 100644 numba/numba/typing/enumdecl.py delete mode 100644 numba/numba/typing/listdecl.py delete mode 100644 numba/numba/typing/mathdecl.py delete mode 100644 numba/numba/typing/npdatetime.py delete mode 100644 numba/numba/typing/npydecl.py delete mode 100644 numba/numba/typing/operatordecl.py delete mode 100644 numba/numba/typing/randomdecl.py delete mode 100644 numba/numba/typing/setdecl.py delete mode 100644 numba/numba/typing/templates.py delete mode 100644 numba/numba/typing/typeof.py delete mode 100644 numba/numba/unittest_support.py delete mode 100644 numba/numba/unsafe/__init__.py delete mode 100644 numba/numba/unsafe/ndarray.py delete mode 100644 numba/numba/unsafe/tuple.py delete mode 100644 numba/numba/utils.py delete mode 100644 numba/requirements.txt delete mode 100644 numba/run_coverage.py delete mode 100755 numba/runtests.py delete mode 100644 numba/setup.py delete mode 100644 numba/tutorials/Numba First Steps.ipynb delete mode 100644 numba/tutorials/Numba types.ipynb delete mode 100644 numba/tutorials/Numpy and numba.ipynb delete mode 100644 numba/versioneer.py diff --git a/numba/.binstar.yml b/numba/.binstar.yml deleted file mode 100644 index 2184eed29..000000000 --- a/numba/.binstar.yml +++ /dev/null @@ -1,79 +0,0 @@ - -## The package attribure specifies a binstar package namespace to build the package to. -## This can be specified here or on the command line -package: numba - -## You can also specify the account to upload to, -## you must be an admin of that account, this -## defaults to your user account -# user: USERNAME - -#=============================================================================== -# Build Matrix Options -# Thes options may be a single item, a list or empty -# The resulting number of builds is [platform * engine * env] -#=============================================================================== - -# The platforms to build on. -# platform defaults to linux-64 -platform: - - linux-64 - - linux-32 - - win-32 - - win-64 - - osx-64 - -# The engine are the initial conda packages you want to run with -engine: - - python=2.6 argparse funcsigs unittest2 - #- python=2.7 funcsigs - #- python=3.3 - - python=3.4 - -## The env param is an environment variable list -#env: - #- MY_ENV=A CC=gcc - #- MY_ENV=B - -#=============================================================================== -# Scrip options -# Thes options may be broken out into the before_script, script and after_script -# or not, that is up to you -#=============================================================================== - -# Run before the script -before_script: - - conda config --add channels numba - - conda install -q --yes numpy llvmlite jinja2 - -# Put your main computations here! -script: - - python setup.py build - - python setup.py build_ext --inplace - - python -m numba.testing -v -b -m - -## This will run after the script regardless of the result of script -## BINSTAR_BUILD_RESULT=[succcess|failure] -# after_script: -# - echo "The build was a $BINSTAR_BUILD_RESULT" | tee artifact1.txt -## This will be run only after a successfull build -# after_success: -# - echo "after_success!" -## This will be run only after a build failure -# after_failure: -# - echo "after_failure!" - -#=============================================================================== -# Build Results -# Build results are split into two categories: artifacts and targets -# You may omit either key and stiff have a successfull build -# They may be a string, list and contain any bash glob -#=============================================================================== - -## Build Targets: Upload these files to your binstar package -## build targets may be a list of files (globs allows) to upload -## The special build targets 'conda' and 'pypi' may be used to -## upload conda builds -## e.g. conda is an alias for /opt/anaconda/conda-bld//*.tar.bz2 -#build_targets: - #- conda diff --git a/numba/.coveragerc b/numba/.coveragerc deleted file mode 100644 index 96c7601ca..000000000 --- a/numba/.coveragerc +++ /dev/null @@ -1,22 +0,0 @@ -# configuration file used by run_coverage.py -[run] -branch = True -source = numba -concurrency = multiprocessing -parallel = True - -[report] - -omit = - */__main__.py - # Vendored packages - numba/appdirs.py - numba/six.py - numba/testing/ddt.py - numba/_version.py - -exclude_lines = - pragma: no cover - if __name__ == .__main__.: - -[html] diff --git a/numba/.gitattributes b/numba/.gitattributes deleted file mode 100644 index 972ba2b7f..000000000 --- a/numba/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -numba/_version.py export-subst diff --git a/numba/.github/ISSUE_TEMPLATE.md b/numba/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index d6c5eb916..000000000 --- a/numba/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,47 +0,0 @@ - - -## Feature request - - - -## Reporting a bug - - - -- [ ] I am using the latest released version of Numba (most recent is visible in - the change log (https://github.com/numba/numba/blob/master/CHANGE_LOG). -- [ ] I have included below a minimal working reproducer (if you are unsure how - to write one see http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports). - - diff --git a/numba/.github/PULL_REQUEST_TEMPLATE.md b/numba/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index e4392c420..000000000 --- a/numba/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,35 +0,0 @@ - diff --git a/numba/.gitignore b/numba/.gitignore deleted file mode 100644 index 63a648e17..000000000 --- a/numba/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -*.pyc -*.o -*.so -*.dylib -*.pyd -*.pdb -*.egg-info -*.sw[po] -*.out -*.ll -.coverage -.nfs* -tags -MANIFEST - -build/ -docs/_build/ -docs/gh-pages/ -dist/ -htmlcov/ -.idea/ -.vscode/ -.ipynb_checkpoints/ -__pycache__/ - -docs/source/developer/autogen* diff --git a/numba/.travis.yml b/numba/.travis.yml deleted file mode 100644 index 4522c62a2..000000000 --- a/numba/.travis.yml +++ /dev/null @@ -1,53 +0,0 @@ -# References https://gist.github.com/dan-blanchard/7045057 -# and https://docs.travis-ci.com/user/trusty-ci-environment/ - -dist: trusty - -matrix: - include: - # Longest build first - # Parametrize the conda env name so multiple builds can - # work with multiple environments on the same machine in parallel. - - os: osx - env: PYTHON=3.6 NUMPY=1.12 BUILD_DOC=yes CONDA_ENV=travisci - - env: PYTHON=3.5 NUMPY=1.11 RUN_COVERAGE=yes CONDA_ENV=travisci - - env: PYTHON=3.6 NUMPY=1.12 BUILD_DOC=yes CONDA_ENV=travisci - - env: PYTHON=3.6 NUMPY=1.13 CONDA_ENV=travisci - - env: PYTHON=3.6 NUMPY=1.14 CONDA_ENV=travisci - - env: PYTHON=3.6 NUMPY=1.14 CONDA_ENV=travisci TEST_SVML=yes - - env: PYTHON=3.5 NUMPY=1.10 CONDA_ENV=travisci - - env: PYTHON=2.7 NUMPY=1.9 CONDA_ENV=travisci - - env: PYTHON=2.7 NUMPY=1.14 CONDA_ENV=travisci - - env: PYTHON=3.7 NUMPY=1.14 CONDA_ENV=travisci USE_C3I_TEST_CHANNEL=yes - - env: PYTHON=3.6 NUMPY=1.14 CONDA_ENV=travisci VANILLA_INSTALL=yes - -branches: - only: - - master - - travis - -before_install: - - buildscripts/incremental/install_miniconda.sh - - export PATH=$HOME/miniconda3/bin:$PATH - - buildscripts/incremental/setup_conda_environment.sh -install: - - buildscripts/incremental/build.sh - -script: - - buildscripts/incremental/test.sh - -after_success: - - buildscripts/incremental/after_success.sh - -notifications: - email: false - on_success: "change" - on_failure: "always" # "change" - slack: - secure: SGgsT4DevPiF/u4y3HBorUYCVWan7EjSnEBmuGPnGkepv8JxfOnbbeM3ca2TSWa601J4OP2K2JhehUGZSn6YkTx5XKOzqF2pddZQX8j0B4htDgV2qvcY/aIUFz8UW2uQAuguP6OlHPmAj5KgF5raJ34rkmTd8UgObL6jAan2Kzg= - webhooks: - urls: - - https://webhooks.gitter.im/e/bd67cc227432d99bf1f1 - on_success: change - on_failure: always - on_start: false diff --git a/numba/AUTHORS b/numba/AUTHORS deleted file mode 100644 index 08a89bf95..000000000 --- a/numba/AUTHORS +++ /dev/null @@ -1,60 +0,0 @@ -Numba proof of concept was originally written by -Travis Oliphant - -If you have contributed to Numba add your name to this file: - -ahmadia -Alberto Valverde -Antoine Pitrou -Bengt Lüers -Björn Linse -Christoph Gohlke -Dag Sverre Seljebotn -Dan Christensen -Dan Yamins -David Warde-Farley -Falcon Dai -Francesc Alted -Frederic -Gaëtan de Menten -GFKjunior -Graham Markall -Hernan Grecco -Ilan Schnell -James Bergstra -Jay Bourque -Jens Timmerman -Jim Garrison -Jon Riehl (Resilient Science) -Juan Luis Cano Rodríguez -kichik -Lars Buitinck -Laurent Fasnacht -liuzhenhai <1989lzhh@gmail.com> -Maggie Mari -majidaldo -Mark Florisson -Mark Wiebe -Martin Fiers -Martin Spacek -Meador Inge -Michael Joyce -Matthew Goodman -Ondřej Čertík -Óscar Villellas Guillén -Pablo Jiménez Mateo -Phillip Cloud -Scott Chadde -Shiquan Wang -Siu Kwan Lam -Stan Seibert -Stefan Seefeld -Thomas Kluyver -timo -Travis E. Oliphant -Uri Laserson -Valentin Haenel -Yaroslav Halchenko -Yauhen Yakimovich -Yuval Langer - diff --git a/numba/CHANGE_LOG b/numba/CHANGE_LOG deleted file mode 100644 index ff3e39c24..000000000 --- a/numba/CHANGE_LOG +++ /dev/null @@ -1,1963 +0,0 @@ -Version 0.39.0 --------------- - -Here are the highlights for the Numba 0.39.0 release. - -* This is the first version that supports Python 3.7. -* With help from Intel, we have fixed the issues with SVML support (related - issues #2938, #2998, #3006). -* List has gained support for containing reference-counted types like NumPy - arrays and `list`. Note, list still cannot hold heterogeneous types. -* We have made a significant change to the internal calling-convention, - which should be transparent to most users, to allow for a future feature that - will permitting jumping back into python-mode from a nopython-mode function. - This also fixes a limitation to `print` that disabled its use from nopython - functions that were deep in the call-stack. -* For CUDA GPU support, we added a `__cuda_array_interface__` following the - NumPy array interface specification to allow Numba to consume externally - defined device arrays. We have opened a corresponding pull request to CuPy to - test out the concept and be able to use a CuPy GPU array. -* The Numba dispatcher `inspect_types()` method now supports the kwarg `pretty` - which if set to `True` will produce ANSI/HTML output, showing the annotated - types, when invoked from ipython/jupyter-notebook respectively. -* The NumPy functions `ndarray.dot`, `np.percentile` and `np.nanpercentile`, and - `np.unique` are now supported. -* Numba now supports the use of a per-project configuration file to permanently - set behaviours typically set via `NUMBA_*` family environment variables. -* Support for the `ppc64le` architecture has been added. - -Enhancements: - -* PR #2793: Simplify and remove javascript from html_annotate templates. -* PR #2840: Support list of refcounted types -* PR #2902: Support for np.unique -* PR #2926: Enable fence for all architecture and add developer notes -* PR #2928: Making error about untyped list more informative. -* PR #2930: Add configuration file and color schemes. -* PR #2932: Fix encoding to 'UTF-8' in `check_output` decode. -* PR #2938: Python 3.7 compat: _Py_Finalizing becomes _Py_IsFinalizing() -* PR #2939: Comprehensive SVML unit test -* PR #2946: Add support for `ndarray.dot` method and tests. -* PR #2953: percentile and nanpercentile -* PR #2957: Add new 3.7 opcode support. -* PR #2963: Improve alias analysis to be more comprehensive -* PR #2984: Support for namedtuples in array analysis -* PR #2986: Fix environment propagation -* PR #2990: Improve function call matching for intrinsics -* PR #3002: Second pass at error rewrites (interpreter errors). -* PR #3004: Add numpy.empty to the list of pure functions. -* PR #3008: Augment SVML detection with llvmlite SVML patch detection. -* PR #3012: Make use of the common spelling of heterogeneous/homogeneous. -* PR #3032: Fix pycc ctypes test due to mismatch in calling-convention -* PR #3039: Add SVML detection to Numba environment diagnostic tool. -* PR #3041: This adds @needs_blas to tests that use BLAS -* PR #3056: Require llvmlite>=0.24.0 - -CUDA Enhancements: - -* PR #2860: __cuda_array_interface__ -* PR #2910: More CUDA intrinsics -* PR #2929: Add Flag To Prevent Unneccessary D->H Copies -* PR #3037: Add CUDA IPC support on non-peer-accessible devices - -CI Enhancements: - -* PR #3021: Update appveyor config. -* PR #3040: Add fault handler to all builds -* PR #3042: Add catchsegv -* PR #3077: Adds optional number of processes for `-m` in testing - -Fixes: - -* PR #2897: Fix line position of delete statement in numba ir -* PR #2905: Fix for #2862 -* PR #3009: Fix optional type returning in recursive call -* PR #3019: workaround and unittest for issue #3016 -* PR #3035: [TESTING] Attempt delayed removal of Env -* PR #3048: [WIP] Fix cuda tests failure on buildfarm -* PR #3054: Make test work on 32-bit -* PR #3062: Fix cuda.In freeing devary before the kernel launch -* PR #3073: Workaround #3072 -* PR #3076: Avoid ignored exception due to missing globals at interpreter teardown - -Documentation Updates: - -* PR #2966: Fix syntax in env var docs. -* PR #2967: Fix typo in CUDA kernel layout example. -* PR #2970: Fix docstring copy paste error. - -Contributors: - -The following people contributed to this release. - -* Anton Malakhov -* Ehsan Totoni (core dev) -* Julia Tatz -* Matthias Bussonnier -* Nick White -* Ray Donnelly -* Siu Kwan Lam (core dev) -* Stan Seibert (core dev) -* Stuart Archibald (core dev) -* Todd A. Anderson (core dev) -* Rik-de-Kort -* rjenc29 - - -Version 0.38.1 --------------- - -This is a critical bug fix release addressing: -https://github.com/numba/numba/issues/3006 - -The bug does not impact users using conda packages from Anaconda or Intel Python -Distribution (but it does impact conda-forge). It does not impact users of pip -using wheels from PyPI. - -This only impacts a small number of users where: - - * The ICC runtime (specifically libsvml) is present in the user's environment. - * The user is using an llvmlite statically linked against a version of LLVM - that has not been patched with SVML support. - * The platform is 64-bit. - -The release fixes a code generation path that could lead to the production of -incorrect results under the above situation. - -Fixes: - -* PR #3007: Augment SVML detection with llvmlite SVML patch detection. - -Contributors: - -The following people contributed to this release. - -* Stuart Archibald (core dev) - - -Version 0.38.0 --------------- - -Following on from the bug fix focus of the last release, this release swings -back towards the addition of new features and usability improvements based on -community feedback. This release is comparatively large! Three key features/ -changes to note are: - - * Numba (via llvmlite) is now backed by LLVM 6.0, general vectorization is - improved as a result. A significant long standing LLVM bug that was causing - corruption was also found and fixed. - * Further considerable improvements in vectorization are made available as - Numba now supports Intel's short vector math library (SVML). - Try it out with `conda install -c numba icc_rt`. - * CUDA 8.0 is now the minimum supported CUDA version. - -Other highlights include: - - * Bug fixes to `parallel=True` have enabled more vectorization opportunities - when using the ParallelAccelerator technology. - * Much effort has gone into improving error reporting and the general usability - of Numba. This includes highlighted error messages and performance tips - documentation. Try it out with `conda install colorama`. - * A number of new NumPy functions are supported, `np.convolve`, `np.correlate` - `np.reshape`, `np.transpose`, `np.permutation`, `np.real`, `np.imag`, and - `np.searchsorted` now supports the`side` kwarg. Further, `np.argsort` now - supports the `kind` kwarg with `quicksort` and `mergesort` available. - * The Numba extension API has gained the ability operate more easily with - functions from Cython modules through the use of - `numba.extending.get_cython_function_address` to obtain function addresses - for direct use in `ctypes.CFUNCTYPE`. - * Numba now allows the passing of jitted functions (and containers of jitted - functions) as arguments to other jitted functions. - * The CUDA functionality has gained support for a larger selection of bit - manipulation intrinsics, also SELP, and has had a number of bugs fixed. - * Initial work to support the PPC64LE platform has been added, full support is - however waiting on the LLVM 6.0.1 release as it contains critical patches - not present in 6.0.0. - It is hoped that any remaining issues will be fixed in the next release. - * The capacity for advanced users/compiler engineers to define their own - compilation pipelines. - -Enhancements: - -* PR #2660: Support bools from cffi in nopython. -* PR #2741: Enhance error message for undefined variables. -* PR #2744: Add diagnostic error message to test suite discovery failure. -* PR #2748: Added Intel SVML optimizations as opt-out choice working by default -* PR #2762: Support transpose with axes arguments. -* PR #2777: Add support for np.correlate and np.convolve -* PR #2779: Implement np.random.permutation -* PR #2801: Passing jitted functions as args -* PR #2802: Support np.real() and np.imag() -* PR #2807: Expose `import_cython_function` -* PR #2821: Add kwarg 'side' to np.searchsorted -* PR #2822: Adds stable argsort -* PR #2832: Fixups for llvmlite 0.23/llvm 6 -* PR #2836: Support `index` method on tuples -* PR #2839: Support for np.transpose and np.reshape. -* PR #2843: Custom pipeline -* PR #2847: Replace signed array access indices in unsiged prange loop body -* PR #2859: Add support for improved error reporting. -* PR #2880: This adds a github issue template. -* PR #2881: Build recipe to clone Intel ICC runtime. -* PR #2882: Update TravisCI to test SVML -* PR #2893: Add reference to the data buffer in array.ctypes object -* PR #2895: Move to CUDA 8.0 - -Fixes: - -* PR #2737: Fix #2007 (part 1). Empty array handling in np.linalg. -* PR #2738: Fix install_requires to allow pip getting pre-release version -* PR #2740: Fix 2208. Generate better error message. -* PR #2765: Fix Bit-ness -* PR #2780: PowerPC reference counting memory fences -* PR #2805: Fix six imports. -* PR #2813: Fix #2812: gufunc scalar output bug. -* PR #2814: Fix the build post #2727 -* PR #2831: Attempt to fix #2473 -* PR #2842: Fix issue with test discovery and broken CUDA drivers. -* PR #2850: Add rtsys init guard and test. -* PR #2852: Skip vectorization test with targets that are not x86 -* PR #2856: Prevent printing to stdout in `test_extending.py` -* PR #2864: Correct C code to prevent compiler warnings. -* PR #2889: Attempt to fix #2386. -* PR #2891: Removed test skipping for inspect_cfg -* PR #2898: Add guard to parallel test on unsupported platforms -* PR #2907: Update change log for PPC64LE LLVM dependency. -* PR #2911: Move build requirement to llvmlite>=0.23.0dev0 -* PR #2912: Fix random permutation test. -* PR #2914: Fix MD list syntax in issue template. - -Documentation Updates: - -* PR #2739: Explicitly state default value of error_model in docstring -* PR #2803: DOC: parallel vectorize requires signatures -* PR #2829: Add Python 2.7 EOL plan to docs -* PR #2838: Use automatic numbering syntax in list. -* PR #2877: Add performance tips documentation. -* PR #2883: Fix #2872: update rng doc about thread/fork-safety -* PR #2908: Add missing link and ref to docs. -* PR #2909: Tiny typo correction - -ParallelAccelerator enhancements/fixes: - -* PR #2727: Changes to enable vectorization in ParallelAccelerator. -* PR #2816: Array analysis for transpose with arbitrary arguments -* PR #2874: Fix dead code eliminator not to remove a call with side-effect -* PR #2886: Fix ParallelAccelerator arrayexpr repr - -CUDA enhancements: - -* PR #2734: More Constants From cuda.h -* PR #2767: Add len(..) Support to DeviceNDArray -* PR #2778: Add More Device Array API Functions to CUDA Simulator -* PR #2824: Add CUDA Primitives for Population Count -* PR #2835: Emit selp Instructions to Avoid Branching -* PR #2867: Full support for CUDA device attributes - -CUDA fixes: -* PR #2768: Don't Compile Code on Every Assignment -* PR #2878: Fixes a Win64 issue with the test in Pr/2865 - -Contributors: - -The following people contributed to this release. - -* Abutalib Aghayev -* Alex Olivas -* Anton Malakhov -* Dong-hee Na -* Ehsan Totoni (core dev) -* John Zwinck -* Josh Wilson -* Kelsey Jordahl -* Nick White -* Olexa Bilaniuk -* Rik-de-Kort -* Siu Kwan Lam (core dev) -* Stan Seibert (core dev) -* Stuart Archibald (core dev) -* Thomas Arildsen -* Todd A. Anderson (core dev) - - -Version 0.37.0 --------------- - -This release focuses on bug fixing and stability but also adds a few new -features including support for Numpy 1.14. The key change for Numba core was the -long awaited addition of the final tranche of thread safety improvements that -allow Numba to be run concurrently on multiple threads without hitting known -thread safety issues inside LLVM itself. Further, a number of fixes and -enhancements went into the CUDA implementation and ParallelAccelerator gained -some new features and underwent some internal refactoring. - -Misc enhancements: - -* PR #2627: Remove hacks to make llvmlite threadsafe -* PR #2672: Add ascontiguousarray -* PR #2678: Add Gitter badge -* PR #2691: Fix #2690: add intrinsic to convert array to tuple -* PR #2703: Test runner feature: failed-first and last-failed -* PR #2708: Patch for issue #1907 -* PR #2732: Add support for array.fill - -Misc Fixes: - -* PR #2610: Fix #2606 lowering of optional.setattr -* PR #2650: Remove skip for win32 cosine test -* PR #2668: Fix empty_like from readonly arrays. -* PR #2682: Fixes 2210, remove _DisableJitWrapper -* PR #2684: Fix #2340, generator error yielding bool -* PR #2693: Add travis-ci testing of NumPy 1.14, and also check on Python 2.7 -* PR #2694: Avoid type inference failure due to a typing template rejection -* PR #2695: Update llvmlite version dependency. -* PR #2696: Fix tuple indexing codegeneration for empty tuple -* PR #2698: Fix #2697 by deferring deletion in the simplify_CFG loop. -* PR #2701: Small fix to avoid tempfiles being created in the current directory -* PR #2725: Fix 2481, LLVM IR parsing error due to mutated IR -* PR #2726: Fix #2673: incorrect fork error msg. -* PR #2728: Alternative to #2620. Remove dead code ByteCodeInst.get. -* PR #2730: Add guard for test needing SciPy/BLAS - -Documentation updates: - -* PR #2670: Update communication channels -* PR #2671: Add docs about diagnosing loop vectorizer -* PR #2683: Add docs on const arg requirements and on const mem alloc -* PR #2722: Add docs on numpy support in cuda -* PR #2724: Update doc: warning about unsupported arguments - -ParallelAccelerator enhancements/fixes: - -Parallel support for `np.arange` and `np.linspace`, also `np.mean`, `np.std` -and `np.var` are added. This was performed as part of a general refactor and -cleanup of the core ParallelAccelerator code. - -* PR #2674: Core pa -* PR #2704: Generate Dels after parfor sequential lowering -* PR #2716: Handle matching directly supported functions - -CUDA enhancements: - -* PR #2665: CUDA DeviceNDArray: Support numpy tranpose API -* PR #2681: Allow Assigning to DeviceNDArrays -* PR #2702: Make DummyArray do High Dimensional Reshapes -* PR #2714: Use CFFI to Reuse Code - -CUDA fixes: - -* PR #2667: Fix CUDA DeviceNDArray slicing -* PR #2686: Fix #2663: incorrect offset when indexing cuda array. -* PR #2687: Ensure Constructed Stream Bound -* PR #2706: Workaround for unexpected warp divergence due to exception raising - code -* PR #2707: Fix regression: cuda test submodules not loading properly in - runtests -* PR #2731: Use more challenging values in slice tests. -* PR #2720: A quick testsuite fix to not run the new cuda testcase in the - multiprocess pool - -Contributors: - -The following people contributed to this release. - -* Coutinho Menezes Nilo -* Daniel -* Ehsan Totoni -* Nick White -* Paul H. Liu -* Siu Kwan Lam -* Stan Seibert -* Stuart Archibald -* Todd A. Anderson - - -Version 0.36.2 --------------- - -This is a bugfix release that provides minor changes to address: - -* PR #2645: Avoid CPython bug with ``exec`` in older 2.7.x. -* PR #2652: Add support for CUDA 9. - - -Version 0.36.1 --------------- - -This release continues to add new features to the work undertaken in partnership -with Intel on ParallelAccelerator technology. Other changes of note include the -compilation chain being updated to use LLVM 5.0 and the production of conda -packages using conda-build 3 and the new compilers that ship with it. - -NOTE: A version 0.36.0 was tagged for internal use but not released. - -ParallelAccelerator: - -NOTE: The ParallelAccelerator technology is under active development and should -be considered experimental. - -New features relating to ParallelAccelerator, from work undertaken with Intel, -include the addition of the `@stencil` decorator for ease of implementation of -stencil-like computations, support for general reductions, and slice and -range fusion for parallel slice/bit-array assignments. Documentation on both the -use and implementation of the above has been added. Further, a new debug -environment variable `NUMBA_DEBUG_ARRAY_OPT_STATS` is made available to give -information about which operators/calls are converted to parallel for-loops. - -ParallelAccelerator features: - -* PR #2457: Stencil Computations in ParallelAccelerator -* PR #2548: Slice and range fusion, parallelizing bitarray and slice assignment -* PR #2516: Support general reductions in ParallelAccelerator - -ParallelAccelerator fixes: - -* PR #2540: Fix bug #2537 -* PR #2566: Fix issue #2564. -* PR #2599: Fix nested multi-dimensional parfor type inference issue -* PR #2604: Fixes for stencil tests and cmath sin(). -* PR #2605: Fixes issue #2603. - -Additional features of note: - -This release of Numba (and llvmlite) is updated to use LLVM version 5.0 as the -compiler back end, the main change to Numba to support this was the addition of -a custom symbol tracker to avoid the calls to LLVM's `ExecutionEngine` that was -crashing when asking for non-existent symbol addresses. Further, the conda -packages for this release of Numba are built using conda build version 3 and the -new compilers/recipe grammar that are present in that release. - -* PR #2568: Update for LLVM 5 -* PR #2607: Fixes abort when getting address to "nrt_unresolved_abort" -* PR #2615: Working towards conda build 3 - -Thanks to community feedback and bug reports, the following fixes were also -made. - -Misc fixes/enhancements: - -* PR #2534: Add tuple support to np.take. -* PR #2551: Rebranding fix -* PR #2552: relative doc links -* PR #2570: Fix issue #2561, handle missing successor on loop exit -* PR #2588: Fix #2555. Disable libpython.so linking on linux -* PR #2601: Update llvmlite version dependency. -* PR #2608: Fix potential cache file collision -* PR #2612: Fix NRT test failure due to increased overhead when running in coverage -* PR #2619: Fix dubious pthread_cond_signal not in lock -* PR #2622: Fix `np.nanmedian` for all NaN case. -* PR #2633: Fix markdown in CONTRIBUTING.md -* PR #2635: Make the dependency on compilers for AOT optional. - -CUDA support fixes: - -* PR #2523: Fix invalid cuda context in memory transfer calls in another thread -* PR #2575: Use CPU to initialize xoroshiro states for GPU RNG. Fixes #2573 -* PR #2581: Fix cuda gufunc mishandling of scalar arg as array and out argument - - -Version 0.35.0 --------------- - -This release includes some exciting new features as part of the work -performed in partnership with Intel on ParallelAccelerator technology. -There are also some additions made to Numpy support and small but -significant fixes made as a result of considerable effort spent chasing bugs -and implementing stability improvements. - - -ParallelAccelerator: - -NOTE: The ParallelAccelerator technology is under active development and should -be considered experimental. - -New features relating to ParallelAccelerator, from work undertaken with Intel, -include support for a larger range of `np.random` functions in `parallel` -mode, printing Numpy arrays in no Python mode, the capacity to initialize Numpy -arrays directly from list comprehensions, and the axis argument to `.sum()`. -Documentation on the ParallelAccelerator technology implementation has also -been added. Further, a large amount of work on equivalence relations was -undertaken to enable runtime checks of broadcasting behaviours in parallel mode. - -ParallelAccelerator features: - -* PR #2400: Array comprehension -* PR #2405: Support printing Numpy arrays -* PR #2438: from Support more np.random functions in ParallelAccelerator -* PR #2482: Support for sum with axis in nopython mode. -* PR #2487: Adding developer documentation for ParallelAccelerator technology. -* PR #2492: Core PA refactor adds assertions for broadcast semantics - -ParallelAccelerator fixes: - -* PR #2478: Rename cfg before parfor translation (#2477) -* PR #2479: Fix broken array comprehension tests on unsupported platforms -* PR #2484: Fix array comprehension test on win64 -* PR #2506: Fix for 32-bit machines. - - -Additional features of note: - -Support for `np.take`, `np.finfo`, `np.iinfo` and `np.MachAr` in no Python -mode is added. Further, three new environment variables are added, two for -overriding CPU target/features and another to warn if `parallel=True` was set -no such transform was possible. - -* PR #2490: Implement np.take and ndarray.take -* PR #2493: Display a warning if parallel=True is set but not possible. -* PR #2513: Add np.MachAr, np.finfo, np.iinfo -* PR #2515: Allow environ overriding of cpu target and cpu features. - - -Due to expansion of the test farm and a focus on fixing bugs, the following -fixes were also made. - -Misc fixes/enhancements: - -* PR #2455: add contextual information to runtime errors -* PR #2470: Fixes #2458, poor performance in np.median -* PR #2471: Ensure LLVM threadsafety in {g,}ufunc building. -* PR #2494: Update doc theme -* PR #2503: Remove hacky code added in 2482 and feature enhancement -* PR #2505: Serialise env mutation tests during multithreaded testing. -* PR #2520: Fix failing cpu-target override tests - -CUDA support fixes: - -* PR #2504: Enable CUDA toolkit version testing -* PR #2509: Disable tests generating code unavailable in lower CC versions. -* PR #2511: Fix Windows 64 bit CUDA tests. - - -Version 0.34.0 --------------- - -This release adds a significant set of new features arising from combined work -with Intel on ParallelAccelerator technology. It also adds list comprehension -and closure support, support for Numpy 1.13 and a new, faster, CUDA reduction -algorithm. For Linux users this release is the first to be built on Centos 6, -which will be the new base platform for future releases. Finally a number of -thread-safety, type inference and other smaller enhancements and bugs have been -fixed. - - -ParallelAccelerator features: - -NOTE: The ParallelAccelerator technology is under active development and should -be considered experimental. - -The ParallelAccelerator technology is accessed via a new "nopython" mode option -"parallel". The ParallelAccelerator technology attempts to identify operations -which have parallel semantics (for instance adding a scalar to a vector), fuse -together adjacent such operations, and then parallelize their execution across -a number of CPU cores. This is essentially auto-parallelization. - -In addition to the auto-parallelization feature, explicit loop based -parallelism is made available through the use of `prange` in place of `range` -as a loop iterator. - -More information and examples on both auto-parallelization and `prange` are -available in the documentation and examples directory respectively. - -As part of the necessary work for ParallelAccelerator, support for closures -and list comprehensions is added: - -* PR #2318: Transfer ParallelAccelerator technology to Numba -* PR #2379: ParallelAccelerator Core Improvements -* PR #2367: Add support for len(range(...)) -* PR #2369: List comprehension -* PR #2391: Explicit Parallel Loop Support (prange) - -The ParallelAccelerator features are available on all supported platforms and -Python versions with the exceptions of (with view of supporting in a future -release): - -* The combination of Windows operating systems with Python 2.7. -* Systems running 32 bit Python. - - -CUDA support enhancements: - -* PR #2377: New GPU reduction algorithm - - -CUDA support fixes: - -* PR #2397: Fix #2393, always set alignment of cuda static memory regions - - -Misc Fixes: - -* PR #2373, Issue #2372: 32-bit compatibility fix for parfor related code -* PR #2376: Fix #2375 missing stdint.h for py2.7 vc9 -* PR #2378: Fix deadlock in parallel gufunc when kernel acquires the GIL. -* PR #2382: Forbid unsafe casting in bitwise operation -* PR #2385: docs: fix Sphinx errors -* PR #2396: Use 64-bit RHS operand for shift -* PR #2404: Fix threadsafety logic issue in ufunc compilation cache. -* PR #2424: Ensure consistent iteration order of blocks for type inference. -* PR #2425: Guard code to prevent the use of 'parallel' on win32 + py27 -* PR #2426: Basic test for Enum member type recovery. -* PR #2433: Fix up the parfors tests with respect to windows py2.7 -* PR #2442: Skip tests that need BLAS/LAPACK if scipy is not available. -* PR #2444: Add test for invalid array setitem -* PR #2449: Make the runtime initialiser threadsafe -* PR #2452: Skip CFG test on 64bit windows - - -Misc Enhancements: - -* PR #2366: Improvements to IR utils -* PR #2388: Update README.rst to indicate the proper version of LLVM -* PR #2394: Upgrade to llvmlite 0.19.* -* PR #2395: Update llvmlite version to 0.19 -* PR #2406: Expose environment object to ufuncs -* PR #2407: Expose environment object to target-context inside lowerer -* PR #2413: Add flags to pass through to conda build for buildbot -* PR #2414: Add cross compile flags to local recipe -* PR #2415: A few cleanups for rewrites -* PR #2418: Add getitem support for Enum classes -* PR #2419: Add support for returning enums in vectorize -* PR #2421: Add copyright notice for Intel contributed files. -* PR #2422: Patch code base to work with np 1.13 release -* PR #2448: Adds in warning message when using 'parallel' if cache=True -* PR #2450: Add test for keyword arg on .sum-like and .cumsum-like array - methods - - -Version 0.33.0 --------------- - -This release resolved several performance issues caused by atomic -reference counting operations inside loop bodies. New optimization -passes have been added to reduce the impact of these operations. We -observe speed improvements between 2x-10x in affected programs due to -the removal of unnecessary reference counting operations. - -There are also several enhancements to the CUDA GPU support: - -* A GPU random number generator based on `xoroshiro128+ algorithm `_ is added. - See details and examples in :ref:`documentation `. -* ``@cuda.jit`` CUDA kernels can now call ``@jit`` and ``@njit`` - CPU functions and they will automatically be compiled as CUDA device - functions. -* CUDA IPC memory API is exposed for sharing memory between proceses. - See usage details in :ref:`documentation `. - -Reference counting enhancements: - -* PR #2346, Issue #2345, #2248: Add extra refcount pruning after inlining -* PR #2349: Fix refct pruning not removing refct op with tail call. -* PR #2352, Issue #2350: Add refcount pruning pass for function that does not need refcount - -CUDA support enhancements: - -* PR #2023: Supports CUDA IPC for device array -* PR #2343, Issue #2335: Allow CPU jit decorated function to be used as cuda device function -* PR #2347: Add random number generator support for CUDA device code -* PR #2361: Update autotune table for CC: 5.3, 6.0, 6.1, 6.2 - -Misc fixes: - -* PR #2362: Avoid test failure due to typing to int32 on 32-bit platforms -* PR #2359: Fixed nogil example that threw a TypeError when executed. -* PR #2357, Issue #2356: Fix fragile test that depends on how the script is executed. -* PR #2355: Fix cpu dispatcher referenced as attribute of another module -* PR #2354: Fixes an issue with caching when function needs NRT and refcount pruning -* PR #2342, Issue #2339: Add warnings to inspection when it is used on unserialized cached code -* PR #2329, Issue #2250: Better handling of missing op codes - -Misc enhancements: - -* PR #2360: Adds missing values in error mesasge interp. -* PR #2353: Handle when get_host_cpu_features() raises RuntimeError -* PR #2351: Enable SVML for erf/erfc/gamma/lgamma/log2 -* PR #2344: Expose error_model setting in jit decorator -* PR #2337: Align blocking terminate support for fork() with new TBB version -* PR #2336: Bump llvmlite version to 0.18 -* PR #2330: Core changes in PR #2318 - - -Version 0.32.0 --------------- - -In this release, we are upgrading to LLVM 4.0. A lot of work has been done -to fix many race-condition issues inside LLVM when the compiler is -used concurrently, which is likely when Numba is used with Dask. - -Improvements: - -* PR #2322: Suppress test error due to unknown but consistent error with tgamma -* PR #2320: Update llvmlite dependency to 0.17 -* PR #2308: Add details to error message on why cuda support is disabled. -* PR #2302: Add os x to travis -* PR #2294: Disable remove_module on MCJIT due to memory leak inside LLVM -* PR #2291: Split parallel tests and recycle workers to tame memory usage -* PR #2253: Remove the pointer-stuffing hack for storing meminfos in lists - -Fixes: - -* PR #2331: Fix a bug in the GPU array indexing -* PR #2326: Fix #2321 docs referring to non-existing function. -* PR #2316: Fixing more race-condition problems -* PR #2315: Fix #2314. Relax strict type check to allow optional type. -* PR #2310: Fix race condition due to concurrent compilation and cache loading -* PR #2304: Fix intrinsic 1st arg not a typing.Context as stated by the docs. -* PR #2287: Fix int64 atomic min-max -* PR #2286: Fix #2285 `@overload_method` not linking dependent libs -* PR #2303: Missing import statements to interval-example.rst - - -Version 0.31.0 --------------- - -In this release, we added preliminary support for debugging with GDB -version >= 7.0. The feature is enabled by setting the ``debug=True`` compiler -option, which causes GDB compatible debug info to be generated. -The CUDA backend also gained limited debugging support so that source locations -are showed in memory-checking and profiling tools. -For details, see :ref:`numba-troubleshooting`. - -Also, we added the ``fastmath=True`` compiler option to enable unsafe -floating-point transformations, which allows LLVM to auto-vectorize more code. - -Other important changes include upgrading to LLVM 3.9.1 and adding support for -Numpy 1.12. - -Improvements: - -* PR #2281: Update for numpy1.12 -* PR #2278: Add CUDA atomic.{max, min, compare_and_swap} -* PR #2277: Add about section to conda recipies to identify license and other - metadata in Anaconda Cloud -* PR #2271: Adopt itanium C++-style mangling for CPU and CUDA targets -* PR #2267: Add fastmath flags -* PR #2261: Support dtype.type -* PR #2249: Changes for llvm3.9 -* PR #2234: Bump llvmlite requirement to 0.16 and add install_name_tool_fixer to - mviewbuf for OS X -* PR #2230: Add python3.6 to TravisCi -* PR #2227: Enable caching for gufunc wrapper -* PR #2170: Add debugging support -* PR #2037: inspect_cfg() for easier visualization of the function operation - -Fixes: - -* PR #2274: Fix nvvm ir patch in mishandling "load" -* PR #2272: Fix breakage to cuda7.5 -* PR #2269: Fix caching of copy_strides kernel in cuda.reduce -* PR #2265: Fix #2263: error when linking two modules with dynamic globals -* PR #2252: Fix path separator in test -* PR #2246: Fix overuse of memory in some system with fork -* PR #2241: Fix #2240: __module__ in dynamically created function not a str -* PR #2239: Fix fingerprint computation failure preventing fallback - - -Version 0.30.1 --------------- - -This is a bug-fix release to enable Python 3.6 support. In addition, -there is now early Intel TBB support for parallel ufuncs when building from -source with TBBROOT defined. The TBB feature is not enabled in our official -builds. - -Fixes: - -* PR #2232: Fix name clashes with _Py_hashtable_xxx in Python 3.6. - -Improvements: - -* PR #2217: Add Intel TBB threadpool implementation for parallel ufunc. - - -Version 0.30.0 --------------- - -This release adds preliminary support for Python 3.6, but no official build is -available yet. A new system reporting tool (``numba --sysinfo``) is added to -provide system information to help core developers in replication and debugging. -See below for other improvements and bug fixes. - -Improvements: - -* PR #2209: Support Python 3.6. -* PR #2175: Support ``np.trace()``, ``np.outer()`` and ``np.kron()``. -* PR #2197: Support ``np.nanprod()``. -* PR #2190: Support caching for ufunc. -* PR #2186: Add system reporting tool. - -Fixes: - -* PR #2214, Issue #2212: Fix memory error with ndenumerate and flat iterators. -* PR #2206, Issue #2163: Fix ``zip()`` consuming extra elements in early - exhaustion. -* PR #2185, Issue #2159, #2169: Fix rewrite pass affecting objmode fallback. -* PR #2204, Issue #2178: Fix annotation for liftedloop. -* PR #2203: Fix Appveyor segfault with Python 3.5. -* PR #2202, Issue #2198: Fix target context not initialized when loading from - ufunc cache. -* PR #2172, Issue #2171: Fix optional type unpacking. -* PR #2189, Issue #2188: Disable freezing of big (>1MB) global arrays. -* PR #2180, Issue #2179: Fix invalid variable version in looplifting. -* PR #2156, Issue #2155: Fix divmod, floordiv segfault on CUDA. - - -Version 0.29.0 --------------- - -This release extends the support of recursive functions to include direct and -indirect recursion without explicit function type annotations. See new example -in `examples/mergesort.py`. Newly supported numpy features include array -stacking functions, np.linalg.eig* functions, np.linalg.matrix_power, np.roots -and array to array broadcasting in assignments. - -This release depends on llvmlite 0.14.0 and supports CUDA 8 but it is not -required. - -Improvements: - -* PR #2130, #2137: Add type-inferred recursion with docs and examples. -* PR #2134: Add ``np.linalg.matrix_power``. -* PR #2125: Add ``np.roots``. -* PR #2129: Add ``np.linalg.{eigvals,eigh,eigvalsh}``. -* PR #2126: Add array-to-array broadcasting. -* PR #2069: Add hstack and related functions. -* PR #2128: Allow for vectorizing a jitted function. (thanks to @dhirschfeld) -* PR #2117: Update examples and make them test-able. -* PR #2127: Refactor interpreter class and its results. - -Fixes: - -* PR #2149: Workaround MSVC9.0 SP1 fmod bug kb982107. -* PR #2145, Issue #2009: Fixes kwargs for jitclass ``__init__`` method. -* PR #2150: Fix slowdown in objmode fallback. -* PR #2050, Issue #1259: Fix liveness problem with some generator loops. -* PR #2072, Issue #1995: Right shift of unsigned LHS should be logical. -* PR #2115, Issue #1466: Fix inspect_types() error due to mangled variable name. -* PR #2119, Issue #2118: Fix array type created from record-dtype. -* PR #2122, Issue #1808: Fix returning a generator due to datamodel error. - - -Version 0.28.1 --------------- - -This is a bug-fix release to resolve packaging issues with setuptools -dependency. - - -Version 0.28.0 --------------- - -Amongst other improvements, this version improves again the level of -support for linear algebra -- functions from the :mod:`numpy.linalg` -module. Also, our random generator is now guaranteed to be thread-safe -and fork-safe. - -Improvements: - -* PR #2019: Add the ``@intrinsic`` decorator to define low-level - subroutines callable from JIT functions (this is considered - a private API for now). -* PR #2059: Implement ``np.concatenate`` and ``np.stack``. -* PR #2048: Make random generation fork-safe and thread-safe, producing - independent streams of random numbers for each thread or process. -* PR #2031: Add documentation of floating-point pitfalls. -* Issue #2053: Avoid polling in parallel CPU target (fixes severe performance - regression on Windows). -* Issue #2029: Make default arguments fast. -* PR #2052: Add logging to the CUDA driver. -* PR #2049: Implement the built-in ``divmod()`` function. -* PR #2036: Implement the ``argsort()`` method on arrays. -* PR #2046: Improving CUDA memory management by deferring deallocations - until certain thresholds are reached, so as to avoid breaking asynchronous - execution. -* PR #2040: Switch the CUDA driver implementation to use CUDA's - "primary context" API. -* PR #2017: Allow ``min(tuple)`` and ``max(tuple)``. -* PR #2039: Reduce fork() detection overhead in CUDA. -* PR #2021: Handle structured dtypes with titles. -* PR #1996: Rewrite looplifting as a transformation on Numba IR. -* PR #2014: Implement ``np.linalg.matrix_rank``. -* PR #2012: Implement ``np.linalg.cond``. -* PR #1985: Rewrite even trivial array expressions, which opens the door - for other optimizations (for example, ``array ** 2`` can be converted - into ``array * array``). -* PR #1950: Have ``typeof()`` always raise ValueError on failure. - Previously, it would either raise or return None, depending on the input. -* PR #1994: Implement ``np.linalg.norm``. -* PR #1987: Implement ``np.linalg.det`` and ``np.linalg.slogdet``. -* Issue #1979: Document integer width inference and how to workaround. -* PR #1938: Numba is now compatible with LLVM 3.8. -* PR #1967: Restrict ``np.linalg`` functions to homogeneous dtypes. Users - wanting to pass mixed-typed inputs have to convert explicitly, which - makes the performance implications more obvious. - -Fixes: - -* PR #2006: ``array(float32) ** int`` should return ``array(float32)``. -* PR #2044: Allow reshaping empty arrays. -* Issue #2051: Fix refcounting issue when concatenating tuples. -* Issue #2000: Make Numpy optional for setup.py, to allow ``pip install`` - to work without Numpy pre-installed. -* PR #1989: Fix assertion in ``Dispatcher.disable_compile()``. -* Issue #2028: Ignore filesystem errors when caching from multiple processes. -* Issue #2003: Allow unicode variable and function names (on Python 3). -* Issue #1998: Fix deadlock in parallel ufuncs that reacquire the GIL. -* PR #1997: Fix random crashes when AOT compiling on certain Windows platforms. -* Issue #1988: Propagate jitclass docstring. -* Issue #1933: Ensure array constants are emitted with the right alignment. - - -Version 0.27.0 --------------- - -Improvements: - -* Issue #1976: improve error message when non-integral dimensions are given - to a CUDA kernel. -* PR #1970: Optimize the power operator with a static exponent. -* PR #1710: Improve contextual information for compiler errors. -* PR #1961: Support printing constant strings. -* PR #1959: Support more types in the print() function. -* PR #1823: Support ``compute_50`` in CUDA backend. -* PR #1955: Support ``np.linalg.pinv``. -* PR #1896: Improve the ``SmartArray`` API. -* PR #1947: Support ``np.linalg.solve``. -* Issue #1943: Improve error message when an argument fails typing.4 -* PR #1927: Support ``np.linalg.lstsq``. -* PR #1934: Use system functions for hypot() where possible, instead of our - own implementation. -* PR #1929: Add cffi support to ``@cfunc`` objects. -* PR #1932: Add user-controllable thread pool limits for parallel CPU target. -* PR #1928: Support self-recursion when the signature is explicit. -* PR #1890: List all lowering implementations in the developer docs. -* Issue #1884: Support ``np.lib.stride_tricks.as_strided()``. - -Fixes: - -* Issue #1960: Fix sliced assignment when source and destination areas are - overlapping. -* PR #1963: Make CUDA print() atomic. -* PR #1956: Allow 0d array constants. -* Issue #1945: Allow using Numpy ufuncs in AOT compiled code. -* Issue #1916: Fix documentation example for ``@generated_jit``. -* Issue #1926: Fix regression when caching functions in an IPython session. -* Issue #1923: Allow non-intp integer arguments to carray() and farray(). -* Issue #1908: Accept non-ASCII unicode docstrings on Python 2. -* Issue #1874: Allow ``del container[key]`` in object mode. -* Issue #1913: Fix set insertion bug when the lookup chain contains deleted - entries. -* Issue #1911: Allow function annotations on jitclass methods. - - -Version 0.26.0 --------------- - -This release adds support for ``cfunc`` decorator for exporting numba jitted -functions to 3rd party API that takes C callbacks. Most of the overhead of -using jitclasses inside the interpreter are eliminated. Support for -decompositions in ``numpy.linalg`` are added. Finally, Numpy 1.11 is -supported. - -Improvements: - -* PR #1889: Export BLAS and LAPACK wrappers for pycc. -* PR #1888: Faster array power. -* Issue #1867: Allow "out" keyword arg for dufuncs. -* PR #1871: ``carray()`` and ``farray()`` for creating arrays from pointers. -* PR #1855: ``@cfunc`` decorator for exporting as ctypes function. -* PR #1862: Add support for ``numpy.linalg.qr``. -* PR #1851: jitclass support for '_' and '__' prefixed attributes. -* PR #1842: Optimize jitclass in Python interpreter. -* Issue #1837: Fix CUDA simulator issues with device function. -* PR #1839: Add support for decompositions from ``numpy.linalg``. -* PR #1829: Support Python enums. -* PR #1828: Add support for ``numpy.random.rand()``` and - ``numpy.random.randn()`` -* Issue #1825: Use of 0-darray in place of scalar index. -* Issue #1824: Scalar arguments to object mode gufuncs. -* Issue #1813: Let bitwise bool operators return booleans, not integers. -* Issue #1760: Optional arguments in generators. -* PR #1780: Numpy 1.11 support. - - -Version 0.25.0 --------------- - -This release adds support for ``set`` objects in nopython mode. It also -adds support for many missing Numpy features and functions. It improves -Numba's compatibility and performance when using a distributed execution -framework such as dask, distributed or Spark. Finally, it removes -compatibility with Python 2.6, Python 3.3 and Numpy 1.6. - -Improvements: - -* Issue #1800: Add erf(), erfc(), gamma() and lgamma() to CUDA targets. -* PR #1793: Implement more Numpy functions: np.bincount(), np.diff(), - np.digitize(), np.histogram(), np.searchsorted() as well as NaN-aware - reduction functions (np.nansum(), np.nanmedian(), etc.) -* PR #1789: Optimize some reduction functions such as np.sum(), np.prod(), - np.median(), etc. -* PR #1752: Make CUDA features work in dask, distributed and Spark. -* PR #1787: Support np.nditer() for fast multi-array indexing with - broadcasting. -* PR #1799: Report JIT-compiled functions as regular Python functions - when profiling (allowing to see the filename and line number where a - function is defined). -* PR #1782: Support np.any() and np.all(). -* Issue #1788: Support the iter() and next() built-in functions. -* PR #1778: Support array.astype(). -* Issue #1775: Allow the user to set the target CPU model for AOT compilation. -* PR #1758: Support creating random arrays using the ``size`` parameter - to the np.random APIs. -* PR #1757: Support len() on array.flat objects. -* PR #1749: Remove Numpy 1.6 compatibility. -* PR #1748: Remove Python 2.6 and 3.3 compatibility. -* PR #1735: Support the ``not in`` operator as well as operator.contains(). -* PR #1724: Support homogeneous sets in nopython mode. -* Issue #875: make compilation of array constants faster. - -Fixes: - -* PR #1795: Fix a massive performance issue when calling Numba functions - with distributed, Spark or a similar mechanism using serialization. -* Issue #1784: Make jitclasses usable with NUMBA_DISABLE_JIT=1. -* Issue #1786: Allow using linear algebra functions when profiling. -* Issue #1796: Fix np.dot() memory leak on non-contiguous inputs. -* PR #1792: Fix static negative indexing of tuples. -* Issue #1771: Use fallback cache directory when __pycache__ isn't writable, - such as when user code is installed in a system location. -* Issue #1223: Use Numpy error model in array expressions (e.g. division - by zero returns ``inf`` or ``nan`` instead of raising an error). -* Issue #1640: Fix np.random.binomial() for large n values. -* Issue #1643: Improve error reporting when passing an invalid spec to - ``@jitclass``. -* PR #1756: Fix slicing with a negative step and an omitted start. - - -Version 0.24.0 --------------- - -This release introduces several major changes, including the ``@generated_jit`` -decorator for flexible specializations as with Julia's "``@generated``" macro, -or the SmartArray array wrapper type that allows seamless transfer of array -data between the CPU and the GPU. - -This will be the last version to support Python 2.6, Python 3.3 and Numpy 1.6. - -Improvements: - -* PR #1723: Improve compatibility of JIT functions with the Python profiler. -* PR #1509: Support array.ravel() and array.flatten(). -* PR #1676: Add SmartArray type to support transparent data management in - multiple address spaces (host & GPU). -* PR #1689: Reduce startup overhead of importing Numba. -* PR #1705: Support registration of CFFI types as corresponding to known - Numba types. -* PR #1686: Document the extension API. -* PR #1698: Improve warnings raised during type inference. -* PR #1697: Support np.dot() and friends on non-contiguous arrays. -* PR #1692: cffi.from_buffer() improvements (allow more pointer types, - allow non-Numpy buffer objects). -* PR #1648: Add the ``@generated_jit`` decorator. -* PR #1651: Implementation of np.linalg.inv using LAPACK. Thanks to - Matthieu Dartiailh. -* PR #1674: Support np.diag(). -* PR #1673: Improve error message when looking up an attribute on an - unknown global. -* Issue #1569: Implement runtime check for the LLVM locale bug. -* PR #1612: Switch to LLVM 3.7 in sync with llvmlite. -* PR #1624: Allow slice assignment of sequence to array. -* PR #1622: Support slicing tuples with a constant slice. - -Fixes: - -* Issue #1722: Fix returning an optional boolean (bool or None). -* Issue #1734: NRT decref bug when variable is del'ed before being defined, - leading to a possible memory leak. -* PR #1732: Fix tuple getitem regression for CUDA target. -* PR #1718: Mishandling of optional to optional casting. -* PR #1714: Fix .compile() on a JIT function not respecting ._can_compile. -* Issue #1667: Fix np.angle() on arrays. -* Issue #1690: Fix slicing with an omitted stop and a negative step value. -* PR #1693: Fix gufunc bug in handling scalar formal arg with non-scalar - input value. -* PR #1683: Fix parallel testing under Windows. -* Issue #1616: Use system-provided versions of C99 math where possible. -* Issue #1652: Reductions of bool arrays (e.g. sum() or mean()) should - return integers or floats, not bools. -* Issue #1664: Fix regression when indexing a record array with a constant - index. -* PR #1661: Disable AVX on old Linux kernels. -* Issue #1636: Allow raising an exception looked up on a module. - - -Version 0.23.1 --------------- - -This is a bug-fix release to address several regressions introduced -in the 0.23.0 release, and a couple other issues. - -Fixes: - -* Issue #1645: CUDA ufuncs were broken in 0.23.0. -* Issue #1638: Check tuple sizes when passing a list of tuples. -* Issue #1630: Parallel ufunc would keep eating CPU even after finishing - under Windows. -* Issue #1628: Fix ctypes and cffi tests under Windows with Python 3.5. -* Issue #1627: Fix xrange() support. -* PR #1611: Rewrite variable liveness analysis. -* Issue #1610: Allow nested calls between explicitly-typed ufuncs. -* Issue #1593: Fix `*args` in object mode. - - -Version 0.23.0 --------------- - -This release introduces JIT classes using the new ``@jitclass`` decorator, -allowing user-defined structures for nopython mode. Other improvements -and bug fixes are listed below. - -Improvements: - -* PR #1609: Speed up some simple math functions by inlining them - in their caller -* PR #1571: Implement JIT classes -* PR #1584: Improve typing of array indexing -* PR #1583: Allow printing booleans -* PR #1542: Allow negative values in np.reshape() -* PR #1560: Support vector and matrix dot product, including ``np.dot()`` - and the ``@`` operator in Python 3.5 -* PR #1546: Support field lookup on record arrays and scalars (i.e. - ``array['field']`` in addition to ``array.field``) -* PR #1440: Support the HSA wavebarrier() and activelanepermute_wavewidth() - intrinsics -* PR #1540: Support np.angle() -* PR #1543: Implement CPU multithreaded gufuncs (target="parallel") -* PR #1551: Allow scalar arguments in np.where(), np.empty_like(). -* PR #1516: Add some more examples from NumbaPro -* PR #1517: Support np.sinc() - -Fixes: - -* Issue #1603: Fix calling a non-cached function from a cached function -* Issue #1594: Ensure a list is homogeneous when unboxing -* Issue #1595: Replace deprecated use of get_pointer_to_function() -* Issue #1586: Allow tests to be run by different users on the same machine -* Issue #1587: Make CudaAPIError picklable -* Issue #1568: Fix using Numba from inside Visual Studio 2015 -* Issue #1559: Fix serializing a jit function referring a renamed module -* PR #1508: Let reshape() accept integer argument(s), not just a tuple -* Issue #1545: Improve error checking when unboxing list objects -* Issue #1538: Fix array broadcasting in CUDA gufuncs -* Issue #1526: Fix a reference count handling bug - - -Version 0.22.1 --------------- - -This is a bug-fix release to resolve some packaging issues and other -problems found in the 0.22.0 release. - -Fixes: - -* PR #1515: Include MANIFEST.in in MANIFEST.in so that sdist still works from - source tar files. -* PR #1518: Fix reference counting bug caused by hidden alias -* PR #1519: Fix erroneous assert when passing nopython=True to guvectorize. -* PR #1521: Fix cuda.test() - -Version 0.22.0 --------------- - -This release features several highlights: Python 3.5 support, Numpy 1.10 -support, Ahead-of-Time compilation of extension modules, additional -vectorization features that were previously only available with the -proprietary extension NumbaPro, improvements in array indexing. - -Improvements: - -* PR #1497: Allow scalar input type instead of size-1 array to @guvectorize -* PR #1480: Add distutils support for AOT compilation -* PR #1460: Create a new API for Ahead-of-Time (AOT) compilation -* PR #1451: Allow passing Python lists to JIT-compiled functions, and - reflect mutations on function return -* PR #1387: Numpy 1.10 support -* PR #1464: Support cffi.FFI.from_buffer() -* PR #1437: Propagate errors raised from Numba-compiled ufuncs; also, - let "division by zero" and other math errors produce a warning instead - of exiting the function early -* PR #1445: Support a subset of fancy indexing -* PR #1454: Support "out-of-line" CFFI modules -* PR #1442: Improve array indexing to support more kinds of basic slicing -* PR #1409: Support explicit CUDA memory fences -* PR #1435: Add support for vectorize() and guvectorize() with HSA -* PR #1432: Implement numpy.nonzero() and numpy.where() -* PR #1416: Add support for vectorize() and guvectorize() with CUDA, - as originally provided in NumbaPro -* PR #1424: Support in-place array operators -* PR #1414: Python 3.5 support -* PR #1404: Add the parallel ufunc functionality originally provided in - NumbaPro -* PR #1393: Implement sorting on arrays and lists -* PR #1415: Add functions to estimate the occupancy of a CUDA kernel -* PR #1360: The JIT cache now stores the compiled object code, yielding - even larger speedups. -* PR #1402: Fixes for the ARMv7 (armv7l) architecture under Linux -* PR #1400: Add the cuda.reduce() decorator originally provided in NumbaPro - -Fixes: - -* PR #1483: Allow np.empty_like() and friends on non-contiguous arrays -* Issue #1471: Allow caching JIT functions defined in IPython -* PR #1457: Fix flat indexing of boolean arrays -* PR #1421: Allow calling Numpy ufuncs, without an explicit output, on - non-contiguous arrays -* Issue #1411: Fix crash when unpacking a tuple containing a Numba-allocated array -* Issue #1394: Allow unifying range_state32 and range_state64 -* Issue #1373: Fix code generation error on lists of bools - - -Version 0.21.0 --------------- - -This release introduces support for AMD's Heterogeneous System Architecture, -which allows memory to be shared directly between the CPU and the GPU. -Other major enhancements are support for lists and the introduction of -an opt-in compilation cache. - -Improvements: - -* PR #1391: Implement print() for CUDA code -* PR #1366: Implement integer typing enhancement proposal (NBEP 1) -* PR #1380: Support the one-argument type() builtin -* PR #1375: Allow boolean evaluation of lists and tuples -* PR #1371: Support array.view() in CUDA mode -* PR #1369: Support named tuples in nopython mode -* PR #1250: Implement numpy.median(). -* PR #1289: Make dispatching faster when calling a JIT-compiled function - from regular Python -* Issue #1226: Improve performance of integer power -* PR #1321: Document features supported with CUDA -* PR #1345: HSA support -* PR #1343: Support lists in nopython mode -* PR #1356: Make Numba-allocated memory visible to tracemalloc -* PR #1363: Add an environment variable NUMBA_DEBUG_TYPEINFER -* PR #1051: Add an opt-in, per-function compilation cache - -Fixes: - -* Issue #1372: Some array expressions would fail rewriting when involved - the same variable more than once, or a unary operator -* Issue #1385: Allow CUDA local arrays to be declared anywhere in a function -* Issue #1285: Support datetime64 and timedelta64 in Numpy reduction functions -* Issue #1332: Handle the EXTENDED_ARG opcode. -* PR #1329: Handle the ``in`` operator in object mode -* Issue #1322: Fix augmented slice assignment on Python 2 -* PR #1357: Fix slicing with some negative bounds or step values. - - -Version 0.20.0 --------------- - -This release updates Numba to use LLVM 3.6 and CUDA 7 for CUDA support. -Following the platform deprecation in CUDA 7, Numba's CUDA feature is no -longer supported on 32-bit platforms. The oldest supported version of -Windows is Windows 7. - -Improvements: - -* Issue #1203: Support indexing ndarray.flat -* PR #1200: Migrate cgutils to llvmlite -* PR #1190: Support more array methods: .transpose(), .T, .copy(), .reshape(), .view() -* PR #1214: Simplify setup.py and avoid manual maintenance -* PR #1217: Support datetime64 and timedelta64 constants -* PR #1236: Reload environment variables when compiling -* PR #1225: Various speed improvements in generated code -* PR #1252: Support cmath module in CUDA -* PR #1238: Use 32-byte aligned allocator to optimize for AVX -* PR #1258: Support numpy.frombuffer() -* PR #1274: Use TravisCI container infrastructure for lower wait time -* PR #1279: Micro-optimize overload resolution in call dispatch -* Issue #1248: Improve error message when return type unification fails - -Fixes: - -* Issue #1131: Handling of negative zeros in np.conjugate() and np.arccos() -* Issue #1188: Fix slow array return -* Issue #1164: Avoid warnings from CUDA context at shutdown -* Issue #1229: Respect the writeable flag in arrays -* Issue #1244: Fix bug in refcount pruning pass -* Issue #1251: Fix partial left-indexing of Fortran contiguous array -* Issue #1264: Fix compilation error in array expression -* Issue #1254: Fix error when yielding array objects -* Issue #1276: Fix nested generator use - - -Version 0.19.2 --------------- - -This release fixes the source distribution on pypi. The only change is in the -setup.py file. We do not plan to provide a conda package as this release is -essentially the same as 0.19.1 for conda users. - - -Version 0.19.1 --------------- - -* Issue #1196: - - * fix double-free segfault due to redundant variable deletion in the - Numba IR (#1195) - * fix use-after-delete in array expression rewrite pass - - -Version 0.19.0 --------------- - -This version introduces memory management in the Numba runtime, allowing to -allocate new arrays inside Numba-compiled functions. There is also a rework -of the ufunc infrastructure, and an optimization pass to collapse cascading -array operations into a single efficient loop. - -.. warning:: - Support for Windows XP and Vista with all compiler targets and support - for 32-bit platforms (Win/Mac/Linux) with the CUDA compiler target are - deprecated. In the next release of Numba, the oldest version of Windows - supported will be Windows 7. CPU compilation will remain supported - on 32-bit Linux and Windows platforms. - -Known issues: - -* There are some performance regressions in very short running ``nopython`` - functions due to the additional overhead incurred by memory management. - We will work to reduce this overhead in future releases. - -Features: - -* Issue #1181: Add a Frequently Asked Questions section to the documentation. -* Issue #1162: Support the ``cumsum()`` and ``cumprod()`` methods on Numpy - arrays. -* Issue #1152: Support the ``*args`` argument-passing style. -* Issue #1147: Allow passing character sequences as arguments to - JIT-compiled functions. -* Issue #1110: Shortcut deforestation and loop fusion for array expressions. -* Issue #1136: Support various Numpy array constructors, for example - numpy.zeros() and numpy.zeros_like(). -* Issue #1127: Add a CUDA simulator running on the CPU, enabled with the - NUMBA_ENABLE_CUDASIM environment variable. -* Issue #1086: Allow calling standard Numpy ufuncs without an explicit - output array from ``nopython`` functions. -* Issue #1113: Support keyword arguments when calling numpy.empty() - and related functions. -* Issue #1108: Support the ``ctypes.data`` attribute of Numpy arrays. -* Issue #1077: Memory management for array allocations in ``nopython`` mode. -* Issue #1105: Support calling a ctypes function that takes ctypes.py_object - parameters. -* Issue #1084: Environment variable NUMBA_DISABLE_JIT disables compilation - of ``@jit`` functions, instead calling into the Python interpreter - when called. This allows easier debugging of multiple jitted functions. -* Issue #927: Allow gufuncs with no output array. -* Issue #1097: Support comparisons between tuples. -* Issue #1075: Numba-generated ufuncs can now be called from ``nopython`` - functions. -* Issue #1062: ``@vectorize`` now allows omitting the signatures, and will - compile the required specializations on the fly (like ``@jit`` does). -* Issue #1027: Support numpy.round(). -* Issue #1085: Allow returning a character sequence (as fetched from a - structured array) from a JIT-compiled function. - -Fixes: - -* Issue #1170: Ensure ``ndindex()``, ``ndenumerate()`` and ``ndarray.flat`` - work properly inside generators. -* Issue #1151: Disallow unpacking of tuples with the wrong size. -* Issue #1141: Specify install dependencies in setup.py. -* Issue #1106: Loop-lifting would fail when the lifted loop does not - produce any output values for the function tail. -* Issue #1103: Fix mishandling of some inputs when a JIT-compiled function - is called with multiple array layouts. -* Issue #1089: Fix range() with large unsigned integers. -* Issue #1088: Install entry-point scripts (numba, pycc) from the conda - build recipe. -* Issue #1081: Constant structured scalars now work properly. -* Issue #1080: Fix automatic promotion of booleans to integers. - - -Version 0.18.2 --------------- - -Bug fixes: - -* Issue #1073: Fixes missing template file for HTML annotation -* Issue #1074: Fixes CUDA support on Windows machine due to NVVM API mismatch - - -Version 0.18.1 --------------- - -Version 0.18.0 is not officially released. - -This version removes the old deprecated and undocumented ``argtypes`` and -``restype`` arguments to the ``@jit`` decorator. Function signatures -should always be passed as the first argument to ``@jit``. - -Features: - -* Issue #960: Add inspect_llvm() and inspect_asm() methods to JIT-compiled - functions: they output the LLVM IR and the native assembler source of the - compiled function, respectively. -* Issue #990: Allow passing tuples as arguments to JIT-compiled functions - in ``nopython`` mode. -* Issue #774: Support two-argument round() in ``nopython`` mode. -* Issue #987: Support missing functions from the math module in nopython - mode: frexp(), ldexp(), gamma(), lgamma(), erf(), erfc(). -* Issue #995: Improve code generation for round() on Python 3. -* Issue #981: Support functions from the random and numpy.random modules - in ``nopython`` mode. -* Issue #979: Add cuda.atomic.max(). -* Issue #1006: Improve exception raising and reporting. It is now allowed - to raise an exception with an error message in ``nopython`` mode. -* Issue #821: Allow ctypes- and cffi-defined functions as arguments to - ``nopython`` functions. -* Issue #901: Allow multiple explicit signatures with ``@jit``. The - signatures must be passed in a list, as with ``@vectorize``. -* Issue #884: Better error message when a JIT-compiled function is called - with the wrong types. -* Issue #1010: Simpler and faster CUDA argument marshalling thanks to a - refactoring of the data model. -* Issue #1018: Support arrays of scalars inside Numpy structured types. -* Issue #808: Reduce Numba import time by half. -* Issue #1021: Support the buffer protocol in ``nopython`` mode. - Buffer-providing objects, such as ``bytearray``, ``array.array`` or - ``memoryview`` support array-like operations such as indexing and iterating. - Furthermore, some standard attributes on the ``memoryview`` object are - supported. -* Issue #1030: Support nested arrays in Numpy structured arrays. -* Issue #1033: Implement the inspect_types(), inspect_llvm() and inspect_asm() - methods for CUDA kernels. -* Issue #1029: Support Numpy structured arrays with CUDA as well. -* Issue #1034: Support for generators in nopython and object mode. -* Issue #1044: Support default argument values when calling Numba-compiled - functions. -* Issue #1048: Allow calling Numpy scalar constructors from CUDA functions. -* Issue #1047: Allow indexing a multi-dimensional array with a single integer, - to take a view. -* Issue #1050: Support len() on tuples. -* Issue #1011: Revive HTML annotation. - -Fixes: - -* Issue #977: Assignment optimization was too aggressive. -* Issue #561: One-argument round() now returns an int on Python 3. -* Issue #1001: Fix an unlikely bug where two closures with the same name - and id() would compile to the same LLVM function name, despite different - closure values. -* Issue #1006: Fix reference leak when a JIT-compiled function is disposed of. -* Issue #1017: Update instructions for CUDA in the README. -* Issue #1008: Generate shorter LLVM type names to avoid segfaults with CUDA. -* Issue #1005: Properly clean up references when raising an exception from - object mode. -* Issue #1041: Fix incompatibility between Numba and the third-party - library "future". -* Issue #1053: Fix the size attribute of CUDA shared arrays. - - -Version 0.17.0 --------------- - -The major focus in this release has been a rewrite of the documentation. -The new documentation is better structured and has more detailed coverage -of Numba features and APIs. It can be found online at -http://numba.pydata.org/numba-doc/dev/index.html - -Features: - -* Issue #895: LLVM can now inline nested function calls in ``nopython`` mode. -* Issue #863: CUDA kernels can now infer the types of their arguments - ("autojit"-like). -* Issue #833: Support numpy.{min,max,argmin,argmax,sum,mean,var,std} - in ``nopython`` mode. -* Issue #905: Add a ``nogil`` argument to the ``@jit`` decorator, to - release the GIL in ``nopython`` mode. -* Issue #829: Add a ``identity`` argument to ``@vectorize`` and - ``@guvectorize``, to set the identity value of the ufunc. -* Issue #843: Allow indexing 0-d arrays with the empty tuple. -* Issue #933: Allow named arguments, not only positional arguments, when - calling a Numba-compiled function. -* Issue #902: Support numpy.ndenumerate() in ``nopython`` mode. -* Issue #950: AVX is now enabled by default except on Sandy Bridge and - Ivy Bridge CPUs, where it can produce slower code than SSE. -* Issue #956: Support constant arrays of structured type. -* Issue #959: Indexing arrays with floating-point numbers isn't allowed - anymore. -* Issue #955: Add support for 3D CUDA grids and thread blocks. -* Issue #902: Support numpy.ndindex() in ``nopython`` mode. -* Issue #951: Numpy number types (``numpy.int8``, etc.) can be used as - constructors for type conversion in ``nopython`` mode. - -Fixes: - -* Issue #889: Fix ``NUMBA_DUMP_ASSEMBLY`` for the CUDA backend. -* Issue #903: Fix calling of stdcall functions with ctypes under Windows. -* Issue #908: Allow lazy-compiling from several threads at once. -* Issue #868: Wrong error message when multiplying a scalar by a non-scalar. -* Issue #917: Allow vectorizing with datetime64 and timedelta64 in the - signature (only with unit-less values, though, because of a Numpy limitation). -* Issue #431: Allow overloading of cuda device function. -* Issue #917: Print out errors occurred in object mode ufuncs. -* Issue #923: Numba-compiled ufuncs now inherit the name and doc of the - original Python function. -* Issue #928: Fix boolean return value in nested calls. -* Issue #915: ``@jit`` called with an explicit signature with a mismatching - type of arguments now raises an error. -* Issue #784: Fix the truth value of NaNs. -* Issue #953: Fix using shared memory in more than one function (kernel or - device). -* Issue #970: Fix an uncommon double to uint64 conversion bug on CentOS5 - 32-bit (C compiler issue). - - -Version 0.16.0 --------------- - -This release contains a major refactor to switch from llvmpy to `llvmlite `_ -as our code generation backend. The switch is necessary to reconcile -different compiler requirements for LLVM 3.5 (needs C++11) and Python -extensions (need specific compiler versions on Windows). As a bonus, we have -found the use of llvmlite speeds up compilation by a factor of 2! - -Other Major Changes: - -* Faster dispatch for numpy structured arrays -* Optimized array.flat() -* Improved CPU feature selection -* Fix constant tuple regression in macro expansion code - -Known Issues: - -* AVX code generation is still disabled by default due to performance - regressions when operating on misaligned NumPy arrays. We hope to have a - workaround in the future. -* In *extremely* rare circumstances, a `known issue with LLVM 3.5 `_ - code generation can cause an ELF relocation error on 64-bit Linux systems. - - -Version 0.15.1 --------------- - -(This was a bug-fix release that superceded version 0.15 before it was -announced.) - -Fixes: - -* Workaround for missing __ftol2 on Windows XP. -* Do not lift loops for compilation that contain break statements. -* Fix a bug in loop-lifting when multiple values need to be returned to - the enclosing scope. -* Handle the loop-lifting case where an accumulator needs to be updated when - the loop count is zero. - -Version 0.15 ------------- - -Features: - -* Support for the Python ``cmath`` module. (NumPy complex functions were - already supported.) -* Support for ``.real``, ``.imag``, and `.conjugate()`` on non-complex - numbers. -* Add support for ``math.isfinite()`` and ``math.copysign()``. -* Compatibility mode: If enabled (off by default), a failure to compile in - object mode will fall back to using the pure Python implementation of the - function. -* *Experimental* support for serializing JIT functions with cloudpickle. -* Loop-jitting in object mode now works with loops that modify scalars that - are accessed after the loop, such as accumulators. -* ``@vectorize`` functions can be compiled in object mode. -* Numba can now be built using the `Visual C++ Compiler for Python 2.7 `_ - on Windows platforms. -* CUDA JIT functions can be returned by factory functions with variables in - the closure frozen as constants. -* Support for "optional" types in nopython mode, which allow ``None`` to be a - valid value. - -Fixes: - -* If nopython mode compilation fails for any reason, automatically fall back - to object mode (unless nopython=True is passed to @jit) rather than raise - an exeception. -* Allow function objects to be returned from a function compiled in object - mode. -* Fix a linking problem that caused slower platform math functions (such as - ``exp()``) to be used on Windows, leading to performance regressions against - NumPy. -* ``min()`` and ``max()`` no longer accept scalars arguments in nopython mode. -* Fix handling of ambigous type promotion among several compiled versions of a - JIT function. The dispatcher will now compile a new version to resolve the - problem. (issue #776) -* Fix float32 to uint64 casting bug on 32-bit Linux. -* Fix type inference to allow forced casting of return types. -* Allow the shape of a 1D ``cuda.shared.array`` and ``cuda.local.array`` to be - a one-element tuple. -* More correct handling of signed zeros. -* Add custom implementation of ``atan2()`` on Windows to handle special cases - properly. -* Eliminated race condition in the handling of the pagelocked staging area - used when transferring CUDA arrays. -* Fix non-deterministic type unification leading to varying performance. - (issue #797) - - -Version 0.14 ------------- - -Features: - -* Support for nearly all the Numpy math functions (including comparison, - logical, bitwise and some previously missing float functions) in nopython mode. -* The Numpy datetime64 and timedelta64 dtypes are supported in nopython mode - with Numpy 1.7 and later. -* Support for Numpy math functions on complex numbers in nopython mode. -* ndarray.sum() is supported in nopython mode. -* Better error messages when unsupported types are used in Numpy math functions. -* Set NUMBA_WARNINGS=1 in the environment to see which functions are compiled - in object mode vs. nopython mode. -* Add support for the two-argument pow() builtin function in nopython mode. -* New developer documentation describing how Numba works, and how to - add new types. -* Support for Numpy record arrays on the GPU. (Note: Improper alignment of dtype - fields will cause an exception to be raised.) -* Slices on GPU device arrays. -* GPU objects can be used as Python context managers to select the active - device in a block. -* GPU device arrays can be bound to a CUDA stream. All subsequent operations - (such as memory copies) will be queued on that stream instead of the default. - This can prevent unnecessary synchronization with other streams. - -Fixes: - -* Generation of AVX instructions has been disabled to avoid performance bugs - when calling external math functions that may use SSE instructions, - especially on OS X. -* JIT functions can be removed by the garbage collector when they are no - longer accessible. -* Various other reference counting fixes to prevent memory leaks. -* Fixed handling of exception when input argument is out of range. -* Prevent autojit functions from making unsafe numeric conversions when - called with different numeric types. -* Fix a compilation error when an unhashable global value is accessed. -* Gracefully handle failure to enable faulthandler in the IPython Notebook. -* Fix a bug that caused loop lifting to fail if the loop was inside an - ``else`` block. -* Fixed a problem with selecting CUDA devices in multithreaded programs on - Linux. -* The ``pow()`` function (and ``**`` operation) applied to two integers now - returns an integer rather than a float. -* Numpy arrays using the object dtype no longer cause an exception in the - autojit. -* Attempts to write to a global array will cause compilation to fall back - to object mode, rather than attempt and fail at nopython mode. -* ``range()`` works with all negative arguments (ex: ``range(-10, -12, -1)``) - -Version 0.13.4 --------------- - -Features: - -* Setting and deleting attributes in object mode -* Added documentation of supported and currently unsupported numpy ufuncs -* Assignment to 1-D numpy array slices -* Closure variables and functions can be used in object mode -* All numeric global values in modules can be used as constants in JIT - compiled code -* Support for the start argument in enumerate() -* Inplace arithmetic operations (+=, -=, etc.) -* Direct iteration over a 1D numpy array (e.g. "for x in array: ...") - in nopython mode - -Fixes: - -* Support for NVIDIA compute capability 5.0 devices (such as the GTX 750) -* Vectorize no longer crashes/gives an error when bool\_ is used as return type -* Return the correct dictionary when globals() is used in JIT functions -* Fix crash bug when creating dictionary literals in object -* Report more informative error message on import if llvmpy is too old -* Temporarily disable pycc --header, which generates incorrect function - signatures. - -Version 0.13.3 --------------- - -Features: - -* Support for enumerate() and zip() in nopython mode -* Increased LLVM optimization of JIT functions to -O1, enabling automatic - vectorization of compiled code in some cases -* Iteration over tuples and unpacking of tuples in nopython mode -* Support for dict and set (Python >= 2.7) literals in object mode - -Fixes: - -* JIT functions have the same __name__ and __doc__ as the original function. -* Numerous improvements to better match the data types and behavior of Python - math functions in JIT compiled code on different platforms. -* Importing Numba will no longer throw an exception if the CUDA driver is - present, but cannot be initialized. -* guvectorize now properly supports functions with scalar arguments. -* CUDA driver is lazily initialized - -Version 0.13.2 --------------- - -Features: - -* @vectorize ufunc now can generate SIMD fast path for unit strided array -* Added cuda.gridsize -* Added preliminary exception handling (raise exception class) - -Fixes: - -* UNARY_POSITIVE -* Handling of closures and dynamically generated functions -* Global None value - -Version 0.13.1 --------------- - -Features: - -* Initial support for CUDA array slicing - -Fixes: - -* Indirectly fixes numbapro when the system has a incompatible CUDA driver -* Fix numba.cuda.detect -* Export numba.intp and numba.intc - -Version 0.13 ------------- - -Features: - -* Opensourcing NumbaPro CUDA python support in `numba.cuda` -* Add support for ufunc array broadcasting -* Add support for mixed input types for ufuncs -* Add support for returning tuple from jitted function - -Fixes: - -* Fix store slice bytecode handling for Python2 -* Fix inplace subtract -* Fix pycc so that correct header is emitted -* Allow vectorize to work on functions with jit decorator - - -Version 0.12.2 --------------- - -Fixes: - -* Improved NumPy ufunc support in nopython mode -* Misc bug fixes - - -Version 0.12.1 --------------- - -This version fixed many regressions reported by user for the 0.12 release. -This release contains a new loop-lifting mechanism that specializes certains -loop patterns for nopython mode compilation. This avoid direct support -for heap-allocating and other very dynamic operations. - -Improvements: - -* Add loop-lifting--jit-ing loops in nopython for object mode code. This allows - functions to allocate NumPy arrays and use Python objects, while the tight - loops in the function can still be compiled in nopython mode. Any arrays that - the tight loop uses should be created before the loop is entered. - -Fixes: - -* Add support for majority of "math" module functions -* Fix for...else handling -* Add support for builtin round() -* Fix tenary if...else support -* Revive "numba" script -* Fix problems with some boolean expressions -* Add support for more NumPy ufuncs - - -Version 0.12 ------------- - -Version 0.12 contains a big refactor of the compiler. The main objective for -this refactor was to simplify the code base to create a better foundation for -further work. A secondary objective was to improve the worst case performance -to ensure that compiled functions in object mode never run slower than pure -Python code (this was a problem in several cases with the old code base). This -refactor is still a work in progress and further testing is needed. - -Main improvements: - -* Major refactor of compiler for performance and maintenance reasons -* Better fallback to object mode when native mode fails -* Improved worst case performance in object mode - -The public interface of numba has been slightly changed. The idea is to -make it cleaner and more rational: - -* jit decorator has been modified, so that it can be called without a signature. - When called without a signature, it behaves as the old autojit. Autojit - has been deprecated in favour of this approach. -* Jitted functions can now be overloaded. -* Added a "njit" decorator that behaves like "jit" decorator with nopython=True. -* The numba.vectorize namespace is gone. The vectorize decorator will - be in the main numba namespace. -* Added a guvectorize decorator in the main numba namespace. It is - similiar to numba.vectorize, but takes a dimension signature. It - generates gufuncs. This is a replacement for the GUVectorize gufunc - factory which has been deprecated. - -Main regressions (will be fixed in a future release): - -* Creating new NumPy arrays is not supported in nopython mode -* Returning NumPy arrays is not supported in nopython mode -* NumPy array slicing is not supported in nopython mode -* lists and tuples are not supported in nopython mode -* string, datetime, cdecimal, and struct types are not implemented yet -* Extension types (classes) are not supported in nopython mode -* Closures are not supported -* Raise keyword is not supported -* Recursion is not support in nopython mode - -Version 0.11 ------------- -* Experimental support for NumPy datetime type - -Version 0.10 ------------- -* Annotation tool (./bin/numba --annotate --fancy) (thanks to Jay Bourque) -* Open sourced prange -* Support for raise statement -* Pluggable array representation -* Support for enumerate and zip (thanks to Eugene Toder) -* Better string formatting support (thanks to Eugene Toder) -* Builtins min(), max() and bool() (thanks to Eugene Toder) -* Fix some code reloading issues (thanks to Björn Linse) -* Recognize NumPy scalar objects (thanks to Björn Linse) - - -Version 0.9 ------------ -* Improved math support -* Open sourced generalized ufuncs -* Improved array expressions - -Version 0.8 ------------ -* Support for autojit classes - * Inheritance not yet supported -* Python 3 support for pycc -* Allow retrieval of ctypes function wrapper - * And hence support retrieval of a pointer to the function -* Fixed a memory leak of array slicing views - -Version 0.7.2 -------------- -* Official Python 3 support (python 3.2 and 3.3) -* Support for intrinsics and instructions -* Various bug fixes (see https://github.com/numba/numba/issues?milestone=7&state=closed) - -Version 0.7.1 -------------- -* Various bug fixes - -Version 0.7 ------------ -* Open sourced single-threaded ufunc vectorizer -* Open sourced NumPy array expression compilation -* Open sourced fast NumPy array slicing -* Experimental Python 3 support -* Support for typed containers - * typed lists and tuples -* Support for iteration over objects -* Support object comparisons -* Preliminary CFFI support - * Jit calls to CFFI functions (passed into autojit functions) - * TODO: Recognize ffi_lib.my_func attributes -* Improved support for ctypes -* Allow declaring extension attribute types as through class attributes -* Support for type casting in Python - * Get the same semantics with or without numba compilation -* Support for recursion - * For jit methods and extension classes -* Allow jit functions as C callbacks -* Friendlier error reporting -* Internal improvements -* A variety of bug fixes - -Version 0.6.1 --------------- -* Support for bitwise operations - -Version 0.6 --------------- -* Python 2.6 support -* Programmable typing - * Allow users to add type inference for external code -* Better NumPy type inference - * outer, inner, dot, vdot, tensordot, nonzero, where, - binary ufuncs + methods (reduce, accumulate, reduceat, outer) -* Type based alias analysis - * Support for strict aliasing -* Much faster autojit dispatch when calling from Python -* Faster numerical loops through data and stride pre-loading -* Integral overflow and underflow checking for conversions from objects -* Make Meta dependency optional - -Version 0.5 --------------- -* SSA-based type inference - * Allows variable reuse - * Allow referring to variables before lexical definition -* Support multiple comparisons -* Support for template types -* List comprehensions -* Support for pointers -* Many bug fixes -* Added user documentation - -Version 0.4 --------------- - -Version 0.3.2 --------------- - -* Add support for object arithmetic (issue 56). -* Bug fixes (issue 55). - -Version 0.3 --------------- -* Changed default compilation approach to ast -* Added support for cross-module linking -* Added support for closures (can jit inner functions and return them) (see examples/closure.py) -* Added support for dtype structures (can access elements of structure with attribute access) (see examples/structures.py) -* Added support for extension types (numba classes) (see examples/numbaclasses.py) -* Added support for general Python code (use nopython to raise an error if Python C-API is used to avoid unexpected slowness because of lack of implementation defaulting to generic Python) -* Fixed many bugs -* Added support to detect math operations. -* Added with python and with nopython contexts -* Added more examples - -Many features need to be documented still. Look at examples and tests for more information. - - -Version 0.2 --------------- -* Added an ast approach to compilation -* Removed d, f, i, b from numba namespace (use f8, f4, i4, b1) -* Changed function to autojit2 -* Added autojit function to decorate calls to the function and use types of the variable to create compiled versions. -* changed keyword arguments to jit and autojit functions to restype and argtypes to be consistent with ctypes module. -* Added pycc -- a python to shared library compiler diff --git a/numba/CONTRIBUTING.md b/numba/CONTRIBUTING.md deleted file mode 100644 index 09dccaa4a..000000000 --- a/numba/CONTRIBUTING.md +++ /dev/null @@ -1,20 +0,0 @@ - -We welcome people who want to make contributions to Numba, big or small! -Even simple documentation improvements are encouraged. - -# Asking questions - -The development mailing-list is at [Google Groups]( -https://groups.google.com/a/continuum.io/forum/#!forum/numba-users). -If you prefer, there is a bidirectional [Gmane mirror]( -http://news.gmane.org/gmane.comp.python.numba.user/) allowing NNTP access. - -# Contributing patches - -Please fork the Numba repository on Github, and create a new branch -containing your work. When you are done, open a pull request. - -# Further reading - -Please read the [contributing guide]( -http://numba.pydata.org/numba-doc/dev/developer/contributing.html). diff --git a/numba/LICENSE b/numba/LICENSE deleted file mode 100644 index 7d19426e7..000000000 --- a/numba/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -Copyright (c) 2012, Anaconda, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numba/LICENSES.third-party b/numba/LICENSES.third-party deleted file mode 100644 index c07c0562f..000000000 --- a/numba/LICENSES.third-party +++ /dev/null @@ -1,78 +0,0 @@ -The Numba source tree includes vendored libraries governed by the following -licenses. - - -appdirs -------- - -# This is the MIT license - -Copyright (c) 2010 ActiveState Software Inc. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -ddt ---- - -Copyright © 2015 Carles Barrobés and additional contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the “Software”), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -([MIT License](http://mit-license.org/)) - - -six ---- - -Copyright (c) 2010-2016 Benjamin Peterson - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/numba/MANIFEST.in b/numba/MANIFEST.in deleted file mode 100644 index bc21816e6..000000000 --- a/numba/MANIFEST.in +++ /dev/null @@ -1,10 +0,0 @@ -include MANIFEST.in -include README.rst setup.py runtests.py versioneer.py CHANGE_LOG AUTHORS LICENSE - -recursive-include numba *.c *.cpp *.h *.hpp *.inc -recursive-include docs *.ipynb *.txt *.py Makefile *.rst -recursive-include examples *.py - -prune docs/_build -prune docs/gh-pages -include numba/_version.py diff --git a/numba/README.rst b/numba/README.rst deleted file mode 100644 index 8cf5cb343..000000000 --- a/numba/README.rst +++ /dev/null @@ -1,143 +0,0 @@ -***** -Numba -***** - -.. image:: https://badges.gitter.im/numba/numba.svg - :target: https://gitter.im/numba/numba?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge - :alt: Gitter - -A compiler for Python array and numerical functions -################################################### - -Numba is an Open Source NumPy-aware optimizing compiler for Python -sponsored by Anaconda, Inc. It uses the -remarkable LLVM compiler infrastructure to compile Python syntax to -machine code. - -It is aware of NumPy arrays as typed memory regions and so can speed-up -code using NumPy arrays. Other, less well-typed code will be translated -to Python C-API calls effectively removing the "interpreter" but not removing -the dynamic indirection. - -Numba is also not a tracing JIT. It *compiles* your code before it gets -run either using run-time type information or type information you provide -in the decorator. - -Numba is a mechanism for producing machine code from Python syntax and typed -data structures such as those that exist in NumPy. - - -Dependencies -============ - -* llvmlite -* numpy (version 1.9 or higher) -* funcsigs (for Python 2) - - -Installing -========== - -The easiest way to install numba and get updates is by using the Anaconda -Distribution: https://www.anaconda.com/download - -:: - - $ conda install numba - -If you wanted to compile Numba from source, -it is recommended to use conda environment to maintain multiple isolated -development environments. To create a new environment for Numba development:: - - $ conda create -p ~/dev/mynumba python numpy llvmlite - -To select the installed version, append "=VERSION" to the package name, -where, "VERSION" is the version number. For example:: - - $ conda create -p ~/dev/mynumba python=2.7 numpy=1.9 llvmlite - -to use Python 2.7 and Numpy 1.9. - -If you need CUDA support, you should also install the CUDA toolkit:: - - $ conda install cudatoolkit - -This installs the CUDA Toolkit version 8.0, which requires driver version 375.x -or later to be installed. - -Custom Python Environments --------------------------- - -If you're not using conda, you will need to build llvmlite yourself: - -Building and installing llvmlite -'''''''''''''''''''''''''''''''' - -See https://github.com/numba/llvmlite for the most up-to-date instructions. -You will need a build of LLVM 6.0.x. - -:: - - $ git clone https://github.com/numba/llvmlite - $ cd llvmlite - $ python setup.py install - -Installing Numba -'''''''''''''''' - -:: - - $ git clone https://github.com/numba/numba.git - $ cd numba - $ pip install -r requirements.txt - $ python setup.py build_ext --inplace - $ python setup.py install - -or simply - -:: - - $ pip install numba - -If you want to enable CUDA support, you will need to install CUDA Toolkit 8.0. -After installing the toolkit, you might have to specify environment variables -in order to override the standard search paths: - -NUMBAPRO_CUDA_DRIVER - Path to the CUDA driver shared library -NUMBAPRO_NVVM - Path to the CUDA libNVVM shared library file -NUMBAPRO_LIBDEVICE - Path to the CUDA libNVVM libdevice directory which contains .bc files - - -Documentation -============= - -http://numba.pydata.org/numba-doc/dev/index.html - - -Mailing Lists -============= - -Join the numba mailing list numba-users@continuum.io: -https://groups.google.com/a/continuum.io/d/forum/numba-users - -or access it through the Gmane mirror: -http://news.gmane.org/gmane.comp.python.numba.user - -Some old archives are at: http://librelist.com/browser/numba/ - - -Website -======= - -See if our sponsor can help you (which can help this project): https://www.anaconda.com - -http://numba.pydata.org - - -Continuous Integration -====================== - -https://travis-ci.org/numba/numba diff --git a/numba/appveyor.yml b/numba/appveyor.yml deleted file mode 100644 index 8dfb8c169..000000000 --- a/numba/appveyor.yml +++ /dev/null @@ -1,44 +0,0 @@ -# Environment loosely based on https://github.com/conda/conda/blob/master/appveyor.yml - -environment: - - global: - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script intepreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\buildscripts\\appveyor\\run_with_env.cmd" - - matrix: - # Since appveyor is quite slow, we only use a single configuration - # and a subset of the test suite (see test_script below) - - PYTHON: "3.6" - ARCH: "64" - NUMPY: "1.14" - CONDA_ENV: testenv - -init: - # Use AppVeyor's provided Miniconda: https://www.appveyor.com/docs/installed-software#python - - if "%ARCH%" == "64" set MINICONDA=C:\Miniconda35-x64 - - if "%ARCH%" == "32" set MINICONDA=C:\Miniconda35 - - set PATH=%MINICONDA%;%MINICONDA%/Scripts;%MINICONDA%/Library/bin;%PATH% - -install: - # Update conda - - conda update -q -y conda - - buildscripts\\incremental\\setup_conda_environment.cmd - # Build the package - - buildscripts\\incremental\\build.cmd - -build: false - -before_test: - # Run system info tool - - ps: pushd bin - - ps: python numba -s - - ps: popd - -test_script: - # Run a subset of the test suite, as AppVeyor is quite slow. - # %CMD_IN_ENV% is needed for distutils/setuptools-based tests - # on certain build configurations. - - "%CMD_IN_ENV% python runtests.py -b --tags important" diff --git a/numba/benchmarks/README b/numba/benchmarks/README deleted file mode 100644 index eca5a45c7..000000000 --- a/numba/benchmarks/README +++ /dev/null @@ -1,20 +0,0 @@ -This directory contains python script for benchmarking the performance of -numba. - -Running the benchmark ------------------------ - - python runall.py - - -Adding new bencharmk ---------------------- - -The "runall.py" will discover scripts if name prefix "bm_". Each benchmark -script should contains two no-argument functions: "python_main" and -"numba_main". They represent the entry point for the benchmark for python -code and numba code, respectively. The timing is produced by `numba.utils -.benchmark`. The best time is reported and it is normalized against the -python timing. - - diff --git a/numba/benchmarks/bm_euler.py b/numba/benchmarks/bm_euler.py deleted file mode 100644 index ddb196970..000000000 --- a/numba/benchmarks/bm_euler.py +++ /dev/null @@ -1,63 +0,0 @@ -# Modified from a stackoverflow post by Hyperboreus: -# http://stackoverflow.com/questions/6964392/speed-comparison-with-project-euler-c-vs-python-vs-erlang-vs-haskell -from __future__ import print_function, division, absolute_import -import math -from numba import jit -from numba.utils import benchmark - - -def py_factorCount(n): - square = math.sqrt(n) - isquare = int (square) - count = -1 if isquare == square else 0 - for candidate in range(1, isquare + 1): - if not n % candidate: - count += 2 - return count - - -def py_euler(): - triangle = 1 - index = 1 - while py_factorCount(triangle) < 1001: - index += 1 - triangle += index - return triangle - - -@jit("intp(intp)", nopython=True) -def factorCount(n): - square = math.sqrt(n) - isquare = int (square) - count = -1 if isquare == square else 0 - for candidate in range(1, isquare + 1): - if not n % candidate: - count += 2 - return count - - -@jit("intp()", nopython=True) -def euler(): - triangle = 1 - index = 1 - while factorCount(triangle) < 1001: - index += 1 - triangle += index - return triangle - -answer = 842161320 - - -def numba_main(): - result = euler() - assert result == answer - - -def python_main(): - result = py_euler() - assert result == answer - - -if __name__ == '__main__': - print(benchmark(python_main)) - print(benchmark(numba_main)) diff --git a/numba/benchmarks/bm_laplace2d.py b/numba/benchmarks/bm_laplace2d.py deleted file mode 100644 index 6881b2fa7..000000000 --- a/numba/benchmarks/bm_laplace2d.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import numpy as np -from numba import jit -from numba.utils import benchmark - - -def jacobi_relax_core(A, Anew): - error = 0.0 - n = A.shape[0] - m = A.shape[1] - - for j in range(1, n - 1): - for i in range(1, m - 1): - Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \ - + A[j - 1, i] + A[j + 1, i]) - error = max(error, abs(Anew[j, i] - A[j, i])) - return error - - -numba_jacobi_relax_core = jit("float64[:,::1], float64[:,::1]", nopython=True)\ - (jacobi_relax_core) - - -def run(fn): - NN = 1024 - NM = 1024 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 10 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - it = 0 - - while error > tol and it < iter_max: - error = fn(A, Anew) - - # swap A and Anew - tmp = A - A = Anew - Anew = tmp - it += 1 - - -def python_main(): - run(jacobi_relax_core) - - -def numba_main(): - run(numba_jacobi_relax_core) - - -if __name__ == '__main__': - print(benchmark(python_main)) - print(benchmark(numba_main)) \ No newline at end of file diff --git a/numba/benchmarks/runall.py b/numba/benchmarks/runall.py deleted file mode 100644 index 897ab5bb8..000000000 --- a/numba/benchmarks/runall.py +++ /dev/null @@ -1,90 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function, division, absolute_import -import os -import numpy as np -from matplotlib import pyplot -from numba.utils import benchmark - -BENCHMARK_PREFIX = 'bm_' - - -def discover_files(startdir=os.curdir): - for root, dirs, files in os.walk(startdir): - for path in files: - if path.startswith(BENCHMARK_PREFIX): - fullpath = os.path.join(root, path) - yield fullpath - - -try: - from importlib import import_module -except ImportError: - # Approximative fallback for Python < 2.7 - def import_module(modulename): - module = __import__(modulename) - for comp in modulename.split('.')[:-1]: - module = getattr(module, comp) - return module - - -def discover_modules(): - for fullpath in discover_files(): - path = os.path.relpath(fullpath) - root, ext = os.path.splitext(path) - if ext != '.py': - continue - modulename = root.replace(os.path.sep, '.') - yield import_module(modulename) - - -def discover(): - for m in discover_modules(): - yield m.main - - -def run(mod): - - name = mod.__name__[len(BENCHMARK_PREFIX):] - print('running', name, end=' ...\n') - - bmr = benchmark(mod.python_main) - python_best = bmr.best - print('\tpython', python_best, 'seconds') - - bmr = benchmark(mod.numba_main) - numba_best = bmr.best - print('\tnumba', numba_best, 'seconds') - - print('\tspeedup', python_best / numba_best) - - return name, numba_best / python_best - - -def main(): - # Generate timings - labels = [] - scores = [] - for mod in discover_modules(): - label, result = run(mod) - labels.append(label) - scores.append(result) - - # Plot - width = 0.8 - ind = np.arange(len(labels)) - fig, ax = pyplot.subplots() - - ax.bar(ind, scores, width) - - # Draw horizontal line at y=1 - ax.axhline(y=1, xmax=ind[-1], color='r') - - ax.set_ylabel('Normalized to CPython') - ax.set_title('Numba Benchmark') - ax.set_xticks(ind + (width/2)) - ax.set_xticklabels(labels) - - pyplot.show() - -if __name__ == '__main__': - main() diff --git a/numba/bin/numba b/numba/bin/numba deleted file mode 100755 index 0e54a0a60..000000000 --- a/numba/bin/numba +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -from __future__ import print_function, division, absolute_import - -from numba.numba_entry import main - -if __name__ == "__main__": - main() diff --git a/numba/buildscripts/appveyor/run_with_env.cmd b/numba/buildscripts/appveyor/run_with_env.cmd deleted file mode 100644 index 3a56e3e84..000000000 --- a/numba/buildscripts/appveyor/run_with_env.cmd +++ /dev/null @@ -1,90 +0,0 @@ -:: From https://github.com/ogrisel/python-appveyor-demo -:: -:: To build extensions for 64 bit Python 3, we need to configure environment -:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) -:: -:: To build extensions for 64 bit Python 2, we need to configure environment -:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) -:: -:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific -:: environment configurations. -:: -:: Note: this script needs to be run with the /E:ON and /V:ON flags for the -:: cmd interpreter, at least for (SDK v7.0) -:: -:: More details at: -:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows -:: http://stackoverflow.com/a/13751649/163740 -:: -:: Author: Olivier Grisel -:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ -:: -:: Notes about batch files for Python people: -:: -:: Quotes in values are literally part of the values: -:: SET FOO="bar" -:: FOO is now five characters long: " b a r " -:: If you don't want quotes, don't include them on the right-hand side. -:: -:: The CALL lines at the end of this file look redundant, but if you move them -:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y -:: case, I don't know why. -@ECHO OFF - -SET COMMAND_TO_RUN=%* -SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows -SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf - -:: Extract the major and minor versions, and allow for the minor version to be -:: more than 9. This requires the version number to have two dots in it. -SET MAJOR_PYTHON_VERSION=%PYTHON:~0,1% -IF "%PYTHON:~3,1%" == "." ( - SET MINOR_PYTHON_VERSION=%PYTHON:~2,1% -) ELSE ( - SET MINOR_PYTHON_VERSION=%PYTHON:~2,2% -) - -:: Based on the Python version, determine what SDK version to use, and whether -:: to set the SDK for 64-bit. -IF %MAJOR_PYTHON_VERSION% == 2 ( - SET WINDOWS_SDK_VERSION="v7.0" - SET SET_SDK_64=Y -) ELSE ( - IF %MAJOR_PYTHON_VERSION% == 3 ( - SET WINDOWS_SDK_VERSION="v7.1" - IF %MINOR_PYTHON_VERSION% LEQ 4 ( - SET SET_SDK_64=Y - ) ELSE ( - SET SET_SDK_64=N - IF EXIST "%WIN_WDK%" ( - :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ - REN "%WIN_WDK%" 0wdf - ) - ) - ) ELSE ( - ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" - EXIT 1 - ) -) - -IF %ARCH% == 64 ( - IF %SET_SDK_64% == Y ( - ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture - SET DISTUTILS_USE_SDK=1 - SET MSSdk=1 - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 - ) ELSE ( - ECHO Using default MSVC build environment for 64 bit architecture - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 - ) -) ELSE ( - ECHO Using default MSVC build environment for 32 bit architecture - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 -) diff --git a/numba/buildscripts/condarecipe.local/bld.bat b/numba/buildscripts/condarecipe.local/bld.bat deleted file mode 100644 index 5be39e7b6..000000000 --- a/numba/buildscripts/condarecipe.local/bld.bat +++ /dev/null @@ -1,4 +0,0 @@ -%PYTHON% buildscripts/remove_unwanted_files.py -%PYTHON% setup.py build install --single-version-externally-managed --record=record.txt - -exit /b %errorlevel% diff --git a/numba/buildscripts/condarecipe.local/build.sh b/numba/buildscripts/condarecipe.local/build.sh deleted file mode 100644 index 7338df6ee..000000000 --- a/numba/buildscripts/condarecipe.local/build.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -$PYTHON buildscripts/remove_unwanted_files.py - -MACOSX_DEPLOYMENT_TARGET=10.10 $PYTHON setup.py build install --single-version-externally-managed --record=record.txt diff --git a/numba/buildscripts/condarecipe.local/license.txt b/numba/buildscripts/condarecipe.local/license.txt deleted file mode 100644 index 7d19426e7..000000000 --- a/numba/buildscripts/condarecipe.local/license.txt +++ /dev/null @@ -1,24 +0,0 @@ -Copyright (c) 2012, Anaconda, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numba/buildscripts/condarecipe.local/mandel.py b/numba/buildscripts/condarecipe.local/mandel.py deleted file mode 100644 index 486c534c9..000000000 --- a/numba/buildscripts/condarecipe.local/mandel.py +++ /dev/null @@ -1,43 +0,0 @@ -from numba import autojit -import numpy as np -#from pylab import imshow, jet, show, ion - -@autojit -def mandel(x, y, max_iters): - """ - Given the real and imaginary parts of a complex number, - determine if it is a candidate for membership in the Mandelbrot - set given a fixed number of iterations. - """ - i = 0 - c = complex(x,y) - z = 0.0j - for i in range(max_iters): - z = z*z + c - if (z.real*z.real + z.imag*z.imag) >= 4: - return i - - return 255 - -@autojit -def create_fractal(min_x, max_x, min_y, max_y, image, iters): - height = image.shape[0] - width = image.shape[1] - - pixel_size_x = (max_x - min_x) / width - pixel_size_y = (max_y - min_y) / height - for x in range(width): - real = min_x + x * pixel_size_x - for y in range(height): - imag = min_y + y * pixel_size_y - color = mandel(real, imag, iters) - image[y, x] = color - - return image - -image = np.zeros((500, 750), dtype=np.uint8) -create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20) -#jet() -#ion() -#show() -print("mandel OK") diff --git a/numba/buildscripts/condarecipe.local/meta.yaml b/numba/buildscripts/condarecipe.local/meta.yaml deleted file mode 100644 index 44a00902e..000000000 --- a/numba/buildscripts/condarecipe.local/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -package: - name: numba - version: {{ GIT_DESCRIBE_TAG }} - -source: - path: ../.. - -build: - number: {{ GIT_DESCRIBE_NUMBER|int }} - entry_points: - - pycc = numba.pycc:main - - numba = numba.numba_entry:main - script_env: - - PY_VCRUNTIME_REDIST - -requirements: - # build and run dependencies are duplicated to avoid setuptools issues - # when we also set install_requires in setup.py - build: - - {{ compiler('c') }} - - {{ compiler('cxx') }} - host: - - python - - numpy x.x - - setuptools - # On channel https://anaconda.org/numba/ - - llvmlite 0.24.* - - funcsigs # [py27] - - singledispatch # [py27] - run: - - python - - numpy x.x - # On channel https://anaconda.org/numba/ - - llvmlite 0.24.* - - funcsigs # [py27] - - singledispatch # [py27] -test: - requires: - - jinja2 - # Required to test optional Numba features - - cffi - - scipy - - ipython - - setuptools - - faulthandler # [py27] - # Need these for AOT. Do not init msvc as it may not be present - - {{ compiler('c') }} # [not win] - - {{ compiler('cxx') }} # [not win] - files: - - mandel.py - -about: - home: http://numba.pydata.org/ - license: BSD - license_file: LICENSE - summary: a just-in-time Python function compiler based on LLVM diff --git a/numba/buildscripts/condarecipe.local/run_test.bat b/numba/buildscripts/condarecipe.local/run_test.bat deleted file mode 100644 index 69ad217a7..000000000 --- a/numba/buildscripts/condarecipe.local/run_test.bat +++ /dev/null @@ -1,18 +0,0 @@ -set NUMBA_DEVELOPER_MODE=1 -set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 -set PYTHONFAULTHANDLER=1 - -@rem Check Numba executables are there -pycc -h -numba -h - -@rem Run system info tool -numba -s - -@rem Check test discovery works -python -m numba.tests.test_runtests - -@rem Run the whole test suite -python -m numba.runtests -b -m -- %TESTS_TO_RUN% - -if errorlevel 1 exit 1 diff --git a/numba/buildscripts/condarecipe.local/run_test.sh b/numba/buildscripts/condarecipe.local/run_test.sh deleted file mode 100644 index 2daa081d2..000000000 --- a/numba/buildscripts/condarecipe.local/run_test.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -set -e - -export NUMBA_DEVELOPER_MODE=1 -export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 -export PYTHONFAULTHANDLER=1 - -unamestr=`uname` -if [[ "$unamestr" == 'Linux' ]]; then - SEGVCATCH=catchsegv -elif [[ "$unamestr" == 'Darwin' ]]; then - SEGVCATCH="" -else - echo Error -fi - -# limit CPUs in use on PPC64LE, fork() issues -# occur on high core count systems -archstr=`uname -m` -if [[ "$archstr" == 'ppc64le' ]]; then - TEST_NPROCS=16 -fi - -# Check Numba executables are there -pycc -h -numba -h - -# run system info tool -numba -s - -# Check test discovery works -python -m numba.tests.test_runtests - -# Run the whole test suite -echo "Running: $SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN" -$SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN diff --git a/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat b/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat deleted file mode 100644 index e2fd587c5..000000000 --- a/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat +++ /dev/null @@ -1,3 +0,0 @@ -%PYTHON% build.py - -exit /b %errorlevel% diff --git a/numba/buildscripts/condarecipe_clone_icc_rt/build.sh b/numba/buildscripts/condarecipe_clone_icc_rt/build.sh deleted file mode 100644 index 9ed12875c..000000000 --- a/numba/buildscripts/condarecipe_clone_icc_rt/build.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -set -x - -${PYTHON} build.py diff --git a/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml b/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml deleted file mode 100644 index c1d98d7cc..000000000 --- a/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml +++ /dev/null @@ -1,34 +0,0 @@ -{% set version = "2018.0.2" %} # this is the intel version to get -{% set build_number = "0" %} # and the build number from the intel version - -package: - name: icc_rt - version: {{ version }} - -build: - number: {{ build_number }} - -source: - - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/win-64/icc_rt-{{version}}-intel_{{build_number}}.tar.bz2 # [win] - - md5: 8ba0b0d3fbffdb0cd9febbf90752c4ad # [win] - - sha256: e2fbb6452d6544325fa2548c11c7a0989083ce6240c64e83963cf44e564a10dc # [win] - - - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/osx-64/icc_rt-{{version}}-intel_{{build_number}}.tar.bz2 # [osx] - - md5: 6c6c59bd819785dfd281f3107ad39ad0 # [osx] - - sha256: 292334590092c973737eabf138dd28f75542054cef7ce04d762de4087504b8c2 # [osx] - - - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/linux-64/icc_rt-{{version}}-intel_{{build_number}}.tar.bz2 # [linux] - - md5: b814ae46604a6ef3f140f00c3f0b9d25 # [linux] - - sha256: 68d8097b340ed8c876101590a5c1b69beb302491439b9771bedeed3c8012a520 # [linux] - - - path: scripts - -requirements: - build: - - python>=3.6 - -about: - license: "Intel" - license_family: "Proprietary" - license_file: LICENSE.txt -summary: Intel ICC runtime. diff --git a/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py b/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py deleted file mode 100644 index 11eac6282..000000000 --- a/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import os -import shutil - -libdir = {'w': 'Library', - 'l': 'lib', - 'd': 'lib'} - - -def run(): - src_dir = os.environ.get('SRC_DIR') - prefix = os.environ.get('PREFIX') - - libd = libdir.get(sys.platform[0], None) - assert libd is not None - - # remove 'lib' from the prefix so a direct copy from the original - # package can be made - lib_dir = os.path.join(prefix, libd) - shutil.rmtree(lib_dir) - # copy in the original package lib dir - shutil.copytree(os.path.join(src_dir, libd), lib_dir) - - # and copy the license - info_dir = os.path.join(src_dir, 'info') - shutil.copy(os.path.join(info_dir, 'LICENSE.txt'), src_dir) - shutil.rmtree(info_dir) - - -if __name__ == "__main__": - args = sys.argv - assert len(args) == 1 - run() diff --git a/numba/buildscripts/incremental/after_success.sh b/numba/buildscripts/incremental/after_success.sh deleted file mode 100755 index 63c55b63d..000000000 --- a/numba/buildscripts/incremental/after_success.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -source activate $CONDA_ENV - -# Make sure any error below is reported as such -set -v -e - -if [ "$RUN_COVERAGE" == "yes" ]; then - coverage combine - codecov -fi diff --git a/numba/buildscripts/incremental/build.cmd b/numba/buildscripts/incremental/build.cmd deleted file mode 100644 index 645a15625..000000000 --- a/numba/buildscripts/incremental/build.cmd +++ /dev/null @@ -1,10 +0,0 @@ - -call activate %CONDA_ENV% - -@rem Build numba extensions without silencing compile errors -python setup.py build_ext -q --inplace - -@rem Install numba locally for use in `numba -s` sys info tool at test time -python -m pip install -e . - -if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/numba/buildscripts/incremental/build.sh b/numba/buildscripts/incremental/build.sh deleted file mode 100755 index 65a2d994b..000000000 --- a/numba/buildscripts/incremental/build.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -source activate $CONDA_ENV - -# Make sure any error below is reported as such -set -v -e - -# Build numba extensions without silencing compile errors -python setup.py build_ext -q --inplace -# (note we don't install to avoid problems with extra long Windows paths -# during distutils-dependent tests -- e.g. test_pycc) - -# Install numba locally for use in `numba -s` sys info tool at test time -python -m pip install -e . diff --git a/numba/buildscripts/incremental/install_miniconda.sh b/numba/buildscripts/incremental/install_miniconda.sh deleted file mode 100755 index a6b19b8af..000000000 --- a/numba/buildscripts/incremental/install_miniconda.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -v -e - -# Install Miniconda -unamestr=`uname` -if [[ "$unamestr" == 'Linux' ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -elif [[ "$unamestr" == 'Darwin' ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -else - echo Error -fi -chmod +x miniconda.sh -./miniconda.sh -b diff --git a/numba/buildscripts/incremental/setup_conda_environment.cmd b/numba/buildscripts/incremental/setup_conda_environment.cmd deleted file mode 100644 index 6c18290ce..000000000 --- a/numba/buildscripts/incremental/setup_conda_environment.cmd +++ /dev/null @@ -1,30 +0,0 @@ -@rem The cmd /C hack circumvents a regression where conda installs a conda.bat -@rem script in non-root environments. -set CONDA_INSTALL=cmd /C conda install -q -y -set PIP_INSTALL=pip install -q - -@echo on - -@rem Deactivate any environment -call deactivate -@rem Display root environment (for debugging) -conda list -@rem Clean up any left-over from a previous build -conda remove --all -q -y -n %CONDA_ENV% -@rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite -conda create -n %CONDA_ENV% -q -y python=%PYTHON% numpy=%NUMPY% cffi pip scipy jinja2 ipython - -call activate %CONDA_ENV% -@rem Install latest llvmlite build -%CONDA_INSTALL% -c numba llvmlite -@rem Install required backports for older Pythons -if %PYTHON% LSS 3.4 (%CONDA_INSTALL% enum34) -if %PYTHON% LSS 3.4 (%PIP_INSTALL% singledispatch) -if %PYTHON% LSS 3.3 (%CONDA_INSTALL% -c numba funcsigs) -@rem Install dependencies for building the documentation -if "%BUILD_DOC%" == "yes" (%CONDA_INSTALL% sphinx pygments) -if "%BUILD_DOC%" == "yes" (%PIP_INSTALL% sphinx_bootstrap_theme) -@rem Install dependencies for code coverage (codecov.io) -if "%RUN_COVERAGE%" == "yes" (%PIP_INSTALL% codecov) - -if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/numba/buildscripts/incremental/setup_conda_environment.sh b/numba/buildscripts/incremental/setup_conda_environment.sh deleted file mode 100755 index 9b79f74bd..000000000 --- a/numba/buildscripts/incremental/setup_conda_environment.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -set -v -e - -CONDA_INSTALL="conda install -q -y" -PIP_INSTALL="pip install -q" - - -EXTRA_CHANNELS="" -if [ "${USE_C3I_TEST_CHANNEL}" == "yes" ]; then - EXTRA_CHANNELS="${EXTRA_CHANNELS} -c c3i_test" -fi - - -# Deactivate any environment -source deactivate -# Display root environment (for debugging) -conda list -# Clean up any left-over from a previous build -# (note workaround for https://github.com/conda/conda/issues/2679: -# `conda env remove` issue) -conda remove --all -q -y -n $CONDA_ENV - -# If VANILLA_INSTALL is yes, then only Python, NumPy and pip are installed, this -# is to catch tests/code paths that require an optional package and are not -# guarding against the possibility that it does not exist in the environment. -# Create a base env first and then add to it... - -conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON numpy=$NUMPY pip - -if [ "${VANILLA_INSTALL}" != "yes" ]; then - # Scipy, CFFI, jinja2, IPython and pygments are optional dependencies, but exercised in the test suite - $CONDA_INSTALL ${EXTRA_CHANNELS} cffi scipy jinja2 ipython pygments -fi - -set +v -source activate $CONDA_ENV -set -v - -# Install the compiler toolchain -if [[ $(uname) == Linux ]]; then - if [[ "$CONDA_SUBDIR" == "linux-32" ]]; then - $CONDA_INSTALL gcc_linux-32 gxx_linux-32 - else - $CONDA_INSTALL gcc_linux-64 gxx_linux-64 - fi -elif [[ $(uname) == Darwin ]]; then - $CONDA_INSTALL clang_osx-64 clangxx_osx-64 -fi - -# Install latest llvmlite build -$CONDA_INSTALL -c numba llvmlite -# Install enum34 and singledispatch for Python < 3.4 -if [ $PYTHON \< "3.4" ]; then $CONDA_INSTALL enum34; fi -if [ $PYTHON \< "3.4" ]; then $PIP_INSTALL singledispatch; fi -# Install funcsigs for Python < 3.3 -if [ $PYTHON \< "3.3" ]; then $CONDA_INSTALL -c numba funcsigs; fi -# Install dependencies for building the documentation -if [ "$BUILD_DOC" == "yes" ]; then $CONDA_INSTALL sphinx pygments; fi -if [ "$BUILD_DOC" == "yes" ]; then $PIP_INSTALL sphinx_bootstrap_theme; fi -# Install dependencies for code coverage (codecov.io) -if [ "$RUN_COVERAGE" == "yes" ]; then $PIP_INSTALL codecov; fi -# Install SVML -if [ "$TEST_SVML" == "yes" ]; then $CONDA_INSTALL -c numba icc_rt; fi - -if [ $PYTHON \< "3.0" ]; then $CONDA_INSTALL faulthandler; fi diff --git a/numba/buildscripts/incremental/test.cmd b/numba/buildscripts/incremental/test.cmd deleted file mode 100644 index 26c896cff..000000000 --- a/numba/buildscripts/incremental/test.cmd +++ /dev/null @@ -1,32 +0,0 @@ - -call activate %CONDA_ENV% - -@rem Ensure that the documentation builds without warnings -if "%BUILD_DOC%" == "yes" python setup.py build_doc -@rem Run system info tool -pushd bin -numba -s -popd - -@rem switch off color messages -set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 -@rem switch on developer mode -set NUMBA_DEVELOPER_MODE=1 -@rem enable the faulthandler -set PYTHONFAULTHANDLER=1 - -@rem First check that the test discovery works -python -m numba.tests.test_runtests -@rem Now run the Numba test suite -@rem Note that coverage is run from the checkout dir to match the "source" -@rem directive in .coveragerc -if "%RUN_COVERAGE%" == "yes" ( - set PYTHONPATH=. - coverage erase - coverage run runtests.py -b -m -- numba.tests -) else ( - set NUMBA_ENABLE_CUDASIM=1 - python -m numba.runtests -b -m -- numba.tests -) - -if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/numba/buildscripts/incremental/test.sh b/numba/buildscripts/incremental/test.sh deleted file mode 100755 index 0a92ba717..000000000 --- a/numba/buildscripts/incremental/test.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -source activate $CONDA_ENV - -# Make sure any error below is reported as such -set -v -e - -# Ensure that the documentation builds without warnings -pushd docs -if [ "$BUILD_DOC" == "yes" ]; then make SPHINXOPTS=-W clean html; fi -popd -# Run system info tool -pushd bin -numba -s -popd - -# switch off color messages -export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 -# switch on developer mode -export NUMBA_DEVELOPER_MODE=1 -# enable the fault handler -export PYTHONFAULTHANDLER=1 - -unamestr=`uname` -if [[ "$unamestr" == 'Linux' ]]; then - SEGVCATCH=catchsegv -elif [[ "$unamestr" == 'Darwin' ]]; then - SEGVCATCH="" -else - echo Error -fi - -# limit CPUs in use on PPC64LE, fork() issues -# occur on high core count systems -archstr=`uname -m` -if [[ "$archstr" == 'ppc64le' ]]; then - TEST_NPROCS=16 -fi - -# First check that the test discovery works -python -m numba.tests.test_runtests -# Now run the Numba test suite -# Note that coverage is run from the checkout dir to match the "source" -# directive in .coveragerc -if [ "$RUN_COVERAGE" == "yes" ]; then - export PYTHONPATH=. - coverage erase - $SEGVCATCH coverage run runtests.py -b -m $TEST_NPROCS -- numba.tests -else - NUMBA_ENABLE_CUDASIM=1 $SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- numba.tests -fi diff --git a/numba/buildscripts/remove_unwanted_files.py b/numba/buildscripts/remove_unwanted_files.py deleted file mode 100644 index b9605b302..000000000 --- a/numba/buildscripts/remove_unwanted_files.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Workaround for a conda-build bug where failing to compile some Python files -results in a build failure. - -See https://github.com/conda/conda-build/issues/1001 -""" - -import os -import sys - - -py2_only_files = [] - -py3_only_files = [ - 'numba/tests/annotation_usecases.py', - ] - - -def remove_files(basedir): - """ - Remove unwanted files from the current source tree - """ - if sys.version_info >= (3,): - removelist = py2_only_files - msg = "Python 2-only file" - else: - removelist = py3_only_files - msg = "Python 3-only file" - for relpath in removelist: - path = os.path.join(basedir, relpath) - print("Removing %s %r" % (msg, relpath)) - os.remove(path) - - -if __name__ == "__main__": - remove_files('.') diff --git a/numba/codecov.yml b/numba/codecov.yml deleted file mode 100644 index c293219ba..000000000 --- a/numba/codecov.yml +++ /dev/null @@ -1,22 +0,0 @@ -# Configuration for codecov.io -# When editing this file, please validate its contents using: -# curl -X POST --data-binary @- https://codecov.io/validate < codecov.yml - -comment: - layout: "header, diff, changes, uncovered" - -coverage: - ignore: - - "numba/cuda/.*" - - "numba/hsa/.*" - - status: - project: - default: - # The build fails if total project coverage drops by more than 3% - target: auto - threshold: "3%" - # These checks can mark a build failed if too much new code - # is not covered (which happens often with JITted functions). - changes: false - patch: false diff --git a/numba/condatestall.py b/numba/condatestall.py deleted file mode 100644 index 182ace5c7..000000000 --- a/numba/condatestall.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Uses conda to run and test all supported python + numpy versions. -""" - -from __future__ import print_function -import itertools -import subprocess -import os -import sys - -if '-q' in sys.argv[1:]: - NPY = '18', -else: - NPY = '16', '17', '18' -PY = '26', '27', '33' -RECIPE_DIR = "./buildscripts/condarecipe.local" - - -def main(): - failfast = '-v' in sys.argv[1:] - - args = "conda build %s --no-binstar-upload" % RECIPE_DIR - - failures = [] - for py, npy in itertools.product(PY, NPY): - if py == '33' and npy == '16': - # Skip python3 + numpy16 - continue - - os.environ['CONDA_PY'] = py - os.environ['CONDA_NPY'] = npy - - try: - subprocess.check_call(args.split()) - except subprocess.CalledProcessError as e: - failures.append((py, npy, e)) - if failfast: - break - - print("=" * 80) - if failures: - for py, npy, err in failures: - print("Test failed for python %s numpy %s" % (py, npy)) - print(err) - else: - print("All Passed") - - -if __name__ == '__main__': - main() - diff --git a/numba/docs/Makefile b/numba/docs/Makefile deleted file mode 100644 index b60d7c1d6..000000000 --- a/numba/docs/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -j1 -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Numba.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Numba.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/Numba" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Numba" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/numba/docs/_static/numba_blue_icon_rgb.png b/numba/docs/_static/numba_blue_icon_rgb.png deleted file mode 100644 index 6dd940173bfd4713d0dfa5014d526f14380ee191..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8527 zcma)Cc{~(a`$zU=5H4j;5f!5dL(Gutsw~BjYG|@X5yrk{or=)w%Gk}VE=IN%j5TB_ zW6Z5AF~~MEiLs1rtiQv1-~WF;pZS>coM%7Z=bYm?9@|=5hzK4P37Lf6+oo zKMu{ig@5#yd0N;{-^pt{MtufWcbgh&>_vWHYb%t2@&ES^d|~&Mz(aB*p^WCaYO=SY z(O6!g3*pDyrlvc;93S1?)Yz#Xj9d%Z&>Q$EVYZvrvP-Y+zu_7W=c^ZBUPtZyCvLOZ zyTQ-#C+j`?b#8iPl(rHj7TX?SlDMW-x;)mmViO)DF{d1PuWjeEG#{5dcc?72@=Ij| z&+eH!MKNB+S)J2}qV`){jh9+?d|Eko(|<|MokA@uQP5A6qxC+ldoI@+-#4af1dB0j z!yhTW6f?nHmukM>5j~u+NfhFWh5UK=M}&H#pIM&bKrh`^&xlL@B>u3AcZyoss@}jm z6-sQLY2-iK+h<4(hEa~y*Ie%Ma>Jkwj~q({(-6*g(>)E&SER!Ek|4_?VlG}O)*Y|z zV{BX2lkml%P%CS?efbN;h}N&w`w?114j>`Vxj}_*9a@Q?Z{`-sY7@JtGd3r@^sC@kYik zV}w|^>anP-mEz4~Uuz;pf1))DXERpX*7q^Q-w3E-peHB;OFSg5c{8n+f3d& zUM6?B>eRYT`dxCyqq5Ps!ZjFktA^mWE6a9zS+R_&$a1fDAQcFV-yIbAM1zMH{~zD13AD!QsKKZ!{vYPGGgS zB73MKdOVV&-{1vhhMX54sfD~gp2&7lHl+5L6|5D`h5OMRjLs7m$}T_FX8d-i0Aak7 zKU$-Q(erN}WhxteTitcF9Coco@$$)jnITi#j2tFCCfqBzs|q=Q^Ni%DX6>=R!aCo^|rdS%LU-J^AW9>|;p# zt)s4d9>kCn6*GI3NU}UR4vw9VLu>J{ueloa* zqV!}M?*=5zLx!diY|nV0#(zhz{3&{K{Y|Zk{YW*-uese&YHMWnqMZ^2gznf%gPJ`- z@78DePS~uvVsoza!*(`Tt+zUB%D=J~0gpMZYqmpK!;XIyONMB*wmIntiGYnlB!cx=7su9^40}+TF*pXb&zM{dG-a_U#w{Cz;;765D7o^mDlaZ91 zE|m>6R*4Vs4f zwRExklIsD32kzG$i}ZMS@eifJj#MSSSnCONMY22s0+Q7pd1P2KShnMU(F@n5x%Mha z$*hc`i73bBXIr+Vqbm;I&SknNz;AF?74sCWVQY^01W$E4x9eZu;B@j zf6YA82+SB4n^vNz>yWZqr}elE_%Tl)tPC1GLL_$ICnDNc)i6$u8^*&&%s=21KZx&~ z2hMO!a$k!;0`bCk@qHEgB5)9(x#29UI=lw?`jdcoHs-#L?zWqTAlc`laf%6RMN&fp8v6P?+%TsEqKd8f^h3BbjYNVCjfwzLVwmvK!oE%_-P6kU z3a*Uw{i6Kq6ClKzN$$F+-VZ91_f}o^1BxtT)#=>FD~BHMEf7`z0&9Cr4 zVVD_&fZ!yv{BbvZ$Yu_$hjrjZ(&Dt|#r}ms&_H=HMn?iLraniEmW}uvnoTPV=^lWhbbw7`ToAMZ)x;rNn+Ejq*e8? z@#K{^3MMTtIa*|=qX=K=K7w1P5L%R<3m$p`sV5z#R2ws+v>eq-rs)0oVtq>;{`sc6 zUv$+dKWUt|56k_!-FEZwx^JVWbVyr7%a5(PW`Q*^?(JL%Q5w-HS%4jGQKz8 z8s`mth2U<0d*<@}AsjeGV})@XRqOk;o2>OE#EhD}x>oTesA)iwD~c?daezMUu+C2` zk)=Z-U$T+VgyR&?P20yvww{P~GB$5@+BDKys}dWI=ia0$KOFN!Y}*{rs{D2vL;e8gP!XBHn^3SP_K7*nMbuPq*Ec8Fp)EAAQ( z+bAov-^vO%m@JL$%0U#_Oeov)$aY`(qyeUvcZvD06uLTBA7n<3Hbw!+(Zxp#Q`}kky!Z-JEjHe*a_z4Edjrz#^((>;T&FaGxWj`*ix*l+ zBmXJ4Lpu9BabC!{9;?W6uAkm^(ID2DYNp|UQEkgqPfb&dV5~%^Q7dF!yoJGSqK`QF zsGnHfvV7}r>{Cm>ol(V}qMM_a1NkfO8>>(*hu%h&z@FIgWsGe`e?tBJr16W=OWT-< z8KNOLfcAb662koFOhy-WZr8xp9}l+Eb1%uq7N}3r2m!|Z$&0QMGNIj*9?j2?$MY#O zV>@PZ$w{`EeH#jOqud^N?V+~IQ$IB@{RV_(qqxZIioHv&Rq@HWwibyMFbkydXI;M? zebk%S40s=H;y1MrIWK-BJ0;F2xICBfdzSN1_uY_sYD3ruWSh~=vLDb)%}Un#&l}Ry zE6Tx5xvQEK3->uAlfhCA0N(ulvyt+)qj&Mu*t>j2zyb}ujxuvQVJtjW@|nlLyxyCM zl9;w-G+$QiPg7XB0CtOR^iqB5iv!9!d_Yj*V<@^da^B^v))%>xSn#krw_zu8WVRy|F z#|%d2cMRV-kzR<_YP>U)4DZ%1)`%)U!X2I^`emo6p~=QSZ~N)If}%)3aiO+Fix*dH z1x)J{$tCxbX7_}`Rt~%j20)y>v%V=`1^H_2EMtyyWD8aZnR`ggabZgEgj{> zjUHp^{zmuA69}lwVqfNJI>&{=tsj}Tz+YJGG81A-cl|R63MHiVbdT@%&&c$6K~zy0 zMNN#xy5MG>euyjT9p8hQ){$)1ci*G?M}=|yB?-)((F8U)Ac7Yat&}JYRTS>N0VbDT z3)Cekm{zaEmzW7(_s|!>zJ+nU7g=6cFnzc`K2i4lN}R+rCF_O+|BB|2MQBL1M&WK9 zQ3>0;Q!2n6E}as?WMaOc=J$`74x=h0y`d}@Y9c+&!G#KPt`U_GS_(ZQ#~&cm@m^Jo z!4+jxCcr}snuco9jrmtRzs@x0_Dpt>c?^^&%8^pjaL+LZpQlQcBNkz!uJZ*u{*jSC zf5pF+F_C73=8m20mhSBv=QcPg#=ZG&=cdG!Z^bb3^|aAhp{JZ&s3My}OaF$%8RzF2 zlcz7lAO3#Ay6D_;G6nWRsq&g`v#@G?EXY!OXLRo|%E;1ENT~+jCAXvo_`)CM0>Ot5 zVe-tJhJ9Dv1(!cNa+*ufWH&7(j6MzZ`Vv&Hr}09T<7q4+do(7uFB^)r+#m?pGg20! zB&vQb*39~Cmt8QT2Y<@0_P50gVHgPEp=4FexDMVqL}oIXlKPKgNgH}HrBDd-M*I}R z&x&7BPmku^#gT@WH`_)E`e*8s+Fya}px)g2tKrg4R6Qc5BZ-BOxYzgQeeck&)ZeTC1rwJ&2a#QURrWX@b@u1b^wP(f*YzVj z;eG@73nsEBPsvO+ZFTilrrA8$iFmlJ|3htT{ruUZ12at#_KU}VQS|;{+OMI^;A<7v z9_@{N63je5)OW4!T*{z>w7^QU*-p#YH{}trJ)A~Eeeu?wX^H3xs=3_RJ86^=Fn%l2 z)rcaTo(N0a@J|0u@hfY8=i~P3XN3FNJh0Z3xzP5!xb4Vg*TTZtDD>Ef?n{_7<2(59FIy&t<@HyT zH7P3IJN8uz#q5jW_?R10sw_6m^i*&=mM7T6U#!3Z_!8L9B8YnR!$`*1ELth(WK6^W zo!XvEqf2KYv?hD-a7VoK)C;&py0k0}$zr{|jPLp-Bo&SpP27O%Q^fd}|6-0b*=HLf zsulfmeCDp5E1_HtJWUaw?qHuLMFgsouru%8SG?=dHah+ZX>?+-Y9phszMMQo%}PvS zbgcHSCJ8p(_Grw=4Z?l?nm3EDHH+`J%3a-yQa<1p)*_2dN zAYb+@Q8IaNv6iv!+9-Wr?rUYJe1R8ltZ@h5AGZWqR%VoV{qKxx>3%_4olrgDfx^|^ zND=PM1jXY^_xtbO!yK7rQ9V&OdNiu}fmLeX%ev?efq)j>!b^6G!qtL%C%Y5F{yu%x z4&O(^zv1s>4qfP;pH(FVdQfi_^)V+7>s41VGp)+a3|&WSvfGRnetoM8bHETqSAQE} zvVf-w;-{9<2GWICTpu;p^l}i1#&o`?P7Boa+d8f&TvjI}fh%fFG;e4=`fp`^)%u_) zzYc{eA++(MDO%12P$&6o3B|9HaOHK$1AG#ru%_bYt&j(1PP&5?cHhpr3rD5Yj;w9F zdqVr>5nhqEa9Mk2!(Sh zMr8(^9)^#eXwA$=LeCoyT)3)hV$z_-JELzOYmM#yBl6)Holb_-lkC~4mLIe4j~auc z_#aKm&owkHp=P2Vkrl2vxOc2+NRWkY($>0IG=+BvFb_B~-0Nb0BJ9?B&+T@c_Vof) z%1`!$N3dUv@@|9o@rGYJ$A_RE!b|U3BznPNMMFp*x)9v%c+~8X&C1c*&PX}PEpbMQ zeR-GvY6wDaTXmN9_P0s%TZ5``f`Qzd_Y@y`60##uquil}hNHO9BZF^rX;Y)826BWj zXTxM1N^iK3>+ZD7U)l?|dzzc?{AJb0DFBZ`g-#WeA5tYv{pZ~| zk!P7K)$TOiLtOGh$Ggv1_GGx<1)b$7XzlScCe=PWRuVKHD}-VSzI_D*mgiECJM!ru zkBe^Up!1>c(ELrt$*K4=$T~M{(GBO=s(#lX;DccNKQoXSY*^mPnibBIZi{RMX9VF%SgF>Ree*QM{P29mbFP(a!K$X{ZbzTfOM z0=*wfM}bOiN4f}#nI5x0X(zlSz0X=@xj)KiM}nOEj&SBd%@LoB&Y?6d%X0e)`!@WX zZyv*fE^E6{aCKKaa!rS~??I+BH?vuCd#U?!Tju;-9vB4$RH)Tv#a4!dANIb$1zN*K z-DO3|`OB~E78ME1E^r-6QKWfP+Yv&0qa|q?7`U{dyCXd{QHlZGQcBD0O(f)*=ATDI z|8;~LM!G=C>g~Ipo!eo3ch){LS5l3F9zZl_#YL8CpQLEKix~KIN16fs1s&|LF0HUj zuhdC|V~JSyRRRgGLk^KlV zPLpJu)j#Zz2#MSF*c!N=siniq^F(5ttP2|5L@ZI_%SmM3MzGg?A{g?82`{dy7adY3O?|9g z2d%qlTd7zGZsYd}Il?gtr^NV4oEg%C{8Aru_JA@CETPDh4^Z>WkhXZUoQ*YeoL!Or z()=rcUoth`D7dVvdN~m!0fIyn#lNst2jXIkA0>8zwbp27A3<(dAcUp=yF$azsi%lo zIRa?Ns*-H3xkFw(?yuWtB^h^@f%LapKc3WQ;~)F+V`vJrHFUB4fx@UEbt-6*1XZh0 zvV(9pQ-A-oufhU3ww5x;X41bU`AL6?TU7GaghYpW)2Y9=rzF zbv(VZSjeMybbe6^Zfb%C^`X9HYl)Ojyaw3FGG-&ihux!u%OLlqL01zhiDE+UcapKv z4FfKMU>|xo+jwQ)`9lO&o6}9O9of?p14{(Ct8CxlCSP@L?I-r{3 z!D8zWSeMLk9 z_B>#ixZW12Ht-x2|C>^BnSK7-{2(uYw*g6z1-ZPtapeXFY{;NyujuH1#AM6tkiL_$ z86NVmKf6@V=H5&UzE6aD0=$_y^|Q|oO$JG*HY@LYK5<7Eth$d?ESxXkHQ6T4%B^71 zR?4I2rT$43(*0wsy6aC3?Gv5OsY590Gue_p_tDL~la*j8KgY-L8jHfJ z9SCEzF@0xPp1`Gn!KD~jCnWEK6`lymILJ}oCwIX>pB@Al`)j{wDFahQ?Z?Yl2J?eV zTPen4%@7$q(iAR?DqfM(IzE8cC^E%HXzj9jcE&V(Und|XaXBeD#IC1`dz{Pg|ww%@Zt9|M&P0(o0Oc1_PR z7ANC1(usvSoGTS8j~`SYi#bD5_N%Aj$7dU-A3p#YjRWA>Z8HKHK+S{K#-D=+KJ)oD zYUz;5&+q{N5m-AEO}kT;0Et{3XVoE_{tiQhA^nlgf`KQqG(#B8FqfnN)iwYlRnsg( zj4bVhRRSmn0585gsl#BCi4~L&ErPk|n6$X<7Gp{Tul#B0Pq`md@U{*?5<)}1zU+~& zFD2XL6mA`HoB=L9{yGEc6J=?<^C1Vy8HSVylEBFw+MIl)or>3hs(|;fu>P=AY$d?A z#~)dSa|*O-jWpCe^7j}*(1QcR<&Ax(|Lc8~dr-mGiBtP@Qh}v^Rc`W?T{-_epy;7y z$PgZ8A~;oP08A|Ef%RyBR<>>UkO3-6C==P8#*s6P0?V#0hrk++ma)ub78a$j1P(5F z7(YvUFt6*eA9mpRp)_#H<{t6sWmlUaIiz41yZp3j8qj}$$YPPEgYN^Nf~We~=?BW^ zu4EY+$oZPS#`~0pzvS|2jC%BQ z4GhDn@Lvzn55YdQIWpD{ViA<*4H)>^G6=rHv9SbJ24yD)R0T&oWd?E|=ZN3ZrK>iB z=Ny?K^DZ=iFo5%iesHrQG8p4DTdCWU;QcNF*G1Tlu;>TvJ)59vU`obW8Z{4mvS&c= z1U2*bxdb+Y8U!w;{6<6FTbQ+#Qj!I7W%PRsN6Y+yH6TKSjob9IHD90MvuC6~PL|(| z0jS}}beF(P1V;Q8yzd*NUqCTQ>ZCGCg~52zLCV}baw|YBbC|O&C7>Cz+b8ceH~Ab~ zD^7w+qlwXwL)*u7_Zh$&q+H&8F@Pyf^RF}HZ)L1_8ogCw&#{cgx`8`%0?__!-!9Gg z28@FvkdJ7STfW8`xW=l z>8yHc=_9q$4l#fx4#^w&g)hH;<9HZ%D9T3V|NUPE`;6c}4O%!3apcq%T}U|oA#I;z NX>NV7!t{3B{{hqXjVAyA diff --git a/numba/docs/_templates/EMPTY b/numba/docs/_templates/EMPTY deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/docs/gh-pages.py b/numba/docs/gh-pages.py deleted file mode 100644 index 3c8093d06..000000000 --- a/numba/docs/gh-pages.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""Script to commit the doc build outputs into the github-pages repo. - -Use: - - gh-pages.py [tag] - -If no tag is given, the current output of 'git describe' is used. If given, -that is how the resulting directory will be named. - -In practice, you should use either actual clean tags from a current build or -something like 'current' as a stable URL for the most current version of the """ -from __future__ import print_function, division, absolute_import - -#----------------------------------------------------------------------------- -# Imports -#----------------------------------------------------------------------------- -import os -import re -import shutil -import sys -from os import chdir as cd -from os.path import join as pjoin - -from subprocess import Popen, PIPE, CalledProcessError, check_call - -#----------------------------------------------------------------------------- -# Globals -#----------------------------------------------------------------------------- - -pages_dir = 'gh-pages' -html_dir = '_build/html' -pdf_dir = '_build/latex' -pages_repo = 'git@github.com:numba/numba-doc.git' - -#----------------------------------------------------------------------------- -# Functions -#----------------------------------------------------------------------------- -def sub_environment(): - """Return an environment dict for executing subcommands in.""" - env = os.environ.copy() - # Force untranslated messages for regex matching - env['LANG'] = 'C' - return env - - -def sh(cmd): - """Execute command in a subshell, return status code.""" - return check_call(cmd, shell=True, env=sub_environment()) - - -def sh2(cmd): - """Execute command in a subshell, return stdout. - - Stderr is unbuffered from the subshell.x""" - p = Popen(cmd, stdout=PIPE, shell=True, env=sub_environment()) - out = p.communicate()[0] - retcode = p.returncode - if retcode: - raise CalledProcessError(retcode, cmd) - else: - return out.rstrip() - - -def sh3(cmd): - """Execute command in a subshell, return stdout, stderr - - If anything appears in stderr, print it out to sys.stderr""" - p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, - env=sub_environment()) - out, err = p.communicate() - retcode = p.returncode - if retcode: - raise CalledProcessError(retcode, cmd) - else: - return out.rstrip(), err.rstrip() - - -def init_repo(path): - """clone the gh-pages repo if we haven't already.""" - sh("git clone %s %s"%(pages_repo, path)) - here = os.getcwd() - cd(path) - sh('git checkout gh-pages') - cd(here) - -#----------------------------------------------------------------------------- -# Script starts -#----------------------------------------------------------------------------- -if __name__ == '__main__': - # The tag can be given as a positional argument - try: - tag = sys.argv[1] - except IndexError: - try: - tag = sh2('git describe --exact-match').decode() - except CalledProcessError: - tag = "dev" # Fallback - print("Using dev") - - startdir = os.getcwd() - if not os.path.exists(pages_dir): - # init the repo - init_repo(pages_dir) - else: - # ensure up-to-date before operating - cd(pages_dir) - sh('git checkout gh-pages') - sh('git pull') - cd(startdir) - - dest = pjoin(pages_dir, tag) - - # don't `make html` here, because gh-pages already depends on html in Makefile - # sh('make html') - if tag != 'dev': - # only build pdf for non-dev targets - #sh2('make pdf') - pass - - # This is pretty unforgiving: we unconditionally nuke the destination - # directory, and then copy the html tree in there - shutil.rmtree(dest, ignore_errors=True) - shutil.copytree(html_dir, dest) - if tag != 'dev': - #shutil.copy(pjoin(pdf_dir, 'ipython.pdf'), pjoin(dest, 'ipython.pdf')) - pass - - try: - cd(pages_dir) - status = sh2('git status | head -1').decode() - branch = re.match('\#?\s*On branch (.*)$', status).group(1) - if branch != 'gh-pages': - e = 'On %r, git branch is %r, MUST be "gh-pages"' % (pages_dir, - branch) - raise RuntimeError(e) - - sh('git add -A %s' % tag) - sh('git commit -m"Updated doc release: %s"' % tag) - print() - print('Most recent 3 commits:') - sys.stdout.flush() - sh('git --no-pager log --oneline HEAD~3..') - finally: - cd(startdir) - - print() - print('Now verify the build in: %r' % dest) - print("If everything looks good, 'git push'") diff --git a/numba/docs/make.bat b/numba/docs/make.bat deleted file mode 100644 index 3016f942f..000000000 --- a/numba/docs/make.bat +++ /dev/null @@ -1,242 +0,0 @@ -@ECHO OFF - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set BUILDDIR=_build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . -set I18NSPHINXOPTS=%SPHINXOPTS% . -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -%SPHINXBUILD% 2> nul -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Numba.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Numba.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %BUILDDIR%/.. - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %BUILDDIR%/.. - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) - -:end diff --git a/numba/docs/source/conf.py b/numba/docs/source/conf.py deleted file mode 100644 index 277f658d7..000000000 --- a/numba/docs/source/conf.py +++ /dev/null @@ -1,300 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# Numba documentation build configuration file, created by -# sphinx-quickstart on Tue Dec 30 11:55:40 2014. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os -import sphinx_bootstrap_theme - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -try: - # Numba is installed - import numba -except ImportError: - # Numba is run from its source checkout - sys.path.insert(0, os.path.abspath('../..')) - import numba - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - #'sphinx.ext.mathjax', - 'sphinx.ext.autodoc', - #'sphinx.ext.graphviz', -] - -todo_include_todos = True - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['../_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'Numba' -copyright = u'2012, Anaconda, Inc.' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -version = '.'.join(numba.__version__.split('.')[:2]) -# The full version, including alpha/beta/rc tags. -release = numba.__version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -#html_theme = 'default' - -# pip install sphinx_bootstrap_theme -html_theme = 'bootstrap' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = { - 'bootswatch_theme': "paper", -} - -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "../_static/numba_blue_icon_rgb.png" - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['../_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'Numbadoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - ('index', 'numba.tex', u'Numba Documentation', - u'Anaconda', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'numba', 'Numba Documentation', - ['Anaconda'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'Numba', 'Numba Documentation', - 'Anaconda', 'Numba', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False - - -# Configuration for intersphinx: refer to the Python standard library -# and the Numpy documentation. -intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('http://docs.scipy.org/doc/numpy', None), - 'llvmlite': ('http://llvmlite.pydata.org/en/latest/', None), - } - - -# -- Custom autogeneration ------------------------------------------------ - -def _autogenerate(): - from numba.scripts.generate_lower_listing import gen_lower_listing - - basedir = os.path.dirname(__file__) - gen_lower_listing(os.path.join(basedir, - 'developer/autogen_lower_listing.rst')) - - -_autogenerate() diff --git a/numba/docs/source/cuda-reference/host.rst b/numba/docs/source/cuda-reference/host.rst deleted file mode 100644 index 1a8887430..000000000 --- a/numba/docs/source/cuda-reference/host.rst +++ /dev/null @@ -1,152 +0,0 @@ -CUDA Host API -============= - -Device Management ------------------ - -Device detection and enquiry -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following functions are available for querying the available hardware: - -.. autofunction:: numba.cuda.is_available - -.. autofunction:: numba.cuda.detect - -Context management -~~~~~~~~~~~~~~~~~~ - -CUDA Python functions execute within a CUDA context. Each CUDA device in a -system has an associated CUDA context, and Numba presently allows only one context -per thread. For further details on CUDA Contexts, refer to the `CUDA Driver API -Documentation on Context Management -`_ and the -`CUDA C Programming Guide Context Documentation -`_. CUDA Contexts -are instances of the :class:`~numba.cuda.cudadrv.driver.Context` class: - -.. autoclass:: numba.cuda.cudadrv.driver.Context - :members: reset, get_memory_info, push, pop - -The following functions can be used to get or select the context: - -.. autofunction:: numba.cuda.current_context -.. autofunction:: numba.cuda.require_context - -The following functions affect the current context: - -.. autofunction:: numba.cuda.synchronize -.. autofunction:: numba.cuda.close - -Device management -~~~~~~~~~~~~~~~~~ - -Numba maintains a list of supported CUDA-capable devices: - -.. attribute:: numba.cuda.gpus - - An indexable list of supported CUDA devices. This list is indexed by integer - device ID. - -Alternatively, the current device can be obtained: - -.. function:: numba.cuda.gpus.current - - Return the currently-selected device. - -Getting a device through :attr:`numba.cuda.gpus` always provides an instance of -:class:`numba.cuda.cudadrv.devices._DeviceContextManager`, which acts as a -context manager for the selected device: - -.. autoclass:: numba.cuda.cudadrv.devices._DeviceContextManager - -One may also select a context and device or get the current device using the -following three functions: - -.. autofunction:: numba.cuda.select_device -.. autofunction:: numba.cuda.get_current_device -.. autofunction:: numba.cuda.list_devices - -The :class:`numba.cuda.cudadrv.driver.Device` class can be used to enquire about -the functionality of the selected device: - -.. class:: numba.cuda.cudadrv.driver.Device - - The device associated with a particular context. - - .. attribute:: compute_capability - - A tuple, *(major, minor)* indicating the supported compute capability. - - .. attribute:: id - - The integer ID of the device. - - .. attribute:: name - - The name of the device (e.g. "GeForce GTX 970") - - .. method:: reset - - Delete the context for the device. This will destroy all memory - allocations, events, and streams created within the context. - -Measurement ------------ - -.. _cuda-profiling: - -Profiling -~~~~~~~~~ - -The NVidia Visual Profiler can be used directly on executing CUDA Python code - -it is not a requirement to insert calls to these functions into user code. -However, these functions can be used to allow profiling to be performed -selectively on specific portions of the code. For further information on -profiling, see the `NVidia Profiler User's Guide -`_. - -.. autofunction:: numba.cuda.profile_start -.. autofunction:: numba.cuda.profile_stop -.. autofunction:: numba.cuda.profiling - -Events -~~~~~~ - -Events can be used to monitor the progress of execution and to record the -timestamps of specific points being reached. Event creation returns immediately, -and the created event can be queried to determine if it has been reached. For -further information, see the `CUDA C Programming Guide Events section -`_. - -The following functions are used for creating and measuring the time between -events: - -.. autofunction:: numba.cuda.event -.. autofunction:: numba.cuda.event_elapsed_time - -Events are instances of the :class:`numba.cuda.cudadrv.driver.Event` class: - -.. autoclass:: numba.cuda.cudadrv.driver.Event - :members: query, record, synchronize, wait - -Stream Management ------------------ - -Streams allow concurrency of execution on a single device within a given -context. Queued work items in the same stream execute sequentially, but work -items in different streams may execute concurrently. Most operations involving a -CUDA device can be performed asynchronously using streams, including data -transfers and kernel execution. For further details on streams, see the `CUDA C -Programming Guide Streams section -`_. - -To create a stream: - -.. autofunction:: numba.cuda.stream - -Streams are instances of :class:`numba.cuda.cudadrv.driver.Stream`: - -.. autoclass:: numba.cuda.cudadrv.driver.Stream - :members: synchronize, auto_synchronize - diff --git a/numba/docs/source/cuda-reference/index.rst b/numba/docs/source/cuda-reference/index.rst deleted file mode 100644 index 789604226..000000000 --- a/numba/docs/source/cuda-reference/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -CUDA Python Reference -===================== - -.. toctree:: - - host.rst - kernel.rst - memory.rst diff --git a/numba/docs/source/cuda-reference/kernel.rst b/numba/docs/source/cuda-reference/kernel.rst deleted file mode 100644 index 2de2100fd..000000000 --- a/numba/docs/source/cuda-reference/kernel.rst +++ /dev/null @@ -1,305 +0,0 @@ -CUDA Kernel API -=============== - -Kernel declaration ------------------- - -The ``@cuda.jit`` decorator is used to create a CUDA kernel: - -.. autofunction:: numba.cuda.jit - -.. autoclass:: numba.cuda.compiler.AutoJitCUDAKernel - :members: inspect_asm, inspect_llvm, inspect_types, specialize, extensions - -Individual specialized kernels are instances of -:class:`numba.cuda.compiler.CUDAKernel`: - -.. autoclass:: numba.cuda.compiler.CUDAKernel - :members: bind, ptx, device, inspect_llvm, inspect_asm, inspect_types - -Intrinsic Attributes and Functions ----------------------------------- - -The remainder of the attributes and functions in this section may only be called -from within a CUDA Kernel. - -Thread Indexing -~~~~~~~~~~~~~~~ - -.. attribute:: numba.cuda.threadIdx - - The thread indices in the current thread block, accessed through the - attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the - range from 0 inclusive to the corresponding value of the attribute in - :attr:`numba.cuda.blockDim` exclusive. - -.. attribute:: numba.cuda.blockIdx - - The block indices in the grid of thread blocks, accessed through the - attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the - range from 0 inclusive to the corresponding value of the attribute in - :attr:`numba.cuda.gridDim` exclusive. - -.. attribute:: numba.cuda.blockDim - - The shape of a block of threads, as declared when instantiating the - kernel. This value is the same for all threads in a given kernel, even - if they belong to different blocks (i.e. each block is "full"). - -.. attribute:: numba.cuda.gridDim - - The shape of the grid of blocks, accessed through the attributes ``x``, - ``y``, and ``z``. - -.. attribute:: numba.cuda.laneid - - The thread index in the current warp, as an integer spanning the range - from 0 inclusive to the :attr:`numba.cuda.warpsize` exclusive. - -.. attribute:: numba.cuda.warpsize - - The size in threads of a warp on the GPU. Currently this is always 32. - -.. function:: numba.cuda.grid(ndim) - - Return the absolute position of the current thread in the entire - grid of blocks. *ndim* should correspond to the number of dimensions - declared when instantiating the kernel. If *ndim* is 1, a single integer - is returned. If *ndim* is 2 or 3, a tuple of the given number of - integers is returned. - - Computation of the first integer is as follows:: - - cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x - - and is similar for the other two indices, but using the ``y`` and ``z`` - attributes. - -.. function:: numba.cuda.gridsize(ndim) - - Return the absolute size (or shape) in threads of the entire grid of - blocks. *ndim* should correspond to the number of dimensions declared when - instantiating the kernel. - - Computation of the first integer is as follows:: - - cuda.blockDim.x * cuda.gridDim.x - - and is similar for the other two indices, but using the ``y`` and ``z`` - attributes. - -Memory Management -~~~~~~~~~~~~~~~~~ - -.. function:: numba.cuda.shared.array(shape, dtype) - - Creates an array in the local memory space of the CUDA kernel with - the given ``shape`` and ``dtype``. - - Returns an array with its content uninitialized. - - .. note:: All threads in the same thread block sees the same array. - -.. function:: numba.cuda.local.array(shape, dtype) - - Creates an array in the local memory space of the CUDA kernel with the - given ``shape`` and ``dtype``. - - Returns an array with its content uninitialized. - - .. note:: Each thread sees a unique array. - -.. function:: numba.cuda.const.array_like(ary) - - Copies the ``ary`` into constant memory space on the CUDA kernel at compile - time. - - Returns an array like the ``ary`` argument. - - .. note:: All threads and blocks see the same array. - -Synchronization and Atomic Operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. function:: numba.cuda.atomic.add(array, idx, value) - - Perform ``array[idx] += value``. Support int32, int64, float32 and - float64 only. The ``idx`` argument can be an integer or a tuple of integer - indices for indexing into multiple dimensional arrays. The number of element - in ``idx`` must match the number of dimension of ``array``. - - Returns the value of ``array[idx]`` before the storing the new value. - Behaves like an atomic load. - -.. function:: numba.cuda.atomic.max(array, idx, value) - - Perform ``array[idx] = max(array[idx], value)``. Support int32, int64, - float32 and float64 only. The ``idx`` argument can be an integer or a - tuple of integer indices for indexing into multiple dimensional arrays. - The number of element in ``idx`` must match the number of dimension of - ``array``. - - Returns the value of ``array[idx]`` before the storing the new value. - Behaves like an atomic load. - - -.. function:: numba.cuda.syncthreads - - Synchronize all threads in the same thread block. This function implements - the same pattern as barriers in traditional multi-threaded programming: this - function waits until all threads in the block call it, at which point it - returns control to all its callers. - -.. function:: numba.cuda.syncthreads_count(predicate) - - An extension to :attr:`numba.cuda.syncthreads` where the return value is a count - of the threads where ``predicate`` is true. - -.. function:: numba.cuda.syncthreads_and(predicate) - - An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is - true for all threads or 0 otherwise. - -.. function:: numba.cuda.syncthreads_or(predicate) - - An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is - true for any thread or 0 otherwise. - - .. warning:: All syncthreads functions must be called by every thread in the - thread-block. Falling to do so may result in undefined behavior. - -Memory Fences -~~~~~~~~~~~~~ - -The memory fences are used to guarantee the effect of memory operations -are visible by other threads within the same thread-block, the same GPU device, -and the same system (across GPUs on global memory). Memory loads and stores -are guaranteed to not move across the memory fences by optimization passes. - -.. warning:: The memory fences are considered to be advanced API and most - usercases should use the thread barrier (e.g. ``syncthreads()``). - - - -.. function:: numba.cuda.threadfence - - A memory fence at device level (within the GPU). - -.. function:: numba.cuda.threadfence_block - - A memory fence at thread block level. - -.. function:: numba.cuda.threadfence_system - - - A memory fence at system level (across GPUs). - -Warp Intrinsics -~~~~~~~~~~~~~~~~~~ - -All warp level operations require at least CUDA 9. The argument ``membermask`` is -a 32 bit integer mask with each bit corresponding to a thread in the warp, with 1 -meaning the thread is in the subset of threads within the function call. The -``membermask`` must be all 1 if the GPU compute capability is below 7.x. - -.. function:: numba.cuda.syncwarp(membermask) - - Synchronize a masked subset of the threads in a warp. - -.. function:: numba.cuda.all_sync(membermask, predicate) - - If the ``predicate`` is true for all threads in the masked warp, then - a non-zero value is returned, otherwise 0 is returned. - -.. function:: numba.cuda.any_sync(membermask, predicate) - - If the ``predicate`` is true for any thread in the masked warp, then - a non-zero value is returned, otherwise 0 is returned. - -.. function:: numba.cuda.eq_sync(membermask, predicate) - - If the boolean ``predicate`` is the same for all threads in the masked warp, - then a non-zero value is returned, otherwise 0 is returned. - -.. function:: numba.cuda.ballot_sync(membermask, predicate) - - Returns a mask of all threads in the warp whose ``predicate`` is true, - and are within the given mask. - -.. function:: numba.cuda.shfl_sync(membermask, value, src_lane) - - Shuffles ``value`` across the masked warp and returns the ``value`` - from ``src_lane``. If this is outside the warp, then the - given ``value`` is returned. - -.. function:: numba.cuda.shfl_up_sync(membermask, value, delta) - - Shuffles ``value`` across the masked warp and returns the ``value`` - from ``laneid - delta``. If this is outside the warp, then the - given ``value`` is returned. - -.. function:: numba.cuda.shfl_down_sync(membermask, value, delta) - - Shuffles ``value`` across the masked warp and returns the ``value`` - from ``laneid + delta``. If this is outside the warp, then the - given ``value`` is returned. - -.. function:: numba.cuda.shfl_xor_sync(membermask, value, lane_mask) - - Shuffles ``value`` across the masked warp and returns the ``value`` - from ``laneid ^ lane_mask``. - -.. function:: numba.cuda.match_any_sync(membermask, value, lane_mask) - - Returns a mask of threads that have same ``value`` as the given ``value`` - from within the masked warp. - -.. function:: numba.cuda.match_all_sync(membermask, value, lane_mask) - - Returns a tuple of (mask, pred), where mask is a mask of threads that have - same ``value`` as the given ``value`` from within the masked warp, if they - all have the same value, otherwise it is 0. And pred is a boolean of whether - or not all threads in the mask warp have the same warp. - - -Integer Intrinsics -~~~~~~~~~~~~~~~~~~ - -A subset of the CUDA Math API's integer intrisics are available. For further -documentation, including semantics, please refer to the `CUDA Toolkit -documentation -`_. - - -.. function:: numba.cuda.popc - - Returns the number of set bits in the given value. - -.. function:: numba.cuda.brev - - Reverses the bit pattern of an integer value, for example 0b10110110 - becomes 0b01101101. - -.. function:: numba.cuda.clz - - Counts the number of leading zeros in a value. - -.. function:: numba.cuda.ffs - - Find the position of the least significant bit set to 1 in an integer. - -Control Flow Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -A subset of the CUDA's control flow instructions are directly available as -intrinsics. Avoiding branches is a key way to improve CUDA performance, and -using these intrinsics mean you don't have to rely on the ``nvcc`` optimizer -identifying and removing branches. For further documentation, including -semantics, please refer to the `relevant CUDA Toolkit documentation -`_. - - -.. function:: numba.cuda.selp - - Select between two expressions, depending on the value of the first - argument. Similar to LLVM's ``select`` instruction. diff --git a/numba/docs/source/cuda-reference/memory.rst b/numba/docs/source/cuda-reference/memory.rst deleted file mode 100644 index caf201227..000000000 --- a/numba/docs/source/cuda-reference/memory.rst +++ /dev/null @@ -1,21 +0,0 @@ -Memory Management -================= - -.. autofunction:: numba.cuda.to_device -.. autofunction:: numba.cuda.device_array -.. autofunction:: numba.cuda.device_array_like -.. autofunction:: numba.cuda.pinned_array -.. autofunction:: numba.cuda.mapped_array -.. autofunction:: numba.cuda.pinned -.. autofunction:: numba.cuda.mapped - -Device Objects --------------- - -.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray - :members: copy_to_device, copy_to_host, is_c_contiguous, is_f_contiguous, - ravel, reshape, split -.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceRecord - :members: copy_to_device, copy_to_host -.. autoclass:: numba.cuda.cudadrv.devicearray.MappedNDArray - :members: copy_to_device, copy_to_host, split diff --git a/numba/docs/source/cuda/cuda_array_interface.rst b/numba/docs/source/cuda/cuda_array_interface.rst deleted file mode 100644 index 99c9ae754..000000000 --- a/numba/docs/source/cuda/cuda_array_interface.rst +++ /dev/null @@ -1,76 +0,0 @@ -.. _cuda-array-interface: - -==================== -CUDA Array Interface -==================== - -The *cuda array inteface* is created for interoperability between different -implementation of GPU array-like objects in various projects. The idea is -borrowed from the `numpy array interface`_. - - -.. note:: - Currently, we only define the Python-side interface. In the future, we may - add a C-side interface for efficient exchange of the information in - compiled code. - - -Python Interface Specification -============================== - -.. note:: Experimental feature. Specification may change. - -The ``__cuda_array_interface__`` attribute is a dictionary-like object that -must contain the following entries: - -- **shape**: ``(integer, ...)`` - - A tuple of `int` (or `long`) representing the size of each dimension. - -- **typestr**: `str` - - The type string. This has the same definition as *typestr* in the - `numpy array interface`_. - -- **data**: `(integer, boolean)` - - The **data** is a 2-tuple. The first element data pointer - as a Python `int` (or `long`). The data must be device-accessible. - The second element is the read-only flag as a Python `bool`. - - Because the user of the interface may or may not be in the same context, - the most common case is to use ``cuPointerGetAttribute`` with - ``CU_POINTER_ATTRIBUTE_DEVICE_POINTER`` in the CUDA driver API (or the - equivalent CUDA Runtime API) to retrieve a device pointer that - is usable in the currently active context. - -- **version**: `integer` - - An integer for the version of the interface being exported. - The current version is *0* since it is still experimental. - - -The following are optional entries: - -- **strides**: ``None`` or ``(integer, ...)`` - - A tuple of `int` (or `long`) representing the number of bytes to skip to - access the next element at each dimension. If it is ``None``, the array is - assumed to be in C-contiguous layout. - -- **descr** - - This is for describing more complicated types. This follows the same - specification as in the `numpy array interface`_. - - -Additional information about the data pointer can be retrieved using -``cuPointerGetAttribute`` or ``cudaPointerGetAttributes``. Such information -include: - -- the CUDA context that owns the pointer; -- is the pointer host-accessible? -- is the pointer a managed memory? - - -.. _numpy array interface: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html#__array_interface__ diff --git a/numba/docs/source/cuda/cudapysupported.rst b/numba/docs/source/cuda/cudapysupported.rst deleted file mode 100644 index 23901cd87..000000000 --- a/numba/docs/source/cuda/cudapysupported.rst +++ /dev/null @@ -1,207 +0,0 @@ -======================================== -Supported Python features in CUDA Python -======================================== - -This page lists the Python features supported in the CUDA Python. This includes -all kernel and device functions compiled with ``@cuda.jit`` and other higher -level Numba decorators that targets the CUDA GPU. - -Language -======== - -Execution Model ---------------- - -CUDA Python maps directly to the *single-instruction multiple-thread* -execution (SIMT) model of CUDA. Each instruction is implicitly -executed by multiple threads in parallel. With this execution model, array -expressions are less useful because we don't want multiple threads to perform -the same task. Instead, we want threads to perform a task in a cooperative -fashion. - -For details please consult the -`CUDA Programming Guide -`_. - -Constructs ----------- - -The following Python constructs are not supported: - -* Exception handling (``try .. except``, ``try .. finally``) -* Context management (the ``with`` statement) -* Comprehensions (either list, dict, set or generator comprehensions) -* Generator (any ``yield`` statements) - -The ``raise`` and ``assert`` statements are supported. -See :ref:`nopython language support `. - -Built-in types -=============== - -The following built-in types support are inherited from CPU nopython mode. - -* int -* float -* complex -* bool -* None -* tuple - -See :ref:`nopython built-in types `. - - -Built-in functions -================== - -The following built-in functions are supported: - -* :func:`abs` -* :class:`bool` -* :class:`complex` -* :func:`enumerate` -* :class:`float` -* :class:`int`: only the one-argument form -* :func:`len` -* :func:`min`: only the multiple-argument form -* :func:`max`: only the multiple-argument form -* :class:`range`: semantics are similar to those of Python 3 even in Python 2: - a range object is returned instead of an array of values. -* :func:`round` -* :func:`zip` - - -Standard library modules -======================== - - -``cmath`` ---------- - -The following functions from the :mod:`cmath` module are supported: - -* :func:`cmath.acos` -* :func:`cmath.acosh` -* :func:`cmath.asin` -* :func:`cmath.asinh` -* :func:`cmath.atan` -* :func:`cmath.atanh` -* :func:`cmath.cos` -* :func:`cmath.cosh` -* :func:`cmath.exp` -* :func:`cmath.isfinite` -* :func:`cmath.isinf` -* :func:`cmath.isnan` -* :func:`cmath.log` -* :func:`cmath.log10` -* :func:`cmath.phase` -* :func:`cmath.polar` -* :func:`cmath.rect` -* :func:`cmath.sin` -* :func:`cmath.sinh` -* :func:`cmath.sqrt` -* :func:`cmath.tan` -* :func:`cmath.tanh` - -``math`` --------- - -The following functions from the :mod:`math` module are supported: - -* :func:`math.acos` -* :func:`math.asin` -* :func:`math.atan` -* :func:`math.arctan` -* :func:`math.acosh` -* :func:`math.asinh` -* :func:`math.atanh` -* :func:`math.cos` -* :func:`math.sin` -* :func:`math.tan` -* :func:`math.hypot` -* :func:`math.cosh` -* :func:`math.sinh` -* :func:`math.tanh` -* :func:`math.atan2` -* :func:`math.erf` -* :func:`math.erfc` -* :func:`math.exp` -* :func:`math.expm1` -* :func:`math.fabs` -* :func:`math.gamma` -* :func:`math.lgamma` -* :func:`math.log` -* :func:`math.log10` -* :func:`math.log1p` -* :func:`math.sqrt` -* :func:`math.pow` -* :func:`math.ceil` -* :func:`math.floor` -* :func:`math.copysign` -* :func:`math.fmod` -* :func:`math.isnan` -* :func:`math.isinf` - - -``operator`` ------------- - -The following functions from the :mod:`operator` module are supported: - -* :func:`operator.add` -* :func:`operator.and_` -* :func:`operator.div` (Python 2 only) -* :func:`operator.eq` -* :func:`operator.floordiv` -* :func:`operator.ge` -* :func:`operator.gt` -* :func:`operator.iadd` -* :func:`operator.iand` -* :func:`operator.idiv` (Python 2 only) -* :func:`operator.ifloordiv` -* :func:`operator.ilshift` -* :func:`operator.imod` -* :func:`operator.imul` -* :func:`operator.invert` -* :func:`operator.ior` -* :func:`operator.ipow` -* :func:`operator.irshift` -* :func:`operator.isub` -* :func:`operator.itruediv` -* :func:`operator.ixor` -* :func:`operator.le` -* :func:`operator.lshift` -* :func:`operator.lt` -* :func:`operator.mod` -* :func:`operator.mul` -* :func:`operator.ne` -* :func:`operator.neg` -* :func:`operator.not_` -* :func:`operator.or_` -* :func:`operator.pos` -* :func:`operator.pow` -* :func:`operator.rshift` -* :func:`operator.sub` -* :func:`operator.truediv` -* :func:`operator.xor` - - -Numpy support -============= - -Due to the CUDA programming model, dynamic memory allocation inside a kernel is -inefficient and is often not needed. Numba disallows any memory allocating features. -This disables a large number of NumPy APIs. For best performance, users should write -code such that each thread is dealing with a single element at a time. - -Supported numpy features: - -* accessing `ndarray` attributes `.shape`, `.strides`, `.ndim`, `.size`, etc.. -* scalar ufuncs that have equivalents in the `math` module; i.e. ``np.sin(x[0])``, where x is a 1D array. -* indexing and slicing works. - -Unsupported numpy features: - -* array creation APIs. -* array methods. -* functions that returns a new array. diff --git a/numba/docs/source/cuda/device-functions.rst b/numba/docs/source/cuda/device-functions.rst deleted file mode 100644 index 4fba8c66f..000000000 --- a/numba/docs/source/cuda/device-functions.rst +++ /dev/null @@ -1,15 +0,0 @@ - -Writing Device Functions -======================== - -CUDA device functions can only be invoked from within the device (by a kernel -or another device function). To define a device function:: - - from numba import cuda - - @cuda.jit(device=True) - def a_device_function(a, b): - return a + b - -Unlike a kernel function, a device function can return a value like normal -functions. diff --git a/numba/docs/source/cuda/device-management.rst b/numba/docs/source/cuda/device-management.rst deleted file mode 100644 index 605db5a2b..000000000 --- a/numba/docs/source/cuda/device-management.rst +++ /dev/null @@ -1,76 +0,0 @@ - -Device management -================= - -For multi-GPU machines, users may want to select which GPU to use. -By default the CUDA driver selects the fastest GPU as the device 0, -which is the default device used by Numba. - -The features introduced on this page are generally not of interest -unless working with systems hosting/offering more than one CUDA-capable GPU. - -Device Selection ----------------- - -If at all required, device selection must be done before any CUDA feature is -used. - -:: - - from numba import cuda - cuda.select_device(0) - -The device can be closed by: - -:: - - cuda.close() - -Users can then create a new context with another device. - -:: - - cuda.select_device(1) # assuming we have 2 GPUs - - -.. function:: numba.cuda.select_device(device_id) - :noindex: - - Create a new CUDA context for the selected *device_id*. *device_id* - should be the number of the device (starting from 0; the device order - is determined by the CUDA libraries). The context is associated with - the current thread. Numba currently allows only one context per thread. - - If successful, this function returns a device instance. - - .. XXX document device instances? - - -.. function:: numba.cuda.close - :noindex: - - Explicitly close all contexts in the current thread. - - .. note:: - Compiled functions are associated with the CUDA context. - This makes it not very useful to close and create new devices, though it - is certainly useful for choosing which device to use when the machine - has multiple GPUs. - -The Device List -=============== - -The Device List is a list of all the GPUs in the system, and can be indexed to -obtain a context manager that ensures execution on the selected GPU. - -.. attribute:: numba.cuda.gpus - :noindex: -.. attribute:: numba.cuda.cudadrv.devices.gpus - -:py:data:`.gpus` is an instance of the :class:`_DeviceList` class, from which -the current GPU context can also be retrieved: - -.. autoclass:: numba.cuda.cudadrv.devices._DeviceList - :members: current - :noindex: - diff --git a/numba/docs/source/cuda/examples.rst b/numba/docs/source/cuda/examples.rst deleted file mode 100644 index 17361ae4c..000000000 --- a/numba/docs/source/cuda/examples.rst +++ /dev/null @@ -1,86 +0,0 @@ - -======== -Examples -======== - -.. _cuda-matmul: - -Matrix multiplication -===================== - -Here is a naive implementation of matrix multiplication using a CUDA kernel:: - - @cuda.jit - def matmul(A, B, C): - """Perform square matrix multiplication of C = A * B - """ - i, j = cuda.grid(2) - if i < C.shape[0] and j < C.shape[1]: - tmp = 0. - for k in range(A.shape[1]): - tmp += A[i, k] * B[k, j] - C[i, j] = tmp - - -This implementation is straightforward and intuitive but performs poorly, -because the same matrix elements will be loaded multiple times from device -memory, which is slow (some devices may have transparent data caches, but -they may not be large enough to hold the entire inputs at once). - -It will be faster if we use a blocked algorithm to reduce accesses to the -device memory. CUDA provides a fast :ref:`shared memory ` -for threads in a block to cooperately compute on a task. The following -implements a faster version of the square matrix multiplication using shared -memory:: - - from numba import cuda, float32 - - # Controls threads per block and shared memory usage. - # The computation will be done on blocks of TPBxTPB elements. - TPB = 16 - - @cuda.jit - def fast_matmul(A, B, C): - # Define an array in the shared memory - # The size and type of the arrays must be known at compile time - sA = cuda.shared.array(shape=(TPB, TPB), dtype=float32) - sB = cuda.shared.array(shape=(TPB, TPB), dtype=float32) - - x, y = cuda.grid(2) - - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - bpg = cuda.gridDim.x # blocks per grid - - if x >= C.shape[0] and y >= C.shape[1]: - # Quit if (x, y) is outside of valid C boundary - return - - # Each thread computes one element in the result matrix. - # The dot product is chunked into dot products of TPB-long vectors. - tmp = 0. - for i in range(bpg): - # Preload data into shared memory - sA[tx, ty] = A[x, ty + i * TPB] - sB[tx, ty] = B[tx + i * TPB, y] - - # Wait until all threads finish preloading - cuda.syncthreads() - - # Computes partial product on the shared memory - for j in range(TPB): - tmp += sA[tx, j] * sB[j, ty] - - # Wait until all threads finish computing - cuda.syncthreads() - - C[x, y] = tmp - -Because the shared memory is a limited resources, the code preloads small -block at a time from the input arrays. Then, it calls -:func:`~numba.cuda.syncthreads` to wait until all threads have finished -preloading and before doing the computation on the shared memory. -It synchronizes again after the computation to ensure all threads -have finished with the data in shared memory before overwriting it -in the next loop iteration. - diff --git a/numba/docs/source/cuda/faq.rst b/numba/docs/source/cuda/faq.rst deleted file mode 100644 index 5cb936981..000000000 --- a/numba/docs/source/cuda/faq.rst +++ /dev/null @@ -1,20 +0,0 @@ - -.. _cudafaq: - -================================================= -CUDA Frequently Asked Questions -================================================= - -nvprof reports "No kernels were profiled" ------------------------------------------ - -When using the ``nvprof`` tool to profile Numba jitted code for the CUDA -target, the output contains ``No kernels were profiled`` but there are clearly -running kernels present, what is going on? - -This is quite likely due to the profiling data not being flushed on program -exit, see the `NVIDIA CUDA documentation -`_ for -details. To fix this simply add a call to ``numba.cuda.profile_stop()`` prior -to the exit point in your program (or whereever you want to stop profiling). -For more on CUDA profiling support in Numba, see :ref:`cuda-profiling`. diff --git a/numba/docs/source/cuda/index.rst b/numba/docs/source/cuda/index.rst deleted file mode 100644 index d12ced469..000000000 --- a/numba/docs/source/cuda/index.rst +++ /dev/null @@ -1,21 +0,0 @@ - -Numba for CUDA GPUs -=================== - -.. toctree:: - - overview.rst - kernels.rst - memory.rst - device-functions.rst - cudapysupported.rst - intrinsics.rst - random.rst - device-management.rst - examples.rst - simulator.rst - reduction.rst - ufunc.rst - ipc.rst - cuda_array_interface.rst - faq.rst diff --git a/numba/docs/source/cuda/intrinsics.rst b/numba/docs/source/cuda/intrinsics.rst deleted file mode 100644 index 1bf402a62..000000000 --- a/numba/docs/source/cuda/intrinsics.rst +++ /dev/null @@ -1,60 +0,0 @@ - -Supported Atomic Operations -=========================== - -Numba provides access to some of the atomic operations supported in CUDA, in the -:class:`numba.cuda.atomic` class. - -Those that are presently implemented are as follows: - -.. automodule:: numba.cuda - :members: atomic - :noindex: - -Example -''''''' - -The following code demonstrates the use of :class:`numba.cuda.atomic.max` to -find the maximum value in an array. Note that this is not the most efficient way -of finding a maximum in this case, but that it serves as an example:: - - from numba import cuda - import numpy as np - - @cuda.jit - def max_example(result, values): - """Find the maximum value in values and store in result[0]""" - tid = cuda.threadIdx.x - bid = cuda.blockIdx.x - bdim = cuda.blockDim.x - i = (bid * bdim) + tid - cuda.atomic.max(result, 0, values[i]) - - - arr = np.random.rand(16384) - result = np.zeros(1, dtype=np.float64) - - max_example[256,64](result, arr) - print(result[0]) # Found using cuda.atomic.max - print(max(arr)) # Print max(arr) for comparision (should be equal!) - - -Multiple dimension arrays are supported by using a tuple of ints for the index:: - - - @cuda.jit - def max_example_3d(result, values): - """ - Find the maximum value in values and store in result[0]. - Both result and values are 3d arrays. - """ - i, j, k = cuda.grid(3) - # Atomically store to result[0,1,2] from values[i, j, k] - cuda.atomic.max(result, (0, 1, 2), values[i, j, k]) - - arr = np.random.rand(1000).reshape(10,10,10) - result = np.zeros((3, 3, 3), dtype=np.float64) - max_example_3d[(2, 2, 2), (5, 5, 5)](result, arr) - print(result[0, 1, 2], '==', np.max(arr)) - - diff --git a/numba/docs/source/cuda/ipc.rst b/numba/docs/source/cuda/ipc.rst deleted file mode 100644 index 5db02ef40..000000000 --- a/numba/docs/source/cuda/ipc.rst +++ /dev/null @@ -1,35 +0,0 @@ -=================== -Sharing CUDA Memory -=================== - -.. _cuda-ipc-memory: - -Sharing between process -======================= - -.. warning:: This feature is limited to Linux only. - - -Export device array to another process --------------------------------------- - -A device array can be shared with another process in the same machine using -the CUDA IPC API. To do so, use the ``.get_ipc_handle()`` method on the device -array to get a ``IpcArrayHandle`` object, which can be transferred to another -process. - - -.. automethod:: numba.cuda.cudadrv.devicearray.DeviceNDArray.get_ipc_handle - :noindex: - -.. autoclass:: numba.cuda.cudadrv.devicearray.IpcArrayHandle - :members: open, close - - -Import IPC memory from another process --------------------------------------- - -The following function is used to open IPC handle from another process -as a device array. - -.. automethod:: numba.cuda.open_ipc_array diff --git a/numba/docs/source/cuda/kernels.rst b/numba/docs/source/cuda/kernels.rst deleted file mode 100644 index fd139ef26..000000000 --- a/numba/docs/source/cuda/kernels.rst +++ /dev/null @@ -1,229 +0,0 @@ - -==================== -Writing CUDA Kernels -==================== - -Introduction -============ - -CUDA has an execution model unlike the traditional sequential model used -for programming CPUs. In CUDA, the code you write will be executed by -multiple threads at once (often hundreds or thousands). Your solution will -be modeled by defining a thread hierarchy of *grid*, *blocks* and *threads*. - -Numba's CUDA support exposes facilities to declare and manage this -hierarchy of threads. The facilities are largely similar to those -exposed by NVidia's CUDA C language. - -Numba also exposes three kinds of GPU memory: global :ref:`device memory -` (the large, relatively slow -off-chip memory that's connected to the GPU itself), on-chip -:ref:`shared memory ` and :ref:`local memory `. -For all but the simplest algorithms, it is important that you carefully -consider how to use and access memory in order to minimize bandwidth -requirements and contention. - - -Kernel declaration -================== - -A *kernel function* is a GPU function that is meant to be called from CPU -code (*). It gives it two fundamental characteristics: - -* kernels cannot explicitly return a value; all result data must be written - to an array passed to the function (if computing a scalar, you will - probably pass a one-element array); - -* kernels explicitly declare their thread hierarchy when called: i.e. - the number of thread blocks and the number of threads per block - (note that while a kernel is compiled once, it can be called multiple - times with different block sizes or grid sizes). - -At first sight, writing a CUDA kernel with Numba looks very much like -writing a :term:`JIT function` for the CPU:: - - @cuda.jit - def increment_by_one(an_array): - """ - Increment all array elements by one. - """ - # code elided here; read further for different implementations - -(*) Note: newer CUDA devices support device-side kernel launching; this feature -is called *dynamic parallelism* but Numba does not support it currently) - - -.. _cuda-kernel-invocation: - -Kernel invocation -================= - -A kernel is typically launched in the following way:: - - threadsperblock = 32 - blockspergrid = (an_array.size + (threadsperblock - 1)) // threadsperblock - increment_by_one[blockspergrid, threadsperblock](an_array) - -We notice two steps here: - -* Instantiate the kernel proper, by specifying a number of blocks - (or "blocks per grid"), and a number of threads per block. The product - of the two will give the total number of threads launched. Kernel - instantiation is done by taking the compiled kernel function - (here ``increment_by_one``) and indexing it with a tuple of integers. - -* Running the kernel, by passing it the input array (and any separate - output arrays if necessary). By default, running a kernel is synchronous: - the function returns when the kernel has finished executing and the - data is synchronized back. - -Choosing the block size ------------------------ - -It might seem curious to have a two-level hierarchy when declaring the -number of threads needed by a kernel. The block size (i.e. number of -threads per block) is often crucial: - -* On the software side, the block size determines how many threads - share a given area of :ref:`shared memory `. - -* On the hardware side, the block size must be large enough for full - occupation of execution units; recommendations can be found in the - `CUDA C Programming Guide`_. - -Multi-dimensional blocks and grids ----------------------------------- - -To help deal with multi-dimensional arrays, CUDA allows you to specify -multi-dimensional blocks and grids. In the example above, you could -make ``blockspergrid`` and ``threadsperblock`` tuples of one, two -or three integers. Compared to 1D declarations of equivalent sizes, -this doesn't change anything to the efficiency or behaviour of generated -code, but can help you write your algorithms in a more natural way. - - -Thread positioning -================== - -When running a kernel, the kernel function's code is executed by every -thread once. It therefore has to know which thread it is in, in order -to know which array element(s) it is responsible for (complex algorithms -may define more complex responsibilities, but the underlying principle -is the same). - -One way is for the thread to determines its position in the grid and block -and manually compute the corresponding array position:: - - @cuda.jit - def increment_by_one(an_array): - # Thread id in a 1D block - tx = cuda.threadIdx.x - # Block id in a 1D grid - ty = cuda.blockIdx.x - # Block width, i.e. number of threads per block - bw = cuda.blockDim.x - # Compute flattened index inside the array - pos = tx + ty * bw - if pos < an_array.size: # Check array boundaries - an_array[pos] += 1 - -.. note:: Unless you are sure the block size and grid size is a divisor - of your array size, you **must** check boundaries as shown above. - -:attr:`.threadIdx`, :attr:`.blockIdx`, :attr:`.blockDim` and :attr:`.gridDim` -are special objects provided by the CUDA backend for the sole purpose of -knowing the geometry of the thread hierarchy and the position of the -current thread within that geometry. - -These objects can be 1D, 2D or 3D, depending on how the kernel was -:ref:`invoked `. To access the value at each -dimension, use the ``x``, ``y`` and ``z`` attributes of these objects, -respectively. - -.. attribute:: numba.cuda.threadIdx - :noindex: - - The thread indices in the current thread block. For 1D blocks, the index - (given by the ``x`` attribute) is an integer spanning the range from 0 - inclusive to :attr:`numba.cuda.blockDim` exclusive. A similar rule - exists for each dimension when more than one dimension is used. - -.. attribute:: numba.cuda.blockDim - :noindex: - - The shape of the block of threads, as declared when instantiating the - kernel. This value is the same for all threads in a given kernel, even - if they belong to different blocks (i.e. each block is "full"). - -.. attribute:: numba.cuda.blockIdx - :noindex: - - The block indices in the grid of threads launched a kernel. For a 1D grid, - the index (given by the ``x`` attribute) is an integer spanning the range - from 0 inclusive to :attr:`numba.cuda.gridDim` exclusive. A similar rule - exists for each dimension when more than one dimension is used. - -.. attribute:: numba.cuda.gridDim - :noindex: - - The shape of the grid of blocks, i.e. the total number of blocks launched - by this kernel invocation, as declared when instantiating the kernel. - -Absolute positions ------------------- - -Simple algorithms will tend to always use thread indices in the -same way as shown in the example above. Numba provides additional facilities -to automate such calculations: - -.. function:: numba.cuda.grid(ndim) - :noindex: - - Return the absolute position of the current thread in the entire - grid of blocks. *ndim* should correspond to the number of dimensions - declared when instantiating the kernel. If *ndim* is 1, a single integer - is returned. If *ndim* is 2 or 3, a tuple of the given number of - integers is returned. - -.. function:: numba.cuda.gridsize(ndim) - :noindex: - - Return the absolute size (or shape) in threads of the entire grid of - blocks. *ndim* has the same meaning as in :func:`.grid` above. - -With these functions, the incrementation example can become:: - - @cuda.jit - def increment_by_one(an_array): - pos = cuda.grid(1) - if pos < an_array.size: - an_array[pos] += 1 - -The same example for a 2D array and grid of threads would be:: - - @cuda.jit - def increment_a_2D_array(an_array): - x, y = cuda.grid(2) - if x < an_array.shape[0] and y < an_array.shape[1]: - an_array[x, y] += 1 - -Note the grid computation when instantiating the kernel must still be -done manually, for example:: - - from __future__ import division # for Python 2 - - threadsperblock = (16, 16) - blockspergrid_x = math.ceil(an_array.shape[0] / threadsperblock[0]) - blockspergrid_y = math.ceil(an_array.shape[1] / threadsperblock[1]) - blockspergrid = (blockspergrid_x, blockspergrid_y) - increment_a_2D_array[blockspergrid, threadsperblock](an_array) - - -Further Reading ----------------- - -Please refer to the the `CUDA C Programming Guide`_ for a detailed discussion -of CUDA programming. - - -.. _CUDA C Programming Guide: http://docs.nvidia.com/cuda/cuda-c-programming-guide diff --git a/numba/docs/source/cuda/memory.rst b/numba/docs/source/cuda/memory.rst deleted file mode 100644 index 6a3519b63..000000000 --- a/numba/docs/source/cuda/memory.rst +++ /dev/null @@ -1,212 +0,0 @@ -================= -Memory management -================= - -.. _cuda-device-memory: - -Data transfer -============= - -Even though Numba can automatically transfer NumPy arrays to the device, -it can only do so conservatively by always transferring device memory back to -the host when a kernel finishes. To avoid the unnecessary transfer for -read-only arrays, you can use the following APIs to manually control the -transfer: - -.. autofunction:: numba.cuda.device_array - :noindex: -.. autofunction:: numba.cuda.device_array_like - :noindex: -.. autofunction:: numba.cuda.to_device - :noindex: - -In addition to the device arrays, Numba can consume any object that implements -:ref:`cuda array interface `. These objects also can be -manually converted into a Numba device array by creating a view of the GPU -buffer using the following APIs: - -.. autofunction:: numba.cuda.as_cuda_array - :noindex: -.. autofunction:: numba.cuda.is_cuda_array - :noindex: - - -Device arrays -------------- - -Device array references have the following methods. These methods are to be -called in host code, not within CUDA-jitted functions. - -.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray - :members: copy_to_host, is_c_contiguous, is_f_contiguous, ravel, reshape - :noindex: - - -.. note:: DeviceNDArray defines the :ref:`cuda array interface `. - - -Pinned memory -============= - -.. autofunction:: numba.cuda.pinned - :noindex: -.. autofunction:: numba.cuda.pinned_array - :noindex: - -Streams -======= - -.. autofunction:: numba.cuda.stream - :noindex: - -CUDA streams have the following methods: - -.. autoclass:: numba.cuda.cudadrv.driver.Stream - :members: synchronize, auto_synchronize - :noindex: - -.. _cuda-shared-memory: - -Shared memory and thread synchronization -======================================== - -A limited amount of shared memory can be allocated on the device to speed -up access to data, when necessary. That memory will be shared (i.e. both -readable and writable) amongst all threads belonging to a given block -and has faster access times than regular device memory. It also allows -threads to cooperate on a given solution. You can think of it as a -manually-managed data cache. - -The memory is allocated once for the duration of the kernel, unlike -traditional dynamic memory management. - -.. function:: numba.cuda.shared.array(shape, type) - :noindex: - - Allocate a shared array of the given *shape* and *type* on the device. - This function must be called on the device (i.e. from a kernel or - device function). *shape* is either an integer or a tuple of integers - representing the array's dimensions and must be a simple constant - expression. *type* is a :ref:`Numba type ` of the elements - needing to be stored in the array. - - The returned array-like object can be read and written to like any normal - device array (e.g. through indexing). - - A common pattern is to have each thread populate one element in the - shared array and then wait for all threads to finish using :func:`.syncthreads`. - - -.. function:: numba.cuda.syncthreads() - :noindex: - - Synchronize all threads in the same thread block. This function - implements the same pattern as `barriers `_ - in traditional multi-threaded programming: this function waits - until all threads in the block call it, at which point it returns - control to all its callers. - -.. seealso:: - :ref:`Matrix multiplication example `. - -.. _cuda-local-memory: - -Local memory -============ - -Local memory is an area of memory private to each thread. Using local -memory helps allocate some scratchpad area when scalar local variables -are not enough. The memory is allocated once for the duration of the kernel, -unlike traditional dynamic memory management. - -.. function:: numba.cuda.local.array(shape, type) - :noindex: - - Allocate a local array of the given *shape* and *type* on the device. - *shape* is either an integer or a tuple of integers representing the array's - dimensions and must be a simple constant expression. *type* is a - :ref:`Numba type ` of the elements needing to be stored in the - array. The array is private to the current thread. An array-like object is - returned which can be read and written to like any standard array - (e.g. through indexing). - -Constant memory -=============== - -Constant memory is an area of memory that is read only, cached and off-chip, it -is accessible by all threads and is host allocated. A method of -creating an array in constant memory is through the use of: - -.. function:: numba.cuda.const.array_like(arr) - :noindex: - - Allocate and make accessible an array in constant memory based on array-like - *arr*. - -SmartArrays (experimental) -========================== - -Numba provides an Array-like data type that manages data movement to -and from the device automatically. It can be used as drop-in replacement for -`numpy.ndarray` in most cases, and is supported by Numba's JIT-compiler for both -'host' and 'cuda' target. - -.. comment: function:: numba.SmartArray(obj=None, copy=True, - shape=None, dtype=None, order=None, where='host') - -.. autoclass:: numba.SmartArray - :members: __init__, get, mark_changed - - -Thus, `SmartArray` objects may be passed as function arguments to jit-compiled -functions. Whenever a cuda.jit-compiled function is being executed, it will -trigger a data transfer to the GPU (unless the data are already there). But instead -of transferring the data back to the host after the function completes, it leaves -the data on the device and merely updates the host-side if there are any external -references to that. -Thus, if the next operation is another invocation of a cuda.jit-compiled function, -the data does not need to be transferred again, making the compound operation more -efficient (and making the use of the GPU advantagous even for smaller data sizes). - -Deallocation Behavior -===================== - -Deallocation of all CUDA resources are tracked on a per-context basis. -When the last reference to a device memory is dropped, the underlying memory -is scheduled to be deallocated. The deallocation does not occur immediately. -It is added to a queue of pending deallocations. This design has two benefits: - -1. Resource deallocation API may cause the device to synchronize; thus, breaking - any asynchronous execution. Deferring the deallocation could avoid latency - in performance critical code section. -2. Some deallocation errors may cause all the remaining deallocations to fail. - Continued deallocation errors can cause critical errors at the CUDA driver - level. In some cases, this could mean a segmentation fault in the CUDA - driver. In the worst case, this could cause the system GUI to freeze and - could only recover with a system reset. When an error occurs during a - deallocation, the remaining pending deallocations are cancelled. Any - deallocation error will be reported. When the process is terminated, the - CUDA driver is able to release all allocated resources by the terminated - process. - -The deallocation queue is flushed automatically as soon as the following events -occur: - -- An allocation failed due to out-of-memory error. Allocation is retried after - flushing all deallocations. -- The deallocation queue has reached its maximum size, which is default to 10. - User can override by setting the environment variable - `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT`. For example, - `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT=20`, increases the limit to 20. -- The maximum accumulated byte size of resources that are pending deallocation - is reached. This is default to 20% of the device memory capacity. - User can override by setting the environment variable - `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO`. For example, - `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO=0.5` sets the limit to 50% of the - capacity. - -Sometimes, it is desired to defer resource deallocation until a code section -ends. Most often, users want to avoid any implicit synchronization due to -deallocation. This can be done by using the following context manager: - -.. autofunction:: numba.cuda.defer_cleanup \ No newline at end of file diff --git a/numba/docs/source/cuda/overview.rst b/numba/docs/source/cuda/overview.rst deleted file mode 100644 index 09f10b0eb..000000000 --- a/numba/docs/source/cuda/overview.rst +++ /dev/null @@ -1,59 +0,0 @@ -======== -Overview -======== - -Numba supports CUDA GPU programming by directly compiling a restricted subset -of Python code into CUDA kernels and device functions following the CUDA -execution model. Kernels written in Numba appear to have direct access -to NumPy arrays. NumPy arrays are transferred between the CPU and the -GPU automatically. - - -Terminology -=========== - -Several important terms in the topic of CUDA programming are listed here: - -- *host*: the CPU -- *device*: the GPU -- *host memory*: the system main memory -- *device memory*: onboard memory on a GPU card -- *kernels*: a GPU function launched by the host and executed on the device -- *device function*: a GPU function executed on the device which can only be - called from the device (i.e. from a kernel or another device function) - - -Programming model -================= - -Most CUDA programming facilities exposed by Numba map directly to the CUDA -C language offered by NVidia. Therefore, it is recommended you read the -official `CUDA C programming guide `_. - - -Requirements -============ - -Supported GPUs --------------- - -Numba supports CUDA-enabled GPU with compute capability 2.0 or above with an -up-to-data Nvidia driver. - -Software --------- - -You will need the CUDA toolkit installed. If you are using Conda, just -type:: - - $ conda install cudatoolkit - - -Missing CUDA Features -===================== - -Numba does not implement all features of CUDA, yet. Some missing features -are listed below: - -* dynamic parallelism -* texture memory diff --git a/numba/docs/source/cuda/random.rst b/numba/docs/source/cuda/random.rst deleted file mode 100644 index 9a5bc5149..000000000 --- a/numba/docs/source/cuda/random.rst +++ /dev/null @@ -1,66 +0,0 @@ - -.. _cuda-random: - -Random Number Generation -======================== - -Numba provides a random number generation algorithm that can be executed on -the GPU. Due to technical issues with how NVIDIA implemented cuRAND, however, -Numba's GPU random number generator is not based on cuRAND. Instead, Numba's -GPU RNG is an implementation of the `xoroshiro128+ algorithm -`_. The xoroshiro128+ algorithm has a period of -``2**128 - 1``, which is shorter than the period of the XORWOW algorithm -used by default in cuRAND, but xoroshiro128+ still passes the BigCrush tests -of random number generator quality. - -When using any RNG on the GPU, it is important to make sure that each thread -has its own RNG state, and they have been initialized to produce non-overlapping -sequences. The numba.cuda.random module provides a host function to do this, -as well as CUDA device functions to obtain uniformly or normally distributed -random numbers. - -.. note:: Numba (like cuRAND) uses the - `Box-Muller transform ` - to generate normally distributed random numbers from a uniform generator. - However, Box-Muller generates pairs of random numbers, and the current - implementation only returns one of them. As a result, generating normally - distributed values is half the speed of uniformly distributed values. - -.. automodule:: numba.cuda.random - :members: create_xoroshiro128p_states, init_xoroshiro128p_states, xoroshiro128p_uniform_float32, xoroshiro128p_uniform_float64, xoroshiro128p_normal_float32, xoroshiro128p_normal_float64 - :noindex: - -Example -''''''' - -Here is a sample program that uses the random number generator:: - - from __future__ import print_function, absolute_import - - from numba import cuda - from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32 - import numpy as np - - @cuda.jit - def compute_pi(rng_states, iterations, out): - """Find the maximum value in values and store in result[0]""" - thread_id = cuda.grid(1) - - # Compute pi by drawing random (x, y) points and finding what - # fraction lie inside a unit circle - inside = 0 - for i in range(iterations): - x = xoroshiro128p_uniform_float32(rng_states, thread_id) - y = xoroshiro128p_uniform_float32(rng_states, thread_id) - if x**2 + y**2 <= 1.0: - inside += 1 - - out[thread_id] = 4.0 * inside / iterations - - threads_per_block = 64 - blocks = 24 - rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=1) - out = np.zeros(threads_per_block * blocks, dtype=np.float32) - - compute_pi[blocks, threads_per_block](rng_states, 10000, out) - print('pi:', out.mean()) diff --git a/numba/docs/source/cuda/reduction.rst b/numba/docs/source/cuda/reduction.rst deleted file mode 100644 index 295b5f9e7..000000000 --- a/numba/docs/source/cuda/reduction.rst +++ /dev/null @@ -1,37 +0,0 @@ -GPU Reduction -============== - -Writing a reduction algorithm for CUDA GPU can be tricky. Numba provides a -``@reduce`` decorator for converting simple binary operation into a reduction -kernel. - -``@reduce`` ------------- - -Example:: - - import numpy - from numba import cuda - - @cuda.reduce - def sum_reduce(a, b): - return a + b - - A = (numpy.arange(1234, dtype=numpy.float64)) + 1 - expect = A.sum() # numpy sum reduction - got = sum_reduce(A) # cuda sum reduction - assert expect == got - -User can also use a lambda function:: - - sum_reduce = cuda.reduce(lambda a, b: a + b) - -class Reduce -------------- - -The ``reduce`` decorator creates an instance of the ``Reduce`` class. -(Currently, ``reduce`` is an alias to ``Reduce``, but this behavior is not -guaranteed.) - -.. autoclass:: numba.cuda.Reduce - :members: __init__, __call__ diff --git a/numba/docs/source/cuda/simulator.rst b/numba/docs/source/cuda/simulator.rst deleted file mode 100644 index f196c7426..000000000 --- a/numba/docs/source/cuda/simulator.rst +++ /dev/null @@ -1,85 +0,0 @@ - -.. _simulator: - -================================================= -Debugging CUDA Python with the the CUDA Simulator -================================================= - -Numba includes a CUDA Simulator that implements most of the semantics in CUDA -Python using the Python interpreter and some additional Python code. This can -be used to debug CUDA Python code, either by adding print statements to your -code, or by using the debugger to step through the execution of an individual -thread. - -Execution of kernels is performed by the simulator one block at a time. One -thread is spawned for each thread in the block, and scheduling of the execution -of these threads is left up to the operating system. - -Using the simulator -=================== - -The simulator is enabled by setting the environment variable -:envvar:`NUMBA_ENABLE_CUDASIM` to 1. CUDA Python code may then be executed as -normal. The easiest way to use the debugger inside a kernel is to only stop a -single thread, otherwise the interaction with the debugger is difficult to -handle. For example, the kernel below will stop in the thread ``<<<(3,0,0), (1, -0, 0)>>>``:: - - @cuda.jit - def vec_add(A, B, out): - x = cuda.threadIdx.x - bx = cuda.blockIdx.x - bdx = cuda.blockDim.x - if x == 1 and bx == 3: - from pdb import set_trace; set_trace() - i = bx * bdx + x - out[i] = A[i] + B[i] - -when invoked with a one-dimensional grid and one-dimensional blocks. - -Supported features -================== - -The simulator aims to provide as complete a simulation of execution on a real -GPU as possible - in particular, the following are supported: - -* Atomic operations -* Constant memory -* Local memory -* Shared memory: declarations of shared memory arrays must be on separate source - lines, since the simulator uses source line information to keep track of - allocations of shared memory across threads. -* :func:`.syncthreads` is supported - however, in the case where divergent - threads enter different :func:`.syncthreads` calls, the launch will not fail, - but unexpected behaviour will occur. A future version of the simulator may - detect this condition. -* The stream API is supported, but all operations occur sequentially and - synchronously, unlike on a real device. Synchronising on a stream is therefore - a no-op. -* The event API is also supported, but provides no meaningful timing - information. -* Data transfer to and from the GPU - in particular, creating array objects with - :func:`.device_array` and :func:`.device_array_like`. The APIs for pinned memory - :func:`.pinned` and :func:`.pinned_array` are also supported, but no pinning - takes place. -* The driver API implementation of the list of GPU contexts (``cuda.gpus`` and - ``cuda.cudadrv.devices.gpus``) is supported, and reports a single GPU context. - This context can be closed and reset as the real one would. -* The :func:`.detect` function is supported, and reports one device called - `SIMULATOR`. - -Some limitations of the simulator include: - -* It does not perform type checking/type inference. If any argument types to a - jitted function are incorrect, or if the specification of the type of any - local variables are incorrect, this will not be detected by the simulator. -* Only one GPU is simulated. -* Multithreaded accesses to a single GPU are not supported, and will result in - unexpected behaviour. -* Most of the driver API is unimplemented. -* It is not possible to link PTX code with CUDA Python functions. -* Warps and warp-level operations are not yet implemented. - -Obviously, the speed of the simulator is also much lower than that of a real -device. It may be necessary to reduce the size of input data and the size of the -CUDA grid in order to make debugging with the simulator tractable. diff --git a/numba/docs/source/cuda/ufunc.rst b/numba/docs/source/cuda/ufunc.rst deleted file mode 100644 index 9983006f6..000000000 --- a/numba/docs/source/cuda/ufunc.rst +++ /dev/null @@ -1,154 +0,0 @@ -CUDA Ufuncs and Generalized Ufuncs -================================== - -This page describes the CUDA ufunc-like object. - -To support the programming pattern of CUDA programs, CUDA Vectorize and -GUVectorize cannot produce a conventional ufunc. Instead, a ufunc-like -object is returned. This object is a close analog but not fully -compatible with a regular NumPy ufunc. The CUDA ufunc adds support for -passing intra-device arrays (already on the GPU device) to reduce -traffic over the PCI-express bus. It also accepts a `stream` keyword -for launching in asynchronous mode. - -Example: Basic Example ------------------------- - -:: - - import math - from numba import vectorize, cuda - import numpy as np - - @vectorize(['float32(float32, float32, float32)', - 'float64(float64, float64, float64)'], - target='cuda') - def cu_discriminant(a, b, c): - return math.sqrt(b ** 2 - 4 * a * c) - - N = 1e+4 - dtype = np.float32 - - # prepare the input - A = np.array(np.random.sample(N), dtype=dtype) - B = np.array(np.random.sample(N) + 10, dtype=dtype) - C = np.array(np.random.sample(N), dtype=dtype) - - D = cu_discriminant(A, B, C) - - print(D) # print result - -Example: Calling Device Functions ----------------------------------- - -All CUDA ufunc kernels have the ability to call other CUDA device functions:: - - from numba import vectorize, cuda - - # define a device function - @cuda.jit('float32(float32, float32, float32)', device=True, inline=True) - def cu_device_fn(x, y, z): - return x ** y / z - - # define a ufunc that calls our device function - @vectorize(['float32(float32, float32, float32)'], target='cuda') - def cu_ufunc(x, y, z): - return cu_device_fn(x, y, z) - - -Generalized CUDA ufuncs ------------------------ - -Generalized ufuncs may be executed on the GPU using CUDA, analogous to -the CUDA ufunc functionality. This may be accomplished as follows:: - - from numba import guvectorize - - @guvectorize(['void(float32[:,:], float32[:,:], float32[:,:])'], - '(m,n),(n,p)->(m,p)', target='cuda') - def matmulcore(A, B, C): - ... - -There are times when the gufunc kernel uses too many of a GPU's -resources, which can cause the kernel launch to fail. The user can -explicitly control the maximum size of the thread block by setting -the `max_blocksize` attribute on the compiled gufunc object. - -:: - - from numba import guvectorize - - @guvectorize(..., target='cuda') - def very_complex_kernel(A, B, C): - ... - - very_complex_kernel.max_blocksize = 32 # limits to 32 threads per block - -.. comment - - Example: A Chunk at a Time - --------------------------- - - Partitioning your data into chunks allows computation and memory transfer - to be overlapped. This can increase the throughput of your ufunc and - enables your ufunc to operate on data that is larger than the memory - capacity of your GPU. For example: - - :: - - import math - from numba import vectorize, cuda - import numpy as np - - # the ufunc kernel - def discriminant(a, b, c): - return math.sqrt(b ** 2 - 4 * a * c) - - cu_discriminant = vectorize(['float32(float32, float32, float32)', - 'float64(float64, float64, float64)'], - target='cuda')(discriminant) - - N = 1e+8 - dtype = np.float32 - - # prepare the input - A = np.array(np.random.sample(N), dtype=dtype) - B = np.array(np.random.sample(N) + 10, dtype=dtype) - C = np.array(np.random.sample(N), dtype=dtype) - D = np.empty(A.shape, dtype=A.dtype) - - # create a CUDA stream - stream = cuda.stream() - - chunksize = 1e+6 - chunkcount = N // chunksize - - # partition numpy arrays into chunks - # no copying is performed - sA = np.split(A, chunkcount) - sB = np.split(B, chunkcount) - sC = np.split(C, chunkcount) - sD = np.split(D, chunkcount) - - device_ptrs = [] - - with stream.auto_synchronize(): - # every operation in this context with be launched asynchronously - # by using the CUDA stream - - # for each chunk - for a, b, c, d in zip(sA, sB, sC, sD): - # transfer to device - dA = cuda.to_device(a, stream) - dB = cuda.to_device(b, stream) - dC = cuda.to_device(c, stream) - dD = cuda.to_device(d, stream, copy=False) # no copying - # launch kernel - cu_discriminant(dA, dB, dC, out=dD, stream=stream) - # retrieve result - dD.copy_to_host(d, stream) - # store device pointers to prevent them from freeing before - # the kernel is scheduled - device_ptrs.extend([dA, dB, dC, dD]) - - # data is ready at this point inside D diff --git a/numba/docs/source/developer/architecture.rst b/numba/docs/source/developer/architecture.rst deleted file mode 100644 index bce96d975..000000000 --- a/numba/docs/source/developer/architecture.rst +++ /dev/null @@ -1,944 +0,0 @@ - -.. _architecture: - -================== -Numba architecture -================== - -Introduction -============ - -Numba is a compiler for Python bytecode with optional type-specialization. - -Suppose you enter a function like this into the standard Python interpreter -(henceforward referred to as "CPython"):: - - def add(a, b): - return a + b - -The interpreter will immediately parse the function and convert it into a -bytecode representation that describes how the CPython interpreter should -execute the function at a low level. For the example above, it looks -something like this:: - - >>> import dis - >>> dis.dis(add) - 2 0 LOAD_FAST 0 (a) - 3 LOAD_FAST 1 (b) - 6 BINARY_ADD - 7 RETURN_VALUE - - -CPython uses a stack-based interpreter (much like an HP calculator), so the -code first pushes two local variables onto the stack. The ``BINARY_ADD`` -opcode pops the top two arguments off the stack and makes a Python C API -function call that is equivalent to calling ``a.__add__(b)``. The result is -then pushed onto the top of the interpreter stack. Finally, the -``RETURN_VALUE`` opcode returns value on the top of the stack as the result of -the function call. - -Numba can take this bytecode and compile it to machine code that performs the -same operations as the CPython interpreter, treating ``a`` and ``b`` as -generic Python objects. The full semantics of Python are preserved, and the -compiled function can be used with any kind of objects that have the add -operator defined. When a Numba function is compiled this way, we say that it -has been compiled in :term:`object mode`, because the code still manipulates -Python objects. - -Numba code compiled in object mode is not much faster than executing the -original Python function in the CPython interpreter. However, if we -specialize the function to only run with certain data types, Numba can -generate much shorter and more efficient code that manipulates the data -natively without any calls into the Python C API. When code has been compiled -for specific data types so that the function body no longer relies on the -Python runtime, we say the function has been compiled in :term:`nopython mode`. -Numeric code compiled in nopython mode can be hundreds of times faster -than the original Python. - - -Compiler architecture -===================== - -Like many compilers, Numba can be conceptually divided into a -*frontend* and a *backend*. - -The Numba *frontend* comprises the stages which analyze the Python bytecode, -translate it to :term:`Numba IR` and perform various transformations and -analysis steps on the IR. One of the key steps is :term:`type inference`. -The frontend must succeed in typing all variables unambiguously in order -for the backend to generate code in :term:`nopython mode`, because the -backend uses type information to match appropriate code generators with -the values they operate on. - -The Numba *backend* walks the Numba IR resulting from the frontend analyses -and exploits the type information deduced by the type inference phase to -produce the right LLVM code for each encountered operation. After LLVM -code is produced, the LLVM library is asked to optimize it and generate -native processor code for the final, native function. - -There are other pieces besides the compiler frontend and backend, such -as the caching machinery for JIT functions. Those pieces are not considered -in this document. - - -Contexts -======== - -Numba is quite flexible, allowing it to generate code for different hardware -architectures like CPUs and GPUs. In order to support these different -applications, Numba uses a *typing context* and a *target context*. - -A *typing context* is used in the compiler frontend to perform type inference -on operations and values in the function. Similar typing contexts could be -used for many architectures because for nearly all cases, typing inference -is hardware-independent. However, Numba currently has a different typing -context for each target. - -A *target context* is used to generate the specific instruction sequence -required to operate on the Numba types identified during type inference. -Target contexts are architecture-specific and are flexible in defining -the execution model and available Python APIs. For example, Numba has a "cpu" -and a "cuda" context for those two kinds of architecture, and a "parallel" -context which produces multithreaded CPU code. - - -Compiler stages -=============== - -The :func:`~numba.jit` decorator in Numba ultimately calls -``numba.compiler.compile_extra()`` which compiles the Python function in a -multi-stage process, described below. - -Stage 1: Analyze bytecode -------------------------- - -At the start of compilation, the function bytecode is passed to an instance of -the Numba interpreter (``numba.interpreter``). The interpreter object -analyzes the bytecode to find the control flow graph (``numba.controlflow``). -The control flow graph (CFG) describes the ways that execution can move from one -block to the next inside the function as a result of loops and branches. - -The data flow analysis (``numba.dataflow``) takes the control flow graph and -traces how values get pushed and popped off the Python interpreter stack for -different code paths. This is important to understand the lifetimes of -variables on the stack, which are needed in Stage 2. - -If you set the environment variable ``NUMBA_DUMP_CFG`` to 1, Numba will dump -the results of the control flow graph analysis to the screen. Our ``add()`` -example is pretty boring, since there is only one statement block:: - - CFG adjacency lists: - {0: []} - CFG dominators: - {0: set([0])} - CFG post-dominators: - {0: set([0])} - CFG back edges: [] - CFG loops: - {} - CFG node-to-loops: - {0: []} - -A function with more complex flow control will have a more interesting -control flow graph. This function:: - - def doloops(n): - acc = 0 - for i in range(n): - acc += 1 - if n == 10: - break - return acc - -compiles to this bytecode:: - - 9 0 LOAD_CONST 1 (0) - 3 STORE_FAST 1 (acc) - - 10 6 SETUP_LOOP 46 (to 55) - 9 LOAD_GLOBAL 0 (range) - 12 LOAD_FAST 0 (n) - 15 CALL_FUNCTION 1 - 18 GET_ITER - >> 19 FOR_ITER 32 (to 54) - 22 STORE_FAST 2 (i) - - 11 25 LOAD_FAST 1 (acc) - 28 LOAD_CONST 2 (1) - 31 INPLACE_ADD - 32 STORE_FAST 1 (acc) - - 12 35 LOAD_FAST 0 (n) - 38 LOAD_CONST 3 (10) - 41 COMPARE_OP 2 (==) - 44 POP_JUMP_IF_FALSE 19 - - 13 47 BREAK_LOOP - 48 JUMP_ABSOLUTE 19 - 51 JUMP_ABSOLUTE 19 - >> 54 POP_BLOCK - - 14 >> 55 LOAD_FAST 1 (acc) - 58 RETURN_VALUE - -The corresponding CFG for this bytecode is:: - - CFG adjacency lists: - {0: [6], 6: [19], 19: [54, 22], 22: [19, 47], 47: [55], 54: [55], 55: []} - CFG dominators: - {0: set([0]), - 6: set([0, 6]), - 19: set([0, 6, 19]), - 22: set([0, 6, 19, 22]), - 47: set([0, 6, 19, 22, 47]), - 54: set([0, 6, 19, 54]), - 55: set([0, 6, 19, 55])} - CFG post-dominators: - {0: set([0, 6, 19, 55]), - 6: set([6, 19, 55]), - 19: set([19, 55]), - 22: set([22, 55]), - 47: set([47, 55]), - 54: set([54, 55]), - 55: set([55])} - CFG back edges: [(22, 19)] - CFG loops: - {19: Loop(entries=set([6]), exits=set([54, 47]), header=19, body=set([19, 22]))} - CFG node-to-loops: - {0: [], 6: [], 19: [19], 22: [19], 47: [], 54: [], 55: []} - -The numbers in the CFG refer to the bytecode offsets shown just to the left -of the opcode names above. - -.. _arch_generate_numba_ir: - -Stage 2: Generate the Numba IR ------------------------------- - -Once the control flow and data analyses are complete, the Numba interpreter -can step through the bytecode and translate it into an Numba-internal -intermediate representation. This translation process changes the function -from a stack machine representation (used by the Python interpreter) to a -register machine representation (used by LLVM). - -Although the IR is stored in memory as a tree of objects, it can be serialized -to a string for debugging. If you set the environment variable -``NUMBA_DUMP_IR`` equal to 1, the Numba IR will be dumped to the screen. For -the ``add()`` function described above, the Numba IR looks like:: - - label 0: - a = arg(0, name=a) ['a'] - b = arg(1, name=b) ['b'] - $0.3 = a + b ['$0.3', 'a', 'b'] - del b [] - del a [] - $0.4 = cast(value=$0.3) ['$0.3', '$0.4'] - del $0.3 [] - return $0.4 ['$0.4'] - -The ``del`` instructions are produced by :ref:`live variable analysis`. -Those instructions ensure references are not leaked. -In :term:`nopython mode`, some objects are tracked by the numba runtime and -some are not. For tracked objects, a dereference operation is emitted; -otherwise, the instruction is an no-op. -In :term:`object mode` each variable contains an owned reference to a PyObject. - - -Stage 3: Macro expansion ------------------------- - -Now that the function has been translated into the Numba IR, macro expansion can -be performed. Macro expansion converts specific attributes that are known to -Numba into IR nodes representing function calls. This is initiated in the -``numba.compiler.translate_stage`` function, and is implemented in -``numba.macro``. - -Examples of attributes that are macro-expanded include the CUDA intrinsics for -grid, block and thread dimensions and indices. For example, the assignment to -``tx`` in the following function:: - - @cuda.jit(argtypes=[f4[:]]) - def f(a): - tx = cuda.threadIdx.x - -has the following representation after translation to Numba IR:: - - $0.1 = global(cuda: ) ['$0.1'] - $0.2 = getattr(value=$0.1, attr=threadIdx) ['$0.1', '$0.2'] - del $0.1 [] - $0.3 = getattr(value=$0.2, attr=x) ['$0.2', '$0.3'] - del $0.2 [] - tx = $0.3 ['$0.3', 'tx'] - -After macro expansion, the ``$0.3 = getattr(value=$0.2, attr=x)`` IR node is -translated into:: - - $0.3 = call tid.x(, ) ['$0.3'] - -which represents an instance of the ``Intrinsic`` IR node for calling the -``tid.x`` intrinsic function. - -.. _`rewrite-untyped-ir`: - -Stage 4: Rewrite untyped IR ---------------------------- - -Before running type inference, it may be desired to run certain -transformations on the Numba IR. One such example is to detect ``raise`` -statements which have an implicitly constant argument, so as to -support them in :term:`nopython mode`. Let's say you compile the -following function with Numba:: - - def f(x): - if x == 0: - raise ValueError("x cannot be zero") - -If you set the :envvar:`NUMBA_DUMP_IR` environment variable to ``1``, -you'll see the IR being rewritten before the type inference phase:: - - REWRITING: - del $0.3 [] - $12.1 = global(ValueError: ) ['$12.1'] - $const12.2 = const(str, x cannot be zero) ['$const12.2'] - $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] - del $const12.2 [] - del $12.1 [] - raise $12.3 ['$12.3'] - ____________________________________________________________ - del $0.3 [] - $12.1 = global(ValueError: ) ['$12.1'] - $const12.2 = const(str, x cannot be zero) ['$const12.2'] - $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] - del $const12.2 [] - del $12.1 [] - raise ('x cannot be zero') [] - - -.. _arch_type_inference: - -Stage 5: Infer types --------------------- - -Now that the Numba IR has been generated and macro-expanded, type analysis -can be performed. The types of the function arguments can be taken either -from the explicit function signature given in the ``@jit`` decorator -(such as ``@jit('float64(float64, float64)')``), or they can be taken from -the types of the actual function arguments if compilation is happening -when the function is first called. - -The type inference engine is found in ``numba.typeinfer``. Its job is to -assign a type to every intermediate variable in the Numba IR. The result of -this pass can be seen by setting the :envvar:`NUMBA_DUMP_ANNOTATION` -environment variable to 1: - -.. code-block:: python - - -----------------------------------ANNOTATION----------------------------------- - # File: archex.py - # --- LINE 4 --- - - @jit(nopython=True) - - # --- LINE 5 --- - - def add(a, b): - - # --- LINE 6 --- - # label 0 - # a = arg(0, name=a) :: int64 - # b = arg(1, name=b) :: int64 - # $0.3 = a + b :: int64 - # del b - # del a - # $0.4 = cast(value=$0.3) :: int64 - # del $0.3 - # return $0.4 - - return a + b - - -If type inference fails to find a consistent type assignment for all the -intermediate variables, it will label every variable as type ``pyobject`` and -fall back to object mode. Type inference can fail when unsupported Python -types, language features, or functions are used in the function body. - - -.. _`rewrite-typed-ir`: - -Stage 6a: Rewrite typed IR --------------------------- - -This pass's purpose is to perform any high-level optimizations that still -require, or could at least benefit from, Numba IR type information. - -One example of a problem domain that isn't as easily optimized once -lowered is the domain of multidimensional array operations. When -Numba lowers an array operation, Numba treats the operation like a -full ufunc kernel. During lowering a single array operation, Numba -generates an inline broadcasting loop that creates a new result array. -Then Numba generates an application loop that applies the operator -over the array inputs. Recognizing and rewriting these loops once -they are lowered into LLVM is hard, if not impossible. - -An example pair of optimizations in the domain of array operators is -loop fusion and shortcut deforestation. When the optimizer -recognizes that the output of one array operator is being fed into -another array operator, and only to that array operator, it can fuse -the two loops into a single loop. The optimizer can further eliminate -the temporary array allocated for the initial operation by directly -feeding the result of the first operation into the second, skipping -the store and load to the intermediate array. This elimination is -known as shortcut deforestation. Numba currently uses the rewrite -pass to implement these array optimizations. For more information, -please consult the ":ref:`case-study-array-expressions`" subsection, -later in this document. - -One can see the result of rewriting by setting the -:envvar:`NUMBA_DUMP_IR` environment variable to a non-zero value (such -as 1). The following example shows the output of the rewrite pass as -it recognizes an array expression consisting of a multiply and add, -and outputs a fused kernel as a special operator, :func:`arrayexpr`:: - - ______________________________________________________________________ - REWRITING: - a0 = arg(0, name=a0) ['a0'] - a1 = arg(1, name=a1) ['a1'] - a2 = arg(2, name=a2) ['a2'] - $0.3 = a0 * a1 ['$0.3', 'a0', 'a1'] - del a1 [] - del a0 [] - $0.5 = $0.3 + a2 ['$0.3', '$0.5', 'a2'] - del a2 [] - del $0.3 [] - $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] - del $0.5 [] - return $0.6 ['$0.6'] - ____________________________________________________________ - a0 = arg(0, name=a0) ['a0'] - a1 = arg(1, name=a1) ['a1'] - a2 = arg(2, name=a2) ['a2'] - $0.5 = arrayexpr(ty=array(float64, 1d, C), expr=('+', [('*', [Var(a0, test.py (14)), Var(a1, test.py (14))]), Var(a2, test.py (14))])) ['$0.5', 'a0', 'a1', 'a2'] - del a0 [] - del a1 [] - del a2 [] - $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] - del $0.5 [] - return $0.6 ['$0.6'] - ______________________________________________________________________ - -Following this rewrite, Numba lowers the array expression into a new -ufunc-like function that is inlined into a single loop that only -allocates a single result array. - - -.. _`parallel-accelerator`: - -Stage 6b: Perform Automatic Parallelization -------------------------------------------- - -This pass is only performed if the ``parallel`` option in the :func:`~numba.jit` -decorator is set to ``True``. This pass find parallelism implicit in the -semantics of operations in the Numba IR and replaces those operations -with explicitly parallel representations of those operations using a -special `parfor` operator. Then, optimizations are performed to maximize -the number of parfors that are adjacent to each other such that they can -then be fused together into one parfor that takes only one pass over the -data and will thus typically have better cache performance. Finally, -during lowering, these parfor operators are converted to a form similar -to guvectorize to implement the actual parallelism. - -The automatic parallelization pass has a number of sub-passes, many of -which are controllable using a dictionary of options passed via the -``parallel`` keyword argument to :func:`~numba.jit`:: - - { 'comprehension': True/False, # parallel comprehension - 'prange': True/False, # parallel for-loop - 'numpy': True/False, # parallel numpy calls - 'reduction': True/False, # parallel reduce calls - 'setitem': True/False, # parallel setitem - 'stencil': True/False, # parallel stencils - 'fusion': True/False, # enable fusion or not - } - -The default is set to `True` for all of them. The sub-passes are -described in more detail in the following paragraphs. - -#. CFG Simplification - Sometimes Numba IR will contain chains of blocks containing no loops which - are merged in this sub-pass into single blocks. This sub-pass simplifies - subsequent analysis of the IR. - -#. Numpy canonicalization - Some Numpy operations can be written as operations on Numpy objects (e.g. - ``arr.sum()``), or as calls to Numpy taking those objects (e.g. - ``numpy.sum(arr)``). This sub-pass converts all such operations to the - latter form for cleaner subsequent analysis. - -#. Array analysis - A critical requirement for later parfor fusion is that parfors have - identical iteration spaces and these iteration spaces typically correspond - to the sizes of the dimensions of Numpy arrays. In this sub-pass, the IR is - analyzed to determine equivalence classes for the dimensions of Numpy - arrays. Consider the example, ``a = b + 1``, where ``a`` and ``b`` are both - Numpy arrays. Here, we know that each dimension of ``a`` must have the same - equivalence class as the corresponding dimension of ``b``. Typically, - routines rich in Numpy operations will enable equivalence classes to be - fully known for all arrays created within a function. - - Array analysis will also reason about size equivalvence for slice selection, - and boolean array masking (one dimensional only). For example, it is able to - infer that ``a[1 : n-1]`` is of the same size as ``b[0 : n-2]``. - - Array analysis may also insert safety assumptions to ensure pre-conditions - related to array sizes are met before an operation can be parallelized. - For example, ``np.dot(X, w)`` between a 2-D matrix ``X`` and a 1-D vector ``w`` - requires that the second dimension of ``X`` is of the same size as ``w``. - Usually this kind of runtime check is automatically inserted, but if array - analysis can infer such equivalence, it will skip them. - - Users can even help array analysis by turning implicit knowledge about - array sizes into explicit assertions. For example, in the code below: - - .. code-block:: python - - @numba.njit(parallel=True) - def logistic_regression(Y, X, w, iterations): - assert(X.shape == (Y.shape[0], w.shape[0])) - for i in range(iterations): - w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) - return w - - Making the explicit assertion helps eliminate all bounds checks in the - rest of the function. - -#. ``prange()`` to parfor - The use of prange (:ref:`numba-prange`) in a for loop is an explicit - indication from the programmer that all iterations of the for loop can - execute in parallel. In this sub-pass, we analyze the CFG to locate loops - and to convert those loops controlled by a prange object to the explicit - `parfor` operator. Each explicit parfor operator consists of: - - a. A list of loop nest information that describes the iteration space of the - parfor. Each entry in the loop nest list contains an indexing variable, - the start of the range, the end of the range, and the step value for each - iteration. - #. An initialization (init) block which contains instructions to be executed - one time before the parfor begins executing. - #. A loop body comprising a set of basic blocks that correspond to the body - of the loop and compute one point in the iteration space. - #. The index variables used for each dimension of the iteration space. - - For parfor `pranges`, the loop nest is a single entry where the start, - stop, and step fields come from the specified `prange`. The init block is - empty for `prange` parfors and the loop body is the set of blocks in the - loop minus the loop header. - - With parallelization on, array comprehensions (:ref:`pysupported-comprehension`) - will also be translated to prange so as to run in parallel. This behavior - be disabled by setting ``parallel={'comprehension': False}``. - - Likewise, the overall `prange` to `parfor` translation can be disabled by - setting ``parallel={'prange': False}``, in which case `prange` is treated the - same as `range`. - -#. Numpy to parfor - In this sub-pass, Numpy functions such as ``ones``, ``zeros``, ``dot``, most - of the random number generating functions, arrayexprs (from Section - :ref:`rewrite-typed-ir`), and Numpy reductions are converted to parfors. - Generally, this conversion creates the loop nest list, whose length is equal - to the number of dimensions of the left-hand side of the assignment - instruction in the IR. The number and size of the dimensions of the - left-hand-side array is taken from the array analysis information generated - in sub-pass 3 above. An instruction to create the result Numpy array is - generated and stored in the new parfor's init block. A basic block is - created for the loop body and an instruction is generated and added to the - end of that block to store the result of the computation into the array at - the current point in the iteration space. The result stored into the array - depends on the operation that is being converted. For example, for ``ones``, - the value stored is a constant 1. For calls to generate a random array, the - value comes from a call to the same random number function but with the size - parameter dropped and therefore returning a scalar. For arrayexpr operators, - the arrayexpr tree is converted to Numba IR and the value at the root of that - expression tree is used to write into the output array. The translation from - Numpy functions and arrayexpr operators to `parfor` can be disabled by - setting ``parallel={'numpy': False}``. - - For reductions, the loop nest list is similarly created using the array - analysis information for the array being reduced. In the init block, the - initial value is assigned to the reduction variable. The loop body consists - of a single block in which the next value in the iteration space is fetched - and the reduction operation is applied to that value and the current - reduction value and the result stored back into the reduction value. - The translation of reduction functions to `parfor` can be disabled by - setting ``parallel={'reduction': False}``. - - Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to - 1 will show some statistics about parfor conversions in general. - -#. Setitem to parfor - Setting a range of array elements using a slice or boolean array selection - can also run in parallel. Statement such as ``A[P] = B[Q]`` - (or a simpler case ``A[P] = c``, where ``c`` is a scalar) is translated to - `parfor` if one of the following conditions is met: - - a. ``P`` and ``Q`` are slices or multi-dimensional selector involving - scalar and slices, and ``A[P]`` and ``B[Q]`` are considered size - equivalent by array analysis. Only 2-value slice/range is supported, - 3-value with a step will not be translated to `parfor`. - #. ``P`` and ``Q`` are the same boolean array. - - This translation can be disabled by setting ``parallel={'setitem': False}``. - -#. Simplification - Performs a copy propagation and dead code elimination pass. - -#. Fusion - This sub-pass first processes each basic block and does a reordering of the - instructions within the block with the goal of pushing parfors lower in the - block and lifting non-parfors towards the start of the block. In practice, - this approach does a good job of getting parfors adjacent to each other in - the IR, which enables more parfors to then be fused. During parfor fusion, - each basic block is repeatedly scanned until no further fusion is possible. - During this scan, each set of adjacent instructions are considered. - Adjacent instructions are fused together if: - - a. they are both parfors - #. the parfors' loop nests are the same size and the array equivalence - classes for each dimension of the loop nests are the same, and - #. the first parfor does not create a reduction variable used by the - second parfor. - - The two parfors are fused together by adding the second parfor's init block - to the first's, merging the two parfors' loop bodies together and replacing - the instances of the second parfor's loop index variables in the second - parfor's body with the loop index variables for the first parfor. - Fusion can be disabled by setting ``parallel={'fusion': False}``. - - Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to - 1 will show some statistics about parfor fusions. - -#. Push call objects and compute parfor parameters - In the lowering phase described in Section :ref:`lowering`, each parfor - becomes a separate function executed in parallel in ``guvectorize`` - (:ref:`guvectorize`) style. Since parfors may use variables defined - previously in a function, when those parfors become separate functions, - those variables must be passed to the parfor function as parameters. In - this sub-pass, a use-def scan is made over each parfor body and liveness - information is used to determine which variables are used but not defined by - the parfor. That list of variables is stored here in the parfor for use - during lowering. Function variables are a special case in this process - since function variables cannot be passed to functions compiled in nopython - mode. Instead, for function variables, this sub-pass pushes the assignment - instruction to the function variable into the parfor body so that those do - not need to be passed as parameters. - - To see the intermediate IR between the above sub-passes and other debugging - information, set the :envvar:`NUMBA_DEBUG_ARRAY_OPT` environment variable to - 1. For the example in Section :ref:`rewrite-typed-ir`, the following IR with - a parfor is generated during this stage:: - - ______________________________________________________________________ - label 0: - a0 = arg(0, name=a0) ['a0'] - a0_sh_attr0.0 = getattr(attr=shape, value=a0) ['a0', 'a0_sh_attr0.0'] - $consta00.1 = const(int, 0) ['$consta00.1'] - a0size0.2 = static_getitem(value=a0_sh_attr0.0, index_var=$consta00.1, index=0) ['$consta00.1', 'a0_sh_attr0.0', 'a0size0.2'] - a1 = arg(1, name=a1) ['a1'] - a1_sh_attr0.3 = getattr(attr=shape, value=a1) ['a1', 'a1_sh_attr0.3'] - $consta10.4 = const(int, 0) ['$consta10.4'] - a1size0.5 = static_getitem(value=a1_sh_attr0.3, index_var=$consta10.4, index=0) ['$consta10.4', 'a1_sh_attr0.3', 'a1size0.5'] - a2 = arg(2, name=a2) ['a2'] - a2_sh_attr0.6 = getattr(attr=shape, value=a2) ['a2', 'a2_sh_attr0.6'] - $consta20.7 = const(int, 0) ['$consta20.7'] - a2size0.8 = static_getitem(value=a2_sh_attr0.6, index_var=$consta20.7, index=0) ['$consta20.7', 'a2_sh_attr0.6', 'a2size0.8'] - ---begin parfor 0--- - index_var = parfor_index.9 - LoopNest(index_variable=parfor_index.9, range=0,a0size0.2,1 correlation=5) - init block: - $np_g_var.10 = global(np: ) ['$np_g_var.10'] - $empty_attr_attr.11 = getattr(attr=empty, value=$np_g_var.10) ['$empty_attr_attr.11', '$np_g_var.10'] - $np_typ_var.12 = getattr(attr=float64, value=$np_g_var.10) ['$np_g_var.10', '$np_typ_var.12'] - $0.5 = call $empty_attr_attr.11(a0size0.2, $np_typ_var.12, kws=(), func=$empty_attr_attr.11, vararg=None, args=[Var(a0size0.2, test2.py (7)), Var($np_typ_var.12, test2.py (7))]) ['$0.5', '$empty_attr_attr.11', '$np_typ_var.12', 'a0size0.2'] - label 1: - $arg_out_var.15 = getitem(value=a0, index=parfor_index.9) ['$arg_out_var.15', 'a0', 'parfor_index.9'] - $arg_out_var.16 = getitem(value=a1, index=parfor_index.9) ['$arg_out_var.16', 'a1', 'parfor_index.9'] - $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] - $arg_out_var.17 = getitem(value=a2, index=parfor_index.9) ['$arg_out_var.17', 'a2', 'parfor_index.9'] - $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] - $0.5[parfor_index.9] = $expr_out_var.13 ['$0.5', '$expr_out_var.13', 'parfor_index.9'] - ----end parfor 0---- - $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] - return $0.6 ['$0.6'] - ______________________________________________________________________ - - .. _`lowering`: - -Stage 7a: Generate nopython LLVM IR ------------------------------------ - -If type inference succeeds in finding a Numba type for every intermediate -variable, then Numba can (potentially) generate specialized native code. This -process is called :term:`lowering`. The Numba IR tree is translated into -LLVM IR by using helper classes from `llvmlite `_. -The machine-generated LLVM IR can seem unnecessarily verbose, but the LLVM -toolchain is able to optimize it quite easily into compact, efficient code. - -The basic lowering algorithm is generic, but the specifics of how particular -Numba IR nodes are translated to LLVM instructions is handled by the -target context selected for compilation. The default target context is -the "cpu" context, defined in ``numba.targets.cpu``. - -The LLVM IR can be displayed by setting the :envvar:`NUMBA_DUMP_LLVM` environment -variable to 1. For the "cpu" context, our ``add()`` example would look like: - -.. code-block:: llvm - - define i32 @"__main__.add$1.int64.int64"(i64* %"retptr", - {i8*, i32}** %"excinfo", - i8* %"env", - i64 %"arg.a", i64 %"arg.b") - { - entry: - %"a" = alloca i64 - %"b" = alloca i64 - %"$0.3" = alloca i64 - %"$0.4" = alloca i64 - br label %"B0" - B0: - store i64 %"arg.a", i64* %"a" - store i64 %"arg.b", i64* %"b" - %".8" = load i64* %"a" - %".9" = load i64* %"b" - %".10" = add i64 %".8", %".9" - store i64 %".10", i64* %"$0.3" - %".12" = load i64* %"$0.3" - store i64 %".12", i64* %"$0.4" - %".14" = load i64* %"$0.4" - store i64 %".14", i64* %"retptr" - ret i32 0 - } - -The post-optimization LLVM IR can be output by setting -:envvar:`NUMBA_DUMP_OPTIMIZED` to 1. The optimizer shortens the code -generated above quite significantly: - -.. code-block:: llvm - - define i32 @"__main__.add$1.int64.int64"(i64* nocapture %retptr, - { i8*, i32 }** nocapture readnone %excinfo, - i8* nocapture readnone %env, - i64 %arg.a, i64 %arg.b) - { - entry: - %.10 = add i64 %arg.b, %arg.a - store i64 %.10, i64* %retptr, align 8 - ret i32 0 - } - -If created during :ref:`parallel-accelerator`, parfor operations are -lowered in the following manner. First, instructions in the parfor's init -block are lowered into the existing function using the normal lowering code. -Second, the loop body of the parfor is turned into a separate GUFunc. -Third, code is emitted for the current function to call the parallel GUFunc. - -To create a GUFunc from the parfor body, the signature of the GUFunc is -created by taking the parfor parameters as identified in step 9 of -Stage :ref:`parallel-accelerator` and adding to that a special `schedule` -parameter, across which the GUFunc will be parallelized. The schedule -parameter is in effect a static schedule mapping portions of the parfor -iteration space to Numba threads and so the length of the schedule -array is the same as the number of configured Numba threads. To make -this process easier and somewhat less dependent on changes to Numba IR, -this stage creates a Python function as text that contains the parameters -to the GUFunc and iteration code that takes the current schedule entry -and loops through the specified portion of the iteration space. In the -body of that loop, a special sentinel is inserted for subsequent easy -location. This code that handles the processing of the iteration space -is then ``eval``'ed into existence and the Numba compiler's run_frontend -function is called to generate IR. That IR is scanned to locate the -sentinel and the sentinel is replaced with the loop body of the parfor. -Then, the process of creating the parallel GUFunc is completed by -compiling this merged IR with the Numba compiler's ``compile_ir`` function. - -To call the parallel GUFunc, the static schedule must be created. -Code is inserted to call a function named ``do_scheduling.`` This function -is called with the size of each of the parfor's dimensions and the number -`N` of configured Numba threads (:envvar:`NUMBA_NUM_THREADS`). -The ``do_scheduling`` function will divide -the iteration space into N approximately equal sized regions (linear for -1D, rectangular for 2D, or hyperrectangles for 3+D) and the resulting -schedule is passed to the parallel GUFunc. The number of threads -dedicated to a given dimension of the full iteration space is roughly -proportional to the ratio of the size of the given dimension to the sum -of the sizes of all the dimensions of the iteration space. - -Parallel reductions are not natively provided by GUFuncs but the parfor -lowering strategy allows us to use GUFuncs in a way that reductions can -be performed in parallel. To accomplish this, for each reduction variable -computed by a parfor, the parallel GUFunc and the code that calls it are -modified to make the scalar reduction variable into an array of reduction -variables whose length is equal to the number of Numba threads. In addition, -the GUFunc still contains a scalar version of the reduction variable that -is updated by the parfor body during each iteration. One time at the -end of the GUFunc this local reduction variable is copied into the -reduction array. In this way, false sharing of the reduction array is -prevented. Code is also inserted into the main -function after the parallel GUFunc has returned that does a reduction -across this smaller reduction array and this final reduction value is -then stored into the original scalar reduction variable. - -The GUFunc corresponding to the example from Section :ref:`parallel-accelerator` -can be seen below:: - - ______________________________________________________________________ - label 0: - sched.29 = arg(0, name=sched) ['sched.29'] - a0 = arg(1, name=a0) ['a0'] - a1 = arg(2, name=a1) ['a1'] - a2 = arg(3, name=a2) ['a2'] - _0_5 = arg(4, name=_0_5) ['_0_5'] - $3.1.24 = global(range: ) ['$3.1.24'] - $const3.3.21 = const(int, 0) ['$const3.3.21'] - $3.4.23 = getitem(value=sched.29, index=$const3.3.21) ['$3.4.23', '$const3.3.21', 'sched.29'] - $const3.6.28 = const(int, 1) ['$const3.6.28'] - $3.7.27 = getitem(value=sched.29, index=$const3.6.28) ['$3.7.27', '$const3.6.28', 'sched.29'] - $const3.8.32 = const(int, 1) ['$const3.8.32'] - $3.9.31 = $3.7.27 + $const3.8.32 ['$3.7.27', '$3.9.31', '$const3.8.32'] - $3.10.36 = call $3.1.24($3.4.23, $3.9.31, kws=[], func=$3.1.24, vararg=None, args=[Var($3.4.23, (2)), Var($3.9.31, (2))]) ['$3.1.24', '$3.10.36', '$3.4.23', '$3.9.31'] - $3.11.30 = getiter(value=$3.10.36) ['$3.10.36', '$3.11.30'] - jump 1 [] - label 1: - $28.2.35 = iternext(value=$3.11.30) ['$28.2.35', '$3.11.30'] - $28.3.25 = pair_first(value=$28.2.35) ['$28.2.35', '$28.3.25'] - $28.4.40 = pair_second(value=$28.2.35) ['$28.2.35', '$28.4.40'] - branch $28.4.40, 2, 3 ['$28.4.40'] - label 2: - $arg_out_var.15 = getitem(value=a0, index=$28.3.25) ['$28.3.25', '$arg_out_var.15', 'a0'] - $arg_out_var.16 = getitem(value=a1, index=$28.3.25) ['$28.3.25', '$arg_out_var.16', 'a1'] - $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] - $arg_out_var.17 = getitem(value=a2, index=$28.3.25) ['$28.3.25', '$arg_out_var.17', 'a2'] - $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] - _0_5[$28.3.25] = $expr_out_var.13 ['$28.3.25', '$expr_out_var.13', '_0_5'] - jump 1 [] - label 3: - $const44.1.33 = const(NoneType, None) ['$const44.1.33'] - $44.2.39 = cast(value=$const44.1.33) ['$44.2.39', '$const44.1.33'] - return $44.2.39 ['$44.2.39'] - ______________________________________________________________________ - - -Stage 7b: Generate object mode LLVM IR --------------------------------------- - -If type inference fails to find Numba types for all values inside a function, -the function will be compiled in object mode. The generated LLVM will be -significantly longer, as the compiled code will need to make calls to the -`Python C API `_ to perform basically all -operations. The optimized LLVM for our example ``add()`` function is: - -.. code-block:: llvm - - @PyExc_SystemError = external global i8 - @".const.Numba_internal_error:_object_mode_function_called_without_an_environment" = internal constant [73 x i8] c"Numba internal error: object mode function called without an environment\00" - @".const.name_'a'_is_not_defined" = internal constant [24 x i8] c"name 'a' is not defined\00" - @PyExc_NameError = external global i8 - @".const.name_'b'_is_not_defined" = internal constant [24 x i8] c"name 'b' is not defined\00" - - define i32 @"__main__.add$1.pyobject.pyobject"(i8** nocapture %retptr, { i8*, i32 }** nocapture readnone %excinfo, i8* readnone %env, i8* %arg.a, i8* %arg.b) { - entry: - %.6 = icmp eq i8* %env, null - br i1 %.6, label %entry.if, label %entry.endif, !prof !0 - - entry.if: ; preds = %entry - tail call void @PyErr_SetString(i8* @PyExc_SystemError, i8* getelementptr inbounds ([73 x i8]* @".const.Numba_internal_error:_object_mode_function_called_without_an_environment", i64 0, i64 0)) - ret i32 -1 - - entry.endif: ; preds = %entry - tail call void @Py_IncRef(i8* %arg.a) - tail call void @Py_IncRef(i8* %arg.b) - %.21 = icmp eq i8* %arg.a, null - br i1 %.21, label %B0.if, label %B0.endif, !prof !0 - - B0.if: ; preds = %entry.endif - tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'a'_is_not_defined", i64 0, i64 0)) - tail call void @Py_DecRef(i8* null) - tail call void @Py_DecRef(i8* %arg.b) - ret i32 -1 - - B0.endif: ; preds = %entry.endif - %.30 = icmp eq i8* %arg.b, null - br i1 %.30, label %B0.endif1, label %B0.endif1.1, !prof !0 - - B0.endif1: ; preds = %B0.endif - tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'b'_is_not_defined", i64 0, i64 0)) - tail call void @Py_DecRef(i8* %arg.a) - tail call void @Py_DecRef(i8* null) - ret i32 -1 - - B0.endif1.1: ; preds = %B0.endif - %.38 = tail call i8* @PyNumber_Add(i8* %arg.a, i8* %arg.b) - %.39 = icmp eq i8* %.38, null - br i1 %.39, label %B0.endif1.1.if, label %B0.endif1.1.endif, !prof !0 - - B0.endif1.1.if: ; preds = %B0.endif1.1 - tail call void @Py_DecRef(i8* %arg.a) - tail call void @Py_DecRef(i8* %arg.b) - ret i32 -1 - - B0.endif1.1.endif: ; preds = %B0.endif1.1 - tail call void @Py_DecRef(i8* %arg.b) - tail call void @Py_DecRef(i8* %arg.a) - tail call void @Py_IncRef(i8* %.38) - tail call void @Py_DecRef(i8* %.38) - store i8* %.38, i8** %retptr, align 8 - ret i32 0 - } - - declare void @PyErr_SetString(i8*, i8*) - - declare void @Py_IncRef(i8*) - - declare void @Py_DecRef(i8*) - - declare i8* @PyNumber_Add(i8*, i8*) - - -The careful reader might notice several unnecessary calls to ``Py_IncRef`` -and ``Py_DecRef`` in the generated code. Currently Numba isn't able to -optimize those away. - -Object mode compilation will also attempt to identify loops which can be -extracted and statically-typed for "nopython" compilation. This process is -called *loop-lifting*, and results in the creation of a hidden nopython mode -function just containing the loop which is then called from the original -function. Loop-lifting helps improve the performance of functions that -need to access uncompilable code (such as I/O or plotting code) but still -contain a time-intensive section of compilable code. - -Stage 8: Compile LLVM IR to machine code ----------------------------------------- - -In both :term:`object mode` and :term:`nopython mode`, the generated LLVM IR -is compiled by the LLVM JIT compiler and the machine code is loaded into -memory. A Python wrapper is also created (defined in -``numba.dispatcher.Dispatcher``) which can do the dynamic dispatch to the -correct version of the compiled function if multiple type specializations -were generated (for example, for both ``float32`` and ``float64`` versions -of the same function). - -The machine assembly code generated by LLVM can be dumped to the screen by -setting the :envvar:`NUMBA_DUMP_ASSEMBLY` environment variable to 1: - -.. code-block:: gas - - .globl __main__.add$1.int64.int64 - .align 16, 0x90 - .type __main__.add$1.int64.int64,@function - __main__.add$1.int64.int64: - addq %r8, %rcx - movq %rcx, (%rdi) - xorl %eax, %eax - retq - -The assembly output will also include the generated wrapper function that -translates the Python arguments to native data types. diff --git a/numba/docs/source/developer/contributing.rst b/numba/docs/source/developer/contributing.rst deleted file mode 100644 index 17b81f1a6..000000000 --- a/numba/docs/source/developer/contributing.rst +++ /dev/null @@ -1,298 +0,0 @@ - -Contributing to Numba -===================== - -We welcome people who want to make contributions to Numba, big or small! -Even simple documentation improvements are encouraged. If you have -questions, don't hesitate to ask them (see below). - - -Communication -------------- - -Mailing-list -'''''''''''' - -We have a public mailing-list that you can e-mail at numba-users@anaconda.com. -If you have any questions about contributing to Numba, it is ok to ask them -on this mailing-list. You can subscribe and read the archives on -`Google Groups `_, -and there is also a `Gmane mirror `_ -allowing NNTP access. - -Real-time Chat -'''''''''''''' - -Numba uses Gitter for public real-time chat. To help improve the -signal-to-noise ratio, we have two channels: - -* `numba/numba `_: General Numba discussion, - questions, and debugging help. -* `numba/numba-dev `_: Discussion of PRs, - planning, release coordination, etc. - -Both channels are public, but we may ask that discussions on numba-dev move to -the numba channel. This is simply to ensure that numba-dev is easy for core -developers to keep up with. - -Note that the Github issue tracker is the best place to report bugs. Bug -reports in chat are difficult to track and likely to be lost. - -Weekly Meetings -''''''''''''''' - -The core Numba developers have a weekly video conference to discuss roadmap, -feature planning, and outstanding issues. These meetings are invite only, but -minutes will be taken and will be posted to the -`Numba wiki `_. - -.. _report-numba-bugs: - -Bug tracker -'''''''''''' - -We use the `Github issue tracker `_ -to track both bug reports and feature requests. If you report an issue, -please include specifics: - -* what you are trying to do; -* which operating system you have and which version of Numba you are running; -* how Numba is misbehaving, e.g. the full error traceback, or the unexpected - results you are getting; -* as far as possible, a code snippet that allows full reproduction of your - problem. - -Getting set up --------------- - -If you want to contribute, we recommend you fork our `Github repository -`_, then create a branch representing -your work. When your work is ready, you should submit it as a pull -request from the Github interface. - -If you want, you can submit a pull request even when you haven't finished -working. This can be useful to gather feedback, or to stress your changes -against the :ref:`continuous integration ` platorm. In this -case, please prepend ``[WIP]`` to your pull request's title. - -.. _buildenv: - -Build environment -''''''''''''''''' - -Numba has a number of dependencies (mostly `Numpy `_ -and `llvmlite `_) with non-trivial build -instructions. Unless you want to build those dependencies yourself, we -recommend you use `Conda `_ to -create a dedicated development environment and install precompiled versions -of those dependencies there. - -First add the Binstar ``numba`` channel so as to get development builds of -the llvmlite library:: - - $ conda config --add channels numba - -Then create an environment with the right dependencies:: - - $ /conda create -n numbaenv python=3.5 llvmlite numpy - -.. note:: - This installs an environment based on Python 3.5, but you can of course - choose another version supported by Numba. - -To activate the environment for the current shell session:: - - $ source /activate numbaenv - -.. note:: - Those instructions are for a standard Linux shell. You may need to - adapt them for other platforms. - -Once the environment is activated, you have a dedicated Python with the -required dependencies:: - - $ python - Python 3.4.2 |Continuum Analytics, Inc.| (default, Oct 21 2014, 17:16:37) - [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux - Type "help", "copyright", "credits" or "license" for more information. - >>> import llvmlite - >>> llvmlite.__version__ - '0.2.0-3-g9f60cd1' - -Building Numba -'''''''''''''' - -For a convenient development workflow, we recommend you build Numba inside -its source checkout:: - - $ python setup.py build_ext --inplace - -This assumes you have a working C compiler and runtime on your development -system. You will have to run this command again whenever you modify -C files inside the Numba source tree. - -Running tests -''''''''''''' - -Numba is validated using a test suite comprised of various kind of tests -(unit tests, functional tests). The test suite is written using the -standard :py:mod:`unittest` framework. - -The tests can be executed via ``python -m numba.runtests``. If you are -running Numba from a source checkout, you can type ``./runtests.py`` -as a shortcut. Various options are supported to influence test running -and reporting. Pass ``-h`` or ``--help`` to get a glimpse at those options. -Examples: - -* to list all available tests:: - - $ python -m numba.runtests -l - -* to list tests from a specific (sub-)suite:: - - $ python -m numba.runtests -l numba.tests.test_usecases - -* to run those tests:: - - $ python -m numba.runtests numba.tests.test_usecases - -* to run all tests in parallel, using multiple sub-processes:: - - $ python -m numba.runtests -m - -* For a detailed list of all options:: - - $ python -m numba.runtests -h - -The numba test suite can take a long time to complete. When you want to avoid -the long wait, it is useful to focus on the failing tests first with the -following test runner options: - -* The ``--failed-first`` option is added to capture the list of failed tests - and to re-execute them first:: - - $ python -m numba.runtests --failed-first -mvb - -* The ``--last-failed`` option is used with ``--failed-first`` to execute - the previously failed tests only:: - - $ python -m numba.runtests --last-failed -mvb - -Development rules ------------------ - -Code reviews -'''''''''''' - -Any non-trivial change should go through a code review by one or several of -the core developers. The recommended process is to submit a pull request -on github. - -A code review should try to assess the following criteria: - -* general design and correctness -* code structure and maintainability -* coding conventions -* docstrings, comments -* test coverage - -Coding conventions -'''''''''''''''''' - -All Python code should follow :pep:`8`. Our C code doesn't have a -well-defined coding style (would it be nice to follow :pep:`7`?). -Code and documentation should generally fit within 80 columns, for -maximum readability with all existing tools (such as code review UIs). - -Stability -''''''''' - -The repository's ``master`` branch is expected to be stable at all times. -This translates into the fact that the test suite passes without errors -on all supported platforms (see below). This also means that a pull request -also needs to pass the test suite before it is merged in. - -.. _travis_ci: - -Platform support -'''''''''''''''' - -Numba is to be kept compatible with Python 2.7, 3.4 and 3.5 under -at least Linux, OS X and Windows. Also, Numpy versions 1.7 and upwards -are supported. - -We don't expect invidual contributors to test those combinations -themselves! Instead, we have a continuous integration platform. Part of -the platform is hosted at `Travis-CI `_. -Each time you submit a pull request, a corresponding build will be started -at Travis-CI and check that Numba builds and tests without any errors. -You can expect this to take less than 20 minutes. - -Some platforms (such as Windows) cannot be hosted by Travis-CI, and the -Numba team has therefore access to a separate platform provided by -`Anaconda, Inc. `_, our sponsor. We hope parts of that -infrastructure can be made public in the future. - - -Documentation -------------- - -The numba documentation is split over two repositories: - -* This documentation is in the ``docs`` directory inside the - `Numba repository `_. - -* The `Numba homepage `_ has its sources in a - separate repository at https://github.com/numba/numba-webpage - - -Main documentation -'''''''''''''''''' - -This documentation is under the ``docs`` directory of the `Numba repository`_. -It is built with `Sphinx `_, which is available -using conda or pip. - -To build the documentation, you need the bootstrap theme:: - - $ pip install sphinx_bootstrap_theme - -You can edit the source files under ``docs/source/``, after which you can -build and check the documentation:: - - $ make html - $ open _build/html/index.html - -Core developers can upload this documentation to the Numba website -at http://numba.pydata.org by using the ``gh-pages.py`` script under ``docs``:: - - $ python gh-pages.py version # version can be 'dev' or '0.16' etc - -then verify the repository under the ``gh-pages`` directory and use -``git push``. - -Web site homepage -''''''''''''''''' - -The Numba homepage on http://numba.pydata.org can be fetched from here: -https://github.com/numba/numba-webpage - -After pushing documentation to a new version, core developers will want to -update the website. Some notable files: - -* ``index.rst`` # Update main page -* ``_templates/sidebar_versions.html`` # Update sidebar links -* ``doc.rst`` # Update after adding a new version for numba docs -* ``download.rst`` # Updata after uploading new numba version to pypi - -After updating run:: - - $ make html - -and check out ``_build/html/index.html``. To push updates to the Web site:: - - $ python _scripts/gh-pages.py - -then verify the repository under the ``gh-pages`` directory. Make sure the -``CNAME`` file is present and contains a single line for ``numba.pydata.org``. -Finally, use ``git push`` to update the website. diff --git a/numba/docs/source/developer/custom_pipeline.rst b/numba/docs/source/developer/custom_pipeline.rst deleted file mode 100644 index f027d4b39..000000000 --- a/numba/docs/source/developer/custom_pipeline.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. _arch-pipeline: - -========================= -Notes on Custom Pipelines -========================= - -.. warning:: The custom pipeline feature is for expert use only. Modifying - the compiler behavior can invalidate internal assumptions in the - numba source code. - - -For library developers looking for a way to extend or modify the compiler -behavior, you can do so by defining a custom compiler pipeline by inheriting -from ``numba.compiler.BasePipeline``. The default numba pipeline is defined -as ``numba.compiler.Pipeline``, implementing the ``.define_pipelines()`` -method, which adds the *nopython-mode*, *object-mode* and *interpreted-mode* -pipelines. These three pipelines are defined in ``BasePipeline`` by the -methods ``.define_nopython_pipeline``, ``.define_objectmode_pipeline`` -and ``.define_interpreted_pipeline``, respectively.. - -To use a custom subclass of ``BasePipeline``, supply it as the -``pipeline_class`` keyword argument to the ``@jit`` and ``@generated_jit`` -decorators. By doing so, the effect of the custom pipeline is limited to the -function being decorated. - -Below are the common methods available to implementors of the ``BasePipeline`` -class: - -.. autoclass:: numba.compiler.BasePipeline - :members: add_cleanup_stage, add_lowering_stage, add_optimization_stage, - add_pre_typing_stage, add_preprocessing_stage, add_typing_stage, - define_nopython_pipeline, define_objectmode_pipeline, - define_interpreted_pipeline diff --git a/numba/docs/source/developer/dispatching.rst b/numba/docs/source/developer/dispatching.rst deleted file mode 100644 index ca50e6d66..000000000 --- a/numba/docs/source/developer/dispatching.rst +++ /dev/null @@ -1,267 +0,0 @@ - -======================= -Polymorphic dispatching -======================= - -Functions compiled using :func:`~numba.jit` or :func:`~numba.vectorize` -are open-ended: they can be called with many different input types and -have to select (possibly compile on-the-fly) the right low-level -specialization. We hereby explain how this mechanism is implemented. - - -Requirements -============ - -JIT-compiled functions can take several arguments and each of them is -taken into account when selecting a specialization. Thus it is a -form of multiple dispatch, more complex than single dispatch. - -Each argument weighs in the selection based on its :ref:`Numba type -`. Numba types are often more granular than Python types: -for example, Numba types Numpy arrays differently depending on their -dimensionality and their layout (C-contiguous, etc.). - -Once a Numba type is inferred for each argument, a specialization must -be chosen amongst the available ones; or, if not suitable specialization -is found, a new one must be compiled. This is not a trivial decision: -there can be multiple specializations compatible with a given concrete -signature (for example, say a two-argument function has compiled -specializations for ``(float64, float64)`` and ``(complex64, complex64)``, -and it is called with ``(float32, float32)``). - -Therefore, there are two crucial steps in the dispatch mechanism: - -1. infer the Numba types of the concrete arguments -2. select the best available specialization (or choose to compile a new one) - for the inferred Numba types - -Compile-time vs. run-time -------------------------- - -This document discusses dispatching when it is done at runtime, i.e. -when a JIT-compiled function is called from pure Python. In that context, -performance is important. To stay in the realm of normal function call -overhead in Python, the overhead of dispatching should stay under a -microsecond. Of course, *the faster the better*... - -When a JIT-compiled function is called from another JIT-compiled -function (in :term:`nopython mode`), the polymorphism is resolved at -compile-time, using a non-performance critical mechanism, bearing zero -runtime performance overhead. - -.. note:: - In practice, the performance-critical parts described here are coded in C. - - -Type resolution -=============== - -The first step is therefore to infer, at call-time, a Numba type for each -of the function's concrete arguments. Given the finer granularity of -Numba types compared to Python types, one cannot simply lookup an object's -class and key a dictionary with it to obtain the corresponding Numba type. - -Instead, there is a machinery to inspect the object and, based on its -Python type, query various properties to infer the appropriate Numba -type. This can be more or less complex: for example, a Python ``int`` -argument will always infer to a Numba ``intp`` (a pointer-sized integer), -but a Python ``tuple`` argument can infer to multiple Numba types (depending -on the tuple's size and the concrete type of each of its elements). - -The Numba type system is high-level and written in pure Python; there is -a pure Python machinery, based on a generic function, to do said inference -(in :mod:`numba.typing.typeof`). That machinery is used for compile-time -inference, e.g. on constants. Unfortunately, it is too slow for run-time -value-based dispatching. It is only used as a fallback for rarely used -(or difficult to infer) types, and exhibits multiple-microsecond overhead. - -Typecodes ---------- - -The Numba type system is really too high-level to be manipulated efficiently -from C code. Therefore, the C dispatching layer uses another representation -based on integer typecodes. Each Numba type gets a unique integer typecode -when constructed; also, an interning system ensure no two instances of same -type are created. The dispatching layer is therefore able to *eschew* -the overhead of the Numba type system by working with simple integer -typecodes, amenable to well-known optimizations (fast hash tables, etc.). - -The goal of the type resolution step becomes: infer a Numba *typecode* -for each of the function's concrete arguments. Ideally, it doesn't deal -with Numba types anymore... - -Hard-coded fast paths ---------------------- - -While eschewing the abstraction and object-orientation overhead of the type -system, the integer typecodes still have the same conceptual complexity. -Therefore, an important technique to speed up inference is to first go -through checks for the most important types, and hard-code a fast resolution -for each of them. - -Several types benefit from such an optimization, notably: - -* basic Python scalars (``bool``, ``int``, ``float``, ``complex``); -* basic Numpy scalars (the various kinds of integer, floating-point, - complex numbers); -* Numpy arrays of certain dimensionalities and basic element types. - -Each of those fast paths ideally uses a hard-coded result value or a direct -table lookup after a few simple checks. - -However, we can't apply that technique to all argument types; there would -be an explosion of ad-hoc internal caches, and it would become difficult to -maintain. Besides, the recursive application of hard-coded fast paths -would not necessarily combine into a low overhead (in the nested tuple -case, for example). - -Fingerprint-based typecode cache --------------------------------- - -For non-so-trivial types (imagine a tuple, or a Numpy ``datetime64`` array, -for example), the hard-coded fast paths don't match. Another mechanism -then kicks in, more generic. - -The principle here is to examine each argument value, as the pure Python -machinery would do, and to describe its Numba type unambiguously. The -difference is that *we don't actually compute a Numba type*. Instead, we -compute a simple bytestring, a low-level possible denotation of that -Numba type: a *fingerprint*. The fingerprint format is designed to be -short and extremely simple to compute from C code (in practice, it has -a bytecode-like format). - -Once the fingerprint is computed, it is looked up in a cache mapping -fingerprints to typecodes. The cache is a hash table, and the lookup -is fast thanks to the fingerprints being generally very short (rarely -more than 20 bytes). - -If the cache lookup fails, the typecode must first be computed using the -slow pure Python machinery. Luckily, this would only happen once: on -subsequent calls, the cached typecode would be returned for the given -fingerprint. - -In rare cases, a fingerprint cannot be computed efficiently. This is -the case for some types which cannot be easily inspected from C: for -example ``cffi`` function pointers. Then, the slow Pure Python machinery -is invoked at each function call with such an argument. - -.. note:: - Two fingerprints may denote a single Numba type. This does not make - the mechanism incorrect; it only creates more cache entries. - - -Summary -------- - -Type resolution of a function argument involves the following mechanisms -in order: - -* Try a few hard-coded fast paths, for common simple types. -* If the above failed, compute a fingerprint for the argument and lookup - its typecode in a cache. -* If all the above failed, invoke the pure Python machinery which will - determine a Numba type for the argument (and look up its typecode). - - -Specialization selection -======================== - -At the previous step, an integer typecode has been determined for each -concrete argument to the JIT-compiled function. Now it remains to match -that concrete signature against each of the available specializations for -the function. There can be three outcomes: - -* There is a satisfying best match: the corresponding specialization - is then invoked (it will handle argument unboxing and other details). -* There is a tie between two or more "best matches": an exception is raised, - refusing to solve the ambiguity. -* There is no satisfying match: a new specialization is compiled tailored - for the concrete argument types that were inferred. - -The selection works by looping over all available specializations, and -computing the compatibility of each concrete argument type with the -corresponding type in the specialization's intended signature. Specifically, -we are interested in: - -1. Whether the concrete argument type is allowed to convert implicitly to - the specialization's argument type; -2. If so, at what semantic (user-visible) cost the conversion comes. - -Implicit conversion rules -------------------------- - -There are five possible kinds of implicit conversion from a source type -to a destination type (note this is an asymmetric relationship): - -1. *exact match*: the two types are identical; this is the ideal case, - since the specialization would behave exactly as intended; -2. *same-kind promotion*: the two types belong to the same "kind" (for - example ``int32`` and ``int64`` are two integer types), and the source - type can be converted losslessly to the destination type (e.g. from - ``int32`` to ``int64``, but not the reverse); -3. *safe conversion*: the two types belong to different kinds, but the - source type can be reasonably converted to the destination type - (e.g. from ``int32`` to ``float64``, but not the reverse); -4. *unsafe conversion*: a conversion is available from the source type - to the destination type, but it may lose precision, magnitude, or - another desirable quality. -5. *no conversion*: there is no correct or reasonably efficient way to - convert between the two types (for example between an ``int64`` and a - ``datetime64``, or a C-contiguous array and a Fortran-contiguous array). - -When a specialization is examined, the latter two cases eliminate it from -the final choice: i.e. when at least one argument has *no conversion* or -only an *unsafe conversion* to the signature's argument type. - -.. note:: - However, if the function is compiled with explicit signatures - in the :func:`~numba.jit` call (and therefore it is not allowed to compile - new specializations), *unsafe conversion* is allowed. - -Candidates and best match -------------------------- - -If a specialization is not eliminated by the rule above, it enters the -list of *candidates* for the final choice. Those candidates are ranked -by an ordered 4-uple of integers: ``(number of unsafe conversions, -number of safe conversions, number of same-kind promotions, number of -exact matches)`` (note the sum of the tuple's elements is equal to the -number of arguments). The best match is then the #1 result in sorted -ascending order, thereby preferring exact matches over promotions, -promotions over safe conversions, safe conversions over unsafe conversions. - -Implementation --------------- - -The above-described mechanism works on integer typecodes, not on Numba -types. It uses an internal hash table storing the possible conversion -kind for each pair of compatible types. The internal hash table is in part -built at startup (for built-in trivial types such as ``int32``, ``int64`` -etc.), in part filled dynamically (for arbitrarily complex types such -as array types: for example to allow using a C-contiguous 2D array where -a function expects a non-contiguous 2D array). - -Summary -------- - -Selecting the right specialization involves the following steps: - -* Examine each available specialization and match it against the concrete - argument types. -* Eliminate any specialization where at least one argument doesn't offer - sufficient compatibility. -* If there are remaining candidates, choose the best one in terms of - preserving the types' semantics. - - -Miscellaneous -============= - -Some `benchmarks of dispatch performance -`_ -exist in the `Numba benchmarks `_ -repository. - -Some unit tests of specific aspects of the machinery are available -in :mod:`numba.tests.test_typeinfer` and :mod:`numba.tests.test_typeof`. -Higher-level dispatching tests are in :mod:`numba.tests.test_dispatcher`. diff --git a/numba/docs/source/developer/environment.rst b/numba/docs/source/developer/environment.rst deleted file mode 100644 index b890f644b..000000000 --- a/numba/docs/source/developer/environment.rst +++ /dev/null @@ -1,58 +0,0 @@ - -================== -Environment Object -================== - -The Environment object (Env) is used to maintain references to python objects -that are needed to support compiled functions for both object-mode and -nopython-mode. - -In nopython-mode, the Env is used for: - -* Storing pyobjects for reconstruction from native values, - such as: - * for printing native values of NumPy arrays; - * for returning or yielding native values back to the interpreter. - -In object-mode, the Env is used for: - -* storing constant values referenced in the code. -* storing a reference to the function's global dictionary to load global - values. - - -The Implementation -================== - -The Env is implemented in two parts. In ``_dynfunc.c``, the Env is defined -as ``EnvironmentObject`` as a Python C-extension type. In ``lowering.py``, -the `EnvironmentObject`` (exported as ``_dynfunc.Environment``) is extended -to support necessary operations needed at lowering. - - -Serialization -------------- - -The Env supports being pickled. Compilation cache files and ahead-of-time -compiled modules serialize all the used Envs for recreation at the runtime. - -Usage ------ - -At the start of the lowering for a function or a generator, an Env is created. -Throughout the compilation, the Env is mutated to attach additional -information. The compiled code references an Env via a global variable in -the emitted LLVM IR. The global variable is zero-initialized with "common" -linkage, which is the default linkage for C global values. The use of this -linkage allows multiple definitions of the global variable to be merged into -a single definition when the modules are linked together. The name of the -global variable is computed from the name of the function -(see ``FunctionDescriptor.env_name`` and ``.get_env_name()`` of the target -context). - -The Env is initialized when the compiled-function is loaded. The JIT engine -finds the address of the associated global variable for the Env and stores the -address of the Env into it. For cached functions, the same process applies. -For ahead-of-time compiled functions, the module initializer in the generated -library is responsible for initializing the global variables of all the Envs -in the module. diff --git a/numba/docs/source/developer/generators.rst b/numba/docs/source/developer/generators.rst deleted file mode 100644 index dd15950d3..000000000 --- a/numba/docs/source/developer/generators.rst +++ /dev/null @@ -1,307 +0,0 @@ - -.. _arch-generators: - -=================== -Notes on generators -=================== - -Numba recently gained support for compiling generator functions. This -document explains some of the implementation choices. - - -Terminology -=========== - -For clarity, we distinguish between *generator functions* and -*generators*. A generator function is a function containing one or -several ``yield`` statements. A generator (sometimes also called "generator -iterator") is the return value of a generator function; it resumes -execution inside its frame each time :py:func:`next` is called. - -A *yield point* is the place where a ``yield`` statement is called. -A *resumption point* is the place just after a *yield point* where execution -is resumed when :py:func:`next` is called again. - - -Function analysis -================= - -Suppose we have the following simple generator function:: - - def gen(x, y): - yield x + y - yield x - y - -Here is its CPython bytecode, as printed out using :py:func:`dis.dis`:: - - 7 0 LOAD_FAST 0 (x) - 3 LOAD_FAST 1 (y) - 6 BINARY_ADD - 7 YIELD_VALUE - 8 POP_TOP - - 8 9 LOAD_FAST 0 (x) - 12 LOAD_FAST 1 (y) - 15 BINARY_SUBTRACT - 16 YIELD_VALUE - 17 POP_TOP - 18 LOAD_CONST 0 (None) - 21 RETURN_VALUE - -When compiling this function with :envvar:`NUMBA_DUMP_IR` set to 1, the -following information is printed out:: - - ----------------------------------IR DUMP: gen---------------------------------- - label 0: - x = arg(0, name=x) ['x'] - y = arg(1, name=y) ['y'] - $0.3 = x + y ['$0.3', 'x', 'y'] - $0.4 = yield $0.3 ['$0.3', '$0.4'] - del $0.4 [] - del $0.3 [] - $0.7 = x - y ['$0.7', 'x', 'y'] - del y [] - del x [] - $0.8 = yield $0.7 ['$0.7', '$0.8'] - del $0.8 [] - del $0.7 [] - $const0.9 = const(NoneType, None) ['$const0.9'] - $0.10 = cast(value=$const0.9) ['$0.10', '$const0.9'] - del $const0.9 [] - return $0.10 ['$0.10'] - ------------------------------GENERATOR INFO: gen------------------------------- - generator state variables: ['$0.3', '$0.7', 'x', 'y'] - yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] - yield point #2: live variables = [], weak live variables = ['$0.7'] - - -What does it mean? The first part is the Numba IR, as already seen in -:ref:`arch_generate_numba_ir`. We can see the two yield points (``yield $0.3`` -and ``yield $0.7``). - -The second part shows generator-specific information. To understand it -we have to understand what suspending and resuming a generator means. - -When suspending a generator, we are not merely returning a value to the -caller (the operand of the ``yield`` statement). We also have to save the -generator's *current state* in order to resume execution. In trivial use -cases, perhaps the CPU's register values or stack slots would be preserved -until the next call to next(). However, any non-trivial case will hopelessly -clobber those values, so we have to save them in a well-defined place. - -What are the values we need to save? Well, in the context of the Numba -Intermediate Representation, we must save all *live variables* at each -yield point. These live variables are computed thanks to the control -flow graph. - -Once live variables are saved and the generator is suspended, resuming -the generator simply involves the inverse operation: the live variables -are restored from the saved generator state. - -.. note:: - It is the same analysis which helps insert Numba ``del`` instructions - where appropriate. - -Let's go over the generator info again:: - - generator state variables: ['$0.3', '$0.7', 'x', 'y'] - yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] - yield point #2: live variables = [], weak live variables = ['$0.7'] - -Numba has computed the union of all live variables (denoted as "state -variables"). This will help define the layout of the :ref:`generator -structure `. Also, for each yield point, we have -computed two sets of variables: - -* the *live variables* are the variables which are used by code following - the resumption point (i.e. after the ``yield`` statement) - -* the *weak live variables* are variables which are del'ed immediately - after the resumption point; they have to be saved in :term:`object mode`, - to ensure proper reference cleanup - - -.. _generator-structure: - -The generator structure -======================= - -Layout ------- - -Function analysis helps us gather enough information to define the -layout of the generator structure, which will store the entire execution -state of a generator. Here is a sketch of the generator structure's layout, -in pseudo-code:: - - struct gen_struct_t { - int32_t resume_index; - struct gen_args_t { - arg_0_t arg0; - arg_1_t arg1; - ... - arg_N_t argN; - } - struct gen_state_t { - state_0_t state_var0; - state_1_t state_var1; - ... - state_N_t state_varN; - } - } - -Let's describe those fields in order. - -* The first member, the *resume index*, is an integer telling the generator - at which resumption point execution must resume. By convention, it can - have two special values: 0 means execution must start at the beginning of - the generator (i.e. the first time :py:func:`next` is called); -1 means - the generator is exhauted and resumption must immediately raise StopIteration. - Other values indicate the yield point's index starting from 1 - (corresponding to the indices shown in the generator info above). - -* The second member, the *arguments structure* is read-only after it is first - initialized. It stores the values of the arguments the generator function - was called with. In our example, these are the values of ``x`` and ``y``. - -* The third member, the *state structure*, stores the live variables as - computed above. - -Concretely, our example's generator structure (assuming the generator -function is called with floating-point numbers) is then:: - - struct gen_struct_t { - int32_t resume_index; - struct gen_args_t { - double arg0; - double arg1; - } - struct gen_state_t { - double $0.3; - double $0.7; - double x; - double y; - } - } - -Note that here, saving ``x`` and ``y`` is redundant: Numba isn't able to -recognize that the state variables ``x`` and ``y`` have the same value -as ``arg0`` and ``arg1``. - -Allocation ----------- - -How does Numba ensure the generator structure is preserved long enough? -There are two cases: - -* When a Numba-compiled generator function is called from a Numba-compiled - function, the structure is allocated on the stack by the callee. In this - case, generator instantiation is practically costless. - -* When a Numba-compiled generator function is called from regular Python - code, a CPython-compatible wrapper is instantiated that has the right - amount of allocated space to store the structure, and whose - :c:member:`~PyTypeObject.tp_iternext` slot is a wrapper around the - generator's native code. - - -Compiling to native code -======================== - -When compiling a generator function, three native functions are actually -generated by Numba: - -* An initialization function. This is the function corresponding - to the generator function itself: it receives the function arguments and - stores them inside the generator structure (which is passed by pointer). - It also initialized the *resume index* to 0, indicating that the generator - hasn't started yet. - -* A next() function. This is the function called to resume execution - inside the generator. Its single argument is a pointer to the generator - structure and it returns the next yielded value (or a special exit code - is used if the generator is exhausted, for quick checking when called - from Numba-compiled functions). - -* An optional finalizer. In object mode, this function ensures that all - live variables stored in the generator state are decref'ed, even if the - generator is destroyed without having been exhausted. - -The next() function -------------------- - -The next() function is the least straight-forward of the three native -functions. It starts with a trampoline which dispatches execution to the -right resume point depending on the *resume index* stored in the generator -structure. Here is how the function start may look like in our example: - -.. code-block:: llvm - - define i32 @"__main__.gen.next"( - double* nocapture %retptr, - { i8*, i32 }** nocapture readnone %excinfo, - i8* nocapture readnone %env, - { i32, { double, double }, { double, double, double, double } }* nocapture %arg.gen) - { - entry: - %gen.resume_index = getelementptr { i32, { double, double }, { double, double, double, double } }* %arg.gen, i64 0, i32 0 - %.47 = load i32* %gen.resume_index, align 4 - switch i32 %.47, label %stop_iteration [ - i32 0, label %B0 - i32 1, label %generator_resume1 - i32 2, label %generator_resume2 - ] - - ; rest of the function snipped - -(uninteresting stuff trimmed from the LLVM IR to make it more readable) - -We recognize the pointer to the generator structure in ``%arg.gen``. -The trampoline switch has three targets (one for each *resume index* 0, 1 -and 2), and a fallback target label named ``stop_iteration``. Label ``B0`` -represents the function's start, ``generator_resume1`` (resp. -``generator_resume2``) is the resumption point after the first -(resp. second) yield point. - -After generation by LLVM, the whole native assembler code for this function -may look like this (on x86-64): - -.. code-block:: asm - - .globl __main__.gen.next - .align 16, 0x90 - __main__.gen.next: - movl (%rcx), %eax - cmpl $2, %eax - je .LBB1_5 - cmpl $1, %eax - jne .LBB1_2 - movsd 40(%rcx), %xmm0 - subsd 48(%rcx), %xmm0 - movl $2, (%rcx) - movsd %xmm0, (%rdi) - xorl %eax, %eax - retq - .LBB1_5: - movl $-1, (%rcx) - jmp .LBB1_6 - .LBB1_2: - testl %eax, %eax - jne .LBB1_6 - movsd 8(%rcx), %xmm0 - movsd 16(%rcx), %xmm1 - movaps %xmm0, %xmm2 - addsd %xmm1, %xmm2 - movsd %xmm1, 48(%rcx) - movsd %xmm0, 40(%rcx) - movl $1, (%rcx) - movsd %xmm2, (%rdi) - xorl %eax, %eax - retq - .LBB1_6: - movl $-3, %eax - retq - -Note the function returns 0 to indicate a value is yielded, -3 to indicate -StopIteration. ``%rcx`` points to the start of the generator structure, -where the resume index is stored. diff --git a/numba/docs/source/developer/index.rst b/numba/docs/source/developer/index.rst deleted file mode 100644 index c6a0b40e4..000000000 --- a/numba/docs/source/developer/index.rst +++ /dev/null @@ -1,20 +0,0 @@ - -.. _developer-manual: - -Developer Manual -================ - -.. toctree:: - :maxdepth: 2 - - contributing.rst - architecture.rst - dispatching.rst - generators.rst - numba-runtime.rst - rewrites.rst - live_variable_analysis.rst - listings.rst - stencil.rst - custom_pipeline.rst - environment.rst diff --git a/numba/docs/source/developer/listings.rst b/numba/docs/source/developer/listings.rst deleted file mode 100644 index 1c0b55e02..000000000 --- a/numba/docs/source/developer/listings.rst +++ /dev/null @@ -1,11 +0,0 @@ -Listings -======== - -This shows listings from compiler internal registries (e.g. lowering -definitions). The information is provided as developer reference. -When possible, links to source code are provided via github links. - -.. toctree:: - :maxdepth: 2 - - autogen_lower_listing.rst diff --git a/numba/docs/source/developer/live_variable_analysis.rst b/numba/docs/source/developer/live_variable_analysis.rst deleted file mode 100644 index 8c5badf54..000000000 --- a/numba/docs/source/developer/live_variable_analysis.rst +++ /dev/null @@ -1,86 +0,0 @@ -.. _live variable analysis: - -====================== -Live Variable Analysis -====================== - -(Releated issue https://github.com/numba/numba/pull/1611) - -Numba uses reference-counting for garbage collection, a technique that -requires cooperation by the compiler. The Numba IR encodes the location -where a decref must be inserted. These locations are determined by live -variable analysis. The corresponding source code is the ``_insert_var_dels()`` -method in https://github.com/numba/numba/blob/master/numba/interpreter.py. - - -In Python semantic, once a variable is defined inside a function, it is alive -until the variable is explicitly deleted or the function scope is ended. -However, Numba analyzes the code to determine the minimum bound of the lifetime -of each variable by its definition and usages during compilation. -As soon as a variable is unreachable, a ``del`` instruction is inserted at the -closest basic-block (either at the start of the next block(s) or at the -end of the current block). This means variables can be released earlier than in -regular Python code. - -The behavior of the live variable analysis affects memory usage of the compiled -code. Internally, Numba does not differentiate temporary variables and user -variables. Since each operation generates at least one temporary variable, -a function can accumulate a high number of temporary variables if they are not -released as soon as possible. -Our generator implementation can benefit from early releasing of variables, -which reduces the size of the state to suspend at each yield point. - - -Notes on behavior of the live variable analysis -================================================ - - -Variable deleted before definition ------------------------------------ - -(Related issue: https://github.com/numba/numba/pull/1738) - -When a variable lifetime is confined within the loop body (its definition and -usage does not escape the loop body), like: - -.. code-block:: python - - def f(arr): - # BB 0 - res = 0 - # BB 1 - for i in (0, 1): - # BB 2 - t = arr[i] - if t[i] > 1: - # BB 3 - res += t[i] - # BB 4 - return res - - -Variable ``t`` is never referenced outside of the loop. -A ``del`` instruction is emitted for ``t`` at the head of the loop (BB 1) -before a variable is defined. The reason is obvious once we know the control -flow graph:: - - +------------------------------> BB4 - | - | - BB 0 --> BB 1 --> BB 2 ---> BB 3 - ^ | | - | V V - +---------------------+ - - -Variable ``t`` is defined in BB 1. In BB 2, the evaluation of -``t[i] > 1`` uses ``t``, which is the last use if execution takes the false -branch and goto BB 1. In BB 3, ``t`` is only used in ``res += t[i]``, which is -the last use if execution takes the true branch. Because BB 3, an outgoing -branch of BB 2 uses ``t``, ``t`` must be deleted at the common predecessor. -The closest point is BB 1, which does not have ``t`` defined from the incoming -edge of BB 0. - -Alternatively, if ``t`` is deleted at BB 4, we will still have to delete the -variable before its definition because BB4 can be executed without executing -the loop body (BB 2 and BB 3), where the variable is defined. diff --git a/numba/docs/source/developer/numba-runtime.rst b/numba/docs/source/developer/numba-runtime.rst deleted file mode 100644 index 738e59e8f..000000000 --- a/numba/docs/source/developer/numba-runtime.rst +++ /dev/null @@ -1,130 +0,0 @@ -.. _arch-numba-runtime: - -====================== -Notes on Numba Runtime -====================== - - -The *Numba Runtime (NRT)* provides the language runtime to the *nopython mode* -Python subset. NRT is a standalone C library with a Python binding. This -allows NPM runtime feature to be used without the GIL. Currently, the only -language feature implemented in NRT is memory management. - - -Memory Management -================= - -NRT implements memory management for NPM code. It uses *atomic reference count* -for threadsafe, deterministic memory management. NRT maintains a separate -``MemInfo`` structure for storing information about each allocation. - -Cooperating with CPython ------------------------- - -For NRT to cooperate with CPython, the NRT python binding provides adaptors for -converting python objects that export a memory region. When such an -object is used as an argument to a NPM function, a new ``MemInfo`` is created -and it acquires a reference to the Python object. When a NPM value is returned -to the Python interpreter, the associated ``MemInfo`` (if any) is checked. If -the ``MemInfo`` references a Python object, the underlying Python object is -released and returned instead. Otherwise, the ``MemInfo`` is wrapped in a -Python object and returned. Additional process maybe required depending on -the type. - -The current implementation supports Numpy array and any buffer-exporting types. - - -Compiler-side Cooperation -------------------------- - -NRT reference counting requires the compiler to emit incref/decref operations -according to the usage. When the reference count drops to zero, the compiler -must call the destructor routine in NRT. - - -.. _nrt-refct-opt-pass: - -Optimizations -------------- - -The compiler is allowed to emit incref/decref operations naively. It relies -on an optimization pass that to remove the redundant reference count -operations. - -The optimization pass runs on block level to avoid control flow analysis. -It depends on LLVM function optimization pass to simplify the control flow, -stack-to-register, and simplify instructions. It works by matching and -removing incref and decref pairs within each block. - - -Quirks ------- - -Since the `refcount optimization pass `_ requires LLVM -function optimization pass, the pass works on the LLVM IR as text. The -optimized IR is then materialized again as a new LLVM in-memory bitcode object. - - -Debugging Leaks ---------------- - -To debug reference leaks in NRT MemInfo, each MemInfo python object has a -``.refcount`` attribute for inspection. To get the MemInfo from a ndarray -allocated by NRT, use the ``.base`` attribute. - -To debug memory leaks in NRT, the ``numba.runtime.rtsys`` defines -``.get_allocation_stats()``. It returns a namedtuple containing the -number of allocation and deallocation since the start of the program. -Checking that the allocation and deallocation counters are matching is the -simplest way to know if the NRT is leaking. - - -Debugging Leaks in C --------------------- - -The start of `numba/runtime/nrt.h `_ -has these lines: - -.. code-block:: C - - /* Debugging facilities - enabled at compile-time */ - /* #undef NDEBUG */ - #if 0 - # define NRT_Debug(X) X - #else - # define NRT_Debug(X) if (0) { X; } - #endif - -Undefining NDEBUG (uncomment the ``#undef NDEBUG`` line) enables the assertion -check in NRT. - -Enabling the NRT_Debug (replace ``#if 0`` with ``#if 1``) turns on -debug print inside NRT. - - -Recursion Support -================= - -During the compilation of a pair of mutually recursive functions, one of the -functions will contain unresolved symbol references since the compiler handles -one function at a time. The memory for the unresolved symbols is allocated and -initialized to the address of the *unresolved symbol abort* function -(``nrt_unresolved_abort``) just before the machine code is -generated by LLVM. These symbols are tracked and resolved as new functions are -compiled. If a bug prevents the resolution of these symbols, -the abort function will be called, raising a ``RuntimeError`` exception. - -The *unresolved symbol abort* function is defined in the NRT with a zero-argument -signature. The caller is safe to call it with arbitrary number of -arguments. Therefore, it is safe to be used inplace of the intended callee. - - -Future Plan -=========== - -The plan for NRT is to make a standalone shared library that can be linked to -Numba compiled code, including use within the Python interpreter and without -the Python interpreter. To make that work, we will be doing some refactoring: - -* numba NPM code references statically compiled code in "helperlib.c". Those - functions should be moved to NRT. diff --git a/numba/docs/source/developer/rewrites.rst b/numba/docs/source/developer/rewrites.rst deleted file mode 100644 index 33d5b96c3..000000000 --- a/numba/docs/source/developer/rewrites.rst +++ /dev/null @@ -1,397 +0,0 @@ -===================================================== -Using the Numba Rewrite Pass for Fun and Optimization -===================================================== - -Overview -======== - -This section introduces intermediate representation (IR) rewrites, and -how they can be used to implement optimizations. - -As discussed earlier in ":ref:`rewrite-typed-ir`", rewriting the Numba -IR allows us to perform optimizations that would be much more -difficult to perform at the lower LLVM level. Similar to the Numba -type and lowering subsystems, the rewrite subsystem is user -extensible. This extensibility affords Numba the possibility of -supporting a wide variety of domain-specific optimizations (DSO's). - -The remaining subsections detail the mechanics of implementing a -rewrite, registering a rewrite with the rewrite registry, and provide -examples of adding new rewrites, as well as internals of the array -expression optimization pass. We conclude by reviewing some use cases -exposed in the examples, as well as reviewing any points where -developers should take care. - - -Rewriting Passes -================ - -Rewriting passes have a simple :func:`~Rewrite.match` and -:func:`~Rewrite.apply` interface. The division between matching and -rewriting follows how one would define a term rewrite in a declarative -domain-specific languages (DSL's). In such DSL's, one may write a -rewrite as follows:: - - => - - -The ```` and ```` symbols represent IR term -expressions, where the left-hand side presents a pattern to match, and -the right-hand side an IR term constructor to build upon matching. -Whenever the rewrite matches an IR pattern, any free variables in the -left-hand side are bound within a custom environment. When applied, -the rewrite uses the pattern matching environment to bind any free -variables in the right-hand side. - -As Python is not commonly used in a declarative capacity, Numba uses -object state to handle the transfer of information between the -matching and application steps. - - -The :class:`Rewrite` Base Class -------------------------------- - -.. class:: Rewrite - - The :class:`Rewrite` class simply defines an abstract base class - for Numba rewrites. Developers should define rewrites as - subclasses of this base type, overloading the - :func:`~Rewrite.match` and :func:`~Rewrite.apply` methods. - - .. attribute:: pipeline - - The pipeline attribute contains the - :class:`numba.compiler.Pipeline` instance that is currently - compiling the function under consideration for rewriting. - - .. method:: __init__(self, pipeline, *args, **kws) - - The base constructor for rewrites simply stashes its arguments - into attributes of the same name. Unless being used in - debugging or testing, rewrites should only be constructed by - the :class:`RewriteRegistry` in the - :func:`RewriteRegistry.apply` method, and the construction - interface should remain stable (though the pipeline will - commonly contain just about everything there is to know). - - .. method:: match(self, block, typemap, callmap) - - The :func:`~Rewrite.match` method takes four arguments other - than *self*: - - * *func_ir*: This is an instance of :class:`numba.ir.FunctionIR` for the - function being rewritten. - - * *block*: This is an instance of :class:`numba.ir.Block`. The - matching method should iterate over the instructions contained - in the :attr:`numba.ir.Block.body` member. - - * *typemap*: This is a Python :class:`dict` instance mapping - from symbol names in the IR, represented as strings, to Numba - types. - - * *callmap*: This is another :class:`dict` instance mapping from - calls, represented as :class:`numba.ir.Expr` instances, to - their corresponding call site type signatures, represented as - a :class:`numba.typing.templates.Signature` instance. - - The :func:`~Rewrite.match` method should return a :class:`bool` - result. A :obj:`True` result should indicate that one or more - matches were found, and the :func:`~Rewrite.apply` method will - return a new replacement :class:`numba.ir.Block` instance. A - :obj:`False` result should indicate that no matches were found, and - subsequent calls to :func:`~Rewrite.apply` will return undefined - or invalid results. - - .. method:: apply(self) - - The :func:`~Rewrite.apply` method should only be invoked - following a successful call to :func:`~Rewrite.match`. This - method takes no additional parameters other than *self*, and - should return a replacement :class:`numba.ir.Block` instance. - - As mentioned above, the behavior of calling - :func:`~Rewrite.apply` is undefined unless - :func:`~Rewrite.match` has already been called and returned - :obj:`True`. - - -Subclassing :class:`Rewrite` ----------------------------- - -Before going into the expectations for the overloaded methods any -:class:`Rewrite` subclass must have, let's step back a minute to -review what is taking place here. By providing an extensible -compiler, Numba opens itself to user-defined code generators which may -be incomplete, or worse, incorrect. When a code generator goes awry, -it can cause abnormal program behavior or early termination. -User-defined rewrites add a new level of complexity because they must -not only generate correct code, but the code they generate should -ensure that the compiler does not get stuck in a match/apply loop. -Non-termination by the compiler will directly lead to non-termination -of user function calls. - -There are several ways to help ensure that a rewrite terminates: - -* *Typing*: A rewrite should generally attempt to decompose composite - types, and avoid composing new types. If the rewrite is matching a - specific type, changing expression types to a lower-level type will - ensure they will no long match after the rewrite is applied. - -* *Special instructions*: A rewrite may synthesize custom operators or - use special functions in the target IR. This technique again - generates code that is no longer within the domain of the original - match, and the rewrite will terminate. - -In the ":ref:`case-study-array-expressions`" subsection, below, we'll -see how the array expression rewriter uses both of these techniques. - - -Overloading :func:`Rewrite.match` ---------------------------------- - -Every rewrite developer should seek to have their implementation of -:func:`~Rewrite.match` return a :obj:`False` value as quickly as -possible. Numba is a just-in-time compiler, and adding compilation -time ultimately adds to the user's run time. When a rewrite returns -:obj:`False` for a given block, the registry will no longer process that -block with that rewrite, and the compiler is that much closer to -proceeding to lowering. - -This need for timeliness has to be balanced against collecting the -necessary information to make a match for a rewrite. Rewrite -developers should be comfortable adding dynamic attributes to their -subclasses, and then having these new attributes guide construction of -the replacement basic block. - - -Overloading :func:`Rewrite.apply` ------------------------------------ - -The :func:`~Rewrite.apply` method should return a replacement -:class:`numba.ir.Block` instance to replace the basic block that -contained a match for the rewrite. As mentioned above, the IR built -by :func:`~Rewrite.apply` methods should preserve the semantics of the -user's code, but also seek to avoid generating another match for the -same rewrite or set of rewrites. - - -The Rewrite Registry -==================== - -When you want to include a rewrite in the rewrite pass, you should -register it with the rewrite registry. The :mod:`numba.rewrites` -module provides both the abstract base class and a class decorator for -hooking into the Numba rewrite subsystem. The following illustrates a -stub definition of a new rewrite:: - - from numba import rewrites - - @rewrites.register_rewrite - class MyRewrite(rewrites.Rewrite): - - def match(self, block, typemap, calltypes): - raise NotImplementedError("FIXME") - - def apply(self): - raise NotImplementedError("FIXME") - - -Developers should note that using the class decorator as shown above -will register a rewrite at import time. It is the developer's -responsibility to ensure their extensions are loaded before -compilation starts. - - -.. _`case-study-array-expressions`: - -Case study: Array Expressions -============================= - -This subsection looks at the array expression rewriter in more depth. -The array expression rewriter, and most of its support functionality, -are found in the :mod:`numba.npyufunc.array_exprs` module. The -rewriting pass itself is implemented in the :class:`RewriteArrayExprs` -class. In addition to the rewriter, the -:mod:`~numba.npyufunc.array_exprs` module includes a function for -lowering array expressions, -:func:`~numba.npyufunc.array_exprs._lower_array_expr`. The overall -optimization process is as follows: - -* :func:`RewriteArrayExprs.match`: The rewrite pass looks for two or - more array operations that form an array expression. - -* :func:`RewriteArrayExprs.apply`: Once an array expression is found, - the rewriter replaces the individual array operations with a new - kind of IR expression, the ``arrayexpr``. - -* :func:`numba.npyufunc.array_exprs._lower_array_expr`: During - lowering, the code generator calls - :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it - finds an ``arrayexpr`` IR expression. - -More details on each step of the optimization are given below. - - -The :func:`RewriteArrayExprs.match` method ------------------------------------------- - -The array expression optimization pass starts by looking for array -operations, including calls to supported :class:`~numpy.ufunc`\'s and -user-defined :class:`~numba.DUFunc`\'s. Numba IR follows the -conventions of a static single assignment (SSA) language, meaning that -the search for array operators begins with looking for assignment -instructions. - -When the rewriting pass calls the :func:`RewriteArrayExprs.match` -method, it first checks to see if it can trivially reject the basic -block. If the method determines the block to be a candidate for -matching, it sets up the following state variables in the rewrite -object: - -* *crnt_block*: The current basic block being matched. - -* *typemap*: The *typemap* for the function being matched. - -* *matches*: A list of variable names that reference array expressions. - -* *array_assigns*: A map from assignment variable names to the actual - assignment instructions that define the given variable. - -* *const_assigns*: A map from assignment variable names to the - constant valued expression that defines the constant variable. - -At this point, the match method iterates iterates over the assignment -instructions in the input basic block. For each assignment -instruction, the matcher looks for one of two things: - -* Array operations: If the right-hand side of the assignment - instruction is an expression, and the result of that expression is - an array type, the matcher checks to see if the expression is either - a known array operation, or a call to a universal function. If an - array operator is found, the matcher stores the left-hand variable - name and the whole instruction in the *array_assigns* member. - Finally, the matcher tests to see if any operands of the array - operation have also been identified as targets of other array - operations. If one or more operands are also targets of array - operations, then the matcher will also append the left-hand side - variable name to the *matches* member. - -* Constants: Constants (even scalars) can be operands to array - operations. Without worrying about the constant being apart of an - array expression, the matcher stores constant names and values in - the *const_assigns* member. - -The end of the matching method simply checks for a non-empty *matches* -list, returning :obj:`True` if there were one or more matches, and -:obj:`False` when *matches* is empty. - - -The :func:`RewriteArrayExprs.apply` method ------------------------------------------- - -When one or matching array expressions are found by -:func:`RewriteArrayExprs.match`, the rewriting pass will call -:func:`RewriteArrayExprs.apply`. The apply method works in two -passes. The first pass iterates over the matches found, and builds a -map from instructions in the old basic block to new instructions in -the new basic block. The second pass iterates over the instructions -in the old basic block, copying instructions that are not changed by -the rewrite, and replacing or deleting instructions that were -identified by the first pass. - -The :func:`RewriteArrayExprs._handle_matches` implements the first -pass of the code generation portion of the rewrite. For each match, -this method builds a special IR expression that contains an expression -tree for the array expression. To compute the leaves of the -expression tree, the :func:`~RewriteArrayExprs._handle_matches` method -iterates over the operands of the identified root operation. If the -operand is another array operation, it is translated into an -expression sub-tree. If the operand is a constant, -:func:`~RewriteArrayExprs._handle_matches` copies the constant value. -Otherwise, the operand is marked as being used by an array expression. -As the method builds array expression nodes, it builds a map from old -instructions to new instructions (*replace_map*), as well as sets of -variables that may have moved (*used_vars*), and variables that should -be removed altogether (*dead_vars*). These three data structures are -returned back to the calling :func:`RewriteArrayExprs.apply` method. - -The remaining part of the :func:`RewriteArrayExprs.apply` method -iterates over the instructions in the old basic block. For each -instruction, this method either replaces, deletes, or duplicates that -instruction based on the results of -:func:`RewriteArrayExprs._handle_matches`. The following list -describes how the optimization handles individual instructions: - -* When an instruction is an assignment, - :func:`~RewriteArrayExprs.apply` checks to see if it is in the - replacement instruction map. When an assignment instruction is found - in the instruction map, :func:`~RewriteArrayExprs.apply` must then - check to see if the replacement instruction is also in the replacement - map. The optimizer continues this check until it either arrives at a - :obj:`None` value or an instruction that isn't in the replacement map. - Instructions that have a replacement that is :obj:`None` are deleted. - Instructions that have a non-:obj:`None` replacement are replaced. - Assignment instructions not in the replacement map are appended to the - new basic block with no changes made. - -* When the instruction is a delete instruction, the rewrite checks to - see if it deletes a variable that may still be used by a later array - expression, or if it deletes a dead variable. Delete instructions for - used variables are added to a map of deferred delete instructions that - :func:`~RewriteArrayExprs.apply` uses to move them past any uses of - that variable. The loop copies delete instructions for non-dead - variables, and ignores delete instructions for dead variables - (effectively removing them from the basic block). - -* All other instructions are appended to the new basic block. - -Finally, the :func:`~RewriteArrayExprs.apply` method returns the new -basic block for lowering. - - -The :func:`~numba.npyufunc.array_exprs._lower_array_expr` function ------------------------------------------------------------------- - -If we left things at just the rewrite, then the lowering stage of the -compiler would fail, complaining it doesn't know how to lower -``arrayexpr`` operations. We start by hooking a lowering function -into the target context whenever the :class:`RewriteArrayExprs` class -is instantiated by the compiler. This hook causes the lowering pass to -call :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it -encounters an ``arrayexr`` operator. - -This function has two steps: - -* Synthesize a Python function that implements the array expression: - This new Python function essentially behaves like a Numpy - :class:`~numpy.ufunc`, returning the result of the expression on - scalar values in the broadcasted array arguments. The lowering - function accomplishes this by translating from the array expression - tree into a Python AST. - -* Compile the synthetic Python function into a kernel: At this point, - the lowering function relies on existing code for lowering ufunc and - DUFunc kernels, calling - :func:`numba.targets.numpyimpl.numpy_ufunc_kernel` after defining - how to lower calls to the synthetic function. - -The end result is similar to loop lifting in Numba's object mode. - - -Conclusions and Caveats -======================= - -We have seen how to implement rewrites in Numba, starting with the -interface, and ending with an actual optimization. The key points of -this section are: - -* When writing a good plug-in, the matcher should try to get a - go/no-go result as soon as possible. - -* The rewrite application portion can be more computationally - expensive, but should still generate code that won't cause infinite - loops in the compiler. - -* We use object state to communicate any results of matching to the - rewrite application pass. diff --git a/numba/docs/source/developer/stencil.rst b/numba/docs/source/developer/stencil.rst deleted file mode 100644 index f27447a7f..000000000 --- a/numba/docs/source/developer/stencil.rst +++ /dev/null @@ -1,170 +0,0 @@ -.. Copyright (c) 2017 Intel Corporation - SPDX-License-Identifier: BSD-2-Clause - -.. _arch-stencil: - -================= -Notes on stencils -================= - -Numba provides the :ref:`@stencil decorator ` to -represent stencil computations. This document explains how this -feature is implemented in the several different modes available in -Numba. Currently, calls to the stencil from non-jitted code is -supported as well as calls from jitted code, either with or without -the :ref:`parallel=True ` option. - -The stencil decorator -===================== - -The stencil decorator itself just returns a ``StencilFunc`` object. -This object encapsulates the original stencil kernel function -as specified in the program and the options passed to the -stencil decorator. Also of note is that after the first compilation -of the stencil, the computed neighborhood of the stencil is -stored in the ``StencilFunc`` object in the ``neighborhood`` attribute. - -Handling the three modes -======================== - -As mentioned above, Numba supports the calling of stencils -from inside or outside a ``@jit`` compiled function, with or -without the :ref:`parallel=True ` option. - -Outside jit context -------------------- - -``StencilFunc`` overrides the ``__call__`` method so that calls -to ``StencilFunc`` objects execute the stencil:: - - def __call__(self, *args, **kwargs): - result = kwargs.get('out') - - new_stencil_func = self._stencil_wrapper(result, None, *args) - - if result is None: - return new_stencil_func.entry_point(*args) - else: - return new_stencil_func.entry_point(*args, result) - -First, the presence of the optional :ref:`out ` -parameter is checked. If it is present then the output array is -stored in ``result``. Then, the call to ``_stencil_wrapper`` -generates the stencil function given the result and argument types -and finally the generated stencil function is executed and its result -returned. - -Jit without ``parallel=True`` ------------------------------ - -When constructed, a ``StencilFunc`` inserts itself into the typing -context's set of user functions and provides the ``_type_me`` -callback. In this way, the standard Numba compiler is able to -determine the output type and signature of a ``StencilFunc``. -Each ``StencilFunc`` maintains a cache of previously seen combinations -of input argument types and keyword types. If previously seen, -the ``StencilFunc`` returns the computed signature. If not previously -computed, the ``StencilFunc`` computes the return type of the stencil -by running the Numba compiler frontend on the stencil kernel and -then performing type inference on the :term:`Numba IR` (IR) to get the scalar -return type of the kernel. From that, a Numpy array type is constructed -whose element type matches that scalar return type. - -After computing the signature of the stencil for a previously -unseen combination of input and keyword types, the ``StencilFunc`` -then :ref:`creates the stencil function ` itself. -``StencilFunc`` then installs the new stencil function's definition -in the target context so that jitted code is able to call it. - -Thus, in this mode, the generated stencil function is a stand-alone -function called like a normal function from within jitted code. - -Jit with ``parallel=True`` --------------------------- - -When calling a ``StencilFunc`` from a jitted context with ``parallel=True``, -a separate stencil function as generated by :ref:`arch-stencil-create-function` -is not used. Instead, `parfors` (:ref:`parallel-accelerator`) are -created within the current function that implements the stencil. -This code again starts with the stencil kernel and does a similar kernel -size computation but then rather than standard Python looping syntax, -corresponding `parfors` are created so that the execution of the stencil -will take place in parallel. - -The stencil to `parfor` translations can also be selectively disabled -by setting ``parallel={'stencil': False}``, among other sub-options -described in :ref:`parallel-accelerator`. - -.. _arch-stencil-create-function: - -Creating the stencil function -============================= - -Conceptually, a stencil function is created from the user-specified -stencil kernel by adding looping code around the kernel, transforming -the relative kernel indices into absolute array indices based on the -loop indices, and replacing the kernel's ``return`` statement with -a statement to assign the computed value into the output array. - -To accomplish this transformation, first, a copy of the stencil -kernel IR is created so that subsequent modifications of the IR -for different stencil signatures will not effect each other. - -Then, an approach similar to how GUFunc's are created for `parfors` -is employed. In a text buffer, a Python function is created with -a unique name. The input array parameter is added to the function -definition and if the ``out`` argument type is present then an -``out`` parameter is added to the stencil function definition. -If the ``out`` argument is not present then first an output array -is created with ``numpy.zeros`` having the same shape as the -input array. - -The kernel is then analyzed to compute the stencil size and the -shape of the boundary (or the ``neighborhood`` stencil decorator -argument is used for this purpose if present). -Then, one ``for`` loop for each dimension of the input array is -added to the stencil function definition. The range of each -loop is controlled by the stencil kernel size previously computed -so that the boundary of the output image is not modified but instead -left as is. The body of the innermost ``for`` loop is a single -``sentinel`` statement that is easily recognized in the IR. -A call to ``exec`` with the text buffer is used to force the -stencil function into existence and an ``eval`` is used to get -access to the corresponding function on which ``run_frontend`` is -used to get the stencil function IR. - -Various renaming and relabeling is performed on the stencil function -IR and the kernel IR so that the two can be combined without conflict. -The relative indices in the kernel IR (i.e., ``getitem`` calls) are -replaced with expressions where the corresponding loop index variables -are added to the relative indices. The ``return`` statement in the -kernel IR is replaced with a ``setitem`` for the corresponding element -in the output array. -The stencil function IR is then scanned for the sentinel and the -sentinel replaced with the modified kernel IR. - -Next, ``compile_ir`` is used to compile the combined stencil function -IR. The resulting compile result is cached in the ``StencilFunc`` so that -other calls to the same stencil do not need to undertake this process -again. - -Exceptions raised -================= - -Various checks are performed during stencil compilation to make sure -that user-specified options do not conflict with each other or with -other runtime parameters. For example, if the user has manually -specified a ``neighborhood`` to the stencil decorator, the length of -that neighborhood must match the dimensionality of the input array. -If this is not the case, a ``ValueError`` is raised. - -If the neighborhood has not been specified then it must be inferred -and a requirement to infer the kernel is that all indices are constant -integers. If they are not, a ``ValueError`` is raised indicating that -kernel indices may not be non-constant. - -Finally, the stencil implementation detects the output array type -by running Numba type inference on the stencil kernel. If the -return type of this kernel does not match the type of the value -passed to the ``cval`` stencil decorator option then a ``ValueError`` -is raised. diff --git a/numba/docs/source/extending/high-level.rst b/numba/docs/source/extending/high-level.rst deleted file mode 100644 index a39294c73..000000000 --- a/numba/docs/source/extending/high-level.rst +++ /dev/null @@ -1,111 +0,0 @@ - -.. _high-level-extending: - -High-level extension API -======================== - -This extension API is exposed through the :mod:`numba.extending` module. - - -Implementing functions ----------------------- - -The ``@overload`` decorator allows you to implement arbitrary functions -for use in :term:`nopython mode` functions. The function decorated with -``@overload`` is called at compile-time with the *types* of the function's -runtime arguments. It should return a callable representing the -*implementation* of the function for the given types. The returned -implementation is compiled by Numba as if it were a normal function -decorated with ``@jit``. Additional options to ``@jit`` can be passed as -dictionary using the ``jit_options`` argument. - -For example, let's pretend Numba doesn't support the :func:`len` function -on tuples yet. Here is how to implement it using ``@overload``:: - - from numba import types - from numba.extending import overload - - @overload(len) - def tuple_len(seq): - if isinstance(seq, types.BaseTuple): - n = len(seq) - def len_impl(seq): - return n - return len_impl - - -You might wonder, what happens if :func:`len()` is called with something -else than a tuple? If a function decorated with ``@overload`` doesn't -return anything (i.e. returns None), other definitions are tried until -one succeeds. Therefore, multiple libraries may overload :func:`len()` -for different types without conflicting with each other. - -Implementing methods --------------------- - -The ``@overload_method`` decorator similarly allows implementing a -method on a type well-known to Numba. The following example implements -the :meth:`~numpy.ndarray.take()` method on Numpy arrays:: - - @overload_method(types.Array, 'take') - def array_take(arr, indices): - if isinstance(indices, types.Array): - def take_impl(arr, indices): - n = indices.shape[0] - res = np.empty(n, arr.dtype) - for i in range(n): - res[i] = arr[indices[i]] - return res - return take_impl - -Implementing attributes ------------------------ - -The ``@overload_attribute`` decorator allows implementing a data -attribute (or property) on a type. Only reading the attribute is -possible; writable attributes are only supported through the -:ref:`low-level API `. - -The following example implements the :attr:`~numpy.ndarray.nbytes` attribute -on Numpy arrays:: - - @overload_attribute(types.Array, 'nbytes') - def array_nbytes(arr): - def get(arr): - return arr.size * arr.itemsize - return get - -Importing Cython Functions --------------------------- - -The function ``get_cython_function_address`` obtains the address of a -C function in a Cython extension module. The address can be used to -access the C function via a :func:`ctypes.CFUNCTYPE` callback, thus -allowing use of the C function inside a Numba jitted function. For -example, suppose that you have the file ``foo.pyx``:: - - from libc.math cimport exp - - cdef api double myexp(double x): - return exp(x) - -You can access ``myexp`` from Numba in the following way:: - - import ctypes - from numba.extending import get_cython_function_address - - addr = get_cython_function_address("foo", "myexp") - functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) - myexp = functype(addr) - -The function ``myexp`` can now be used inside jitted functions, for -example:: - - @njit - def double_myexp(x): - return 2*myexp(x) - -One caveat is that if your function uses Cython's fused types, then -the function's name will be mangled. To find out the mangled name of -your function you can check the extension module's ``__pyx_capi__`` -attribute. diff --git a/numba/docs/source/extending/index.rst b/numba/docs/source/extending/index.rst deleted file mode 100644 index 58cd70474..000000000 --- a/numba/docs/source/extending/index.rst +++ /dev/null @@ -1,29 +0,0 @@ - -Extending Numba -=============== - -.. module:: numba.extending - -This chapter describes how to extend Numba to make it recognize and support -additional operations, functions or types. Numba provides two categories -of APIs to this end: - -* The high-level APIs provide abstracted entry points which are sufficient - for simple uses. They require little knowledge of Numba's internal - compilation chain. - -* The low-level APIs reflect Numba's internal compilation chain and allow - flexible interaction with its various layers, but require more effort - and experience with Numba internals. - -It may be helpful for readers of this chapter to also read some of the -documents in the :doc:`developer manual <../developer/index>`, especially -the :doc:`architecture document <../developer/architecture>`. - - -.. toctree:: - - high-level.rst - low-level.rst - interval-example.rst - diff --git a/numba/docs/source/extending/interval-example.rst b/numba/docs/source/extending/interval-example.rst deleted file mode 100644 index 00b0cab0c..000000000 --- a/numba/docs/source/extending/interval-example.rst +++ /dev/null @@ -1,326 +0,0 @@ - -Example: an interval type -========================= - -We will extend the Numba frontend to support a class that it does not -currently support so as to allow: - -* Passing an instance of the class to a Numba function -* Accessing attributes of the class in a Numba function -* Constructing and returning a new instance of the class from a Numba function - -(all the above in :term:`nopython mode`) - -We will mix APIs from the :ref:`high-level extension API ` -and the :ref:`low-level extension API `, depending on what is -available for a given task. - -The starting point for our example is the following pure Python class:: - - class Interval(object): - """ - A half-open interval on the real number line. - """ - def __init__(self, lo, hi): - self.lo = lo - self.hi = hi - - def __repr__(self): - return 'Interval(%f, %f)' % (self.lo, self.hi) - - @property - def width(self): - return self.hi - self.lo - - -Extending the typing layer -"""""""""""""""""""""""""" - -Creating a new Numba type -------------------------- - -As the ``Interval`` class is not known to Numba, we must create a new Numba -type to represent instances of it. Numba does not deal with Python types -directly: it has its own type system that allows a different level of -granularity as well as various meta-information not available with regular -Python types. - -We first create a type class ``IntervalType`` and, since we don't need the -type to be parametric, we instantiate a single type instance ``interval_type``:: - - from numba import types - - class IntervalType(types.Type): - def __init__(self): - super(IntervalType, self).__init__(name='Interval') - - interval_type = IntervalType() - - -Type inference for Python values --------------------------------- - -In itself, creating a Numba type doesn't do anything. We must teach Numba -how to infer some Python values as instances of that type. In this example, -it is trivial: any instance of the ``Interval`` class should be treated as -belonging to the type ``interval_type``:: - - from numba.extending import typeof_impl - - @typeof_impl.register(Interval) - def typeof_index(val, c): - return interval_type - -Function arguments and global values will thusly be recognized as belonging -to ``interval_type`` whenever they are instances of ``Interval``. - - -Type inference for operations ------------------------------ - -We want to be able to construct interval objects from Numba functions, so -we must teach Numba to recognize the two-argument ``Interval(lo, hi)`` -constructor. The arguments should be floating-point numbers:: - - from numba.extending import type_callable - - @type_callable(Interval) - def type_interval(context): - def typer(lo, hi): - if isinstance(lo, types.Float) and isinstance(hi, types.Float): - return interval_type - return typer - - -The :func:`type_callable` decorator specifies that the decorated function -should be invoked when running type inference for the given callable object -(here the ``Interval`` class itself). The decorated function must simply -return a typer function that will be called with the argument types. The -reason for this seemingly convoluted setup is for the typer function to have -*exactly* the same signature as the typed callable. This allows handling -keyword arguments correctly. - -The *context* argument received by the decorated function is useful in -more sophisticated cases where computing the callable's return type -requires resolving other types. - - -Extending the lowering layer -"""""""""""""""""""""""""""" - -We have finished teaching Numba about our type inference additions. -We must now teach Numba how to actually generated code and data for -the new operations. - - -Defining the data model for native intervals --------------------------------------------- - -As a general rule, :term:`nopython mode` does not work on Python objects -as they are generated by the CPython interpreter. The representations -used by the interpreter are far too inefficient for fast native code. -Each type supported in :term:`nopython mode` therefore has to define -a tailored native representation, also called a *data model*. - -A common case of data model is an immutable struct-like data model, that -is akin to a C ``struct``. Our interval datatype conveniently falls in -that category, and here is a possible data model for it:: - - from numba.extending import models, register_model - - @register_model(IntervalType) - class IntervalModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('lo', types.float64), - ('hi', types.float64), - ] - models.StructModel.__init__(self, dmm, fe_type, members) - - -This instructs Numba that values of type ``IntervalType`` (or any instance -thereof) are represented as a structure of two fields ``lo`` and ``hi``, -each of them a double-precision floating-point number (``types.float64``). - -.. note:: - Mutable types need more sophisticated data models to be able to - persist their values after modification. They typically cannot be - stored and passed on the stack or in registers like immutable types do. - - -Exposing data model attributes ------------------------------- - -We want the data model attributes ``lo`` and ``hi`` to be exposed under -the same names for use in Numba functions. Numba provides a convenience -function to do exactly that:: - - from numba.extending import make_attribute_wrapper - - make_attribute_wrapper(IntervalType, 'lo', 'lo') - make_attribute_wrapper(IntervalType, 'hi', 'hi') - -This will expose the attributes in read-only mode. As mentioned above, -writable attributes don't fit in this model. - - -Exposing a property -------------------- - -As the ``width`` property is computed rather than stored in the structure, -we cannot simply expose it like we did for ``lo`` and ``hi``. We have to -re-implement it explicitly:: - - from numba.extending import overload_attribute - - @overload_attribute(IntervalType, "width") - def get_width(interval): - def getter(interval): - return interval.hi - interval.lo - return getter - -You might ask why we didn't need to expose a type inference hook for this -attribute? The answer is that ``@overload_attribute`` is part of the -high-level API: it combines type inference and code generation in a -single API. - - -Implementing the constructor ----------------------------- - -Now we want to implement the two-argument ``Interval`` constructor:: - - from numba.extending import lower_builtin - from numba import cgutils - - @lower_builtin(Interval, types.Float, types.Float) - def impl_interval(context, builder, sig, args): - typ = sig.return_type - lo, hi = args - interval = cgutils.create_struct_proxy(typ)(context, builder) - interval.lo = lo - interval.hi = hi - return interval._getvalue() - - -There is a bit more going on here. ``@lower_builtin`` decorates the -implementation of the given callable or operation (here the ``Interval`` -constructor) for some specific argument types. This allows defining -type-specific implementations of a given operation, which is important -for heavily overloaded functions such as :func:`len`. - -``types.Float`` is the class of all floating-point types (``types.float64`` -is an instance of ``types.Float``). It is generally more future-proof -to match argument types on their class rather than on specific instances -(however, when *returning* a type -- chiefly during the type inference -phase --, you must usually return a type instance). - -``cgutils.create_struct_proxy()`` and ``interval._getvalue()`` are a bit -of boilerplate due to how Numba passes values around. Values are passed -as instances of :class:`llvmlite.ir.Value`, which can be too limited: -LLVM structure values especially are quite low-level. A struct proxy -is a temporary wrapper around a LLVM structure value allowing to easily -get or set members of the structure. The ``_getvalue()`` call simply -gets the LLVM value out of the wrapper. - - -Boxing and unboxing -------------------- - -If you try to use an ``Interval`` instance at this point, you'll certainly -get the error *"cannot convert Interval to native value"*. This is because -Numba doesn't yet know how to make a native interval value from a Python -``Interval`` instance. Let's teach it how to do it:: - - from numba.extending import unbox, NativeValue - - @unbox(IntervalType) - def unbox_interval(typ, obj, c): - """ - Convert a Interval object to a native interval structure. - """ - lo_obj = c.pyapi.object_getattr_string(obj, "lo") - hi_obj = c.pyapi.object_getattr_string(obj, "hi") - interval = cgutils.create_struct_proxy(typ)(c.context, c.builder) - interval.lo = c.pyapi.float_as_double(lo_obj) - interval.hi = c.pyapi.float_as_double(hi_obj) - c.pyapi.decref(lo_obj) - c.pyapi.decref(hi_obj) - is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) - return NativeValue(interval._getvalue(), is_error=is_error) - -*Unbox* is the other name for "convert a Python object to a native value" -(it fits the idea of a Python object as a sophisticated box containing -a simple native value). The function returns a ``NativeValue`` object -which gives its caller access to the computed native value, the error bit -and possibly other information. - -The snippet above makes abundant use of the ``c.pyapi`` object, which -gives access to a subset of the -`Python interpreter's C API `_. -Note the use of ``c.pyapi.err_occurred()`` to detect any errors that -may have happened when unboxing the object (try passing ``Interval('a', 'b')`` -for example). - -We also want to do the reverse operation, called *boxing*, so as to return -interval values from Numba functions:: - - from numba.extending import box - - @box(IntervalType) - def box_interval(typ, val, c): - """ - Convert a native interval structure to an Interval object. - """ - interval = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val) - lo_obj = c.pyapi.float_from_double(interval.lo) - hi_obj = c.pyapi.float_from_double(interval.hi) - class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(Interval)) - res = c.pyapi.call_function_objargs(class_obj, (lo_obj, hi_obj)) - c.pyapi.decref(lo_obj) - c.pyapi.decref(hi_obj) - c.pyapi.decref(class_obj) - return res - - -Using it -"""""""" - -:term:`nopython mode` functions are now able to make use of Interval objects -and the various operations you have defined on them. You can try for -example the following functions:: - - from numba import jit - - @jit(nopython=True) - def inside_interval(interval, x): - return interval.lo <= x < interval.hi - - @jit(nopython=True) - def interval_width(interval): - return interval.width - - @jit(nopython=True) - def sum_intervals(i, j): - return Interval(i.lo + j.lo, i.hi + j.hi) - - -Conclusion -"""""""""" - -We have shown how to do the following tasks: - -* Define a new Numba type class by subclassing the ``Type`` class -* Define a singleton Numba type instance for a non-parametric type -* Teach Numba how to infer the Numba type of Python values of a certain class, - using ``typeof_impl.register`` -* Define the data model for a Numba type using ``StructModel`` - and ``register_model`` -* Implementing a boxing function for a Numba type using the ``@box`` decorator -* Implementing an unboxing function for a Numba type using the ``@unbox`` decorator - and the ``NativeValue`` class -* Type and implement a callable using the ``@type_callable`` and - ``@lower_builtin`` decorators -* Expose a read-only structure attribute using the ``make_attribute_wrapper`` - convenience function -* Implement a read-only property using the ``@overload_attribute`` decorator diff --git a/numba/docs/source/extending/low-level.rst b/numba/docs/source/extending/low-level.rst deleted file mode 100644 index 07fd05062..000000000 --- a/numba/docs/source/extending/low-level.rst +++ /dev/null @@ -1,179 +0,0 @@ - -.. _low-level-extending: - -Low-level extension API -======================= - -This extension API is available through the :mod:`numba.extending` module. -It allows you to hook directly into the Numba compilation chain. As such, -it distinguished between several compilation phases: - -* The :term:`typing` phase deduces the types of variables in a compiled - function by looking at the operations performed. - -* The :term:`lowering` phase converts high-level Python operations into - low-level LLVM code. This phase exploits the typing information derived - by the typing phase. - -* *Boxing* and *unboxing* convert Python objects into native values, and - vice-versa. They occur at the boundaries of calling a Numba function - from the Python interpreter. - - -Typing ------- - -.. XXX the API described here can be insufficient for some use cases. - Should we describe the whole templates menagerie? - -Type inference -- or simply *typing* -- is the process of assigning -Numba types to all values involved in a function, so as to enable -efficient code generation. Broadly speaking, typing comes in two flavours: -typing plain Python *values* (e.g. function arguments or global variables) -and typing *operations* (or *functions*) on known value types. - -.. decorator:: typeof_impl.register(cls) - - Register the decorated function as typing Python values of class *cls*. - The decorated function will be called with the signature ``(val, c)`` - where *val* is the Python value being typed and *c* is a context - object. - - -.. decorator:: type_callable(func) - - Register the decorated function as typing the callable *func*. - *func* can be either an actual Python callable or a string denoting - a operation internally known to Numba (for example ``'getitem'``). - The decorated function is called with a single *context* argument - and must return a typer function. The typer function should have - the same signature as the function being typed, and it is called - with the Numba *types* of the function arguments; it should return - either the Numba type of the function's return value, or ``None`` - if inference failed. - - -Lowering --------- - -The following decorators all take a type specification of some kind. -A type specification is usually a type class (such as ``types.Float``) -or a specific type instance (such as ``types.float64``). Some values -have a special meaning: - -* ``types.Any`` matches any type; this allows doing your own dispatching - inside the implementation - -* ``types.VarArg()`` matches any number of arguments of the - given type; it can only appear as the last type specification when - describing a function's arguments. - -A *context* argument in the following APIs is a target context providing -various utility methods for code generation (such as creating a constant, -converting from a type to another, looking up the implementation of a -specific function, etc.). A *builder* argument is a -:class:`llvmlite.ir.IRBuilder` instance for the LLVM code being generated. - -A *signature* is an object specifying the concrete type of an operation. -The ``args`` attribute of the signature is a tuple of the argument types. -The ``return_type`` attribute of the signature is the type that the -operation should return. - -.. note:: - Numba always reasons on Numba types, but the values being passed - around during lowering are LLVM values: they don't hold the required - type information, which is why Numba types are passed explicitly too. - - LLVM has its own, very low-level type system: you can access the LLVM - type of a value by looking up its ``.type`` attribute. - - -Native operations -''''''''''''''''' - -.. decorator:: lower_builtin(func, typespec, ...) - - Register the decorated function as implementing the callable *func* - for the arguments described by the given Numba *typespecs*. - As with :func:`type_callable`, *func* can be either an actual Python - callable or a string denoting a operation internally known to Numba - (for example ``'getitem'``). - - The decorated function is called with four arguments - ``(context, builder, sig, args)``. ``sig`` is the concrete signature - the callable is being invoked with. ``args`` is a tuple of the values - of the arguments the callable is being invoked with; each value in - ``args`` corresponds to a type in ``sig.args``. The function - must return a value compatible with the type ``sig.return_type``. - -.. decorator:: lower_getattr(typespec, name) - - Register the decorated function as implementing the attribute *name* - of the given *typespec*. The decorated function is called with four - arguments ``(context, builder, typ, value)``. *typ* is the concrete - type the attribute is being looked up on. *value* is the value the - attribute is being looked up on. - -.. decorator:: lower_getattr_generic(typespec) - - Register the decorated function as a fallback for attribute lookup - on a given *typespec*. Any attribute that does not have a corresponding - :func:`lower_getattr` declaration will go through - :func:`lower_getattr_generic`. The decorated function is called with - five arguments ``(context, builder, typ, value, name)``. *typ* - and *value* are as in :func:`lower_getattr`. *name* is the name - of the attribute being looked up. - -.. decorator:: lower_cast(fromspec, tospec) - - Register the decorated function as converting from types described by - *fromspec* to types described by *tospec*. The decorated function - is called with five arguments ``(context, builder, fromty, toty, value)``. - *fromty* and *toty* are the concrete types being converted from and to, - respectively. *value* is the value being converted. The function - must return a value compatible with the type ``toty``. - - -Constants -''''''''' - -.. decorator:: lower_constant(typespec) - - Register the decorated function as implementing the creation of - constants for the Numba *typespec*. The decorated function - is called with four arguments ``(context, builder, ty, pyval)``. - *ty* is the concrete type to create a constant for. *pyval* - is the Python value to convert into a LLVM constant. - The function must return a value compatible with the type ``ty``. - - -Boxing and unboxing -''''''''''''''''''' - -In these functions, *c* is a convenience object with several attributes: - -* its ``context`` attribute is a target context as above -* its ``builder`` attribute is a :class:`llvmlite.ir.IRBuilder` as above -* its ``pyapi`` attribute is an object giving access to a subset of the - `Python interpreter's C API `_ - -An object, as opposed to a native value, is a ``PyObject *`` pointer. -Such pointers can be produced or processed by the methods in the ``pyapi`` -object. - -.. decorator:: box(typespec) - - Register the decorated function as boxing values matching the *typespec*. - The decorated function is called with three arguments ``(typ, val, c)``. - *typ* is the concrete type being boxed. *val* is the value being - boxed. The function should return a Python object, or NULL to signal - an error. - -.. decorator:: unbox(typespec) - - Register the decorated function as unboxing values matching the *typespec*. - The decorated function is called with three arguments ``(typ, obj, c)``. - *typ* is the concrete type being unboxed. *obj* is the Python object - (a ``PyObject *`` pointer, in C terms) being unboxed. The function - should return a ``NativeValue`` object giving the unboxing result value - and an optional error bit. diff --git a/numba/docs/source/glossary.rst b/numba/docs/source/glossary.rst deleted file mode 100644 index 9fe07808e..000000000 --- a/numba/docs/source/glossary.rst +++ /dev/null @@ -1,96 +0,0 @@ - -Glossary -======== - -.. glossary:: - - ahead-of-time compilation - AOT compilation - AOT - Compilation of a function in a separate step before running the - program code, producing an on-disk binary object which can be distributed - independently. This is the traditional kind of compilation known - in languages such as C, C++ or Fortran. - - bytecode - Python bytecode - The original form in which Python functions are executed. Python - bytecode describes a stack-machine executing abstract (untyped) - operations using operands from both the function stack and the - execution environment (e.g. global variables). - - compile-time constant - An expression whose value Numba can infer and freeze at compile-time. - Global variables and closure variables are compile-time constants. - - just-in-time compilation - JIT compilation - JIT - Compilation of a function at execution time, as opposed to - :term:`ahead-of-time compilation`. - - JIT function - Shorthand for "a function :term:`JIT-compiled ` with Numba using - the :ref:`@jit ` decorator." - - loop-lifting - loop-jitting - A feature of compilation in :term:`object mode` where a loop can be - automatically extracted and compiled in :term:`nopython mode`. This - allows functions with operations unsupported in nopython mode to see - significant performance improvements if they contain loops with only - nopython-supported operations. - - lowering - The act of translating :term:`Numba IR` into LLVM IR. The term - "lowering" stems from the fact that LLVM IR is low-level and - machine-specific while Numba IR is high-level and abstract. - - nopython mode - A Numba compilation mode that generates code that does not access the - Python C API. This compilation mode produces the highest performance - code, but requires that the native types of all values in the function - can be :term:`inferred `. Unless otherwise instructed, - the ``@jit`` decorator will automatically fall back to :term:`object - mode` if nopython mode cannot be used. - - Numba IR - Numba intermediate representation - A representation of a piece of Python code which is more amenable - to analysis and transformations than the original Python - :term:`bytecode`. - - object mode - A Numba compilation mode that generates code that handles all values - as Python objects and uses the Python C API to perform all operations - on those objects. Code compiled in object mode will often run - no faster than Python interpreted code, unless the Numba compiler can - take advantage of :term:`loop-jitting`. - - type inference - The process by which Numba determines the specialized types of all - values within a function being compiled. Type inference can fail - if arguments or globals have Python types unknown to Numba, or if - functions are used that are not recognized by Numba. Sucessful - type inference is a prerequisite for compilation in - :term:`nopython mode`. - - typing - The act of running :term:`type inference` on a value or operation. - - ufunc - A NumPy `universal function `_. - Numba can create new compiled ufuncs with - the :ref:`@vectorize ` decorator. - - reflection - In numba, when a mutable container is passed as argument to a nopython - function from the Python interpreter, the container object and all its - contained elements are converted into nopython values. To match the - semantics of Python, any mutation on the container inside the nopython - function must be visible in the Python interpreter. To do so, Numba - must update the container and its elements and convert them back into - Python objects during the transition back into the interpreter. - - Not to be confused with Python's "reflection" in the context of binary - operators (see https://docs.python.org/3.5/reference/datamodel.html). diff --git a/numba/docs/source/hsa/device-functions.rst b/numba/docs/source/hsa/device-functions.rst deleted file mode 100644 index 9433d0c9e..000000000 --- a/numba/docs/source/hsa/device-functions.rst +++ /dev/null @@ -1,15 +0,0 @@ - -Writing Device Functions -======================== - -HSA device functions can only be invoked from a kernel -or another device function. To define a device function:: - - from numba import hsa - - @hsa.jit(device=True) - def a_device_function(a, b): - return a + b - -Unlike a kernel function, a device function can return a value like normal -functions. diff --git a/numba/docs/source/hsa/device-management.rst b/numba/docs/source/hsa/device-management.rst deleted file mode 100644 index a6a45d3d0..000000000 --- a/numba/docs/source/hsa/device-management.rst +++ /dev/null @@ -1,5 +0,0 @@ - -Device management -================= - -TODO... diff --git a/numba/docs/source/hsa/examples.rst b/numba/docs/source/hsa/examples.rst deleted file mode 100644 index 9b3eb9440..000000000 --- a/numba/docs/source/hsa/examples.rst +++ /dev/null @@ -1,115 +0,0 @@ - -======== -Examples -======== - -.. _hsa-matmul: - -Matrix multiplication -===================== - -Here is a naive implementation of matrix multiplication using a HSA kernel:: - - - @hsa.jit - def matmul(A, B, C): - i = hsa.get_global_id(0) - j = hsa.get_global_id(1) - - if i >= C.shape[0] or j >= C.shape[1]: - return - - tmp = 0 - - for k in range(A.shape[1]): - tmp += A[i, k] * B[k, j] - - C[i, j] = tmp - - - -This implementation is straightforward and intuitive but performs poorly, -because the same matrix elements will be loaded multiple times from device -memory, which is slow (some devices may have transparent data caches, but -they may not be large enough to hold the entire inputs at once). - -It will be faster if we use a blocked algorithm to reduce accesses to the -device memory. HSA provides a fast :ref:`shared memory ` -for workitems in a group to cooperately compute on a task. The following -implements a faster version of the square matrix multiplication using shared -memory:: - - - import numpy as np - from numba import hsa - - - blocksize = 20 - gridsize = 20 - - @hsa.jit - def matmulfast(A, B, C): - x = hsa.get_global_id(0) - y = hsa.get_global_id(1) - - tx = hsa.get_local_id(0) - ty = hsa.get_local_id(1) - - sA = hsa.shared.array(shape=(blocksize, blocksize), dtype=float32) - sB = hsa.shared.array(shape=(blocksize, blocksize), dtype=float32) - - if x >= C.shape[0] or y >= C.shape[1]: - return - - tmp = 0 - - for i in range(gridsize): - # preload - sA[tx, ty] = A[x, ty + i * blocksize] - sB[tx, ty] = B[tx + i * blocksize, y] - # wait for preload to end - hsa.barrier(1) - # compute loop - for j in range(blocksize): - tmp += sA[tx, j] * sB[j, ty] - # wait for compute to end - hsa.barrier(1) - - C[x, y] = tmp - - N = gridsize * blocksize - A = np.random.random((N, N)).astype(np.float32) - B = np.random.random((N, N)).astype(np.float32) - C = np.zeros_like(A) - - griddim = gridsize, gridsize - blockdim = blocksize, blocksize - - with hsa.register(A, B, C): - ts = timer() - matmulfast[griddim, blockdim](A, B, C) - te = timer() - print("1st GPU time:", te - ts) - - with hsa.register(A, B, C): - ts = timer() - matmulfast[griddim, blockdim](A, B, C) - te = timer() - print("2nd GPU time:", te - ts) - - ts = timer() - ans = np.dot(A, B) - te = timer() - print("CPU time:", te - ts) - np.testing.assert_allclose(ans, C, rtol=1e-5) - - -Because the shared memory is a limited resources, the code preloads small -block at a time from the input arrays. Then, it calls -:func:`~numba.hsa.barrier` to wait until all threads have finished -preloading and before doing the computation on the shared memory. -It synchronizes again after the computation to ensure all threads -have finished with the data in shared memory before overwriting it -in the next loop iteration. - - diff --git a/numba/docs/source/hsa/index.rst b/numba/docs/source/hsa/index.rst deleted file mode 100644 index ecd1de1cb..000000000 --- a/numba/docs/source/hsa/index.rst +++ /dev/null @@ -1,13 +0,0 @@ - -Numba for HSA APUs -================== - -.. toctree:: - - overview.rst - kernels.rst - memory.rst - device-functions.rst - intrinsics.rst - device-management.rst - examples.rst diff --git a/numba/docs/source/hsa/intrinsics.rst b/numba/docs/source/hsa/intrinsics.rst deleted file mode 100644 index 618d460c1..000000000 --- a/numba/docs/source/hsa/intrinsics.rst +++ /dev/null @@ -1,40 +0,0 @@ - -Supported Atomic Operations -=========================== - -Numba provides access to some of the atomic operations supported in HSA, in the -:class:`numba.hsa.atomic` class. - -Example -''''''' - -The following code demonstrates the use of :class:`numba.hsa.atomic.add` to -count every number in [0,32) occurred in the input array in parallel:: - - from numba import hsa - import numpy as np - - @hsa.jit - def hsa_atomic_histogram(ary): - tid = hsa.get_local_id(0) - sm = hsa.shared.array(32, numba.uint32) # declare shared library - sm[tid] = 0 # init values to zero - hsa.barrier(1) # synchronize (wait for init) - loc = ary[tid] % 32 # ensure we are in range - hsa.atomic.add(sm, loc, 1) # atomic add - hsa.barrier(1) # synchronize - ary[tid] = sm[tid] # store result inplace - - ary = np.random.randint(0, 32, size=32).astype(np.uint32) - orig = ary.copy() - - # HSA version - hsa_atomic_histogram[1, 32](ary) - - # Expected behavior - gold = np.zeros_like(ary) - for i in range(orig.size): - gold[orig[i]] += 1 - - print(ary) # HSA kernel result - print(gold) # for comparison diff --git a/numba/docs/source/hsa/kernels.rst b/numba/docs/source/hsa/kernels.rst deleted file mode 100644 index 50c9b6a48..000000000 --- a/numba/docs/source/hsa/kernels.rst +++ /dev/null @@ -1,175 +0,0 @@ - -==================== -Writing HSA Kernels -==================== - -Introduction -============ - -HSA provides an execution model similar to OpenCL. Instructions are executed -in parallel by a group of hardware threads. In some way, this is similar to -*single-instruction-multiple-data* (SIMD) model but with the convenience that -the fine-grain scheduling is hidden from the programmer instead of programming -with SIMD vectors as a data structure. In HSA, the code you write will be -executed by multiple threads at once (often hundreds or thousands). Your -solution will -be modeled by defining a thread hierarchy of *grid*, *workgroup* and -*workitem*. - -Numba's HSA support exposes facilities to declare and manage this -hierarchy of threads. - - -Introduction for CUDA Programmers -================================== - -HSA execution model is similar to CUDA. The main difference will be the -shared memory model employed by HSA so that there are no device memory. The -GPU hardware uses the machine's main memory (or host memory in -CUDA term) directly. Therefore, you will not need ``to_device()`` and -``copy_to_host()`` in HSA programming. - -Here's a quick mapping of the CUDA terms to HSA: -* workitem is CUDA threads -* workgroup is CUDA thread block -* grid is CUDA grid - - -Kernel declaration -================== - -A *kernel function* is a GPU function that is meant to be called from CPU -code. It gives it two fundamental characteristics: - -* kernels cannot explicitly return a value; all result data must be written - to an array passed to the function (if computing a scalar, you will - probably pass a one-element array); - -* kernels explicitly declare their thread hierarchy when called: i.e. - the number of workgroups and the number of workitems per workgroup - (note that while a kernel is compiled once, it can be called multiple - times with different workgroup sizes or grid sizes). - -At first sight, writing a HSA kernel with Numba looks very much like -writing a :term:`JIT function` for the CPU:: - - @hsa.jit - def increment_by_one(an_array): - """ - Increment all array elements by one. - """ - # code elided here; read further for different implementations - - -.. _hsa-kernel-invocation: - -Kernel invocation -================= - -A kernel is typically launched in the following way:: - - itempergroup = 32 - groupperrange = (an_array.size + (itempergroup - 1)) // itempergroup - increment_by_one[groupperrange, itempergroup](an_array) - -We notice two steps here: - -* Instantiate the kernel proper, by specifying a number of workgroup - (or "workgroup per grid"), and a number of workitems per workgroup. The - product of the two will give the total number of workitem launched. Kernel - instantiation is done by taking the compiled kernel function - (here ``increment_by_one``) and indexing it with a tuple of integers. - -* Running the kernel, by passing it the input array (and any separate - output arrays if necessary). By default, running a kernel is synchronous: - the function returns when the kernel has finished executing and the - data is synchronized back. - -Choosing the workgroup size ---------------------------- - -It might seem curious to have a two-level hierarchy when declaring the -number of workitem needed by a kernel. The workgroup size (i.e. number of -workitem per workgroup) is often crucial: - -* On the software side, the workgroup size determines how many threads - share a given area of :ref:`shared memory `. -* On the hardware side, the workgroup size must be large enough for full - occupation of execution units. - -Multi-dimensional workgroup and grid ---------------------------------------- - -To help deal with multi-dimensional arrays, HSA allows you to specify -multi-dimensional workgroups and grids. In the example above, you could -make ``itempergroup`` and ``groupperrange`` tuples of one, two -or three integers. Compared to 1D declarations of equivalent sizes, -this doesn't change anything to the efficiency or behaviour of generated -code, but can help you write your algorithms in a more natural way. - - -WorkItem positioning -==================== - -When running a kernel, the kernel function's code is executed by every -thread once. It therefore has to know which thread it is in, in order -to know which array element(s) it is responsible for (complex algorithms -may define more complex responsibilities, but the underlying principle -is the same). - -One way is for the thread to determines its position in the grid and -workgroup and manually compute the corresponding array position:: - - @hsa.jit - def increment_by_one(an_array): - # workitem id in a 1D workgroup - tx = hsa.get_local_id(0) - # workgroup id in a 1D grid - ty = hsa.get_group_id(0) - # workgroup size, i.e. number of workitem per workgroup - bw = hsa.get_local_size(0) - # Compute flattened index inside the array - pos = tx + ty * bw - # The above is equivalent to pos = hsa.get_global_id(0) - if pos < an_array.size: # Check array boundaries - an_array[pos] += 1 - -.. note:: Unless you are sure the workgroup size and grid size is a divisor - of your array size, you **must** check boundaries as shown above. - -:func:`.get_local_id`, :func:`.get_local_size`, :func:`.get_group_id` and -:func:`.get_global_id` are special functions provided by the HSA backend for -the sole purpose of knowing the geometry of the thread hierarchy and the -position of the current workitem within that geometry. - -.. function:: numba.hsa.get_local_id(dim) - - Takes the index of the dimension being queried - - Returns local workitem ID in the the current workgroup for the given - dimension. For 1D workgroup, the index is an integer spanning the range - from 0 inclusive to :func:`numba.hsa.get_local_size` exclusive. - -.. function:: numba.hsa.get_local_size(dim) - - Takes the index of the dimension being queried - - Returns the size of the workgroup at the given dimension. - The value is declared when instantiating the kernel. - This value is the same for all workitems in a given kernel, - even if they belong to different workgroups (i.e. each workgroups is "full"). - -.. function:: numba.hsa.get_group_id(dim) - - Takes the index of the dimension being queried - - Returns the workgroup ID in the grid of workgroup launched a kernel. - -.. function:: numba.hsa.get_global_id(dim) - - Takes the index of the dimension being queried - - Returns the global workitem ID for the given dimension. Unlike `numba.hsa - .get_local_id()`, this number is unique for all workitems in a grid. - - diff --git a/numba/docs/source/hsa/memory.rst b/numba/docs/source/hsa/memory.rst deleted file mode 100644 index 7436240b1..000000000 --- a/numba/docs/source/hsa/memory.rst +++ /dev/null @@ -1,64 +0,0 @@ -================= -Memory management -================= - -.. _hsa-device-memory: - -The CPU and GPU in a APU share the same main memory. There is no distinction -between CPU and GPU memory. Even though a HSA kernel can directly consume any -data in the main memory, it is recommended to register a memory region to the -HSA runtime compatibility with HSA-compliant discrete GPUs. - -.. function:: hsa.register(*arrays) - - Register every given array. The function can be used in a *with-context* - for automically deregistration:: - - array_a = numpy.arange(10) - array_b = numpy.arange(10) - with hsa.register(array_a, array_b): - some_hsa_code(array_a, array_b) - - -.. function:: hsa.deregister(*arrays) - - Deregister every given array - -.. _hsa-shared-memory: - -Shared memory and thread synchronization -======================================== - -A limited amount of shared memory can be allocated on the device to speed -up access to data, when necessary. That memory will be shared (i.e. both -readable and writable) amongst all workitems belonging to a given group -and has faster access times than regular device memory. It also allows -workitems to cooperate on a given solution. You can think of it as a -manually-managed data cache. - -The memory is allocated once for the duration of the kernel, unlike -traditional dynamic memory management. - -.. function:: numba.hsa.shared.array(shape, type) - - Allocate a shared array of the given *shape* and *type* on the device. - This function must be called on the device (i.e. from a kernel or - device function). *shape* is either an integer or a tuple of integers - representing the array's dimensions. *type* is a :ref:`Numba type ` - of the elements needing to be stored in the array. - - The returned array-like object can be read and written to like any normal - device array (e.g. through indexing). - - A common pattern is to have each workitem populate one element in the - shared array and then wait for all workitems to finish using :func:` - .barrier`. - -.. function:: numba.hsa.barrier(scope) - - The ``scope`` argument specifies the level of synchronization. Set ``scope`` - to ``1`` to synchronize all workitems in the same workgroup. - -.. seealso:: - :ref:`Matrix multiplication example `. - diff --git a/numba/docs/source/hsa/overview.rst b/numba/docs/source/hsa/overview.rst deleted file mode 100644 index 537713a90..000000000 --- a/numba/docs/source/hsa/overview.rst +++ /dev/null @@ -1,51 +0,0 @@ -======== -Overview -======== - -Numba supports HSA APU programming by directly compiling a restricted subset -of Python code into HSA kernels and device functions following the HSA -execution model. Kernels written in Numba appear to have direct access -to NumPy arrays. - -Terminology -=========== - -Several important terms in the topic of HSA programming are listed here: - -- *kernels*: a GPU function launched by the host and executed on the device -- *device function*: a GPU function executed on the device which can only be - called from the device (i.e. from a kernel or another device function) - - -Requirements -============ - -This is a preview of the HSA feature. We only support Kavari on 64-bit -Ubuntu at this time. Please consult offical documentation at -`this documentation `_. -about system requirement. - - -Installation -============ - -Follow `this document `_ -for installation instructions to enable HSA support for the system. -Be sure to use the ``.deb`` packages to simplify the process. -Aftwards, the following libraries must be added to your ``LD_LIBRARY_PATH``: - -* libhsakmt.so.1 -* libhsa-runtime64.so -* libhsa-runtime-ext64.so - - -``libhsa-runtime64.so`` and ``libhsa-runtime-ext64.so`` are in ``/opt/hsa/lib`` -``libhsakmt.so.1`` has no default location and is available from -https://github.com/HSAFoundation/HSA-Drivers-Linux-AMD - -The current implementation uses the stable LLVM compiler from AMD. -To install, use ``.deb`` file from https://github.com/HSAFoundation/HSAIL-HLC-Stable -This will install the binaries to ``/opt/amd/bin``, which is expected by Numba. - - - diff --git a/numba/docs/source/index.rst b/numba/docs/source/index.rst deleted file mode 100644 index 15e71f4c2..000000000 --- a/numba/docs/source/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. Numba documentation master file, created by - sphinx-quickstart on Tue Dec 30 11:55:40 2014. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Numba documentation -=================== - -This is the Numba documentation. Unless you are already acquainted -with Numba, we suggest you start with the :doc:`User manual `. - - -.. toctree:: - :numbered: - :maxdepth: 2 - - user/index.rst - reference/index.rst - cuda/index.rst - cuda-reference/index.rst - hsa/index.rst - extending/index.rst - developer/index.rst - proposals/index.rst - glossary.rst - release-notes.rst diff --git a/numba/docs/source/proposals/cfunc.rst b/numba/docs/source/proposals/cfunc.rst deleted file mode 100644 index 7650dfb6d..000000000 --- a/numba/docs/source/proposals/cfunc.rst +++ /dev/null @@ -1,147 +0,0 @@ -============================ -NBEP 4: Defining C callbacks -============================ - -:Author: Antoine Pitrou -:Date: April 2016 -:Status: Draft - - -Interfacing with some native libraries (for example written in C -or C++) can necessitate writing native callbacks to provide business logic -to the library. Some Python-facing libraries may also provide the -alternative of passing a ctypes-wrapped native callback instead of a -Python callback for better performance. A simple example is the -``scipy.integrate`` package where the user passes the function to be -integrated as a callback. - -Users of those libraries may want to benefit from the performance advantage -of running purely native code, while writing their code in Python. -This proposal outlines a scheme to provide such a functionality in -Numba. - - -Basic usage -=========== - -We propose adding a new decorator, ``@cfunc``, importable from the main -package. This decorator allows defining a callback as in the following -example:: - - from numba import cfunc - from numba.types import float64 - - # A callback with the C signature `double(double)` - - @cfunc(float64(float64), nopython=True) - def integrand(x): - return 1 / x - - -The ``@cfunc`` decorator returns a "C function" object holding the -resources necessary to run the given compiled function (for example its -LLVM module). This object has several attributes and methods: - -* the ``ctypes`` attribute is a ctypes function object representing - the native function. - -* the ``address`` attribute is the address of the native function code, as - an integer (note this can also be computed from the ``ctypes`` attribute). - -* the ``native_name`` attribute is the symbol under which the function - can be looked up inside the current process. - -* the ``inspect_llvm()`` method returns the IR for the LLVM module - in which the function is compiled. It is expected that the ``native_name`` - attribute corresponds to the function's name in the LLVM IR. - -The general signature of the decorator is ``cfunc(signature, **options)``. - -The ``signature`` must specify the argument types and return type of the -function using Numba types. In contrary to ``@jit``, the return type cannot -be omitted. - -The ``options`` are keyword-only parameters specifying compilation options. -We are expecting that the standard ``@jit`` options (``nopython``, -``forceobj``, ``cache``) can be made to work with ``@cfunc``. - - -Calling from Numba-compiled functions -------------------------------------- - -While the intended use is to pass a callback's address to foreign C -code expecting a function pointer, it should be made possible to call -the C callback from a Numba-compiled function. - - -Passing array data -================== - -Native platform ABIs as used by C or C++ don't have the notion of a shaped -array as in Numpy. One common solution is to pass a raw data pointer and -one or several size arguments (depending on dimensionality). Numba must -provide a way to rebuild an array view of this data inside the callback. - -:: - - from numba import cfunc, carray - from numba.types import float64, CPointer, void, intp - - # A callback with the C signature `void(double *, double *, size_t)` - - @cfunc(void(CPointer(float64), CPointer(float64), intp)) - def invert(in_ptr, out_ptr, n): - in_ = carray(in_ptr, (n,)) - out = carray(out_ptr, (n,)) - for i in range(n): - out[i] = 1 / in_[i] - - -The ``carray`` function takes ``(pointer, shape, dtype)`` arguments -(``dtype`` being optional) and returns a C-layout array view over the -data *pointer*, with the given *shape* and *dtype*. *pointer* must -be a ctypes pointer object (not a Python integer). The array's -dimensionality corresponds to the *shape* tuple's length. If *dtype* -is not given, the array's dtype corresponds to the *pointer*'s pointee -type. - -The ``farray`` function is similar except that it returns a F-layout -array view. - - -Error handling -============== - -There is no standard mechanism in C for error reporting. Unfortunately, -Numba currently doesn't handle ``try..except`` blocks, which makes it more -difficult for the user to implement the required error reporting scheme. -The current stance of this proposal is to let users guard against invalid -arguments where necessary, and do whatever is required to inform the caller -of the error. - -Based on user feedback, we can later add support for some error reporting -schemes, such as returning an integer error code depending on whether an -exception was raised, or setting ``errno``. - - -Deferred topics -=============== - -Ahead-of-Time compilation -------------------------- - -This proposal doesn't make any provision for AOT compilation of C callbacks. -It would probably necessitate a separate API (a new method on the -``numba.pycc.CC`` object), and the implementation would require exposing -a subset of the C function object's functionality from the compiled C -extension module. - -Opaque data pointers --------------------- - -Some libraries allow passing an opaque data pointer (``void *``) to a -user-provided callback, to provide any required context for execution -of the callback. Taking advantage of this functionality would require -adding specific support in Numba, for example the ability to do generic -conversion from ``types.voidptr`` and to take the address of a -Python-facing ``jitclass`` instance. diff --git a/numba/docs/source/proposals/extension-points.rst b/numba/docs/source/proposals/extension-points.rst deleted file mode 100644 index 89197dd17..000000000 --- a/numba/docs/source/proposals/extension-points.rst +++ /dev/null @@ -1,414 +0,0 @@ -======================== -NBEP 2: Extension points -======================== - -:Author: Antoine Pitrou -:Date: July 2015 -:Status: Draft - - -Implementing new types or functions in Numba requires hooking into -various mechanisms along the compilation chain (and potentially -outside of it). This document aims, first, at examining the -current ways of doing so and, second, at making proposals to make -extending easier. - -If some of the proposals are implemented, we should first strive -to use and exercise them internally, before exposing the APIs to the -public. - -.. note:: - This document doesn't cover CUDA or any other non-CPU backend. - - -High-level API -============== - -There is currently no high-level API, making some use cases more -complicated than they should be. - -Proposed changes ----------------- - -Dedicated module -'''''''''''''''' - -We propose the addition of a ``numba.extending`` module exposing the main -APIs useful for extending Numba. - -Implementing a function -''''''''''''''''''''''' - -We propose the addition of a ``@overload`` decorator allowing the -implementation of a given function for use in :term:`nopython mode`. -The overloading function has the same formal signature as the implemented -function, and receives the actual argument types. It should return a -Python function implementing the overloaded function for the given types. - -The following example implements :func:`numpy.where` with -this approach. - -.. literalinclude:: np-where-override.py - -It is also possible to implement functions already known to Numba, to -support additional types. The following example implements the -built-in function :func:`len` for tuples with this approach:: - - @overload(len) - def tuple_len(x): - if isinstance(x, types.BaseTuple): - # The tuple length is known at compile-time, so simply reify it - # as a constant. - n = len(x) - def len_impl(x): - return n - return len_impl - - -Implementing an attribute -''''''''''''''''''''''''' - -We propose the addition of a ``@overload_attribute`` decorator allowing -the implementation of an attribute getter for use in :term:`nopython mode`. - -The following example implements the ``.nbytes`` attribute on Numpy arrays:: - - @overload_attribute(types.Array, 'nbytes') - def array_nbytes(arr): - def get(arr): - return arr.size * arr.itemsize - return get - -.. note:: - The overload_attribute() signature allows for expansion to also define - setters and deleters, by letting the decorated function return a - ``getter, setter, deleter`` tuple instead of a single ``getter``. - - -Implementing a method -''''''''''''''''''''' - -We propose the addition of a ``@overload_method`` decorator allowing the -implementation of an instance method for use in :term:`nopython mode`. - -The following example implements the ``.take()`` method on Numpy arrays:: - - @overload_method(types.Array, 'take') - def array_take(arr, indices): - if isinstance(indices, types.Array): - def take_impl(arr, indices): - n = indices.shape[0] - res = np.empty(n, arr.dtype) - for i in range(n): - res[i] = arr[indices[i]] - return res - return take_impl - - -Exposing a structure member -''''''''''''''''''''''''''' - -We propose the addition of a ``make_attribute_wrapper()`` function exposing -an internal field as a visible read-only attribute, for those types backed -by a ``StructModel`` data model. - -For example, assuming ``PdIndexType`` is the Numba type of pandas indices, -here is how to expose the underlying Numpy array as a ``._data`` attribute:: - - @register_model(PdIndexType) - class PdIndexModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('values', fe_type.as_array), - ] - models.StructModel.__init__(self, dmm, fe_type, members) - - make_attribute_wrapper(PdIndexType, 'values', '_data') - - -Typing -====== - -Numba types ------------ - -Numba's standard types are declared in :mod:`numba.types`. To declare -a new type, one subclasses the base :class:`Type` class or one of its -existing abstract subclasses, and implements the required functionality. - -Proposed changes -'''''''''''''''' - -No change required. - - -Type inference on values ------------------------- - -Values of a new type need to be type-inferred if they can appear as -function arguments or constants. The core machinery is in -:mod:`numba.typing.typeof`. - -In the common case where some Python class or classes map exclusively -to the new type, one can extend a generic function to dispatch on said -classes, e.g.:: - - from numba.typing.typeof import typeof_impl - - @typeof_impl(MyClass) - def _typeof_myclass(val, c): - if "some condition": - return MyType(...) - -The ``typeof_impl`` specialization must return a Numba type instance, -or None if the value failed typing. - -(when one controls the class being type-inferred, an alternative -to ``typeof_impl`` is to define a ``_numba_type_`` property on the class) - -In the rarer case where the new type can denote various Python classes -that are impossible to enumerate, one must insert a manual check in the -fallback implementation of the ``typeof_impl`` generic function. - -Proposed changes -'''''''''''''''' - -Allow people to define a generic hook without monkeypatching the -fallback implementation. - - -Fast path for type inference on function arguments --------------------------------------------------- - -Optionally, one may want to allow a new type to participate in the -fast type resolution (written in C code) to minimize function call -overhead when a JIT-compiled function is called with the new type. -One must then insert the required checks and implementation in -the ``_typeof.c`` file, presumably inside the ``compute_fingerprint()`` -function. - -Proposed changes -'''''''''''''''' - -None. Adding generic hooks to C code embedded in a C Python extension -is too delicate a change. - - -Type inference on operations ----------------------------- - -Values resulting from various operations (function calls, operators, etc.) -are typed using a set of helpers called "templates". One can define a -new template by subclass one of the existing base classes and implement -the desired inference mechanism. The template is explicitly registered -with the type inference machinery using a decorator. - -The :class:`ConcreteTemplate` base class allows one to define inference as -a set of supported signatures for a given operation. The following example -types the modulo operator:: - - @builtin - class BinOpMod(ConcreteTemplate): - key = "%" - cases = [signature(op, op, op) - for op in sorted(types.signed_domain)] - cases += [signature(op, op, op) - for op in sorted(types.unsigned_domain)] - cases += [signature(op, op, op) for op in sorted(types.real_domain)] - -(note that type *instances* are used in the signatures, severely -limiting the amount of genericity that can be expressed) - -The :class:`AbstractTemplate` base class allows to define inference -programmatically, giving it full flexibility. Here is a simplistic -example of how tuple indexing (i.e. the ``__getitem__`` operator) can -be expressed:: - - @builtin - class GetItemUniTuple(AbstractTemplate): - key = "getitem" - - def generic(self, args, kws): - tup, idx = args - if isinstance(tup, types.UniTuple) and isinstance(idx, types.Integer): - return signature(tup.dtype, tup, idx) - - -The :class:`AttributeTemplate` base class allows to type the attributes -and methods of a given type. Here is an example, typing the ``.real`` -and ``.imag`` attributes of complex numbers:: - - @builtin_attr - class ComplexAttribute(AttributeTemplate): - key = types.Complex - - def resolve_real(self, ty): - return ty.underlying_float - - def resolve_imag(self, ty): - return ty.underlying_float - -.. note:: - :class:`AttributeTemplate` only works for getting attributes. Setting - an attribute's value is hardcoded in :mod:`numba.typeinfer`. - -The :class:`CallableTemplate` base class offers an easier way to parse -flexible function signatures, by letting one define a callable that has -the same definition as the function being typed. For example, here is how -one could hypothetically type Python's ``sorted`` function if Numba supported -lists:: - - @builtin - class Sorted(CallableTemplate): - key = sorted - - def generic(self): - def typer(iterable, key=None, reverse=None): - if reverse is not None and not isinstance(reverse, types.Boolean): - return - if key is not None and not isinstance(key, types.Callable): - return - if not isinstance(iterable, types.Iterable): - return - return types.List(iterable.iterator_type.yield_type) - - return typer - -(note you can return just the function's return type instead of the -full signature) - -Proposed changes -'''''''''''''''' - -Naming of the various decorators is quite vague and confusing. We propose -renaming ``@builtin`` to ``@infer``, ``@builtin_attr`` to ``@infer_getattr`` -and ``builtin_global`` to ``infer_global``. - -The two-step declaration for global values is a bit verbose, we propose -simplifying it by allowing the use of ``infer_global`` as a decorator:: - - @infer_global(len) - class Len(AbstractTemplate): - key = len - - def generic(self, args, kws): - assert not kws - (val,) = args - if isinstance(val, (types.Buffer, types.BaseTuple)): - return signature(types.intp, val) - -The class-based API can feel clumsy, we can add a functional API for -some of the template kinds: - -.. code-block:: python - - @type_callable(sorted) - def type_sorted(context): - def typer(iterable, key=None, reverse=None): - # [same function as above] - - return typer - - -Code generation -=============== - -Concrete representation of values of a Numba type -------------------------------------------------- - -Any concrete Numba type must be able to be represented in LLVM form -(for variable storage, argument passing, etc.). One defines that -representation by implementing a datamodel class and registering it -with a decorator. Datamodel classes for standard types are defined -in :mod:`numba.datamodel.models`. - -Proposed changes -'''''''''''''''' - -No change required. - -Conversion between types ------------------------- - -Implicit conversion between Numba types is currently implemented as a -monolithic sequence of choices and type checks in the -:meth:`BaseContext.cast` method. To add a new implicit conversion, one -appends a type-specific check in that method. - -Boolean evaluation is a special case of implicit conversion (the -destination type being :class:`types.Boolean`). - -.. note:: - Explicit conversion is seen as a regular operation, e.g. a constructor - call. - -Proposed changes -'''''''''''''''' - -Add a generic function for implicit conversion, with multiple dispatch -based on the source and destination types. Here is an example showing -how to write a float-to-integer conversion:: - - @lower_cast(types.Float, types.Integer) - def float_to_integer(context, builder, fromty, toty, val): - lty = context.get_value_type(toty) - if toty.signed: - return builder.fptosi(val, lty) - else: - return builder.fptoui(val, lty) - - -Implementation of an operation ------------------------------- - -Other operations are implemented and registered using a set of generic -functions and decorators. For example, here is how lookup for a the ``.ndim`` -attribute on Numpy arrays is implemented:: - - @builtin_attr - @impl_attribute(types.Kind(types.Array), "ndim", types.intp) - def array_ndim(context, builder, typ, value): - return context.get_constant(types.intp, typ.ndim) - -And here is how calling ``len()`` on a tuple value is implemented:: - - @builtin - @implement(types.len_type, types.Kind(types.BaseTuple)) - def tuple_len(context, builder, sig, args): - tupty, = sig.args - retty = sig.return_type - return context.get_constant(retty, len(tupty.types)) - -Proposed changes -'''''''''''''''' - -Review and streamine the API. Drop the requirement to write -``types.Kind(...)`` explicitly. Remove the separate ``@implement`` -decorator and rename ``@builtin`` to ``@lower_builtin``, ``@builtin_attr`` -to ``@lower_getattr``, etc. - -Add decorators to implement ``setattr()`` operations, named -``@lower_setattr`` and ``@lower_setattr_generic``. - - -Conversion from / to Python objects ------------------------------------ - -Some types need to be converted from or to Python objects, if they can -be passed as function arguments or returned from a function. The -corresponding boxing and unboxing operations are implemented using -a generic function. The implementations for standard Numba types -are in :mod:`numba.targets.boxing`. For example, here is the boxing -implementation for a boolean value:: - - @box(types.Boolean) - def box_bool(c, typ, val): - longval = c.builder.zext(val, c.pyapi.long) - return c.pyapi.bool_from_long(longval) - -Proposed changes -'''''''''''''''' - -Change the implementation signature from ``(c, typ, val)`` to -``(typ, val, c)``, to match the one chosen for the ``typeof_impl`` -generic function. diff --git a/numba/docs/source/proposals/index.rst b/numba/docs/source/proposals/index.rst deleted file mode 100644 index 1fad6adae..000000000 --- a/numba/docs/source/proposals/index.rst +++ /dev/null @@ -1,35 +0,0 @@ -=========================== -Numba Enhancement Proposals -=========================== - -Numba Enhancement Proposals (not really abbreviated "NEPs", since "NEP" -is already taken by the Numpy project) describe proposed changes to Numba. -They are modeled on Python Enhancement Proposals (PEPs) and Numpy Enhancement -Proposals, and are typically written up when important changes -(behavioural changes, feature additions...) to Numba are proposed. - -This page provides an overview of all proposals, making only a distinction -between the ones that have been implemented and those that have not been -implemented. - -Implemented proposals ---------------------- - -.. toctree:: - :maxdepth: 1 - - integer-typing.rst - - -Other proposals ---------------- - -.. toctree:: - :maxdepth: 1 - - extension-points.rst - jit-classes.rst - cfunc.rst - type-inference.rst - typing_recursion.rst - diff --git a/numba/docs/source/proposals/integer-typing.rst b/numba/docs/source/proposals/integer-typing.rst deleted file mode 100644 index 7a62fd2c2..000000000 --- a/numba/docs/source/proposals/integer-typing.rst +++ /dev/null @@ -1,186 +0,0 @@ -.. _nbep-1: - -================================= -NBEP 1: Changes in integer typing -================================= - -:Author: Antoine Pitrou -:Date: July 2015 -:Status: Final - - -Current semantics -================= - -Type inference of integers in Numba currently has some subtleties -and some corner cases. The simple case is when some variable has an obvious -Numba type (for example because it is the result of a constructor call to a -Numpy scalar type such as ``np.int64``). That case suffers no ambiguity. - -The less simple case is when a variable doesn't bear such explicit -information. This can happen because it is inferred from a built-in Python -``int`` value, or from an arithmetic operation between two integers, or -other cases yet. Then Numba has a number of rules to infer the resulting -Numba type, especially its signedness and bitwidth. - -Currently, the generic case could be summarized as: *start small, -grow bigger as required*. Concretely: - -1. Each constant or pseudo-constant is inferred using the *smallest signed - integer type* that can correctly represent it (or, possibly, ``uint64`` - for positive integers between ``2**63`` and ``2**64 - 1``). -2. The result of an operation is typed so as to ensure safe representation - in the face of overflow and other magnitude increases (for example, - ``int32 + int32`` would be typed ``int64``). -3. As an exception, a Python ``int`` used as function argument is always - typed ``intp``, a pointer-size integer. This is to avoid the proliferation - of compiled specializations, as otherwise various integer bitwidths - in input arguments may produce multiple signatures. - -.. note:: - The second rule above (the "respect magnitude increases" rule) - reproduces Numpy's behaviour with arithmetic on scalar values. - Numba, however, has different implementation and performance constraints - than Numpy scalars. - - It is worth nothing, by the way, that Numpy arrays do not implement - said rule (i.e. ``array(int32) + array(int32)`` is typed ``array(int32)``, - not ``array(int64)``). Probably because this makes performance more - controllable. - -This has several non-obvious side-effects: - -1. It is difficult to predict the precise type of a value inside a function, - after several operations. The basic operands in an expression tree - may for example be ``int8`` but the end result may be ``int64``. Whether - this is desirable or not is an open question; it is good for correctness, - but potentially bad for performance. - -2. In trying to follow the correctness over predictability rule, some values - can actually leave the integer realm. For example, ``int64 + uint64`` - is typed ``float64`` in order to avoid magnitude losses (but incidentally - will lose precision on large integer values...), again following Numpy's - semantics for scalars. This is usually not intended by the user. - -3. More complicated scenarios can produce unexpected errors at the type unification - stage. An example is at `Github issue 1299 `_, - the gist of which is reproduced here:: - - @jit(nopython=True) - def f(): - variable = 0 - for i in range(1): - variable = variable + 1 - return np.arange(variable) - - At the time of this writing, this fails compiling, on a 64-bit system, - with the error:: - - numba.errors.TypingError: Failed at nopython (nopython frontend) - Can't unify types of variable '$48.4': $48.4 := {array(int32, 1d, C), array(int64, 1d, C)} - - People expert with Numba's type unification system can understand why. - But the user is caught in mystery. - - -Proposal: predictable width-conserving typing -============================================= - -We propose to turn the current typing philosophy on its head. Instead -of "*start small and grow as required*", we propose "*start big and keep -the width unchanged*". - -Concretely: - -1. The typing of Python ``int`` values used as function arguments doesn't - change, as it works satisfyingly and doesn't surprise the user. - -2. The typing of integer *constants* (and pseudo-constants) changes to match - the typing of integer arguments. That is, every non-explicitly typed - integer constant is typed ``intp``, the pointer-sized integer; except for - the rare cases where ``int64`` (on 32-bit systems) or ``uint64`` is - required. - -3. Operations on integers promote bitwidth to ``intp``, if smaller, otherwise - they don't promote. For example, on a 32-bit machine, ``int8 + int8`` - is typed ``int32``, as is ``int32 + int32``. However, ``int64 + int64`` - is typed ``int64``. - -4. Furthermore, mixed operations between signed and unsigned fall back to - signed, while following the same bitwidth rule. For example, on a - 32-bit machine, ``int8 + uint16`` is typed ``int32``, as is - ``uint32 + int32``. - - -Proposal impact -=============== - -Semantics ---------- - -With this proposal, the semantics become clearer. Regardless of whether -the arguments and constants of a function were explicitly typed or not, -the results of various expressions at any point in the function have -easily predictable types. - -When using built-in Python ``int``, the user gets acceptable magnitude -(32 or 64 bits depending on the system's bitness), and the type remains -the same accross all computations. - -When explicitly using smaller bitwidths, intermediate results don't -suffer from magnitude loss, since their bitwidth is promoted to ``intp``. - -There is also less potential for annoyances with the type unification -system as demonstrated above. The user would have to force several -different types to be faced with such an error. - -One potential cause for concern is the discrepancy with Numpy's scalar -semantics; but at the same time this brings Numba scalar semantics closer -to array semantics (both Numba's and Numpy's), which seems a desirable -outcome as well. - -It is worth pointing out that some sources of integer numbers, such -as the ``range()`` built-in, always yield 32-bit integers or larger. -This proposal could be an opportunity to standardize them on ``intp``. - -Performance ------------ - -Except in trivial cases, it seems unlikely that the current "best fit" -behaviour for integer constants really brings a performance benefit. After -all, most integers in Numba code would either be stored in arrays (with -well-known types, chosen by the user) or be used as indices, where a ``int8`` -is highly unlikely to fare better than a ``intp`` (actually, it may be worse, -if LLVM isn't able to optimize away the required sign-extension). - -As a side note, the default use of ``intp`` rather than ``int64`` -ensures that 32-bit systems won't suffer from poor arithmetic performance. - -Implementation --------------- - -Optimistically, this proposal may simplify some Numba internals a bit. -Or, at least, it doesn't threaten to make them significantly more complicated. - -Limitations ------------ - -This proposal doesn't really solve the combination of signed and unsigned -integers. It is geared mostly at solving the bitwidth issues, which are -a somewhat common cause of pain for users. Unsigned integers are in -practice very uncommon in Numba-compiled code, except when explicitly -asked for, and therefore much less of a pain point. - -On the bitwidth front, 32-bit systems could still show discrepancies based -on the values of constants: if a constant is too large to fit in 32 bits, -it is typed ``int64``, which propagates through other computations. -This would be a reminiscence of the current behaviour, but rarer and much -more controlled still. - -Long-term horizon ------------------ - -While we believe this proposal makes Numba's behaviour more regular and more -predictable, it also pulls it further from general compatibility with pure -Python semantics, where users can assume arbitrary-precision integers without -any truncation issues. diff --git a/numba/docs/source/proposals/jit-classes.rst b/numba/docs/source/proposals/jit-classes.rst deleted file mode 100644 index 3f80a0432..000000000 --- a/numba/docs/source/proposals/jit-classes.rst +++ /dev/null @@ -1,231 +0,0 @@ -=================== -NBEP 3: JIT Classes -=================== - -:Author: Siu Kwan Lam -:Date: Dec 2015 -:Status: Draft - -Introduction -============ - -Numba does not yet support user-defined classes. -Classes provide useful abstraction and promote modularity when used -right. In the simplest sense, a class specifies the set of data and -operations as attributes and methods, respectively. -A class instance is an instantiation of that class. -This proposal will focus on supporting this simple usecase of classes--with -just attributes and methods. Other features, such as class methods, static -methods, and inheritance are deferred to another proposal, but we believe -these features can be easily implemented given the foundation described here. - - -Proposal: jit-classes -===================== - -A JIT-classes is more restricted than a Python class. -We will focus on the following operations on a class and its instance: - -* Instantiation: create an instance of a class using the class object as the - constructor: ``cls(*args, **kwargs)`` -* Destruction: remove resources allocated during instantiation and release - all references to other objects. -* Attribute access: loading and storing attributes using ``instance.attr`` - syntax. -* Method access: loading methods using ``instance.method`` syntax. - -With these operations, a class object (not the instance) does not need to be -materialize. Using the class object as a constructor is fully resolved (a -runtime implementation is picked) during the typing phase in the compiler. -This means **a class object will not be first class**. On the other hand, -implementating a first-class class object will require an -"interface" type, or the type of class. - -The instantiation of a class will allocate resources for storing the data -attributes. This is described in the "Storage model" section. Methods are -never stored in the instance. They are information attached to the class. -Since a class object only exists in the type domain, the methods will also be -fully resolved at the typing phase. Again, numba do not have first-class -function value and each function type maps uniquely to each function -implementation (this needs to be changed to support function value as argument). - -A class instance can contain other NRT reference-counted object as attributes. -To properly clean up an instance, a destructor is called when the reference -count of the instance is dropped to zero. This is described in the -"Reference count and descructor" section. - -Storage model -~~~~~~~~~~~~~ - -For compatibility with C, attributes are stored in a simple plain-old-data -structure. Each attribute are stored in a user-defined order in a padded -(for proper alignment), contiguous memory region. An instance that contains -three fields of int32, float32, complex64 will be compatible with the following -C structure:: - - struct { - int32 field0; - float32 field1; - complex64 field2; - }; - -This will also be comptabile with an aligned numpy structure dtype. - - -Methods -~~~~~~~ - -Methods are regular function that can be bounded to an instance. -They can be compiled as regular function by numba. -The operation ``getattr(instance, name)`` (getting an attribute ``name`` from -``instance``) binds the instance to the requested method at runtime. - - -The special ``__init__`` method is also handled like regular functions. - - -``__del__`` is not supported at this time. - - -Reference count and destructor -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -An instance of jit-class is reference-counted by NRT. Since it may contain -other NRT tracked object, it must call a destructor when its reference count -dropped to zero. The destructor will decrement the reference count of all -attributes by one. - -At this time, there is no support for user defined ``__del__`` method. - -Proper cleanup for cyclic reference is not handled at this time. -Cycles will cause memory leak. - -Type inference -~~~~~~~~~~~~~~ - -So far we have not described the type of the attributes or the methods. -Type information is necessary to materailize the instance (e.g. allocate the -storage). The simplest way is to let user provide the type of each attributes -as well as the ordering; for instance:: - - dct = OrderedDict() - dct['x'] = int32 - dct['y'] = float32 - -Allowing user to supply an ordered dictionary will provide the name, ordering -and types of the attributes. However, this statically typed semantic is not as -flexible as the Python semantic which behaves like a generic class. - -Inferring the type of attributes is difficult. In a previous attempt to -implement JIT classes, the ``__init__`` method is specialized to capture -the type stored into the attributes. Since the method can contain arbitrary -logic, the problem can become a dependent typing problem if types are assigned -conditionally depending on the value. (Very few languages implement dependent -typing and those that does are mostly theorem provers.) - -Example: typing function using an OrderedDict ---------------------------------------------- - -.. code-block:: python - - spec = OrderedDict() - spec['x'] = numba.int32 - spec['y'] = numba.float32 - - @jitclass(spec) - class Vec(object): - def __init__(self, x, y): - self.x = x - self.y = y - - def add(self, dx, dy): - self.x += dx - self.y += dy - -Example: typing function using a list of 2-tuples -------------------------------------------------- - -.. code-block:: python - - spec = [('x', numba.int32), - ('y', numba.float32)] - - @jitclass(spec) - class Vec(object): - ... - -Creating multiple jitclasses from a single class object -------------------------------------------------------- - -The `jitclass(spec)` decorator creates a new jitclass type even when applied to -the same class object and the same type specification. - -.. code-block:: python - - class Vec(object): - ... - - Vec1 = jitclass(spec)(Vec) - Vec2 = jitclass(spec)(Vec) - # Vec1 and Vec2 are two different jitclass types - -Usage from the Interpreter -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When constructing a new instance of a jitclass, a "box" is created that wraps -the underlying jitclass instance from numba. Attributes and methods are -accessible from the interpreter. The actual implementation will be in numba -compiled code. Any Python object is converted to its native -representation for consumption in numba. Similarly, the returned value is -converted to its Python representation. As a result, there may be overhead in -manipulating jitclass instances in the interpreter. This overhead is minimal -and should be easily amortized by more efficient computation in the compiled -methods. - -Support for property, staticmethod and classmethod -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The use of ``property`` is accepted for getter and setter only. Deleter is not -supported. - -The use of ``staticmethod`` is not supported. - -The use of ``classmethod`` is not supported. - -Inheritance -~~~~~~~~~~~ - -Class inhertance is not considered in this proposal. The only accepted base -class for a jitclass is `object`. - -Supported targets -~~~~~~~~~~~~~~~~~~ - -Only the CPU target (including the parallel target) is supported. -GPUs (e.g. CUDA and HSA) targets are supported via an immutable version of the -jitclass instance, which will be described in a separate NBEP. - - -Other properties -~~~~~~~~~~~~~~~~ - -Given: - -.. code-block:: python - - spec = [('x', numba.int32), - ('y', numba.float32)] - - @jitclass(spec) - class Vec(object): - ... - -* ``isinstance(Vec(1, 2), Vec)`` is True. -* ``type(Vec(1, 2))`` may not be ``Vec``. - -Future enhancements -~~~~~~~~~~~~~~~~~~~ - -This proposal has only described the basic semantic and functionality of a -jitclass. Additional features will be described in future enhancement -proposals. diff --git a/numba/docs/source/proposals/np-where-override.py b/numba/docs/source/proposals/np-where-override.py deleted file mode 100644 index 7f7e7956e..000000000 --- a/numba/docs/source/proposals/np-where-override.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np - -from numba import types -from numba.extending import overload - -@overload(np.where) -def where(cond, x, y): - """ - Implement np.where(). - """ - # Choose implementation based on argument types. - if isinstance(cond, types.Array): - # Array where() => return an array of the same shape - if all(ty.layout == 'C' for ty in (cond, x, y)): - def where_impl(cond, x, y): - """ - Fast implementation for C-contiguous arrays - """ - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - cf = cond.flat - xf = x.flat - yf = y.flat - rf = res.flat - for i in range(cond.size): - rf[i] = xf[i] if cf[i] else yf[i] - return res - else: - def where_impl(cond, x, y): - """ - Generic implementation for other arrays - """ - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - for idx, c in np.ndenumerate(cond): - res[idx] = x[idx] if c else y[idx] - return res - - else: - def where_impl(cond, x, y): - """ - Scalar where() => return a 0-dim array - """ - scal = x if cond else y - return np.full_like(scal, scal) - - return where_impl diff --git a/numba/docs/source/proposals/recursion_callstack.svg b/numba/docs/source/proposals/recursion_callstack.svg deleted file mode 100644 index 7e23f30d5..000000000 --- a/numba/docs/source/proposals/recursion_callstack.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/numba/docs/source/proposals/type-inference.rst b/numba/docs/source/proposals/type-inference.rst deleted file mode 100644 index 90e37206c..000000000 --- a/numba/docs/source/proposals/type-inference.rst +++ /dev/null @@ -1,124 +0,0 @@ -====================== -NBEP 5: Type Inference -====================== - -:Author: Siu Kwan Lam -:Date: Sept 2016 -:Status: Draft - - -This document describes the current type inference implementation in numba. - - -Introduction -============ - -Numba uses type information to ensure that every variable in the user code can -be correctly lowered (translated into a low-level representation). The type of -a variable describes the set of valid operations and available attributes. -Resolving this information during compilation avoids the overhead of type -checking and dispatching at runtime. However, Python is dynamically typed and -the user does not declare variable types. Since type information is absent, -we use type inference to reconstruct the missing information. - - -Numba Type Semantic -=================== - -Type inference operates on :term:`Numba IR`, a mostly static-single-assignment (SSA) -encoding of the Python bytecode. Conceptually, all intermediate values in the -Python code are explicitly assigned to a variable in the IR. Numba enforces -that each IR variable to have one type only. A user variable (from the Python -source code) can be mapped to multiple variables in the IR. They are *versions* -of a variable. Each time a user variable is assigned to, a new version is -created. From that point, all subsequent references will use the new version. -The user variable *evolves* as the function logic updates its type. Merge -points (e.g. subsequent block to an if-else, the loop body, etc..) in the control -flow need extra care. At each merge point, a new version is implicitly created -to merge the different variable versions from the incoming paths. -The merging of the variable versions may translate into an implicit cast. - -Numba uses function overloading to emulate Python duck-typing. The type of a -function can contain multiple call signatures that accept different argument -types and yield different return types. The process to decide the best -signature for an overloaded function is called *overload resolution*. -Numba partially implements the C++ overload resolution scheme -(`ISOCPP`_ 13.3 Overload Resolution). The scheme uses a "best fit" algorithm by -ranking each argument symmetrically. The five possible rankings in increasing -order of penalty are: - -* *Exact*: the expected type is the same as the actual type. -* *Promotion*: the actual type can be upcast to the expected type by extending - the precision without changing the behavior. -* *Safe conversion*: the actual type can be cast to the expected type by changing - the type without losing information. -* *Unsafe conversion*: the actual type can be cast to the expected type by - changing the type or downcasting the type even if it is imprecise. -* *No match*: no valid operation can convert the actual type to the expected type. - -It is possible to have an ambiguous resolution. For example, a function with -signatures ``(int16, int32)`` and ``(int32, int16)`` can become ambiguous if -presented with the argument types ``(int32, int32)``, because demoting either -argument to ``int16`` is equally "fit". Fortunately, numba can usually resolve -such ambiguity by compiling a new version with the exact signature -``(int32, int32)``. When compilation is disabled and there are multiple -signatures with equal fit, an execption is raised. - -Type Inference -============== - -The type inference in numba has three important components---type -variable, constraint network, and typing context. - -* The *typing context* provides all the type information and typing related - operations, including the logic for type unification, and the logic for typing - of global and constant values. It defines the semantic of the language that - can be compiled by numba. - -* A *type variable* holds the type of each variable (in the Numba IR). - Conceptually, it is initialized to the universal type and, as it is re-assigned, - it stores a common type by unifying the new type with the existing type. The - common type must be able to represent values of the new type and the existing - type. Type conversion is applied as necessary and precision loss is - accepted for usability reason. - -* The *constraint network* is a dependency graph built from the IR. Each - node represents an operation in the Numba IR and updates at least one type - variable. There may be cycles due to loops in user code. - -The type inference process starts by seeding the argument types. These initial -types are propagated in the constraint network, which eventually fills all the -type variables. Due to cycles in the network, the process repeats until all -type variables converge or it fails with undecidable types. - -Type unification always returns a more "general" (quoted because unsafe conversion -is allowed) type. Types will converge to the least "general" type that -can represent all possible values that the variable can hold. Since unification -will never move down the type hierarchy and there is a single top type, the -universal type---``object``, the type inference is guaranteed to converge. - -A failure in type inference can be caused by two reasons. The first reason is user -error due to incorrect use of a type. This type of error will also trigger an -exception in regular python execution. The second reason is due to the use of an -unsupported feature, but the code is otherwise valid in regular python -execution. Upon an error, the type inference will set all types to the object -type. As a result, numba will fallback to *object-mode*. - -Since functions can be overloaded, the type inference needs to decide the -type signature used at each call site. The overload resolution is applied to -all known overload versions of the callee function described in *call-templates*. -A call-template can either be concrete or abstract. A concrete call-template -defines a fixed list of all possible signatures. An abstract call-template -defines the logic to compute the accepted signature and it is used to implement -generic functions. - -Numba-compiled functions are generic functions due to their ability to compile -new versions. When it sees a new set of argument types, it triggers type -inference to validate and determine the return type. When there are nested calls -for numba-compiled functions, each call-site triggers type inference. -This poses a problem to recursive functions because the type inference will also -be triggered recursively. Currently, simple single recursion is supported if -the signature is user-annotated by the user, which avoids unbound recursion in -type inference that will never terminate. - -.. _ISOCPP: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4296.pdf \ No newline at end of file diff --git a/numba/docs/source/proposals/typing_recursion.rst b/numba/docs/source/proposals/typing_recursion.rst deleted file mode 100644 index 2b671903e..000000000 --- a/numba/docs/source/proposals/typing_recursion.rst +++ /dev/null @@ -1,129 +0,0 @@ -======================== -NBEP 6: Typing Recursion -======================== - -:Author: Siu Kwan Lam -:Date: Sept 2016 -:Status: Draft - -Introduction -============ - -This document proposes an enhancement to the type inference algorithm to -support recursion without explicitly annotating the function signature. -As a result, the proposal enables numba to type-infer both self-recursive and -mutual-recursive functions under some limitations. In practice, these -limitions can be easily overcome by specifying a compilation order. - - -The Current State -================= - -Recursion support in numba is currently limited to self-recursion with explicit -type annotation for the function. This limitation comes from the inability to -determine the return type of a recursive call. This is because the callee is -either the current function (for self-recursion) or a parent function -(mutual-recursion) and its type inference process has been suspended while waiting for -the function-type of its callee. This results in the formation of a cyclic -dependency. For example, given a function ``foo()`` that calls ``bar()``, -which in turns call ``foo()``:: - - def foo(x): - if x > 0: - return bar(x) - else: - return 1 - - def bar(x): - return foo(x - 1) - - -The type inferrence process of ``foo()`` depends on that of ``bar()``, -which depends on ``foo()``. Therefore ``foo()`` depends on itself and the type -inference algorithm cannot terminate. - - -The Solution -============ - -The proposed solution has two components: - -1. The introduction of a compile-time *callstack* that tracks the compiling functions. -2. The allowance of a partial type inference on functions by leveraging the return type - on non-recursive control-flow paths. - -The compile-time callstack stores typing information of the functions being -compiled. Like an ordinary callstack, it pushes a new record every time a -function is "called". Since this occurs at compile-time, a "call" triggers -a compilation of the callee. - -To detect recursion, the compile-time callstack is searched bottom-up -(stack grows downward) for a record that matches the callee. -As the record contains a reference to the type inference state, -the type inference process can be resumed to determine the return type. - -Recall that the type inference process cannot be resumed normally because of the cyclic -dependency of the return type. In practice, we can assume that a useful -program must have a terminating condition, a path that does not recurse. So, -the type inference process can make an initial guess for the return-type at the recursive -call by using the return-type determined by the non-recursive paths. This -allows type information to propagate on the recursive paths to generate the -final return type, which is used to refine the type information by the -subsequent iteration in the type inference process. - -The following figure illustrates the compile-time callstack when the compiler -reaches the recursive call to ``foo()`` from ``bar()``: - -.. image:: recursion_callstack.svg - :width: 400px - -At this time, the type inference process of ``foo()`` is suspended and that of ``bar()`` -is active. The compiler can see that the callee is already compiling by -searching the callstack. Knowing that it is a recursive call, the compiler -can resume the type-inference on ``foo()`` by ignoring the paths that contain -recursive calls. This means only the ``else`` branch is considered and we can -easily tell that ``foo()`` returns an ``int`` in this case. The compiler will -then set the initial return type of ``foo()`` and ``bar()`` to ``int``. The -subsequent type propagation can use this information to complete the type -inference of both functions, unifying the return-type of all returning paths. - - -Limitations -=========== - -For the proposed type inference algorithm to terminate, it assumes that -at least one of the control path leads to a return-statement without undertaking -a recursive call. Should this not be the case, the algorithm will raise an -exception indicating a potential runaway recursion. - -For example:: - - @jit - def first(x): - # The recursing call must have a path that is non-recursing. - if x > 0: - return second(x) - else: - return 1 - - @jit - def second(x): - return third(x) - - @jit - def third(x): - return first(x - 1) - - -The ``first()`` function must be the compiled first for the type inference algorithm to -complete successfully. Compiling any other function first will lead to a failure -in type inference. The type inference algorithm will treat it as a runaway -recursion due to the lack of a non-recursive exit in the recursive callee. - -For example, compiling ``second()`` first will move the recursive call to -``first()``. When the compiler tries to resume the type inference process of -``second()``, it will fail to find a non-recursive path. - -This is a small limitation and can be overcome easily by code restructuring or -precompiling in a specific order. - diff --git a/numba/docs/source/reference/aot-compilation.rst b/numba/docs/source/reference/aot-compilation.rst deleted file mode 100644 index 926993432..000000000 --- a/numba/docs/source/reference/aot-compilation.rst +++ /dev/null @@ -1,74 +0,0 @@ -Ahead-of-Time compilation -========================= - -.. currentmodule:: numba.pycc - -.. class:: CC(extension_name, source_module=None) - - An object used to generate compiled extensions from Numba-compiled - Python functions. *extension_name* is the name of the extension - to be generated. *source_module* is the Python module - containing the functions; if ``None``, it is inferred by examining - the call stack. - - :class:`CC` instances have the following attributes and methods: - - .. attribute:: name - - (read-only attribute) The name of the extension module to be generated. - - .. attribute:: output_dir - - (read-write attribute) The directory the extension module will be - written into. By default it is the directory the *source_module* is - located in. - - .. attribute:: output_file - - (read-write attribute) The name of the file the extension module will - be written to. By default this follows the Python naming convention - for the current platform. - - .. attribute:: target_cpu - - (read-write attribute) The name of the CPU model to generate code for. - This will select the appropriate instruction set extensions. By - default, a generic CPU is selected in order to produce portable code. - - Recognized names for this attribute depend on the current architecture - and LLVM version. If you have LLVM installed, ``llc -mcpu=help`` - will give you a list. Examples on x86-64 are ``"ivybridge"``, - ``"haswell"``, ``"skylake"`` or ``"broadwell"``. You can also give - the value ``"host"`` which will select the current host CPU. - - .. attribute:: verbose - - (read-write attribute) If true, print out information while - compiling the extension. False by default. - - .. decorator:: export(exported_name, sig) - - Mark the decorated function for compilation with the signature *sig*. - The compiled function will be exposed as *exported_name* in the - generated extension module. - - All exported names within a given :class:`CC` instance must be - distinct, otherwise an exception is raised. - - .. method:: compile() - - Compile all exported functions and generate the extension module - as specified by :attr:`output_dir` and :attr:`output_file`. - - .. method:: distutils_extension(**kwargs) - - Return a :py:class:`distutils.core.Extension` instance allowing - to integrate generation of the extension module in a conventional - ``setup.py``-driven build process. The optional *kwargs* let you - pass optional parameters to the :py:class:`~distutils.core.Extension` - constructor. - - In this mode of operation, it is not necessary to call :meth:`compile` - yourself. Also, :attr:`output_dir` and :attr:`output_file` will be - ignored. - diff --git a/numba/docs/source/reference/envvars.rst b/numba/docs/source/reference/envvars.rst deleted file mode 100644 index 3041af0f2..000000000 --- a/numba/docs/source/reference/envvars.rst +++ /dev/null @@ -1,266 +0,0 @@ -.. _numba-envvars: - -Environment variables -===================== - -Numba allows its behaviour to be changed through the use of environment -variables. Unless otherwise mentioned, those variables have integer values and -default to zero. - -For convenience, Numba also supports the use of a configuration file to persist -configuration settings. Note: To use this feature ``pyyaml`` must be installed. - -The configuration file must be named ``.numba_config.yaml`` and be present in -the directory from which the Python interpreter is invoked. The configuration -file, if present, is read for configuration settings before the environment -variables are searched. This means that the environment variable settings will -override the settings obtained from a configuration file (the configuration file -is for setting permanent preferences whereas the environment variables are for -ephemeral preferences). - -The format of the configuration file is a dictionary in ``YAML`` format that -maps the environment variables below (without the ``NUMBA_`` prefix) to a -desired value. For example, to permanently switch on developer mode -(``NUMBA_DEVELOPER_MODE`` environment variable) and control flow graph printing -(``NUMBA_DUMP_CFG`` environment variable), create a configuration file with the -contents:: - - developer_mode: 1 - dump_cfg: 1 - -This can be especially useful in the case of wanting to use a set color scheme -based on terminal background color. For example, if the terminal background -color is black, the ``dark_bg`` color scheme would be well suited and can be set -for permanent use by adding:: - - color_scheme: dark_bg - - -Errors and warnings display ---------------------------- - -.. envvar:: NUMBA_WARNINGS - - If set to non-zero, printout of Numba warnings is enabled, otherwise - the warnings are suppressed. The warnings can give insight into the - compilation process. - - -Debugging ---------- - -These variables influence what is printed out during compilation of -:term:`JIT functions `. - -.. envvar:: NUMBA_DEVELOPER_MODE - - If set to non-zero, developer mode produces full tracebacks and disables - help instructions. Default is zero. - -.. envvar:: NUMBA_FULL_TRACEBACKS - - If set to non-zero, enable full tracebacks when an exception occurs. - Defaults to the value set by `NUMBA_DEVELOPER_MODE`. - -.. envvar:: NUMBA_SHOW_HELP - - If not set or set to zero, show user level help information. - Defaults to the negation of the value set by `NUMBA_DEVELOPER_MODE`. - -.. envvar:: NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING - - If set to non-zero error message highlighting is disabled. This is useful - for running the test suite on CI systems. - -.. envvar:: NUMBA_COLOR_SCHEME - - Alters the color scheme used in error reporting (requires the ``colorama`` - package to be installed to work). Valid values are: - - - ``no_color`` No color added, just bold font weighting. - - ``dark_bg`` Suitable for terminals with a dark background. - - ``light_bg`` Suitable for terminals with a light background. - - ``blue_bg`` Suitable for terminals with a blue background. - - ``jupyter_nb`` Suitable for use in Jupyter Notebooks. - - *Default value:* ``no_color``. The type of the value is ``string``. - -.. envvar:: NUMBA_DEBUG - - If set to non-zero, print out all possible debugging information during - function compilation. Finer-grained control can be obtained using other - variables below. - -.. envvar:: NUMBA_DEBUG_FRONTEND - - If set to non-zero, print out debugging information during operation - of the compiler frontend, up to and including generation of the Numba - Intermediate Representation. - -.. envvar:: NUMBA_DEBUG_TYPEINFER - - If set to non-zero, print out debugging information about type inference. - -.. envvar:: NUMBA_DEBUG_CACHE - - If set to non-zero, print out information about operation of the - :ref:`JIT compilation cache `. - -.. envvar:: NUMBA_TRACE - - If set to non-zero, trace certain function calls (function entry and exit - events, including arguments and return values). - -.. envvar:: NUMBA_DUMP_BYTECODE - - If set to non-zero, print out the Python :py:term:`bytecode` of - compiled functions. - -.. envvar:: NUMBA_DUMP_CFG - - If set to non-zero, print out information about the Control Flow Graph - of compiled functions. - -.. envvar:: NUMBA_DUMP_IR - - If set to non-zero, print out the Numba Intermediate Representation - of compiled functions. - -.. envvar:: NUMBA_DUMP_ANNOTATION - - If set to non-zero, print out types annotations for compiled functions. - -.. envvar:: NUMBA_DUMP_LLVM - - Dump the unoptimized LLVM assembler source of compiled functions. - Unoptimized code is usually very verbose; therefore, - :envvar:`NUMBA_DUMP_OPTIMIZED` is recommended instead. - -.. envvar:: NUMBA_DUMP_FUNC_OPT - - Dump the LLVM assembler source after the LLVM "function optimization" - pass, but before the "module optimization" pass. This is useful mostly - when developing Numba itself, otherwise use :envvar:`NUMBA_DUMP_OPTIMIZED`. - -.. envvar:: NUMBA_DUMP_OPTIMIZED - - Dump the LLVM assembler source of compiled functions after all - optimization passes. The output includes the raw function as well as - its CPython-compatible wrapper (whose name begins with ``wrapper.``). - Note that the function is often inlined inside the wrapper, as well. - -.. envvar:: NUMBA_DEBUG_ARRAY_OPT - - Dump debugging information related to the processing associated with - the ``parallel=True`` jit decorator option. - -.. envvar:: NUMBA_DEBUG_ARRAY_OPT_RUNTIME - - Dump debugging information related to the runtime scheduler associated - with the ``parallel=True`` jit decorator option. - -.. envvar:: NUMBA_DEBUG_ARRAY_OPT_STATS - - Dump statistics about how many operators/calls are converted to - parallel for-loops and how many are fused together, which are associated - with the ``parallel=True`` jit decorator option. - -.. envvar:: NUMBA_DUMP_ASSEMBLY - - Dump the native assembler code of compiled functions. - -.. seealso:: - :ref:`numba-troubleshooting` and :ref:`architecture`. - - -Compilation options -------------------- - -.. envvar:: NUMBA_OPT - - The optimization level; this option is passed straight to LLVM. - - *Default value:* 3 - -.. envvar:: NUMBA_LOOP_VECTORIZE - - If set to non-zero, enable LLVM loop vectorization. - - *Default value:* 1 (except on 32-bit Windows) - -.. envvar:: NUMBA_ENABLE_AVX - - If set to non-zero, enable AVX optimizations in LLVM. This is disabled - by default on Sandy Bridge and Ivy Bridge architectures as it can sometimes - result in slower code on those platforms. - -.. envvar:: NUMBA_DISABLE_INTEL_SVML - - If set to non-zero and Intel SVML is available, the use of SVML will be - disabled. - -.. envvar:: NUMBA_COMPATIBILITY_MODE - - If set to non-zero, compilation of JIT functions will never entirely - fail, but instead generate a fallback that simply interprets the - function. This is only to be used if you are migrating a large - codebase from an old Numba version (before 0.12), and want to avoid - breaking everything at once. Otherwise, please don't use this. - -.. envvar:: NUMBA_DISABLE_JIT - - Disable JIT compilation entirely. The :func:`~numba.jit` decorator acts - as if it performs no operation, and the invocation of decorated functions - calls the original Python function instead of a compiled version. This - can be useful if you want to run the Python debugger over your code. - -.. envvar:: NUMBA_CPU_NAME and NUMBA_CPU_FEATURES - - Override CPU and CPU features detection. - By setting ``NUMBA_CPU_NAME=generic``, a generic CPU model is picked - for the CPU architecture and the feature list (``NUMBA_CPU_FEATURES``) - defaults to empty. CPU features must be listed with the format - ``+feature1,-feature2`` where ``+`` indicates enable and ``-`` indicates - disable. For example, ``+sse,+sse2,-avx,-avx2`` enables SSE and SSE2, and - disables AVX and AVX2. - - These settings are passed to LLVM for configuring the compilation target. - To get a list of available options, use the ``llc`` commandline tool - from LLVM, for example:: - - llc -march=x86 -mattr=help - - - .. tip:: To force all caching functions (``@jit(cache=True)``) to emit - portable code (portable within the same architecture and OS), - simply set ``NUMBA_CPU_NAME=generic``. - - -GPU support ------------ - -.. envvar:: NUMBA_DISABLE_CUDA - - If set to non-zero, disable CUDA support. - -.. envvar:: NUMBA_FORCE_CUDA_CC - - If set, force the CUDA compute capability to the given version (a - string of the type ``major.minor``), regardless of attached devices. - -.. envvar:: NUMBA_ENABLE_CUDASIM - - If set, don't compile and execute code for the GPU, but use the CUDA - Simulator instead. For debugging purposes. - -Threading Control ------------------ - -.. envvar:: NUMBA_NUM_THREADS - - If set, the number of threads in the thread pool for the parallel CPU target - will take this value. Must be greater than zero. This value is independent - of ``OMP_NUM_THREADS`` and ``MKL_NUM_THREADS``. - - *Default value:* The number of CPU cores on the system as determined at run - time, this can be accessed via ``numba.config.NUMBA_DEFAULT_NUM_THREADS``. diff --git a/numba/docs/source/reference/fpsemantics.rst b/numba/docs/source/reference/fpsemantics.rst deleted file mode 100644 index c9b607121..000000000 --- a/numba/docs/source/reference/fpsemantics.rst +++ /dev/null @@ -1,85 +0,0 @@ - -Floating-point pitfalls -======================= - -Precision and accuracy ----------------------- - -For some operations, Numba may use a different algorithm than Python or -Numpy. The results may not be bit-by-bit compatible. The difference -should generally be small and within reasonable expectations. However, -small accumulated differences might produce large differences at the end, -especially if a divergent function is involved. - -Math library implementations -'''''''''''''''''''''''''''' - -Numba supports a variety of platforms and operating systems, each of which -has its own math library implementation (referred to as ``libm`` from here -in). The majority of math functions included in ``libm`` have specific -requirements as set out by the IEEE 754 standard (like ``sin()``, ``exp()`` -etc.), but each implementation may have bugs. Thus, on some platforms -Numba has to exercise special care in order to workaround known ``libm`` -issues. - -Another typical problem is when an operating system's ``libm`` function -set is incomplete and needs to be supplemented by additional functions. -These are provided with reference to the IEEE 754 and C99 standards -and are often implemented in Numba in a manner similar to equivalent -CPython functions. - -In particular, math library issues are known to affect Python 2.7 builds -on Windows, since Python 2.7 requires the use of an obsolete version of -the Microsoft Visual Studio compiler. - -Linear algebra -'''''''''''''' - -Numpy forces some linear algebra operations to run in double-precision mode -even when a ``float32`` input is given. Numba will always observe -the input's precision, and invoke single-precision linear algebra routines -when all inputs are ``float32`` or ``complex64``. - -The implementations of the ``numpy.linalg`` routines in Numba only support the -floating point types that are used in the LAPACK functions that provide -the underlying core functionality. As a result only ``float32``, ``float64``, -``complex64`` and ``complex128`` types are supported. If a user has e.g. an -``int32`` type, an appropriate type conversion must be performed to a -floating point type prior to its use in these routines. The reason for this -decision is to essentially avoid having to replicate type conversion choices -made in Numpy and to also encourage the user to choose the optimal floating -point type for the operation they are undertaking. - - -Mixed-types operations -'''''''''''''''''''''' - -Numpy will most often return a ``float64`` as a result of a computation -with mixed integer and floating-point operands (a typical example is the -power operator ``**``). Numba by contrast will select the highest precision -amongst the floating-point operands, so for example ``float32 ** int32`` -will return a ``float32``, regardless of the input values. This makes -performance characteristics easier to predict, but you should explicitly -cast the input to ``float64`` if you need the extra precision. - - -.. _ufunc-fpu-errors: - -Warnings and errors -------------------- - -When calling a :term:`ufunc` created with :func:`~numba.vectorize`, -Numpy will determine whether an error occurred by examining the FPU -error word. It may then print out a warning or raise an exception -(such as ``RuntimeWarning: divide by zero encountered``), -depending on the current error handling settings. - -Depending on how LLVM optimized the ufunc's code, however, some spurious -warnings or errors may appear. If you get caught by this issue, we -recommend you call :func:`numpy.seterr` to change Numpy's error handling -settings, or the :class:`numpy.errstate` context manager to switch them -temporarily:: - - with np.errstate(all='ignore'): - x = my_ufunc(y) - diff --git a/numba/docs/source/reference/index.rst b/numba/docs/source/reference/index.rst deleted file mode 100644 index 15cd6876f..000000000 --- a/numba/docs/source/reference/index.rst +++ /dev/null @@ -1,16 +0,0 @@ - -Reference Manual -================ - -.. toctree:: - - types.rst - jit-compilation.rst - aot-compilation.rst - utils.rst - envvars.rst - pysupported.rst - numpysupported.rst - pysemantics.rst - fpsemantics.rst - python27-eol.rst diff --git a/numba/docs/source/reference/jit-compilation.rst b/numba/docs/source/reference/jit-compilation.rst deleted file mode 100644 index 2adcdcdba..000000000 --- a/numba/docs/source/reference/jit-compilation.rst +++ /dev/null @@ -1,478 +0,0 @@ -Just-in-Time compilation -======================== - - -JIT functions -------------- - -.. decorator:: numba.jit(signature=None, nopython=False, nogil=False, cache=False, forceobj=False, parallel=False, error_model='python', locals={}) - - Compile the decorated function on-the-fly to produce efficient machine - code. All parameters all optional. - - If present, the *signature* is either a single signature or a list of - signatures representing the expected :ref:`numba-types` of function - arguments and return values. Each signature can be given in several - forms: - - * A tuple of :ref:`numba-types` arguments (for example - ``(numba.int32, numba.double)``) representing the types of the - function's arguments; Numba will then infer an appropriate return - type from the arguments. - * A call signature using :ref:`numba-types`, specifying both return - type and argument types. This can be given in intuitive form - (for example ``numba.void(numba.int32, numba.double)``). - * A string representation of one of the above, for example - ``"void(int32, double)"``. All type names used in the string are assumed - to be defined in the ``numba.types`` module. - - *nopython* and *nogil* are boolean flags. *locals* is a mapping of - local variable names to :ref:`numba-types`. - - This decorator has several modes of operation: - - * If one or more signatures are given in *signature*, a specialization is - compiled for each of them. Calling the decorated function will then try - to choose the best matching signature, and raise a :class:`TypeError` if - no appropriate conversion is available for the function arguments. If - converting succeeds, the compiled machine code is executed with the - converted arguments and the return value is converted back according to - the signature. - - * If no *signature* is given, the decorated function implements - lazy compilation. Each call to the decorated function will try to - re-use an existing specialization if it exists (for example, a call - with two integer arguments may re-use a specialization for argument - types ``(numba.int64, numba.int64)``). If no suitable specialization - exists, a new specialization is compiled on-the-fly, stored for later - use, and executed with the converted arguments. - - If true, *nopython* forces the function to be compiled in :term:`nopython - mode`. If not possible, compilation will raise an error. - - If true, *forceobj* forces the function to be compiled in :term:`object - mode`. Since object mode is slower than nopython mode, this is mostly - useful for testing purposes. - - If true, *nogil* tries to release the :py:term:`global interpreter lock` - inside the compiled function. The GIL will only be released if Numba can - compile the function in :term:`nopython mode`, otherwise a compilation - warning will be printed. - - If true, *cache* enables a file-based cache to shorten compilation times - when the function was already compiled in a previous invocation. - The cache is maintained in the ``__pycache__`` subdirectory of - the directory containing the source file; if the current user is not - allowed to write to it, though, it falls back to a platform-specific - user-wide cache directory (such as ``$HOME/.cache/numba`` on Unix - platforms). - - If true, *parallel* enables the automatic parallelization of a number of - common Numpy constructs as well as the fusion of adjacent parallel - operations to maximize cache locality. - - The *error_model* option controls the divide-by-zero behavior. - Setting it to 'python' causes divide-by-zero to raise exception like CPython. - Setting it to 'numpy' causes divide-by-zero to set the result to *+/-inf* or - *nan*. - - Not all functions can be cached, since some functionality cannot be - always persisted to disk. When a function cannot be cached, a - warning is emitted; use :envvar:`NUMBA_WARNINGS` to see it. - - The *locals* dictionary may be used to force the :ref:`numba-types` - of particular local variables, for example if you want to force the - use of single precision floats at some point. In general, we recommend - you let Numba's compiler infer the types of local variables by itself. - - Here is an example with two signatures:: - - @jit(["int32(int32)", "float32(float32)"], nopython=True) - def f(x): ... - - Not putting any parentheses after the decorator is equivalent to calling - the decorator without any arguments, i.e.:: - - @jit - def f(x): ... - - is equivalent to:: - - @jit() - def f(x): ... - - The decorator returns a :class:`Dispatcher` object. - - .. note:: - If no *signature* is given, compilation errors will be raised when - the actual compilation occurs, i.e. when the function is first called - with some given argument types. - - .. note:: - Compilation can be influenced by some dedicated :ref:`numba-envvars`. - - -Generated JIT functions ------------------------ - -.. decorator:: numba.generated_jit(nopython=False, nogil=False, cache=False, forceobj=False, locals={}) - - Like the :func:`~numba.jit` decorator, but calls the decorated function at - compile-time, passing the *types* of the function's arguments. - The decorated function must return a callable which will be compiled as - the function's implementation for those types, allowing flexible kinds of - specialization. - - The :func:`~numba.generated_jit` decorator returns a :class:`Dispatcher` object. - - -Dispatcher objects ------------------- - -.. class:: Dispatcher - - The class of objects created by calling :func:`~numba.jit` or - :func:`~numba.generated_jit`. You shouldn't try to create such an object - in any other way. Calling a Dispatcher object calls the compiled - specialization for the arguments with which it is called, letting it - act as an accelerated replacement for the Python function which was compiled. - - In addition, Dispatcher objects have the following methods and attributes: - - .. attribute:: py_func - - The pure Python function which was compiled. - - .. method:: inspect_types(file=None, pretty=False) - - Print out a listing of the function source code annotated line-by-line - with the corresponding Numba IR, and the inferred types of the various - variables. If *file* is specified, printing is done to that file - object, otherwise to sys.stdout. If *pretty* is set to True then colored - ANSI will be produced in a terminal and HTML in a notebook. - - .. seealso:: :ref:`architecture` - - .. method:: inspect_llvm(signature=None) - - Return a dictionary keying compiled function signatures to the human - readable LLVM IR generated for the function. If the signature - keyword is specified a string corresponding to that individual - signature is returned. - - .. method:: inspect_asm(signature=None) - - Return a dictionary keying compiled function signatures to the - human-readable native assembler code for the function. If the - signature keyword is specified a string corresponding to that - individual signature is returned. - - .. method:: inspect_cfg(signature=None, show_wrapped) - - Return a dictionary keying compiled function signatures to the - control-flow graph objects for the function. If the signature keyword is - specified a string corresponding to that individual signature is returned. - - The control-flow graph objects can be stringified (``str`` or ``repr``) - to get the textual representation of the graph in DOT format. Or, use - its ``.display(filename=None, view=False)`` method to plot the graph. - The *filename* option can be set to a specific path for the rendered - output to write to. If *view* option is True, the plot is opened by - the system default application for the image format (PDF). In IPython - notebook, the returned object can be plot inlined. - - Usage:: - - @jit - def foo(): - ... - - # opens the CFG in system default application - foo.inspect_cfg(foo.signatures[0]).display(view=True) - - .. method:: recompile() - - Recompile all existing signatures. This can be useful for example if - a global or closure variable was frozen by your function and its value - in Python has changed. Since compiling isn't cheap, this is mainly - for testing and interactive use. - - -Vectorized functions (ufuncs and DUFuncs) ------------------------------------------ - -.. decorator:: numba.vectorize(*, signatures=[], identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) - - Compile the decorated function and wrap it either as a `Numpy - ufunc`_ or a Numba :class:`~numba.DUFunc`. The optional - *nopython*, *forceobj* and *locals* arguments have the same meaning - as in :func:`numba.jit`. - - *signatures* is an optional list of signatures expressed in the - same form as in the :func:`numba.jit` *signature* argument. If - *signatures* is non-empty, then the decorator will compile the user - Python function into a Numpy ufunc. If no *signatures* are given, - then the decorator will wrap the user Python function in a - :class:`~numba.DUFunc` instance, which will compile the user - function at call time whenever Numpy can not find a matching loop - for the input arguments. *signatures* is required if *target* is - ``"parallel"``. - - *identity* is the identity (or unit) value of the function being - implemented. Possible values are 0, 1, None, and the string - ``"reorderable"``. The default is None. Both None and - ``"reorderable"`` mean the function has no identity value; - ``"reorderable"`` additionally specifies that reductions along multiple - axes can be reordered. - - If there are several *signatures*, they must be ordered from the more - specific to the least specific. Otherwise, Numpy's type-based - dispatching may not work as expected. For example, the following is - wrong:: - - @vectorize(["float64(float64)", "float32(float32)"]) - def f(x): ... - - as running it over a single-precision array will choose the ``float64`` - version of the compiled function, leading to much less efficient - execution. The correct invocation is:: - - @vectorize(["float32(float32)", "float64(float64)"]) - def f(x): ... - - *target* is a string for backend target; Available values are "cpu", - "parallel", and "cuda". To use a multithreaded version, change the - target to "parallel" (which requires signatures to be specified):: - - @vectorize(["float64(float64)", "float32(float32)"], target='parallel') - def f(x): ... - - For the CUDA target, use "cuda":: - - @vectorize(["float64(float64)", "float32(float32)"], target='cuda') - def f(x): ... - - The compiled function can be cached to reduce future compilation time. - It is enabled by setting *cache* to True. Only the "cpu" and "parallel" - targets support caching. - - -.. decorator:: numba.guvectorize(signatures, layout, *, identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) - - Generalized version of :func:`numba.vectorize`. While - :func:`numba.vectorize` will produce a simple ufunc whose core - functionality (the function you are decorating) operates on scalar - operands and returns a scalar value, :func:`numba.guvectorize` - allows you to create a `Numpy ufunc`_ whose core function takes array - arguments of various dimensions. - - The additional argument *layout* is a string specifying, in symbolic - form, the dimensionality and size relationship of the argument types - and return types. For example, a matrix multiplication will have - a layout string of ``"(m,n),(n,p)->(m,p)"``. Its definition might - be (function body omitted):: - - @guvectorize(["void(float64[:,:], float64[:,:], float64[:,:])"], - "(m,n),(n,p)->(m,p)") - def f(a, b, result): - """Fill-in *result* matrix such as result := a * b""" - ... - - If one of the arguments should be a scalar, the corresponding layout - specification is ``()`` and the argument will really be given to - you as a zero-dimension array (you have to dereference it to get the - scalar value). For example, a :ref:`one-dimension moving average ` - with a parameterable window width may have a layout string of ``"(n),()->(n)"``. - - Note that any output will be given to you preallocated as an additional - function argument: your code has to fill it with the appropriate values - for the function you are implementing. - - If your function doesn't take an output array, you should omit the "arrow" - in the layout string (e.g. ``"(n),(n)"``). - - .. seealso:: - Specification of the `layout string `_ - as supported by Numpy. Note that Numpy uses the term "signature", - which we unfortunately use for something else. - - The compiled function can be cached to reduce future compilation time. - It is enabled by setting *cache* to True. Only the "cpu" and "parallel" - targets support caching. - -.. _Numpy ufunc: http://docs.scipy.org/doc/numpy/reference/ufuncs.html - -.. class:: numba.DUFunc - - The class of objects created by calling :func:`numba.vectorize` - with no signatures. - - DUFunc instances should behave similarly to Numpy - :class:`~numpy.ufunc` objects with one important difference: - call-time loop generation. When calling a ufunc, Numpy looks at - the existing loops registered for that ufunc, and will raise a - :class:`~python.TypeError` if it cannot find a loop that it cannot - safely cast the inputs to suit. When calling a DUFunc, Numba - delegates the call to Numpy. If the Numpy ufunc call fails, then - Numba attempts to build a new loop for the given input types, and - calls the ufunc again. If this second call attempt fails or a - compilation error occurs, then DUFunc passes along the exception to - the caller. - - .. seealso:: - - The ":ref:`dynamic-universal-functions`" section in the user's - guide demonstrates the call-time behavior of - :class:`~numba.DUFunc`, and discusses the impact of call order - on how Numba generates the underlying :class:`~numpy.ufunc`. - - .. attribute:: ufunc - - The actual Numpy :class:`~numpy.ufunc` object being built by the - :class:`~numba.DUFunc` instance. Note that the - :class:`~numba.DUFunc` object maintains several important data - structures required for proper ufunc functionality (specifically - the dynamically compiled loops). Users should not pass the - :class:`~numpy.ufunc` value around without ensuring the - underlying :class:`~numba.DUFunc` will not be garbage collected. - - .. attribute:: nin - - The number of DUFunc (ufunc) inputs. See `ufunc.nin`_. - - .. attribute:: nout - - The number of DUFunc outputs. See `ufunc.nout`_. - - .. attribute:: nargs - - The total number of possible DUFunc arguments (should be - :attr:`~numba.DUFunc.nin` + :attr:`~numba.DUFunc.nout`). - See `ufunc.nargs`_. - - .. attribute:: ntypes - - The number of input types supported by the DUFunc. See - `ufunc.ntypes`_. - - .. attribute:: types - - A list of the supported types given as strings. See - `ufunc.types`_. - - .. attribute:: identity - - The identity value when using the ufunc as a reduction. See - `ufunc.identity`_. - - .. method:: reduce(A, *, axis, dtype, out, keepdims) - - Reduces *A*\'s dimension by one by applying the DUFunc along one - axis. See `ufunc.reduce`_. - - .. method:: accumulate(A, *, axis, dtype, out) - - Accumulate the result of applying the operator to all elements. - See `ufunc.accumulate`_. - - .. method:: reduceat(A, indices, *, axis, dtype, out) - - Performs a (local) reduce with specified slices over a single - axis. See `ufunc.reduceat`_. - - .. method:: outer(A, B) - - Apply the ufunc to all pairs (*a*, *b*) with *a* in *A*, and *b* - in *B*. See `ufunc.outer`_. - - .. method:: at(A, indices, *, B) - - Performs unbuffered in place operation on operand *A* for - elements specified by *indices*. If you are using Numpy 1.7 or - earlier, this method will not be present. See `ufunc.at`_. - - -.. note:: - Vectorized functions can, in rare circumstances, show - :ref:`unexpected warnings or errors `. - - -.. _`ufunc.nin`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nin.html#numpy.ufunc.nin - -.. _`ufunc.nout`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nout.html#numpy.ufunc.nout - -.. _`ufunc.nargs`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nargs.html#numpy.ufunc.nargs - -.. _`ufunc.ntypes`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.ntypes.html#numpy.ufunc.ntypes - -.. _`ufunc.types`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.types.html#numpy.ufunc.types - -.. _`ufunc.identity`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.identity.html#numpy.ufunc.identity - -.. _`ufunc.reduce`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduce.html#numpy.ufunc.reduce - -.. _`ufunc.accumulate`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.accumulate.html#numpy.ufunc.accumulate - -.. _`ufunc.reduceat`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduceat.html#numpy.ufunc.reduceat - -.. _`ufunc.outer`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.outer.html#numpy.ufunc.outer - -.. _`ufunc.at`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.at.html#numpy.ufunc.at - - -C callbacks ------------ - -.. decorator:: numba.cfunc(signature, nopython=False, cache=False, locals={}) - - Compile the decorated function on-the-fly to produce efficient machine - code. The compiled code is wrapped in a thin C callback that makes it - callable using the natural C ABI. - - The *signature* is a single signature representing the signature of the - C callback. It must have the same form as in :func:`~numba.jit`. - The decorator does not check that the types in the signature have - a well-defined representation in C. - - *nopython* and *cache* are boolean flags. *locals* is a mapping of - local variable names to :ref:`numba-types`. They all have the same - meaning as in :func:`~numba.jit`. - - The decorator returns a :class:`CFunc` object. - - .. note:: - C callbacks currently do not support :term:`object mode`. - - -.. class:: CFunc - - The class of objects created by :func:`~numba.cfunc`. :class:`CFunc` - objects expose the following attributes and methods: - - .. attribute:: address - - The address of the compiled C callback, as an integer. - - .. attribute:: cffi - - A `cffi`_ function pointer instance, to be passed as an argument to - `cffi`_-wrapped functions. The pointer's type is ``void *``, so - only minimal type checking will happen when passing it to `cffi`_. - - .. attribute:: ctypes - - A :mod:`ctypes` callback instance, as if it were created using - :func:`ctypes.CFUNCTYPE`. - - .. attribute:: native_name - - The name of the compiled C callback. - - .. method:: inspect_llvm() - - Return the human-readable LLVM IR generated for the C callback. - :attr:`native_name` is the name under which this callback is defined - in the IR. - - -.. _cffi: https://cffi.readthedocs.org/ diff --git a/numba/docs/source/reference/numpysupported.rst b/numba/docs/source/reference/numpysupported.rst deleted file mode 100644 index bab00897b..000000000 --- a/numba/docs/source/reference/numpysupported.rst +++ /dev/null @@ -1,600 +0,0 @@ - -======================== -Supported NumPy features -======================== - -One objective of Numba is having a seamless integration with `NumPy`_. -NumPy arrays provide an efficient storage method for homogeneous sets of -data. NumPy dtypes provide type information useful when compiling, and -the regular, structured storage of potentially large amounts of data -in memory provides an ideal memory layout for code generation. Numba -excels at generating code that executes on top of NumPy arrays. - -NumPy support in Numba comes in many forms: - -* Numba understands calls to NumPy `ufuncs`_ and is able to generate - equivalent native code for many of them. - -* NumPy arrays are directly supported in Numba. Access to Numpy arrays - is very efficient, as indexing is lowered to direct memory accesses - when possible. - -* Numba is able to generate `ufuncs`_ and `gufuncs`_. This means that it - is possible to implement ufuncs and gufuncs within Python, getting - speeds comparable to that of ufuncs/gufuncs implemented in C extension - modules using the NumPy C API. - -.. _NumPy: http://www.numpy.org/ -.. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html -.. _gufuncs: http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html - -The following sections focus on the Numpy features supported in -:term:`nopython mode`, unless otherwise stated. - - -Scalar types -============ - -Numba supports the following Numpy scalar types: - -* **Integers**: all integers of either signedness, and any width up to 64 bits -* **Booleans** -* **Real numbers:** single-precision (32-bit) and double-precision (64-bit) reals -* **Complex numbers:** single-precision (2x32-bit) and double-precision (2x64-bit) complex numbers -* **Datetimes and timestamps:** of any unit -* **Character sequences** (but no operations are available on them) -* **Structured scalars:** structured scalars made of any of the types above and arrays of the types above - -The following scalar types and features are not supported: - -* **Arbitrary Python objects** -* **Half-precision and extended-precision** real and complex numbers -* **Nested structured scalars** the fields of structured scalars may not contain other structured scalars - -The operations supported on scalar Numpy numbers are the same as on the -equivalent built-in types such as ``int`` or ``float``. You can use -a type's constructor to convert from a different type or width. - -Structured scalars support attribute getting and setting, as well as -member lookup using constant strings. - -.. seealso:: - `Numpy scalars `_ - reference. - - -Array types -=========== - -`Numpy arrays `_ -of any of the scalar types above are supported, regardless of the shape -or layout. - -Array access ------------- - -Arrays support normal iteration. Full basic indexing and slicing is -supported. A subset of advanced indexing is also supported: only one -advanced index is allowed, and it has to be a one-dimensional array -(it can be combined with an arbitrary number of basic indices as well). - -.. seealso:: - `Numpy indexing `_ - reference. - -Attributes ----------- - -The following attributes of Numpy arrays are supported: - -* :attr:`~numpy.ndarray.dtype` -* :attr:`~numpy.ndarray.flags` -* :attr:`~numpy.ndarray.flat` -* :attr:`~numpy.ndarray.itemsize` -* :attr:`~numpy.ndarray.ndim` -* :attr:`~numpy.ndarray.shape` -* :attr:`~numpy.ndarray.size` -* :attr:`~numpy.ndarray.strides` -* :attr:`~numpy.ndarray.T` -* :attr:`~numpy.ndarray.real` -* :attr:`~numpy.ndarray.imag` - -The ``flags`` object -'''''''''''''''''''' - -The object returned by the :attr:`~numpy.ndarray.flags` attribute supports -the ``contiguous``, ``c_contiguous`` and ``f_contiguous`` attributes. - -The ``flat`` object -''''''''''''''''''' - -The object returned by the :attr:`~numpy.ndarray.flat` attribute supports -iteration and indexing, but be careful: indexing is very slow on -non-C-contiguous arrays. - -The ``real`` and ``imag`` attributes -'''''''''''''''''''''''''''''''''''' - -Numpy supports these attributes regardless of the dtype but Numba chooses to -limit their support to avoid potential user error. For numeric dtypes, -Numba follows Numpy's behavior. The :attr:`~numpy.ndarray.real` attribute -returns a view of the real part of the complex array and it behaves as an identity -function for other numeric dtypes. The :attr:`~numpy.ndarray.imag` attribute -returns a view of the imaginary part of the complex array and it returns a zero -array with the same shape and dtype for other numeric dtypes. For non-numeric -dtypes, including all structured/record dtypes, using these attributes will -result in a compile-time (`TypingError`) error. This behavior differs from -Numpy's but it is chosen to avoid the potential confusion with field names that -overlap these attributes. - -Calculation ------------ - -The following methods of Numpy arrays are supported in their basic form -(without any optional arguments): - -* :meth:`~numpy.ndarray.all` -* :meth:`~numpy.ndarray.any` -* :meth:`~numpy.ndarray.argmax` -* :meth:`~numpy.ndarray.argmin` -* :meth:`~numpy.ndarray.cumprod` -* :meth:`~numpy.ndarray.cumsum` -* :meth:`~numpy.ndarray.max` -* :meth:`~numpy.ndarray.mean` -* :meth:`~numpy.ndarray.min` -* :meth:`~numpy.ndarray.nonzero` -* :meth:`~numpy.ndarray.prod` -* :meth:`~numpy.ndarray.std` -* :meth:`~numpy.ndarray.take` -* :meth:`~numpy.ndarray.var` - -The corresponding top-level Numpy functions (such as :func:`numpy.prod`) -are similarly supported. - -Other methods -------------- - -The following methods of Numpy arrays are supported: - -* :meth:`~numpy.ndarray.argsort` (without arguments) -* :meth:`~numpy.ndarray.astype` (only the 1-argument form) -* :meth:`~numpy.ndarray.copy` (without arguments) -* :meth:`~numpy.ndarray.dot` (only the 1-argument form) -* :meth:`~numpy.ndarray.flatten` (no order argument; 'C' order only) -* :meth:`~numpy.ndarray.item` (without arguments) -* :meth:`~numpy.ndarray.itemset` (only the 1-argument form) -* :meth:`~numpy.ndarray.ravel` (no order argument; 'C' order only) -* :meth:`~numpy.ndarray.reshape` (only the 1-argument form) -* :meth:`~numpy.ndarray.sort` (without arguments) -* :meth:`~numpy.ndarray.sum` (with or without the ``axis`` argument) - - * If the ``axis`` argument is a compile-time constant, all valid values are supported. - An out-of-range value will result in a ``LoweringError`` at compile-time. - * If the ``axis`` argument is not a compile-time constant, only values from 0 to 3 are supported. - An out-of-range value will result in a runtime exception. - -* :meth:`~numpy.ndarray.transpose` -* :meth:`~numpy.ndarray.view` (only the 1-argument form) - - -.. warning:: - Sorting may be slightly slower than Numpy's implementation. - - -Functions -========= - -Linear algebra --------------- - -Basic linear algebra is supported on 1-D and 2-D contiguous arrays of -floating-point and complex numbers: - -* :func:`numpy.dot` -* :func:`numpy.kron` -* :func:`numpy.outer` -* :func:`numpy.trace` (only the first argument). -* :func:`numpy.vdot` -* On Python 3.5 and above, the matrix multiplication operator from - :pep:`465` (i.e. ``a @ b`` where ``a`` and ``b`` are 1-D or 2-D arrays). -* :func:`numpy.linalg.cholesky` -* :func:`numpy.linalg.cond` (only non string values in ``p``). -* :func:`numpy.linalg.det` -* :func:`numpy.linalg.eig` (only running with data that does not cause a domain - change is supported e.g. real input -> real - output, complex input -> complex output). -* :func:`numpy.linalg.eigh` (only the first argument). -* :func:`numpy.linalg.eigvals` (only running with data that does not cause a - domain change is supported e.g. real input -> real output, - complex input -> complex output). -* :func:`numpy.linalg.eigvalsh` (only the first argument). -* :func:`numpy.linalg.inv` -* :func:`numpy.linalg.lstsq` -* :func:`numpy.linalg.matrix_power` -* :func:`numpy.linalg.matrix_rank` -* :func:`numpy.linalg.norm` (only the 2 first arguments and only non string - values in ``ord``). -* :func:`numpy.linalg.pinv` -* :func:`numpy.linalg.qr` (only the first argument). -* :func:`numpy.linalg.slogdet` -* :func:`numpy.linalg.solve` -* :func:`numpy.linalg.svd` (only the 2 first arguments). - -.. note:: - The implementation of these functions needs Scipy 0.16+ to be installed. - -Reductions ----------- - -The following reduction functions are supported: - -* :func:`numpy.diff` (only the 2 first arguments) -* :func:`numpy.median` (only the first argument) -* :func:`numpy.nanmax` (only the first argument) -* :func:`numpy.nanmean` (only the first argument) -* :func:`numpy.nanmedian` (only the first argument) -* :func:`numpy.nanmin` (only the first argument) -* :func:`numpy.nanprod` (only the first argument) -* :func:`numpy.nanstd` (only the first argument) -* :func:`numpy.nansum` (only the first argument) -* :func:`numpy.nanvar` (only the first argument) -* :func:`numpy.percentile` (only the 2 first arguments, requires NumPy >= 1.10) -* :func:`numpy.nanpercentile` (only the 2 first arguments, requires NumPy >= 1.11) - -Other functions ---------------- - -The following top-level functions are supported: - -* :func:`numpy.arange` -* :func:`numpy.argsort` (no optional arguments) -* :func:`numpy.array` (only the 2 first arguments) -* :func:`numpy.asfortranarray` (only the first argument) -* :func:`numpy.atleast_1d` -* :func:`numpy.atleast_2d` -* :func:`numpy.atleast_3d` -* :func:`numpy.bincount` (only the 2 first arguments) -* :func:`numpy.column_stack` -* :func:`numpy.concatenate` -* :func:`numpy.convolve` (only the 2 first arguments) -* :func:`numpy.copy` (only the first argument) -* :func:`numpy.correlate` (only the 2 first arguments) -* :func:`numpy.diag` -* :func:`numpy.digitize` -* :func:`numpy.dstack` -* :func:`numpy.empty` (only the 2 first arguments) -* :func:`numpy.empty_like` (only the 2 first arguments) -* :func:`numpy.expand_dims` -* :func:`numpy.eye` -* :func:`numpy.flatten` (no order argument; 'C' order only) -* :func:`numpy.frombuffer` (only the 2 first arguments) -* :func:`numpy.full` (only the 3 first arguments) -* :func:`numpy.full_like` (only the 3 first arguments) -* :func:`numpy.histogram` (only the 3 first arguments) -* :func:`numpy.hstack` -* :func:`numpy.identity` -* :func:`numpy.linspace` (only the 3-argument form) -* :class:`numpy.ndenumerate` -* :class:`numpy.ndindex` -* :class:`numpy.nditer` (only the first argument) -* :func:`numpy.ones` (only the 2 first arguments) -* :func:`numpy.ones_like` (only the 2 first arguments) -* :func:`numpy.ravel` (no order argument; 'C' order only) -* :func:`numpy.reshape` (no order argument; 'C' order only) -* :func:`numpy.roots` -* :func:`numpy.round_` -* :func:`numpy.searchsorted` (only the 3 first arguments) -* :func:`numpy.sinc` -* :func:`numpy.sort` (no optional arguments) -* :func:`numpy.stack` -* :func:`numpy.take` (only the 2 first arguments) -* :func:`numpy.transpose` -* :func:`numpy.unique` (only the first argument) -* :func:`numpy.vstack` -* :func:`numpy.where` -* :func:`numpy.zeros` (only the 2 first arguments) -* :func:`numpy.zeros_like` (only the 2 first arguments) - -The following constructors are supported, both with a numeric input (to -construct a scalar) or a sequence (to construct an array): - -* :class:`numpy.bool_` -* :class:`numpy.complex64` -* :class:`numpy.complex128` -* :class:`numpy.float32` -* :class:`numpy.float64` -* :class:`numpy.int8` -* :class:`numpy.int16` -* :class:`numpy.int32` -* :class:`numpy.int64` -* :class:`numpy.intc` -* :class:`numpy.intp` -* :class:`numpy.uint8` -* :class:`numpy.uint16` -* :class:`numpy.uint32` -* :class:`numpy.uint64` -* :class:`numpy.uintc` -* :class:`numpy.uintp` - -The following machine parameter classes are supported, with all purely numerical -attributes: - -* :class:`numpy.iinfo` -* :class:`numpy.finfo` (``machar`` attribute not supported) -* :class:`numpy.MachAr` (with no arguments to the constructor) - - -Literal arrays --------------- - -.. XXX should this part of the user's guide? - -Neither Python nor Numba has actual array literals, but you can construct -arbitrary arrays by calling :func:`numpy.array` on a nested tuple:: - - a = numpy.array(((a, b, c), (d, e, f))) - -(nested lists are not yet supported by Numba) - - -Modules -======= - -.. _numpy-random: - -``random`` ----------- - -Numba supports top-level functions from the -`numpy.random `_ -module, but does not allow you to create individual RandomState instances. -The same algorithms are used as for :ref:`the standard -random module ` (and therefore the same notes apply), -but with an independent internal state: seeding or drawing numbers from -one generator won't affect the other. - -The following functions are supported. - -Initialization -'''''''''''''' - -* :func:`numpy.random.seed`: with an integer argument only - -Simple random data -'''''''''''''''''' - -* :func:`numpy.random.rand` -* :func:`numpy.random.randint` (only the first two arguments) -* :func:`numpy.random.randn` -* :func:`numpy.random.random` -* :func:`numpy.random.random_sample` -* :func:`numpy.random.ranf` -* :func:`numpy.random.sample` - -Permutations -'''''''''''' - -* :func:`numpy.random.choice`: the optional *p* argument (probabilities - array) is not supported -* :func:`numpy.random.permutation` -* :func:`numpy.random.shuffle`: the sequence argument must be a one-dimension - Numpy array or buffer-providing object (such as a :class:`bytearray` - or :class:`array.array`) - -Distributions -''''''''''''' - -.. warning:: The `size` argument is not supported in the following functions. - -* :func:`numpy.random.beta` -* :func:`numpy.random.binomial` -* :func:`numpy.random.chisquare` -* :func:`numpy.random.exponential` -* :func:`numpy.random.f` -* :func:`numpy.random.gamma` -* :func:`numpy.random.geometric` -* :func:`numpy.random.gumbel` -* :func:`numpy.random.hypergeometric` -* :func:`numpy.random.laplace` -* :func:`numpy.random.logistic` -* :func:`numpy.random.lognormal` -* :func:`numpy.random.logseries` -* :func:`numpy.random.multinomial` -* :func:`numpy.random.negative_binomial` -* :func:`numpy.random.normal` -* :func:`numpy.random.pareto` -* :func:`numpy.random.poisson` -* :func:`numpy.random.power` -* :func:`numpy.random.rayleigh` -* :func:`numpy.random.standard_cauchy` -* :func:`numpy.random.standard_exponential` -* :func:`numpy.random.standard_gamma` -* :func:`numpy.random.standard_normal` -* :func:`numpy.random.standard_t` -* :func:`numpy.random.triangular` -* :func:`numpy.random.uniform` -* :func:`numpy.random.vonmises` -* :func:`numpy.random.wald` -* :func:`numpy.random.weibull` -* :func:`numpy.random.zipf` - -.. note:: - Calling :func:`numpy.random.seed` from non-Numba code (or from - :term:`object mode` code) will seed the Numpy random generator, not the - Numba random generator. - -.. note:: - The generator is not thread-safe when :ref:`releasing the GIL `. - - Also, under Unix, if creating a child process using :func:`os.fork` or the - :mod:`multiprocessing` module, the child's random generator will inherit - the parent's state and will therefore produce the same sequence of - numbers (except when using the "forkserver" start method under Python 3.4 - and later). - - -``stride_tricks`` ------------------ - -The following function from the :mod:`numpy.lib.stride_tricks` module -is supported: - -* :func:`~numpy.lib.stride_tricks.as_strided` (the *strides* argument - is mandatory, the *subok* argument is not supported) - -.. _supported_ufuncs: - -Standard ufuncs -=============== - -One objective of Numba is having all the -`standard ufuncs in NumPy `_ -understood by Numba. When a supported ufunc is found when compiling a -function, Numba maps the ufunc to equivalent native code. This allows the -use of those ufuncs in Numba code that gets compiled in :term:`nopython mode`. - -Limitations ------------ - -Right now, only a selection of the standard ufuncs work in :term:`nopython mode`. -Following is a list of the different standard ufuncs that Numba is aware of, -sorted in the same way as in the NumPy documentation. - - -Math operations ---------------- - -============== ============= =============== - UFUNC MODE --------------- ------------------------------ - name object mode nopython mode -============== ============= =============== - add Yes Yes - subtract Yes Yes - multiply Yes Yes - divide Yes Yes - logaddexp Yes Yes - logaddexp2 Yes Yes - true_divide Yes Yes - floor_divide Yes Yes - negative Yes Yes - power Yes Yes - remainder Yes Yes - mod Yes Yes - fmod Yes Yes - abs Yes Yes - absolute Yes Yes - fabs Yes Yes - rint Yes Yes - sign Yes Yes - conj Yes Yes - exp Yes Yes - exp2 Yes Yes - log Yes Yes - log2 Yes Yes - log10 Yes Yes - expm1 Yes Yes - log1p Yes Yes - sqrt Yes Yes - square Yes Yes - reciprocal Yes Yes - conjugate Yes Yes -============== ============= =============== - - -Trigonometric functions ------------------------ - -============== ============= =============== - UFUNC MODE --------------- ------------------------------ - name object mode nopython mode -============== ============= =============== - sin Yes Yes - cos Yes Yes - tan Yes Yes - arcsin Yes Yes - arccos Yes Yes - arctan Yes Yes - arctan2 Yes Yes - hypot Yes Yes - sinh Yes Yes - cosh Yes Yes - tanh Yes Yes - arcsinh Yes Yes - arccosh Yes Yes - arctanh Yes Yes - deg2rad Yes Yes - rad2deg Yes Yes - degrees Yes Yes - radians Yes Yes -============== ============= =============== - - -Bit-twiddling functions ------------------------ - -============== ============= =============== - UFUNC MODE --------------- ------------------------------ - name object mode nopython mode -============== ============= =============== - bitwise_and Yes Yes - bitwise_or Yes Yes - bitwise_xor Yes Yes - bitwise_not Yes Yes - invert Yes Yes - left_shift Yes Yes - right_shift Yes Yes -============== ============= =============== - - -Comparison functions --------------------- - -============== ============= =============== - UFUNC MODE --------------- ------------------------------ - name object mode nopython mode -============== ============= =============== - greater Yes Yes - greater_equal Yes Yes - less Yes Yes - less_equal Yes Yes - not_equal Yes Yes - equal Yes Yes - logical_and Yes Yes - logical_or Yes Yes - logical_xor Yes Yes - logical_not Yes Yes - maximum Yes Yes - minimum Yes Yes - fmax Yes Yes - fmin Yes Yes -============== ============= =============== - - -Floating functions ------------------- - -============== ============= =============== - UFUNC MODE --------------- ------------------------------ - name object mode nopython mode -============== ============= =============== - isfinite Yes Yes - isinf Yes Yes - isnan Yes Yes - signbit Yes Yes - copysign Yes Yes - nextafter Yes Yes - modf Yes No - ldexp Yes (*) Yes - frexp Yes No - floor Yes Yes - ceil Yes Yes - trunc Yes Yes - spacing Yes Yes -============== ============= =============== - -(\*) not supported on windows 32 bit diff --git a/numba/docs/source/reference/pysemantics.rst b/numba/docs/source/reference/pysemantics.rst deleted file mode 100644 index 72ee04fc2..000000000 --- a/numba/docs/source/reference/pysemantics.rst +++ /dev/null @@ -1,50 +0,0 @@ - -Deviations from Python semantics -================================ - - -Integer width -------------- - -While Python has arbitrary-sized integers, integers in Numba-compiled -functions get a fixed size through :term:`type inference` (usually, -the size of a machine integer). This means that arithmetic -operations can wrapround or produce undefined results or overflow. - -Type inference can be overriden by an explicit type specification, -if fine-grained control of integer width is desired. - -.. seealso:: - :ref:`Enhancement proposal 1: Changes in integer typing ` - - -Boolean inversion ------------------ - -Calling the bitwise complement operator (the ``~`` operator) on a Python -boolean returns an integer, while the same operator on a Numpy boolean -returns another boolean:: - - >>> ~True - -2 - >>> ~np.bool_(True) - False - -Numba follows the Numpy semantics. - - -Global and closure variables ----------------------------- - -In :term:`nopython mode`, global and closure variables are *frozen* by -Numba: a Numba-compiled function sees the value of those variables at the -time the function was compiled. Also, it is not possible to change their -values from the function. - -Numba **may or may not** copy global variables referenced inside a compiled -function. Small global arrays are copied for potential compiler optimization -with immutability assumption. However, large global arrays are not copied to -conserve memory. The definition of "small" and "large" may change. - - -.. todo:: This document needs completing. diff --git a/numba/docs/source/reference/pysupported.rst b/numba/docs/source/reference/pysupported.rst deleted file mode 100644 index 624c9fbfa..000000000 --- a/numba/docs/source/reference/pysupported.rst +++ /dev/null @@ -1,578 +0,0 @@ -.. _pysupported: - -========================= -Supported Python features -========================= - -Apart from the :ref:`pysupported-language` part below, which applies to both -:term:`object mode` and :term:`nopython mode`, this page only lists the -features supported in :term:`nopython mode`. - -.. _pysupported-language: - -Language -======== - -Constructs ----------- - -Numba strives to support as much of the Python language as possible, but -some language features are not available inside Numba-compiled functions. The following Python language features are not currently supported: - -* Class definition -* Exception handling (``try .. except``, ``try .. finally``) -* Context management (the ``with`` statement) -* Some comprehensions (list comprehension is supported, but not dict, set or generator comprehensions) -* Generator delegation (``yield from``) - -The ``raise`` statement is supported in several forms: - -* ``raise`` (to re-raise the current exception) -* ``raise SomeException`` -* ``raise SomeException()``: in :term:`nopython mode`, constructor - arguments must be :term:`compile-time constants ` - -Similarly, the ``assert`` statement is supported with or without an error -message. - -Functions ---------- - -Function calls -'''''''''''''' - -Numba supports function calls using positional and named arguments, as well -as arguments with default values and ``*args`` (note the argument for -``*args`` can only be a tuple, not a list). Explicit ``**kwargs`` are -not supported. - -Function calls to locally defined inner functions are supported as long as -they can be fully inlined. - -Functions as arguments -'''''''''''''''''''''' - -Functions can be passed as argument into another function. But, they cannot -be returned. For example: - -.. code-block:: python - - from numba import jit - - @jit - def add1(x): - return x + 1 - - @jit - def bar(fn, x): - return fn(x) - - @jit - def foo(x): - return bar(add1, x) - - # Passing add1 within numba compiled code. - print(foo(1)) - # Passing add1 into bar from interpreted code - print(bar(add1, 1)) - -.. note:: Numba does not handle function objects as real objects. Once a - function is assigned to a variable, the variable cannot be - re-assigned to a different function. - - -Inner function and closure -''''''''''''''''''''''''''' - -Numba now supports inner functions as long as they are non-recursive -and only called locally, but not passed as argument or returned as -result. The use of closure variables (variables defined in outer scopes) -within an inner function is also supported. - -Recursive calls -''''''''''''''' - -Most recursive call patterns are supported. The only restriction is that the -recursive callee must have a control-flow path that returns without recursing. -Numba is able to type-infer recursive functions without specifying the function -type signature (which is required in numba 0.28 and earlier). -Recursive calls can even call into a different overload of the function. - -.. XXX add reference to NBEP - -Generators ----------- - -Numba supports generator functions and is able to compile them in -:term:`object mode` and :term:`nopython mode`. The returned generator -can be used both from Numba-compiled code and from regular Python code. - -Coroutine features of generators are not supported (i.e. the -:meth:`generator.send`, :meth:`generator.throw`, :meth:`generator.close` -methods). - -.. _pysupported-builtin-types: - -Built-in types -============== - -int, bool ---------- - -Arithmetic operations as well as truth values are supported. - -The following attributes and methods are supported: - -* ``.conjugate()`` -* ``.real`` -* ``.imag`` - -float, complex --------------- - -Arithmetic operations as well as truth values are supported. - -The following attributes and methods are supported: - -* ``.conjugate()`` -* ``.real`` -* ``.imag`` - -tuple ------ - -The following operations are supported: - -* tuple construction -* tuple unpacking -* comparison between tuples -* iteration and indexing over homogeneous tuples -* addition (concatenation) between tuples -* slicing tuples with a constant slice -* the index method on tuples - -list ----- - -Creating and returning lists from JIT-compiled functions is supported, -as well as all methods and operations. Lists must be strictly homogeneous: -Numba will reject any list containing objects of different types, even if -the types are compatible (for example, ``[1, 2.5]`` is rejected as it -contains a :class:`int` and a :class:`float`). - -For example, to create a list of arrays:: - - In [1]: from numba import njit - - In [2]: import numpy as np - - In [3]: @njit - ...: def foo(x): - ...: lst = [] - ...: for i in range(x): - ...: lst.append(np.arange(i)) - ...: return lst - ...: - - In [4]: foo(4) - Out[4]: [array([], dtype=int64), array([0]), array([0, 1]), array([0, 1, 2])] - - -List Reflection -''''''''''''''' - -In nopython mode, Numba does not operate on Python objects. ``list`` are -compiled into an internal representation. Any ``list`` arguments must be -converted into this representation on the way in to nopython mode and their -contained elements must be restored in the original Python objects via a -process called :term:`reflection`. Reflection is required to maintain the same -semantics as found in regular Python code. However, the reflection process -can be expensive for large lists and it is not supported for lists that contain -reflected data types. Users cannot use list-of-list as an argument because -of this limitation. - -.. note:: - When passing a list into a JIT-compiled function, any modifications - made to the list will not be visible to the Python interpreter until - the function returns. (A limitation of the reflection process.) - -.. warning:: - List sorting currently uses a quicksort algorithm, which has different - performance characterics than the algorithm used by Python. - -.. _pysupported-comprehension: - -List comprehension -'''''''''''''''''' - -Numba supports list comprehension. For example:: - - - In [1]: from numba import njit - - In [2]: @njit - ...: def foo(x): - ...: return [[i for i in range(n)] for n in range(x)] - ...: - - In [3]: foo(3) - Out[3]: [[], [0], [0, 1]] - - -.. note:: - Prior to version 0.39.0, Numba did not support the creation of nested lists. - - -Numba also supports "array comprehension" that is a list comprehension -followed immediately by a call to :func:`numpy.array`. The following -is an example that produces a 2D Numpy array:: - - from numba import jit - import numpy as np - - @jit(nopython=True) - def f(n): - return np.array([ [ x * y for x in range(n) ] for y in range(n) ]) - -In this case, Numba is able to optimize the program to allocate and -initialize the result array directly without allocating intermediate -list objects. Therefore, the nesting of list comprehension here is -not a problem since a multi-dimensional array is being created here -instead of a nested list. - -Additionally, Numba supports parallel array comphension when combined -with the :ref:`parallel_jit_option` option on CPUs. - -set ---- - -All methods and operations on sets are supported in JIT-compiled functions. - -Sets must be strictly homogeneous: Numba will reject any set containing -objects of different types, even if the types are compatible (for example, -``{1, 2.5}`` is rejected as it contains a :class:`int` and a :class:`float`). - -.. note:: - When passing a set into a JIT-compiled function, any modifications - made to the set will not be visible to the Python interpreter until - the function returns. - -None ----- - -The None value is supported for identity testing (when using an -:class:`~numba.optional` type). - -bytes, bytearray, memoryview ----------------------------- - -The :class:`bytearray` type and, on Python 3, the :class:`bytes` type -support indexing, iteration and retrieving the len(). - -The :class:`memoryview` type supports indexing, slicing, iteration, -retrieving the len(), and also the following attributes: - -* :attr:`~memoryview.contiguous` -* :attr:`~memoryview.c_contiguous` -* :attr:`~memoryview.f_contiguous` -* :attr:`~memoryview.itemsize` -* :attr:`~memoryview.nbytes` -* :attr:`~memoryview.ndim` -* :attr:`~memoryview.readonly` -* :attr:`~memoryview.shape` -* :attr:`~memoryview.strides` - - -Built-in functions -================== - -The following built-in functions are supported: - -* :func:`abs` -* :class:`bool` -* :class:`complex` -* :func:`divmod` -* :func:`enumerate` -* :class:`float` -* :class:`int`: only the one-argument form -* :func:`iter`: only the one-argument form -* :func:`len` -* :func:`min` -* :func:`max` -* :func:`next`: only the one-argument form -* :func:`print`: only numbers and strings; no ``file`` or ``sep`` argument -* :class:`range`: semantics are similar to those of Python 3 even in Python 2: - a range object is returned instead of an array of values. -* :func:`round` -* :func:`sorted`: the ``key`` argument is not supported -* :func:`type`: only the one-argument form, and only on some types - (e.g. numbers and named tuples) -* :func:`zip` - - -Standard library modules -======================== - -``array`` ---------- - -Limited support for the :class:`array.array` type is provided through -the buffer protocol. Indexing, iteration and taking the len() is supported. -All type codes are supported except for ``"u"``. - -``cmath`` ---------- - -The following functions from the :mod:`cmath` module are supported: - -* :func:`cmath.acos` -* :func:`cmath.acosh` -* :func:`cmath.asin` -* :func:`cmath.asinh` -* :func:`cmath.atan` -* :func:`cmath.atanh` -* :func:`cmath.cos` -* :func:`cmath.cosh` -* :func:`cmath.exp` -* :func:`cmath.isfinite` -* :func:`cmath.isinf` -* :func:`cmath.isnan` -* :func:`cmath.log` -* :func:`cmath.log10` -* :func:`cmath.phase` -* :func:`cmath.polar` -* :func:`cmath.rect` -* :func:`cmath.sin` -* :func:`cmath.sinh` -* :func:`cmath.sqrt` -* :func:`cmath.tan` -* :func:`cmath.tanh` - -``collections`` ---------------- - -Named tuple classes, as returned by :func:`collections.namedtuple`, are -supported in the same way regular tuples are supported. Attribute access -and named parameters in the constructor are also supported. - -Creating a named tuple class inside Numba code is *not* supported; the class -must be created at the global level. - -``ctypes`` ----------- - -Numba is able to call ctypes-declared functions with the following argument -and return types: - -* :class:`ctypes.c_int8` -* :class:`ctypes.c_int16` -* :class:`ctypes.c_int32` -* :class:`ctypes.c_int64` -* :class:`ctypes.c_uint8` -* :class:`ctypes.c_uint16` -* :class:`ctypes.c_uint32` -* :class:`ctypes.c_uint64` -* :class:`ctypes.c_float` -* :class:`ctypes.c_double` -* :class:`ctypes.c_void_p` - -``enum`` --------- - -Both :class:`enum.Enum` and :class:`enum.IntEnum` subclasses are supported. - -``math`` --------- - -The following functions from the :mod:`math` module are supported: - -* :func:`math.acos` -* :func:`math.acosh` -* :func:`math.asin` -* :func:`math.asinh` -* :func:`math.atan` -* :func:`math.atan2` -* :func:`math.atanh` -* :func:`math.ceil` -* :func:`math.copysign` -* :func:`math.cos` -* :func:`math.cosh` -* :func:`math.degrees` -* :func:`math.erf` -* :func:`math.erfc` -* :func:`math.exp` -* :func:`math.expm1` -* :func:`math.fabs` -* :func:`math.floor` -* :func:`math.frexp` -* :func:`math.gamma` -* :func:`math.hypot` -* :func:`math.isfinite` -* :func:`math.isinf` -* :func:`math.isnan` -* :func:`math.ldexp` -* :func:`math.lgamma` -* :func:`math.log` -* :func:`math.log10` -* :func:`math.log1p` -* :func:`math.pow` -* :func:`math.radians` -* :func:`math.sin` -* :func:`math.sinh` -* :func:`math.sqrt` -* :func:`math.tan` -* :func:`math.tanh` -* :func:`math.trunc` - -``operator`` ------------- - -The following functions from the :mod:`operator` module are supported: - -* :func:`operator.add` -* :func:`operator.and_` -* :func:`operator.div` (Python 2 only) -* :func:`operator.eq` -* :func:`operator.floordiv` -* :func:`operator.ge` -* :func:`operator.gt` -* :func:`operator.iadd` -* :func:`operator.iand` -* :func:`operator.idiv` (Python 2 only) -* :func:`operator.ifloordiv` -* :func:`operator.ilshift` -* :func:`operator.imatmul` (Python 3.5 and above) -* :func:`operator.imod` -* :func:`operator.imul` -* :func:`operator.invert` -* :func:`operator.ior` -* :func:`operator.ipow` -* :func:`operator.irshift` -* :func:`operator.isub` -* :func:`operator.itruediv` -* :func:`operator.ixor` -* :func:`operator.le` -* :func:`operator.lshift` -* :func:`operator.lt` -* :func:`operator.matmul` (Python 3.5 and above) -* :func:`operator.mod` -* :func:`operator.mul` -* :func:`operator.ne` -* :func:`operator.neg` -* :func:`operator.not_` -* :func:`operator.or_` -* :func:`operator.pos` -* :func:`operator.pow` -* :func:`operator.rshift` -* :func:`operator.sub` -* :func:`operator.truediv` -* :func:`operator.xor` - -``functools`` -------------- - -The :func:`functools.reduce` function is supported but the `initializer` -argument is required. - -.. _pysupported-random: - -``random`` ----------- - -Numba supports top-level functions from the :mod:`random` module, but does -not allow you to create individual Random instances. A Mersenne-Twister -generator is used, with a dedicated internal state. It is initialized at -startup with entropy drawn from the operating system. - -* :func:`random.betavariate` -* :func:`random.expovariate` -* :func:`random.gammavariate` -* :func:`random.gauss` -* :func:`random.getrandbits`: number of bits must not be greater than 64 -* :func:`random.lognormvariate` -* :func:`random.normalvariate` -* :func:`random.paretovariate` -* :func:`random.randint` -* :func:`random.random` -* :func:`random.randrange` -* :func:`random.seed`: with an integer argument only -* :func:`random.shuffle`: the sequence argument must be a one-dimension - Numpy array or buffer-providing object (such as a :class:`bytearray` - or :class:`array.array`); the second (optional) argument is not supported -* :func:`random.uniform` -* :func:`random.triangular` -* :func:`random.vonmisesvariate` -* :func:`random.weibullvariate` - -.. note:: - Calling :func:`random.seed` from non-Numba code (or from :term:`object mode` - code) will seed the Python random generator, not the Numba random generator. - -.. note:: - Since version 0.28.0, the generator is thread-safe and fork-safe. Each - thread and each process will produce independent streams of random numbers. - -.. seealso:: - Numba also supports most additional distributions from the :ref:`Numpy - random module `. - - -Third-party modules -=================== - -.. I put this here as there's only one module (apart from Numpy), otherwise - it should be a separate page. - -``cffi`` --------- - -Similarly to ctypes, Numba is able to call into `cffi`_-declared external -functions, using the following C types and any derived pointer types: - -* :c:type:`char` -* :c:type:`short` -* :c:type:`int` -* :c:type:`long` -* :c:type:`long long` -* :c:type:`unsigned char` -* :c:type:`unsigned short` -* :c:type:`unsigned int` -* :c:type:`unsigned long` -* :c:type:`unsigned long long` -* :c:type:`int8_t` -* :c:type:`uint8_t` -* :c:type:`int16_t` -* :c:type:`uint16_t` -* :c:type:`int32_t` -* :c:type:`uint32_t` -* :c:type:`int64_t` -* :c:type:`uint64_t` -* :c:type:`float` -* :c:type:`double` -* :c:type:`ssize_t` -* :c:type:`size_t` -* :c:type:`void` - -The ``from_buffer()`` method of ``cffi.FFI`` and ``CompiledFFI`` objects is -supported for passing Numpy arrays and other buffer-like objects. Only -*contiguous* arguments are accepted. The argument to ``from_buffer()`` -is converted to a raw pointer of the appropriate C type (for example a -``double *`` for a ``float64`` array). - -Additional type mappings for the conversion from a buffer to the appropriate C -type may be registered with Numba. This may include struct types, though it is -only permitted to call functions that accept pointers to structs - passing a -struct by value is unsupported. For registering a mapping, use: - -.. function:: numba.cffi_support.register_type(cffi_type, numba_type) - -Out-of-line cffi modules must be registered with Numba prior to the use of any -of their functions from within Numba-compiled functions: - -.. function:: numba.cffi_support.register_module(mod) - - Register the cffi out-of-line module ``mod`` with Numba. - -Inline cffi modules require no registration. - -.. _cffi: https://cffi.readthedocs.org/ diff --git a/numba/docs/source/reference/python27-eol.rst b/numba/docs/source/reference/python27-eol.rst deleted file mode 100644 index 359b528af..000000000 --- a/numba/docs/source/reference/python27-eol.rst +++ /dev/null @@ -1,20 +0,0 @@ -=========================== -Python 2.7 End of Life Plan -=========================== - -As per `PEP 373 `_, Python 2.7 will cease to be supported in 2020, though `no exact date has been officially selected yet `_. Like many projects, the Numba team has to consider how to time its own end of Python 2.7 support. Given how deeply Numba must interface with the Python interpreter, supporting both Python 2 and 3 creates quite a development and testing burden. In addition, Numba (specifically via llvmlite) has to deal with some specifically tricky compiler issues on Windows, where LLVM requires Visual Studio 2015 or later, but Python 2.7 extensions must be built with Visual Studio 2008. Needless to say, the goal with this plan is to support our Python 2.7 user base (~30% of downloads as of February 2018), but also clearly signal that *now is the time to switch to Python 3 if you have not already*. - -Python 2.7 users of Numba should also be aware of `NumPy's timeline for ending Python 2.7 support `_. Due to Numba's tight coupling with NumPy, the NumPy timeline has strongly informed the Numba timeline below. - -Timeline -======== - -The end of Python 2.7 support in Numba will be staged: - -* **December 2018**: Tag and release Numba 1.x.0. Create a Python 2.7 branch based on this release. -* Critical fixes until **January 1, 2020** will be backported to the Python 2.7 branch and released as Numba 1.x.y. -* No new features will be added to the Python 2.7 branch, but we will continue to do automated testing of it with new NumPy releases. -* **January 1, 2019**: we will slim down the Numba master branch by removing all the Python 2.7 compatibility code and release Numba 1.(x+1).0, which will be functionally identical to Numba 1.x.0. -* **January 1, 2020**: The Numba developers will stop supporting the Python 2.7 branch. - -If there are concerns about the above timeline, please `raise an issue `_ in our issue tracker. \ No newline at end of file diff --git a/numba/docs/source/reference/types.rst b/numba/docs/source/reference/types.rst deleted file mode 100644 index 792571236..000000000 --- a/numba/docs/source/reference/types.rst +++ /dev/null @@ -1,200 +0,0 @@ -.. _numba-types: - -==================== -Types and signatures -==================== - -Rationale -========= - -As an optimizing compiler, Numba needs to decide on the type of each -variable to generate efficient machine code. Python's standard types -are not precise enough for that, so we had to develop our own fine-grained -type system. - -You will encounter Numba types mainly when trying to inspect the results -of Numba's type inference, for :ref:`debugging ` or -:ref:`educational ` purposes. However, you need to use -types explicitly if compiling code :ref:`ahead-of-time `. - - -Signatures -========== - -A signature specifies the type of a function. Exactly which kind -of signature is allowed depends on the context (:term:`AOT` or :term:`JIT` -compilation), but signatures always involve some representation of Numba -types to specifiy the concrete types for the function's arguments and, -if required, the function's return type. - -An example function signature would be the string ``"f8(i4, i4)"`` -(or the equivalent ``"float64(int32, int32)"``) which specifies a -function taking two 32-bit integers and returning a double-precision float. - - -Basic types -=========== - -The most basic types can be expressed through simple expressions. The -symbols below refer to attributes of the main ``numba`` module (so if -you read "boolean", it means that symbol can be accessed as ``numba.boolean``). -Many types are available both as a canonical name and a shorthand alias, -following Numpy's conventions. - -Numbers -------- - -The following table contains the elementary numeric types currently defined -by Numba and their aliases. - -=================== ========= =================================== -Type name(s) Shorthand Comments -=================== ========= =================================== -boolean b1 represented as a byte -uint8, byte u1 8-bit unsigned byte -uint16 u2 16-bit unsigned integer -uint32 u4 32-bit unsigned integer -uint64 u8 64-bit unsigned integer - -int8, char i1 8-bit signed byte -int16 i2 16-bit signed integer -int32 i4 32-bit signed integer -int64 i8 64-bit signed integer - -intc -- C int-sized integer -uintc -- C int-sized unsigned integer -intp -- pointer-sized integer -uintp -- pointer-sized unsigned integer - -float32 f4 single-precision floating-point number -float64, double f8 double-precision floating-point number - -complex64 c8 single-precision complex number -complex128 c16 double-precision complex number -=================== ========= =================================== - -Arrays ------- - -The easy way to declare array types is to subscript an elementary type -according to the number of dimensions. For example a 1-dimension -single-precision array:: - - >>> numba.float32[:] - array(float32, 1d, A) - -or a 3-dimension array of the same underlying type:: - - >>> numba.float32[:, :, :] - array(float32, 3d, A) - -This syntax defines array types with no particular layout (producing code -that accepts both non-contiguous and contiguous arrays), but you can -specify a particular contiguity by using the ``::1`` index either at -the beginning or the end of the index specification:: - - >>> numba.float32[::1] - array(float32, 1d, C) - >>> numba.float32[:, :, ::1] - array(float32, 3d, C) - >>> numba.float32[::1, :, :] - array(float32, 3d, F) - - -Advanced types -============== - -For more advanced declarations, you have to explicitly call helper -functions or classes provided by Numba. - -.. warning:: - The APIs documented here are not guaranteed to be stable. Unless - necessary, it is recommended to let Numba infer argument types by using - the :ref:`signature-less variant of @jit `. - -.. A word of note: I only documented those types that can be genuinely - useful to users, i.e. types that can be passed as parameters to a JIT - function. Other types such as tuple are only usable in type inference. - - -Inference ---------- - -.. function:: numba.typeof(value) - - Create a Numba type accurately describing the given Python *value*. - ``ValueError`` is raised if the value isn't supported in - :term:`nopython mode`. - - :: - - >>> numba.typeof(np.empty(3)) - array(float64, 1d, C) - >>> numba.typeof((1, 2.0)) - (int64, float64) - >>> numba.typeof([0]) - reflected list(int64) - - -Numpy scalars -------------- - -Instead of using :func:`~numba.typeof`, non-trivial scalars such as -structured types can also be constructed programmatically. - -.. function:: numba.from_dtype(dtype) - - Create a Numba type corresponding to the given Numpy *dtype*:: - - >>> struct_dtype = np.dtype([('row', np.float64), ('col', np.float64)]) - >>> ty = numba.from_dtype(struct_dtype) - >>> ty - Record([('row', '>> ty[:, :] - unaligned array(Record([('row', '`_. - - -Arrays ------- - -.. class:: numba.types.Array(dtype, ndim, layout) - - Create an array type. *dtype* should be a Numba type. *ndim* is the - number of dimensions of the array (a positive integer). *layout* - is a string giving the layout of the array: ``A`` means any layout, ``C`` - means C-contiguous and ``F`` means Fortran-contiguous. - - -Optional types --------------- - -.. class:: numba.optional(typ) - - Create an optional type based on the underlying Numba type *typ*. - The optional type will allow any value of either *typ* or :const:`None`. - - :: - - >>> @jit((optional(intp),)) - ... def f(x): - ... return x is not None - ... - >>> f(0) - True - >>> f(None) - False diff --git a/numba/docs/source/reference/utils.rst b/numba/docs/source/reference/utils.rst deleted file mode 100644 index 3fc866e5c..000000000 --- a/numba/docs/source/reference/utils.rst +++ /dev/null @@ -1,34 +0,0 @@ - -========= -Utilities -========= - -Dealing with pointers -===================== - -These functions can be called from pure Python as well as in -:term:`nopython mode`. - - -.. function:: numba.carray(ptr, shape, dtype=None) - - Return a Numpy array view over the data pointed to by *ptr* with the - given *shape*, in C order. If *dtype* is given, it is used as the array's - dtype, otherwise the array's dtype is inferred from *ptr*'s type. - As the returned array is a view, not a copy, writing to it will modify - the original data. - - *ptr* should be a ctypes pointer object (either a typed pointer - as created using :func:`~ctypes.POINTER`, or a :class:`~ctypes.c_void_p`). - - *shape* should be an integer or a tuple of integers. - - *dtype* should be a Numpy dtype or scalar class (i.e. both - ``np.dtype('int8')`` and ``np.int8`` are accepted). - - -.. function:: numba.farray(ptr, shape, dtype=None) - - Same as :func:`~numba.carray`, but the data is assumed to be laid out - in Fortran order, and the array view is constructed accordingly. - diff --git a/numba/docs/source/release-notes.rst b/numba/docs/source/release-notes.rst deleted file mode 100644 index 103366e15..000000000 --- a/numba/docs/source/release-notes.rst +++ /dev/null @@ -1,5 +0,0 @@ -====================== -Release Notes -====================== - -.. include:: ../../CHANGE_LOG diff --git a/numba/docs/source/user/cfunc.rst b/numba/docs/source/user/cfunc.rst deleted file mode 100644 index 90ca9efb6..000000000 --- a/numba/docs/source/user/cfunc.rst +++ /dev/null @@ -1,133 +0,0 @@ -.. _cfunc: - -==================================== -Creating C callbacks with ``@cfunc`` -==================================== - -Interfacing with some native libraries (for example written in C or C++) -can necessitate writing native callbacks to provide business logic to the -library. The :func:`numba.cfunc` decorator creates a compiled function -callable from foreign C code, using the signature of your choice. - - -Basic usage -=========== - -The ``@cfunc`` decorator has a similar usage to ``@jit``, but with an -important difference: passing a single signature is mandatory. -It determines the visible signature of the C callback:: - - from numba import cfunc - - @cfunc("float64(float64, float64)") - def add(x, y): - return x + y - - -The C function object exposes the address of the compiled C callback as -the :attr:`~CFunc.address` attribute, so that you can pass it to any -foreign C or C++ library. It also exposes a :mod:`ctypes` callback -object pointing to that callback; that object is also callable from -Python, making it easy to check the compiled code:: - - @cfunc("float64(float64, float64)") - def add(x, y): - return x + y - - print(add.ctypes(4.0, 5.0)) # prints "9.0" - - -Example -======= - -In this example, we are going to be using the ``scipy.integrate.quad`` -function. That function accepts either a regular Python callback or -a C callback wrapped in a :mod:`ctypes` callback object. - -Let's define a pure Python integrand and compile it as a -C callback:: - - >>> import numpy as np - >>> from numba import cfunc - >>> def integrand(t): - return np.exp(-t) / t**2 - ...: - >>> nb_integrand = cfunc("float64(float64)")(integrand) - -We can pass the ``nb_integrand`` object's :mod:`ctypes` callback to -``scipy.integrate.quad`` and check that the results are the same as with -the pure Python function:: - - >>> import scipy.integrate as si - >>> def do_integrate(func): - """ - Integrate the given function from 1.0 to +inf. - """ - return si.quad(func, 1, np.inf) - ...: - >>> do_integrate(integrand) - (0.14849550677592208, 3.8736750296130505e-10) - >>> do_integrate(nb_integrand.ctypes) - (0.14849550677592208, 3.8736750296130505e-10) - - -Using the compiled callback, the integration function does not invoke the -Python interpreter each time it evaluates the integrand. In our case, the -integration is made 18 times faster:: - - >>> %timeit do_integrate(integrand) - 1000 loops, best of 3: 242 µs per loop - >>> %timeit do_integrate(nb_integrand.ctypes) - 100000 loops, best of 3: 13.5 µs per loop - - -Dealing with pointers and array memory -====================================== - -A less trivial use case of C callbacks involves doing operation on some -array of data passed by the caller. As C doesn't have a high-level -abstraction similar to Numpy arrays, the C callback's signature will pass -low-level pointer and size arguments. Nevertheless, the Python code for -the callback will expect to exploit the power and expressiveness of Numpy -arrays. - -In the following example, the C callback is expected to operate on 2-d arrays, -with the signature ``void(double *input, double *output, int m, int n)``. -You can implement such a callback thusly:: - - from numba import cfunc, types, carray - - c_sig = types.void(types.CPointer(types.double), - types.CPointer(types.double), - types.intc, types.intc) - - @cfunc(c_sig) - def my_callback(in_, out, m, n): - in_array = carray(in_, (m, n)) - out_array = carray(out, (m, n)) - for i in range(m): - for j in range(n): - out_array[i, j] = 2 * in_array[i, j] - - -The :func:`numba.carray` function takes as input a data pointer and a shape -and returns an array view of the given shape over that data. The data is -assumed to be laid out in C order. If the data is laid out in Fortran order, -:func:`numba.farray` should be used instead. - - -Signature specification -======================= - -The explicit ``@cfunc`` signature can use any :ref:`Numba types `, -but only a subset of them make sense for a C callback. You should -generally limit yourself to scalar types (such as ``int8`` or ``float64``) -or pointers to them (for example ``types.CPointer(types.int8)``). - - -Compilation options -=================== - -A number of keyword-only arguments can be passed to the ``@cfunc`` -decorator: ``nopython`` and ``cache``. Their meaning is similar to those -in the ``@jit`` decorator. diff --git a/numba/docs/source/user/examples.rst b/numba/docs/source/user/examples.rst deleted file mode 100644 index d76ca4c66..000000000 --- a/numba/docs/source/user/examples.rst +++ /dev/null @@ -1,36 +0,0 @@ -======== -Examples -======== - - -Mandelbrot ----------- - -.. literalinclude:: /../../examples/mandel/mandel_jit.py - - -.. _example-movemean: - -Moving average --------------- - -.. literalinclude:: /../../examples/movemean.py - - -Multi-threading ---------------- - -The code below showcases the potential performance improvement when -using the :ref:`nogil ` feature. For example, on a 4-core machine, -I get the following results printed out:: - - numpy (1 thread) 145 ms - numba (1 thread) 128 ms - numba (4 threads) 35 ms - -.. note:: - Under Python 3, you can use the standard `concurrent.futures - `_ module - rather than spawn threads and dispatch tasks by hand. - -.. literalinclude:: /../../examples/nogil.py diff --git a/numba/docs/source/user/faq.rst b/numba/docs/source/user/faq.rst deleted file mode 100644 index 8cef1c5ed..000000000 --- a/numba/docs/source/user/faq.rst +++ /dev/null @@ -1,251 +0,0 @@ - -========================== -Frequently Asked Questions -========================== - - -Programming -=========== - -Can I pass a function as an argument to a jitted function? ----------------------------------------------------------- - -You can't, but in many cases you can use a closure to emulate it. -For example, this example:: - - @jit(nopython=True) - def f(g, x): - return g(x) + g(-x) - - result = f(my_g_function, 1) - -could be rewritten using a factory function:: - - def make_f(g): - # Note: a new f() is compiled each time make_f() is called! - @jit(nopython=True) - def f(x): - return g(x) + g(-x) - return f - - f = make_f(my_g_function) - result = f(1) - -Numba doesn't seem to care when I modify a global variable ----------------------------------------------------------- - -Numba considers global variables as compile-time constants. If you want -your jitted function to update itself when you have modified a global -variable's value, one solution is to recompile it using the -:meth:`~Dispatcher.recompile` method. This is a relatively slow operation, -though, so you may instead decide to rearchitect your code and turn the -global variable into a function argument. - -Can I debug a jitted function? ------------------------------- - -Calling into :mod:`pdb` or other such high-level facilities is currently not -supported from Numba-compiled code. However, you can temporarily disable -compilation by setting the :envvar:`NUMBA_DISABLE_JIT` environment -variable. - -How can I create a Fortran-ordered array? ------------------------------------------ - -Numba currently doesn't support the ``order`` argument to most Numpy -functions such as :func:`numpy.empty` (because of limitations in the -:term:`type inference` algorithm). You can work around this issue by -creating a C-ordered array and then transposing it. For example:: - - a = np.empty((3, 5), order='F') - b = np.zeros(some_shape, order='F') - -can be rewritten as:: - - a = np.empty((5, 3)).T - b = np.zeros(some_shape[::-1]).T - -How can I increase integer width? ---------------------------------- - -By default, Numba will generally use machine integer width for integer -variables. On a 32-bit machine, you may sometimes need the magnitude of -64-bit integers instead. You can simply initialize relevant variables as -``np.int64`` (for example ``np.int64(0)`` instead of ``0``). It will -propagate to all computations involving those variables. - -.. _parallel_faqs: - -How can I tell if ``parallel=True`` worked? -------------------------------------------- - -Set the :ref:`environment variable ` ``NUMBA_WARNINGS`` to -non-zero and if the ``parallel=True`` transformations failed for a function -decorated as such, a warning will be displayed. - -Also, setting the :ref:`environment variable ` -``NUMBA_DEBUG_ARRAY_OPT_STATS`` will show some statistics about which -operators/calls are converted to parallel for-loops. - -Performance -=========== - -Does Numba inline functions? ----------------------------- - -Numba gives enough information to LLVM so that functions short enough -can be inlined. This only works in :term:`nopython mode`. - -Does Numba vectorize array computations (SIMD)? ------------------------------------------------ - -Numba doesn't implement such optimizations by itself, but it lets LLVM -apply them. - -Why my loop is not vectorized? ------------------------------- - -Numba enables the loop-vectorize optimization in LLVM by default. -While it is a powerful optimization, not all loops are applicable. -Sometimes, loop-vectorization may fail due to subtle details like memory access -pattern. To see additional diagnostic information from LLVM, -add the following lines: - -.. code-block:: python - - import llvmlite.binding as llvm - llvm.set_option('', '--debug-only=loop-vectorize') - -This tells LLVM to print debug information from the **loop-vectorize** -pass to stderr. Each function entry looks like: - -.. code-block:: text - - LV: Checking a loop in "" from - LV: Loop hints: force=? width=0 unroll=0 - ... - LV: Vectorization is possible but not beneficial. - LV: Interleaving is not beneficial. - -Each function entry is separated by an empty line. The reason for rejecting -the vectorization is usually at the end of the entry. In the example above, -LLVM rejected the vectorization because doing so will not speedup the loop. -In this case, it can be due to memory access pattern. For instance, the -array being looped over may not be in contiguous layout. - -When memory access pattern is non-trivial such that it cannot determine the -access memory region, LLVM may reject with the following message: - -.. code-block:: text - - LV: Can't vectorize due to memory conflicts - -Another common reason is: - -.. code-block:: text - - LV: Not vectorizing: loop did not meet vectorization requirements. - -In this case, vectorization is rejected because the vectorized code may behave -differently. This is a case to try turning on ``fastmath=True`` to allow -fastmath instructions. - - -Does Numba automatically parallelize code? ------------------------------------------- - -It can, in some cases: - -* Ufuncs and gufuncs with the ``target="parallel"`` option will run on multiple threads. -* The experimental ``parallel=True`` option to ``@jit`` will attempt to optimize - array operations and run them in parallel. It also adds support for ``prange()`` to - explicitly parallelize a loop. - -You can also manually run computations on multiple threads yourself and use -the ``nogil=True`` option (see :ref:`releasing the GIL `). Numba -can also target parallel execution on GPU architectures using its CUDA and HSA -backends. - - -Can Numba speed up short-running functions? -------------------------------------------- - -Not significantly. New users sometimes expect to JIT-compile such -functions:: - - def f(x, y): - return x + y - -and get a significant speedup over the Python interpreter. But there isn't -much Numba can improve here: most of the time is probably spent in CPython's -function call mechanism, rather than the function itself. As a rule of -thumb, if a function takes less than 10 µs to execute: leave it. - -The exception is that you *should* JIT-compile that function if it is called -from another jitted function. - -There is a delay when JIT-compiling a complicated function, how can I improve it? ---------------------------------------------------------------------------------- - -Try to pass ``cache=True`` to the ``@jit`` decorator. It will keep the -compiled version on disk for later use. - -A more radical alternative is :ref:`ahead-of-time compilation `. - - -Integration with other utilities -================================ - -Can I "freeze" an application which uses Numba? ------------------------------------------------ - -If you're using PyInstaller or a similar utility to freeze an application, -you may encounter issues with llvmlite. llvmlite needs a non-Python DLL -for its working, but it won't be automatically detected by freezing utilities. -You have to inform the freezing utility of the DLL's location: it will -usually be named ``llvmlite/binding/libllvmlite.so`` or -``llvmlite/binding/llvmlite.dll``, depending on your system. - -I get errors when running a script twice under Spyder ------------------------------------------------------ - -When you run a script in a console under Spyder, Spyder first tries to -reload existing modules. This doesn't work well with Numba, and can -produce errors like ``TypeError: No matching definition for argument type(s)``. - -There is a fix in the Spyder preferences. Open the "Preferences" window, -select "Console", then "Advanced Settings", click the "Set UMR excluded -modules" button, and add ``numba`` inside the text box that pops up. - -To see the setting take effect, be sure to restart the IPython console or -kernel. - -.. _llvm-locale-bug: - -Why does Numba complain about the current locale? -------------------------------------------------- - -If you get an error message such as the following:: - - RuntimeError: Failed at nopython (nopython mode backend) - LLVM will produce incorrect floating-point code in the current locale - -it means you have hit a LLVM bug which causes incorrect handling of -floating-point constants. This is known to happen with certain third-party -libraries such as the Qt backend to matplotlib. - -To work around the bug, you need to force back the locale to its default -value, for example:: - - import locale - locale.setlocale(locale.LC_NUMERIC, 'C') - - -Miscellaneous -============= - -How do I reference/cite/acknowledge Numba in other work? --------------------------------------------------------- -For academic use, the best option is to cite our ACM Proceedings: -`Numba: a LLVM-based Python JIT compiler. -`_ diff --git a/numba/docs/source/user/generated-jit.rst b/numba/docs/source/user/generated-jit.rst deleted file mode 100644 index 595bf9389..000000000 --- a/numba/docs/source/user/generated-jit.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. _generated-jit: - -================================================ -Flexible specializations with ``@generated_jit`` -================================================ - - -While the :func:`~numba.jit` decorator is useful for many situations, -sometimes you want to write a function that has different implementations -depending on its input types. The :func:`~numba.generated_jit` decorator -allows the user to control the selection of a specialization at compile-time, -while fulling retaining runtime execution speed of a JIT function. - - -Example -======= - -Suppose you want to write a function which returns whether a given value -is a "missing" value according to certain conventions. For the sake of -the example, let's adopt the following definition: - -- for floating-point arguments, a missing value is a ``NaN`` -- for Numpy datetime64 and timedelta64 arguments, a missing value is a ``NaT`` -- other types don't have the concept of a missing value. - -That compile-time logic is easily implemented using the -:func:`~numba.generated_jit` decorator:: - - import numpy as np - - from numba import generated_jit, types - - @generated_jit(nopython=True) - def is_missing(x): - """ - Return True if the value is missing, False otherwise. - """ - if isinstance(x, types.Float): - return lambda x: np.isnan(x) - elif isinstance(x, (types.NPDatetime, types.NPTimedelta)): - # The corresponding Not-a-Time value - missing = x('NaT') - return lambda x: x == missing - else: - return lambda x: False - - -There are several things to note here: - -* The decorated function is called with the :ref:`Numba types ` - of the arguments, not their values. - -* The decorated function doesn't actually compute a result, it returns - a callable implementing the actual definition of the function for the - given types. - -* It is possible to pre-compute some data at compile-time (the ``missing`` - variable above) to have them reused inside the compiled implementation. - -* The function definitions use the same names for arguments as in the - decorated function, this is required to ensure passing arguments by - name works as expected. - - -Compilation options -=================== - -The :func:`~numba.generated_jit` decorator supports the same keyword-only -arguments as the :func:`~numba.jit` decorator, for example the ``nopython`` -and ``cache`` options. - diff --git a/numba/docs/source/user/index.rst b/numba/docs/source/user/index.rst deleted file mode 100644 index 003a78fda..000000000 --- a/numba/docs/source/user/index.rst +++ /dev/null @@ -1,20 +0,0 @@ - -User Manual -=========== - -.. toctree:: - - overview.rst - installing.rst - jit.rst - generated-jit.rst - vectorize.rst - jitclass.rst - cfunc.rst - pycc.rst - parallel.rst - stencil.rst - performance-tips.rst - troubleshoot.rst - faq.rst - examples.rst diff --git a/numba/docs/source/user/installing.rst b/numba/docs/source/user/installing.rst deleted file mode 100644 index ab83877c0..000000000 --- a/numba/docs/source/user/installing.rst +++ /dev/null @@ -1,72 +0,0 @@ - -Getting started -=============== - -Compatibility -------------- - -Numba is compatible with Python 2.7 and 3.5 or later, and Numpy versions 1.7 to 1.14. - -Our supported platforms are: - -* Linux x86 (32-bit and 64-bit) -* Windows 7 and later (32-bit and 64-bit) -* OS X 10.9 and later (64-bit) -* NVIDIA GPUs of compute capability 2.0 and later -* AMD APUs supported by the HSA 1.0 final runtime (Kaveri, Carrizo) - - -Installing using Conda ----------------------- - -The easiest way to install numba and get updates is by using Conda, -a cross-platform package manager and software distribution maintained -by Anaconda, Inc. You can either use `Anaconda -`_ to get the full stack in one download, -or `Miniconda `_ which will install -the minimum packages needed to get started. - -Once you have conda installed, just type:: - - $ conda install numba - -or:: - - $ conda update numba - -Installing from source ----------------------- - -We won't cover requirements in detail here, but you can get the bleeding-edge -source code from `Github `_:: - - $ git clone git://github.com/numba/numba.git - -Source archives of the latest release can be found on -`PyPI `_. - -You will need a C compiler corresponding to your Python installation, as -well as the `Numpy `_ and -`llvmlite `_ packages. See :ref:`buildenv` -for more information. - -Checking your installation --------------------------- - -You should be able to import Numba from the Python prompt:: - - $ python - Python 3.4.2 |Continuum Analytics, Inc.| (default, Oct 21 2014, 17:16:37) - [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux - Type "help", "copyright", "credits" or "license" for more information. - >>> import numba - >>> numba.__version__ - '0.16.0-82-g350c9d2' - -You can also try executing the :ref:`pycc ` utility:: - - $ pycc --help - usage: pycc [-h] [-o OUTPUT] [-c | --llvm] [--linker LINKER] - [--linker-args LINKER_ARGS] [--header] [--python] [-d] - inputs [inputs ...] - diff --git a/numba/docs/source/user/jit.rst b/numba/docs/source/user/jit.rst deleted file mode 100644 index 5bd6f7b3b..000000000 --- a/numba/docs/source/user/jit.rst +++ /dev/null @@ -1,199 +0,0 @@ -.. _jit: - -=================================== -Compiling Python code with ``@jit`` -=================================== - -Numba provides several utilities for code generation, but its central -feature is the :func:`numba.jit` decorator. Using this decorator, you can mark -a function for optimization by Numba's JIT compiler. Various invocation -modes trigger differing compilation options and behaviours. - - -Basic usage -=========== - -.. _jit-lazy: - -Lazy compilation ----------------- - -The recommended way to use the ``@jit`` decorator is to let Numba decide -when and how to optimize:: - - from numba import jit - - @jit - def f(x, y): - # A somewhat trivial example - return x + y - -In this mode, compilation will be deferred until the first function -execution. Numba will infer the argument types at call time, and generate -optimized code based on this information. Numba will also be able to -compile separate specializations depending on the input types. For example, -calling the ``f()`` function above with integer or complex numbers will -generate different code paths:: - - >>> f(1, 2) - 3 - >>> f(1j, 2) - (2+1j) - -Eager compilation ------------------ - -You can also tell Numba the function signature you are expecting. The -function ``f()`` would now look like:: - - from numba import jit, int32 - - @jit(int32(int32, int32)) - def f(x, y): - # A somewhat trivial example - return x + y - -``int32(int32, int32)`` is the function's signature. In this case, the -corresponding specialization will be compiled by the ``@jit`` decorator, -and no other specialization will be allowed. This is useful if you want -fine-grained control over types chosen by the compiler (for example, -to use single-precision floats). - -If you omit the return type, e.g. by writing ``(int32, int32)`` instead of -``int32(int32, int32)``, Numba will try to infer it for you. Function -signatures can also be strings, and you can pass several of them as a list; -see the :func:`numba.jit` documentation for more details. - -Of course, the compiled function gives the expected results:: - - >>> f(1,2) - 3 - -and if we specified ``int32`` as return type, the higher-order bits get -discarded:: - - >>> f(2**31, 2**31 + 1) - 1 - - -Calling and inlining other functions -==================================== - -Numba-compiled functions can call other compiled functions. The function -calls may even be inlined in the native code, depending on optimizer -heuristics. For example:: - - @jit - def square(x): - return x ** 2 - - @jit - def hypot(x, y): - return math.sqrt(square(x) + square(y)) - -The ``@jit`` decorator *must* be added to any such library function, -otherwise Numba may generate much slower code. - - -Signature specifications -======================== - -Explicit ``@jit`` signatures can use a number of types. Here are some -common ones: - -* ``void`` is the return type of functions returning nothing (which - actually return :const:`None` when called from Python) -* ``intp`` and ``uintp`` are pointer-sized integers (signed and unsigned, - respectively) -* ``intc`` and ``uintc`` are equivalent to C ``int`` and ``unsigned int`` - integer types -* ``int8``, ``uint8``, ``int16``, ``uint16``, ``int32``, ``uint32``, - ``int64``, ``uint64`` are fixed-width integers of the corresponding bit - width (signed and unsigned) -* ``float32`` and ``float64`` are single- and double-precision floating-point - numbers, respectively -* ``complex64`` and ``complex128`` are single- and double-precision complex - numbers, respectively -* array types can be specified by indexing any numeric type, e.g. ``float32[:]`` - for a one-dimensional single-precision array or ``int8[:,:]`` for a - two-dimensional array of 8-bit integers. - - -Compilation options -=================== - -A number of keyword-only arguments can be passed to the ``@jit`` decorator. - -.. _jit-nopython: - -``nopython`` ------------- - -Numba has two compilation modes: :term:`nopython mode` and -:term:`object mode`. The former produces much faster code, but has -limitations that can force Numba to fall back to the latter. To prevent -Numba from falling back, and instead raise an error, pass ``nopython=True``. - -:: - - @jit(nopython=True) - def f(x, y): - return x + y - -.. seealso:: :ref:`numba-troubleshooting` - -.. _jit-nogil: - -``nogil`` ---------- - -Whenever Numba optimizes Python code to native code that only works on -native types and variables (rather than Python objects), it is not necessary -anymore to hold Python's :py:term:`global interpreter lock` (GIL). -Numba will release the GIL when entering such a compiled function if you -passed ``nogil=True``. - -:: - - @jit(nogil=True) - def f(x, y): - return x + y - -Code running with the GIL released runs concurrently with other -threads executing Python or Numba code (either the same compiled function, -or another one), allowing you to take advantage of multi-core systems. -This will not be possible if the function is compiled in :term:`object mode`. - -When using ``nogil=True``, you'll have to be wary of the usual pitfalls -of multi-threaded programming (consistency, synchronization, race conditions, -etc.). - -.. _jit-cache: - -``cache`` ---------- - -To avoid compilation times each time you invoke a Python program, -you can instruct Numba to write the result of function compilation into -a file-based cache. This is done by passing ``cache=True``:: - - @jit(cache=True) - def f(x, y): - return x + y - -.. _parallel_jit_option: - -``parallel`` ------------- - -Enables an experimental feature that automatically parallelizes (and -performs other optimizations for) those operations in the function known to -have parallel semantics. For a list of supported operations, see -:ref:`numba-parallel`. This feature is enabled by passing ``parallel=True`` and -must be used in conjunction with ``nopython=True``:: - - @jit(nopython=True, parallel=True) - def f(x, y): - return x + y - -.. seealso:: :ref:`numba-parallel` diff --git a/numba/docs/source/user/jitclass.rst b/numba/docs/source/user/jitclass.rst deleted file mode 100644 index f358f9f62..000000000 --- a/numba/docs/source/user/jitclass.rst +++ /dev/null @@ -1,101 +0,0 @@ -.. _jitclass: - -======================================= -Compiling python classes with @jitclass -======================================= - -.. note:: - - This is a early version of jitclass support. Not all compiling features are - exposed or implemented, yet. - - -Numba supports code generation for classes via the :func:`numba.jitclass` -decorator. A class can be marked for optimization using this decorator along -with a specification of the types of each field. We call the resulting class -object a jitclass. All methods of a jitclass are compiled into nopython -functions. The data of a jitclass instance is allocated on the heap as a -C-compatible structure so that any compiled functions can have direct access -to the underlying data, bypassing the interpreter. - - -Basic usage -=========== - -Here's an example of a jitclass:: - - import numpy as np - from numba import jitclass # import the decorator - from numba import int32, float32 # import the types - - spec = [ - ('value', int32), # a simple scalar field - ('array', float32[:]), # an array field - ] - - @jitclass(spec) - class Bag(object): - def __init__(self, value): - self.value = value - self.array = np.zeros(value, dtype=np.float32) - - @property - def size(self): - return self.array.size - - def increment(self, val): - for i in range(self.size): - self.array[i] = val - return self.array - - -(see full example at `examples/jitclass.py` from the source tree) - -In the above example, a ``spec`` is provided as a list of 2-tuples. The tuples -contain the name of the field and the numba type of the field. Alternatively, -user can use a dictionary (an ``OrderedDict`` preferrably for stable field -ordering), which maps field names to types. - -The definition of the class requires at least a ``__init__`` method for -initializing each defined fields. Uninitialized fields contains garbage data. -Methods and properties (getters and setters only) can be defined. They will be -automatically compiled. - - -Support operations -================== - -The following operations of jitclasses work in both the interpreter and numba -compiled functions: - -* calling the jitclass class object to construct a new instance - (e.g. ``mybag = Bag(123)``); -* read/write access to attributes and properties (e.g. ``mybag.value``); -* calling methods (e.g. ``mybag.increment(3)``); - -Using jitclasses in numba compiled function is more efficient. -Short methods can be inlined (at the discretion of LLVM inliner). -Attributes access are simply reading from a C structure. -Using jitclasses from the intpreter has the same overhead of calling any -numba compiled function from the interpreter. Arguments and return values -must be unboxed or boxed between python objects and native representation. -Values encapsulated by a jitclass does not get boxed into python object when -the jitclass instance is handed to the interpreter. It is during attribute -access to the field values that they are boxed. - - -Limitations -=========== - -* A jitclass class object is treated as a function (the constructor) inside - a numba compiled function. -* ``isinstance()`` only works in the interpreter. -* Manipulating jitclass instances in the interpreter is not optimized, yet. -* Support for jitclasses are available on CPU only. - (Note: Support for GPU devices is planned for a future release.) - - -The decorator: ``@jitclass`` -============================ - -.. autofunction:: numba.jitclass diff --git a/numba/docs/source/user/overview.rst b/numba/docs/source/user/overview.rst deleted file mode 100644 index 9b11b5a49..000000000 --- a/numba/docs/source/user/overview.rst +++ /dev/null @@ -1,34 +0,0 @@ - -Overview -======== - -Numba is a compiler for Python array and numerical functions that gives -you the power to speed up your applications with high performance -functions written directly in Python. - -Numba generates optimized machine code from pure Python code using -the `LLVM compiler infrastructure `_. With a few simple -annotations, array-oriented and math-heavy Python code can be -just-in-time optimized to performance similar as C, C++ and Fortran, without -having to switch languages or Python interpreters. - -Numba's main features are: - -* :ref:`on-the-fly code generation ` (at import time or runtime, at the - user's preference) -* native code generation for the CPU (default) and - :doc:`GPU hardware <../cuda/index>` -* integration with the Python scientific software stack (thanks to Numpy) - -Here is how a Numba-optimized function, taking a Numpy array as argument, -might look like:: - - @numba.jit - def sum2d(arr): - M, N = arr.shape - result = 0.0 - for i in range(M): - for j in range(N): - result += arr[i,j] - return result - diff --git a/numba/docs/source/user/parallel.rst b/numba/docs/source/user/parallel.rst deleted file mode 100644 index 87875ac03..000000000 --- a/numba/docs/source/user/parallel.rst +++ /dev/null @@ -1,141 +0,0 @@ -.. Copyright (c) 2017 Intel Corporation - SPDX-License-Identifier: BSD-2-Clause - -.. _numba-parallel: - -======================================= -Automatic parallelization with ``@jit`` -======================================= - -Setting the :ref:`parallel_jit_option` option for :func:`~numba.jit` enables -an experimental Numba feature that attempts to automatically parallelize and -perform other optimizations on (part of) a function. At the moment, this -feature only works on CPUs. - -Some operations inside a user defined function, e.g., adding a scalar value to -an array, are known to have parallel semantics. A user program may contain -many such operations and while each operation could be parallelized -individually, such an approach often has lackluster performance due to poor -cache behavior. Instead, with auto-parallelization, Numba attempts to -identify such operations in a user program, and fuse adjacent ones together, -to form one or more kernels that are automatically run in parallel. -The process is fully automated without modifications to the user program, -which is in contrast to Numba's :func:`~numba.vectorize` or -:func:`~numba.guvectorize` mechanism, where manual effort is required -to create parallel kernels. - -.. _numba-parallel-supported: - -Supported Operations -==================== - -In this section, we give a list of all the array operations that have -parallel semantics and for which we attempt to parallelize. - -#. All numba array operations that are supported by :ref:`case-study-array-expressions`, - which include common arithmetic functions between Numpy arrays, and between - arrays and scalars, as well as Numpy ufuncs. They are often called - `element-wise` or `point-wise` array operations: - - * unary operators: ``+`` ``-`` ``~`` - * binary operators: ``+`` ``-`` ``*`` ``/`` ``/?`` ``%`` ``|`` ``>>`` ``^`` ``<<`` ``&`` ``**`` ``//`` - * comparison operators: ``==`` ``!=`` ``<`` ``<=`` ``>`` ``>=`` - * :ref:`Numpy ufuncs ` that are supported in :term:`nopython mode`. - * User defined :class:`~numba.DUFunc` through :func:`~numba.vectorize`. - -#. Numpy reduction functions ``sum``, ``prod``, ``min``, ``max``, ``argmin``, - and ``argmax``. Also, array math functions ``mean``, ``var``, and ``std``. - -#. Numpy array creation functions ``zeros``, ``ones``, ``arange``, ``linspace``, - and several random functions (rand, randn, ranf, random_sample, sample, - random, standard_normal, chisquare, weibull, power, geometric, exponential, - poisson, rayleigh, normal, uniform, beta, binomial, f, gamma, lognormal, - laplace, randint, triangular). - -#. Numpy ``dot`` function between a matrix and a vector, or two vectors. - In all other cases, Numba's default implementation is used. - -#. Multi-dimensional arrays are also supported for the above operations - when operands have matching dimension and size. The full semantics of - Numpy broadcast between arrays with mixed dimensionality or size is - not supported, nor is the reduction across a selected dimension. - -#. Array assignment in which the target is an array selection using a slice - or a boolean array, and the value being assigned is either a scalar or - another selection where the slice range or bitarray are inferred to be - compatible. - -#. The ``reduce`` operator of ``functools`` is supported for specifying parallel - reductions on 1D Numpy arrays but the initial value argument is mandatory. - -.. _numba-prange: - -Explicit Parallel Loops -======================== - -Another experimental feature of this module is support for explicit parallel -loops. One can use Numba's ``prange`` instead of ``range`` to specify that a -loop can be parallelized. The user is required to make sure that the loop does -not have cross iteration dependencies except the supported reductions. - -A reductions is inferred automatically if a variable is updated by a binary -function/operator using its previous value in the loop body. The initial value -of the reduction is inferred automatically for ``+=`` and ``*=`` operators. -For other functions/operators, the reduction variable should hold the initial -value right before entering the ``prange`` loop. - -The example below demonstrates a parallel loop with a -reduction (``A`` is a one-dimensional Numpy array):: - - from numba import njit, prange - @njit(parallel=True) - def prange_test(A): - s = 0 - for i in prange(A.shape[0]): - s += A[i] - return s - -Examples -======== - -In this section, we give an example of how this feature helps -parallelize Logistic Regression:: - - @numba.jit(nopython=True, parallel=True) - def logistic_regression(Y, X, w, iterations): - for i in range(iterations): - w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) - return w - -We will not discuss details of the algorithm, but instead focus on how -this program behaves with auto-parallelization: - -1. Input ``Y`` is a vector of size ``N``, ``X`` is an ``N x D`` matrix, - and ``w`` is a vector of size ``D``. - -2. The function body is an iterative loop that updates variable ``w``. - The loop body consists of a sequence of vector and matrix operations. - -3. The inner ``dot`` operation produces a vector of size ``N``, followed by a - sequence of arithmetic operations either between a scalar and vector of - size ``N``, or two vectors both of size ``N``. - -4. The outer ``dot`` produces a vector of size ``D``, followed by an inplace - array subtraction on variable ``w``. - -5. With auto-parallelization, all operations that produce array of size - ``N`` are fused together to become a single parallel kernel. This includes - the inner ``dot`` operation and all point-wise array operations following it. - -6. The outer ``dot`` operation produces a result array of different dimension, - and is not fused with the above kernel. - -Here, the only thing required to take advantage of parallel hardware is to set -the :ref:`parallel_jit_option` option for :func:`~numba.jit`, with no -modifications to the ``logistic_regression`` function itself. If we were to -give an equivalence parallel implementation using :func:`~numba.guvectorize`, -it would require a pervasive change that rewrites the code to extract kernel -computation that can be parallelized, which was both tedious and challenging. - - -.. seealso:: :ref:`parallel_jit_option`, :ref:`Parallel FAQs ` diff --git a/numba/docs/source/user/performance-tips.rst b/numba/docs/source/user/performance-tips.rst deleted file mode 100644 index 5e4f3c57f..000000000 --- a/numba/docs/source/user/performance-tips.rst +++ /dev/null @@ -1,230 +0,0 @@ -Performance Tips -================ - -This is a short guide to features present in Numba that can help with obtaining -the best performance from code. Two examples are used, both are entirely -contrived and exist purely for pedagogical reasons to motivate discussion. -The first is the computation of the trigonometric identity -``cos(x)^2 + sin(x)^2``, the second is a simple element wise square root of a -vector with reduction over summation. All performance numbers are indicative -only and unless otherwise stated were taken from running on an Intel ``i7-4790`` -CPU (4 hardware threads) with an input of ``np.arange(1.e7)``. - -.. note:: - A reasonably effective approach to achieving high performance code is to - profile the code running with real data and use that to guide performance - tuning. The information presented here is to demonstrate features, not to act - as canonical guidance! - -No Python mode vs Object mode ------------------------------ - -A common pattern is to decorate functions with ``@jit`` as this is the most -flexible decorator offered by Numba. ``@jit`` essentially encompasses two modes -of compilation, first it will try and compile the decorated function in no -Python mode, if this fails it will try again to compile the function using -object mode. Whilst the use of looplifting in object mode can enable some -performance increase, getting functions to compile under no python mode is -really the key to good performance. To make it such that only no python mode is -used and if compilation fails an exception is raised the decorators ``@njit`` -and ``@jit(nopython=True)`` can be used (the first is an alias of the -second for convenience). - -Loops ------ -Whilst NumPy has developed a strong idiom around the use of vector operations, -Numba is perfectly happy with loops too. For users familiar with C or Fortran, -writing Python in this style will work fine in Numba (after all, LLVM gets a -lot of use in compiling C lineage languages). For example:: - - @njit - def ident_np(x): - return np.cos(x) ** 2 + np.sin(x) ** 2 - - @njit - def ident_loops(x): - r = np.empty_like(x) - n = len(x) - for i in range(n): - r[i] = np.cos(x[i]) ** 2 + np.sin(x[i]) ** 2 - return r - -The above run at almost identical speeds when decorated with ``@njit``, without -the decorator the vectorized function is a couple of orders of magnitude faster. - -+-----------------+-------+----------------+ -| Function Name | @njit | Execution time | -+=================+=======+================+ -| ``ident_np`` | No | 0.581s | -+-----------------+-------+----------------+ -| ``ident_np`` | Yes | 0.659s | -+-----------------+-------+----------------+ -| ``ident_loops`` | No | 25.2s | -+-----------------+-------+----------------+ -| ``ident_loops`` | Yes | 0.670s | -+-----------------+-------+----------------+ - - -Fastmath --------- -In certain classes of applications strict IEEE 754 compliance is less -important. As a result it is possible to relax some numerical rigour with -view of gaining additional performance. The way to achieve this behaviour in -Numba is through the use of the ``fastmath`` keyword argument:: - - @njit(fastmath=False) - def do_sum(A): - acc = 0. - # without fastmath, this loop must accumulate in strict order - for x in A: - acc += np.sqrt(x) - return acc - - @njit(fastmath=True) - def do_sum_fast(A): - acc = 0. - # with fastmath, the reduction can be vectorized as floating point - # reassociation is permitted. - for x in A: - acc += np.sqrt(x) - return acc - - -+-----------------+-----------------+ -| Function Name | Execution time | -+=================+=================+ -| ``do_sum`` | 35.2 ms | -+-----------------+-----------------+ -| ``do_sum_fast`` | 17.8 ms | -+-----------------+-----------------+ - - -Parallel=True -------------- -If code contains operations that are parallelisable (:ref:`and supported -`) Numba can compile a version of that will run in -parallel on multiple native threads (no GIL!). This parallelisation is performed -automatically and is enabled by simply adding the ``parallel`` keyword -argument:: - - @njit(parallel=True) - def ident_parallel(A): - return np.cos(x) ** 2 + np.sin(x) ** 2 - - -Executions times are as follows: - -+--------------------+-----------------+ -| Function Name | Execution time | -+====================+=================+ -| ``ident_parallel`` | 112 ms | -+--------------------+-----------------+ - - -The execution speed of this function with ``parallel=True`` present is -approximately 5x that of the NumPy equivalent and 6x that of standard -``@njit``. - - -Numba parallel execution also has support for explicit parallel loop -declaration similar to that in OpenMP. To indicate that a loop should be -executed in parallel the ``numba.prange`` function should be used, this function -behaves like Python ``range`` and if ``parallel=True`` is not set it acts -simply as an alias of ``range``. Loops induced with ``prange`` can be used for -embarrassingly parallel computation and also reductions. - -Revisiting the reduce over sum example, assuming it is safe for the sum to be -accumulated out of order, the loop in ``n`` can be parallelised through the use -of ``prange``. Further, the ``fastmath=True`` keyword argument can be added -without concern in this case as the assumption that out of order execution is -valid has already been made through the use of ``parallel=True`` (as each thread -computes a partial sum). -:: - - @njit(parallel=True) - def do_sum_parallel(A): - # each thread can accumulate its own partial sum, and then a cross - # thread reduction is performed to obtain the result to return - n = len(A) - acc = 0. - for i in prange(n): - acc += np.sqrt(A[i]) - return acc - - @njit(parallel=True, fastmath=True) - def do_sum_parallel_fast(A): - n = len(A) - acc = 0. - for i in prange(n): - acc += np.sqrt(A[i]) - return acc - - -Execution times are as follows, ``fastmath`` again improves performance. - -+-------------------------+-----------------+ -| Function Name | Execution time | -+=========================+=================+ -| ``do_sum_parallel`` | 9.81 ms | -+-------------------------+-----------------+ -| ``do_sum_parallel_fast``| 5.37 ms | -+-------------------------+-----------------+ - -Intel SVML ----------- - -Intel provides a short vector math library (SVML) that contains a large number -of optimised transcendental functions available for use as compiler -intrinsics. If the ``icc_rt`` package is present in the environment (or the SVML -libraries are simply locatable!) then Numba automatically configures the LLVM -back end to use the SVML intrinsic functions where ever possible. SVML provides -both high and low accuracy versions of each intrinsic and the version that is -used is determined through the use of the ``fastmath`` keyword. The default is -to use high accuracy which is accurate to within ``1 ULP``, however if -``fastmath`` is set to ``True`` then the lower accuracy versions of the -intrinsics are used (answers to within ``4 ULP``). - - -First obtain SVML, using conda for example:: - - conda install -c numba icc_rt - -Rerunning the identity function example ``ident_np`` from above with various -combinations of options to ``@njit`` and with/without SVML yields the following -performance results (input size ``np.arange(1.e8)``). For reference, with just -NumPy the function executed in ``5.84s``: - -+-----------------------------------+--------+-------------------+ -| ``@njit`` kwargs | SVML | Execution time | -+===================================+========+===================+ -| ``None`` | No | 5.95s | -+-----------------------------------+--------+-------------------+ -| ``None`` | Yes | 2.26s | -+-----------------------------------+--------+-------------------+ -| ``fastmath=True`` | No | 5.97s | -+-----------------------------------+--------+-------------------+ -| ``fastmath=True`` | Yes | 1.8s | -+-----------------------------------+--------+-------------------+ -| ``parallel=True`` | No | 1.36s | -+-----------------------------------+--------+-------------------+ -| ``parallel=True`` | Yes | 0.624s | -+-----------------------------------+--------+-------------------+ -| ``parallel=True, fastmath=True`` | No | 1.32s | -+-----------------------------------+--------+-------------------+ -| ``parallel=True, fastmath=True`` | Yes | 0.576s | -+-----------------------------------+--------+-------------------+ - -It is evident that SVML significantly increases the performance of this -function. The impact of ``fastmath`` in the case of SVML not being present is -zero, this is expected as there is nothing in the original function that would -benefit from relaxing numerical strictness. - -Linear algebra --------------- -Numba supports most of ``numpy.linalg`` in no Python mode. The internal -implementation relies on a LAPACK and BLAS library to do the numerical work -and it obtains the bindings for the necessary functions from SciPy. Therefore, -to achieve good performance in ``numpy.linalg`` functions with Numba it is -necessary to use a SciPy built against a well optimised LAPACK/BLAS library. -In the case of the Anaconda distribution SciPy is built against Intel's MKL -which is highly optimised and as a result Numba makes use of this performance. diff --git a/numba/docs/source/user/pycc.rst b/numba/docs/source/user/pycc.rst deleted file mode 100644 index a8035b6df..000000000 --- a/numba/docs/source/user/pycc.rst +++ /dev/null @@ -1,137 +0,0 @@ - -============================ -Compiling code ahead of time -============================ - -.. _pycc: - -While Numba's main use case is :term:`Just-in-Time compilation`, it also -provides a facility for :term:`Ahead-of-Time compilation` (AOT). - - -Overview -======== - -Benefits --------- - -#. AOT compilation produces a compiled extension module which does not depend - on Numba: you can distribute the module on machines which do not have - Numba installed (but Numpy is required). - -#. There is no compilation overhead at runtime (but see the - ``@jit`` :ref:`cache ` option), nor any overhead of importing - Numba. - -.. seealso:: - Compiled extension modules are discussed in the - `Python packaging user guide `_. - - -Limitations ------------ - -#. AOT compilation only allows for regular functions, not :term:`ufuncs `. - -#. You have to specify function signatures explicitly. - -#. Each exported function can have only one signature (but you can export - several different signatures under different names). - -#. AOT compilation produces generic code for your CPU's architectural family - (for example "x86-64"), while JIT compilation produces code optimized - for your particular CPU model. - - -Usage -===== - -Standalone example ------------------- - -:: - - from numba.pycc import CC - - cc = CC('my_module') - # Uncomment the following line to print out the compilation steps - #cc.verbose = True - - @cc.export('multf', 'f8(f8, f8)') - @cc.export('multi', 'i4(i4, i4)') - def mult(a, b): - return a * b - - @cc.export('square', 'f8(f8)') - def square(a): - return a ** 2 - - if __name__ == "__main__": - cc.compile() - - -If you run this Python script, it will generate an extension module named -``my_module``. Depending on your platform, the actual filename may be -``my_module.so``, ``my_module.pyd``, ``my_module.cpython-34m.so``, etc. - -The generated module has three functions: ``multf``, ``multi`` and ``square``. -``multi`` operates on 32-bit integers (``i4``), while ``multf`` and ``square`` -operate on double-precision floats (``f8``):: - - >>> import my_module - >>> my_module.multi(3, 4) - 12 - >>> my_module.square(1.414) - 1.9993959999999997 - - -Distutils integration ---------------------- - -You can also integrate the compilation step for your extension modules -in your ``setup.py`` script, using distutils or setuptools:: - - from distutils.core import setup - - from source_module import cc - - setup(..., - ext_modules=[cc.distutils_extension()]) - - -The ``source_module`` above is the module defining the ``cc`` object. -Extensions compiled like this will be automatically included in the -build files for your Python project, so you can distribute them inside -binary packages such as wheels or Conda packages. Note that in the case of -using conda, the compilers used for AOT need to be those that are available -in the Anaconda distribution. - - -Signature syntax ----------------- - -The syntax for exported signatures is the same as in the ``@jit`` -decorator. You can read more about it in the :ref:`types ` -reference. - -Here is an example of exporting an implementation of the second-order -centered difference on a 1d array:: - - @cc.export('centdiff_1d', 'f8[:](f8[:], f8)') - def centdiff_1d(u, dx): - D = np.empty_like(u) - D[0] = 0 - D[-1] = 0 - for i in range(1, len(D) - 1): - D[i] = (u[i+1] - 2 * u[i] + u[i-1]) / dx**2 - return D - -.. (example from http://nbviewer.ipython.org/gist/ketch/ae87a94f4ef0793d5d52) - -You can also omit the return type, which will then be inferred by Numba:: - - @cc.export('centdiff_1d', '(f8[:], f8)') - def centdiff_1d(u, dx): - # Same code as above - ... - diff --git a/numba/docs/source/user/stencil.rst b/numba/docs/source/user/stencil.rst deleted file mode 100644 index 2a3260114..000000000 --- a/numba/docs/source/user/stencil.rst +++ /dev/null @@ -1,244 +0,0 @@ -.. Copyright (c) 2017 Intel Corporation - SPDX-License-Identifier: BSD-2-Clause - -.. _numba-stencil: - -================================ -Using the ``@stencil`` decorator -================================ - -Stencils are a common computational pattern in which array elements -are updated according to some fixed pattern called the stencil kernel. -Numba provides the ``@stencil`` decorator so that users may -easily specify a stencil kernel and Numba then generates the looping -code necessary to apply that kernel to some input array. Thus, the -stencil decorator allows clearer, more concise code and in conjunction -with :ref:`the parallel jit option ` enables higher -performance through parallelization of the stencil execution. - - -Basic usage -=========== - -An example use of the ``@stencil`` decorator:: - - from numba import stencil - - @stencil - def kernel1(a): - return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0]) - -The stencil kernel is specified by what looks like a standard Python -function definition but there are different semantics with -respect to array indexing. -Stencils produce an output array of the same size and shape as the -input array although depending on the kernel definition may have a -different type. -Conceptually, the stencil kernel is run once for each element in the -output array. The return value from the stencil kernel is the value -written into the output array for that particular element. - -The parameter ``a`` represents the input array over which the -kernel is applied. -Indexing into this array takes place with respect to the current element -of the output array being processed. For example, if element ``(x, y)`` -is being processed then ``a[0, 0]`` in the stencil kernel corresponds to -``a[x + 0, y + 0]`` in the input array. Similarly, ``a[-1, 1]`` in the stencil -kernel corresponds to ``a[x - 1, y + 1]`` in the input array. - -Depending on the specified kernel, the kernel may not be applicable to the -borders of the output array as this may cause the input array to be -accessed out-of-bounds. The way in which the stencil decorator handles -this situation is dependent upon which :ref:`stencil-mode` is selected. -The default mode is for the stencil decorator to set the border elements -of the output array to zero. - -To invoke a stencil on an input array, call the stencil as if it were -a regular function and pass the input array as the argument. For example, using -the kernel defined above:: - - >>> import numpy as np - >>> input_arr = np.arange(100).reshape((10, 10)) - array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], - [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], - [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], - [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], - [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], - [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) - >>> output_arr = kernel1(input_arr) - array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], - [ 0., 11., 12., 13., 14., 15., 16., 17., 18., 0.], - [ 0., 21., 22., 23., 24., 25., 26., 27., 28., 0.], - [ 0., 31., 32., 33., 34., 35., 36., 37., 38., 0.], - [ 0., 41., 42., 43., 44., 45., 46., 47., 48., 0.], - [ 0., 51., 52., 53., 54., 55., 56., 57., 58., 0.], - [ 0., 61., 62., 63., 64., 65., 66., 67., 68., 0.], - [ 0., 71., 72., 73., 74., 75., 76., 77., 78., 0.], - [ 0., 81., 82., 83., 84., 85., 86., 87., 88., 0.], - [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) - >>> input_arr.dtype - dtype('int64') - >>> output_arr.dtype - dtype('float64') - -Note that the stencil decorator has determined that the output type -of the specified stencil kernel is ``float64`` and has thus created the -output array as ``float64`` while the input array is of type ``int64``. - -Stencil Parameters -================== - -Stencil kernel definitions may take any number of arguments with -the following provisions. The first argument must be an array. -The size and shape of the output array will be the same as that of the -first argument. Additional arguments may either be scalars or -arrays. For array arguments, those arrays must be at least as large -as the first argument (array) in each dimension. Array indexing is relative for -all such input array arguments. - -.. _stencil-kernel-shape-inference: - -Kernel shape inference and border handling -========================================== - -In the above example and in most cases, the array indexing in the -stencil kernel will exclusively use ``Integer`` literals. -In such cases, the stencil decorator is able to analyze the stencil -kernel to determine its size. In the above example, the stencil -decorator determines that the kernel is ``3 x 3`` in shape since indices -``-1`` to ``1`` are used for both the first and second dimensions. Note that -the stencil decorator also correctly handles non-symmetric and -non-square stencil kernels. - -Based on the size of the stencil kernel, the stencil decorator is -able to compute the size of the border in the output array. If -applying the kernel to some element of input array would cause -an index to be out-of-bounds then that element belongs to the border -of the output array. In the above example, points ``-1`` and ``+1`` are -accessed in each dimension and thus the output array has a border -of size one in all dimensions. - - -Stencil decorator options -========================= - -While the stencil decorator may be augmented in the future to -provide additional mechanisms for border handling, at the moment -the stencil decorator currently supports only one option. - -.. _stencil-neighborhood: - -``neighborhood`` ----------------- - -Sometimes it may be inconvenient to write the stencil kernel -exclusively with ``Integer`` literals. For example, let us say we -would like to compute the trailing 30-day moving average of a -time series of data. One could write -``(a[-29] + a[-28] + ... + a[-1] + a[0]) / 30`` but the stencil -decorator offers a more concise form using the ``neighborhood`` -option:: - - @stencil(neighborhood = ((-29, 0),)) - def kernel2(a): - cumul = 0 - for i in range(-29, 1): - cumul += a[i] - return cumul / 30 - -The neighborhood option is a tuple of tuples. The outer tuple's -length is equal to the number of dimensions of the input array. -The inner tuple's lengths are always two because -each element of the outer tuple corresponds to minimum and -maximum index offsets used in the corresponding dimension. - -If a user specifies a neighborhood but the kernel accesses elements outside the -specified neighborhood, **the behavior is undefined.** - -.. _stencil-mode: - -``mode`` --------- - -The optional mode parameter controls how the border of the output array -is handled. Currently, there is only one supported value, ``"constant"``. -In ``constant`` mode, the stencil kernel is not applied in cases where -the kernel would access elements outside the valid range of the input -array. In such cases, those elements in the output array are assigned -to a constant value, as specified by the ``cval`` parameter. - -``cval`` --------- - -The optional cval parameter defaults to zero but can be set to any -desired value, which is then used for the border of the output array -if the mode parameter is set to ``constant``. The cval parameter is -ignored in all other modes. The type of the cval parameter must match -the return type of the stencil kernel. If the user wishes the output -array to be constructed from a particular type then they should ensure -that the stencil kernel returns that type. - -``standard_indexing`` ---------------------- - -By default, all array accesses in a stencil kernel are processed as -relative indices as described above. However, sometimes it may be -advantageous to pass an auxiliary array (e.g. an array of weights) -to a stencil kernel and have that array use standard Python indexing -rather than relative indexing. For this purpose, there is the -stencil decorator option ``standard_indexing`` whose value is a -collection of strings whose names match those parameters to the -stencil function that are to be accessed with standard Python indexing -rather than relative indexing:: - - @stencil(standard_indexing=("b",)) - def kernel3(a, b): - return a[-1] * b[0] + a[0] + b[1] - -``StencilFunc`` -=============== - -The stencil decorator returns a callable object of type ``StencilFunc``. -``StencilFunc`` objects contains a number of attributes but the only one of -potential interest to users is the ``neighborhood`` attribute. -If the ``neighborhood`` option was passed to the stencil decorator then -the provided neighborhood is stored in this attribute. Else, upon -first execution or compilation, the system calculates the neighborhood -as described above and then stores the computed neighborhood into this -attribute. A user may then inspect the attribute if they wish to verify -that the calculated neighborhood is correct. - -Stencil invocation options -========================== - -Internally, the stencil decorator transforms the specified stencil -kernel into a regular Python function. This function will have the -same parameters as specified in the stencil kernel definition but will -also include the following optional parameter. - -.. _stencil-function-out: - -``out`` -------- - -The optional ``out`` parameter is added to every stencil function -generated by Numba. If specified, the ``out`` parameter tells -Numba that the user is providing their own pre-allocated array -to be used for the output of the stencil. In this case, the -stencil function will not allocate its own output array. -Users should assure that the return type of the stencil kernel can -be safely cast to the element-type of the user-specified output array -following the `Numpy ufunc casting rules`_. - -.. _`Numpy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules - -An example usage is shown below:: - - >>> import numpy as np - >>> input_arr = np.arange(100).reshape((10, 10)) - >>> output_arr = np.full(input_arr.shape, 0.0) - >>> kernel1(input_arr, out=output_arr) diff --git a/numba/docs/source/user/troubleshoot.rst b/numba/docs/source/user/troubleshoot.rst deleted file mode 100644 index a2c5da1cb..000000000 --- a/numba/docs/source/user/troubleshoot.rst +++ /dev/null @@ -1,490 +0,0 @@ - -.. _numba-troubleshooting: - -======================== -Troubleshooting and tips -======================== - -.. _what-to-compile: - -What to compile -=============== - -The general recommendation is that you should only try to compile the -critical paths in your code. If you have a piece of performance-critical -computational code amongst some higher-level code, you may factor out -the performance-critical code in a separate function and compile the -separate function with Numba. Letting Numba focus on that small piece -of performance-critical code has several advantages: - -* it reduces the risk of hitting unsupported features; -* it reduces the compilation times; -* it allows you to evolve the higher-level code which is outside of the - compiled function much easier. - -.. _code-doesnt-compile: - -My code doesn't compile -======================= - -There can be various reasons why Numba cannot compile your code, and raises -an error instead. One common reason is that your code relies on an -unsupported Python feature, especially in :term:`nopython mode`. -Please see the list of :ref:`pysupported`. If you find something that -is listed there and still fails compiling, please -:ref:`report a bug `. - -When Numba tries to compile your code it first tries to work out the types of -all the variables in use, this is so it can generate a type specific -implementation of your code that can be compiled down to machine code. A common -reason for Numba failing to compile (especially in :term:`nopython mode`) is a -type inference failure, essentially Numba cannot work out what the type of all -the variables in your code should be. - -For example, let's consider this trivial function:: - - @jit(nopython=True) - def f(x, y): - return x + y - -If you call it with two numbers, Numba is able to infer the types properly:: - - >>> f(1, 2) - 3 - -If however you call it with a tuple and a number, Numba is unable to say -what the result of adding a tuple and number is, and therefore compilation -errors out:: - - >>> f(1, (2,)) - Traceback (most recent call last): - File "", line 1, in - File "/numba/numba/dispatcher.py", line 339, in _compile_for_args - reraise(type(e), e, None) - File "/numba/numba/six.py", line 658, in reraise - raise value.with_traceback(tb) - numba.errors.TypingError: Failed at nopython (nopython frontend) - Invalid use of + with parameters (int64, tuple(int64 x 1)) - Known signatures: - * (int64, int64) -> int64 - * (int64, uint64) -> int64 - * (uint64, int64) -> int64 - * (uint64, uint64) -> uint64 - * (float32, float32) -> float32 - * (float64, float64) -> float64 - * (complex64, complex64) -> complex64 - * (complex128, complex128) -> complex128 - * (uint16,) -> uint64 - * (uint8,) -> uint64 - * (uint64,) -> uint64 - * (uint32,) -> uint64 - * (int16,) -> int64 - * (int64,) -> int64 - * (int8,) -> int64 - * (int32,) -> int64 - * (float32,) -> float32 - * (float64,) -> float64 - * (complex64,) -> complex64 - * (complex128,) -> complex128 - * parameterized - [1] During: typing of intrinsic-call at (3) - - File "", line 3: - -The error message helps you find out what went wrong: -"Invalid use of + with parameters (int64, tuple(int64 x 1))" is to be -interpreted as "Numba encountered an addition of variables typed as integer -and 1-tuple of integer, respectively, and doesn't know about any such -operation". - -Note that if you allow object mode:: - - @jit - def g(x, y): - return x + y - -compilation will succeed and the compiled function will raise at runtime as -Python would do:: - - >>> g(1, (2,)) - Traceback (most recent call last): - File "", line 1, in - TypeError: unsupported operand type(s) for +: 'int' and 'tuple' - - -My code has a type unification problem -====================================== - -Another common reason for Numba not being able to compile your code is that it -cannot statically determine the return type of a function. The most likely -cause of this is the return type depending on a value that is available only at -runtime. Again, this is most often problematic when using -:term:`nopython mode`. The concept of type unification is simply trying to find -a type in which two variables could safely be represented. For example a 64 bit -float and a 64 bit complex number could both be represented in a 128 bit complex -number. - -As an example of type unification failure, this function has a return type that -is determined at runtime based on the value of `x`:: - - In [1]: from numba import jit - - In [2]: @jit(nopython=True) - ...: def f(x): - ...: if x > 10: - ...: return (1,) - ...: else: - ...: return 1 - ...: - - In [3]: f(10) - -Trying to execute this function, errors out as follows:: - - TypingError: Failed at nopython (nopython frontend) - Can't unify return type from the following types: tuple(int64 x 1), int64 - Return of: IR name '$8.2', type '(int64 x 1)', location: - File "", line 4: - def f(x): - - if x > 10: - return (1,) - ^ - Return of: IR name '$12.2', type 'int64', location: - File "", line 6: - def f(x): - - else: - return 1 - -The error message "Can't unify return type from the following types: -tuple(int64 x 1), int64" should be read as "Numba cannot find a type that -can safely represent a 1-tuple of integer and an integer". - - -The compiled code is too slow -============================= - -The most common reason for slowness of a compiled JIT function is that -compiling in :term:`nopython mode` has failed and the Numba compiler has -fallen back to :term:`object mode`. :term:`object mode` currently provides -little to no speedup compared to regular Python interpretation, and its -main point is to allow an internal optimization known as -:term:`loop-lifting`: this optimization will allow to compile inner -loops in :term:`nopython mode` regardless of what code surrounds those -inner loops. - -To find out if type inference succeeded on your function, you can use -the :meth:`~Dispatcher.inspect_types` method on the compiled function. - -For example, let's take the following function:: - - @jit - def f(a, b): - s = a + float(b) - return s - -When called with numbers, this function should be fast as Numba is able -to convert number types to floating-point numbers. Let's see:: - - >>> f(1, 2) - 3.0 - >>> f.inspect_types() - f (int64, int64) - -------------------------------------------------------------------------------- - # --- LINE 7 --- - - @jit - - # --- LINE 8 --- - - def f(a, b): - - # --- LINE 9 --- - # label 0 - # a.1 = a :: int64 - # del a - # b.1 = b :: int64 - # del b - # $0.2 = global(float: ) :: Function() - # $0.4 = call $0.2(b.1, ) :: (int64,) -> float64 - # del b.1 - # del $0.2 - # $0.5 = a.1 + $0.4 :: float64 - # del a.1 - # del $0.4 - # s = $0.5 :: float64 - # del $0.5 - - s = a + float(b) - - # --- LINE 10 --- - # $0.7 = cast(value=s) :: float64 - # del s - # return $0.7 - - return s - -Without trying to understand too much of the Numba intermediate representation, -it is still visible that all variables and temporary values have had their -types inferred properly: for example *a* has the type ``int64``, *$0.5* has -the type ``float64``, etc. - -However, if *b* is passed as a string, compilation will fall back on object -mode as the float() constructor with a string is currently not supported -by Numba:: - - >>> f(1, "2") - 3.0 - >>> f.inspect_types() - [... snip annotations for other signatures, see above ...] - ================================================================================ - f (int64, str) - -------------------------------------------------------------------------------- - # --- LINE 7 --- - - @jit - - # --- LINE 8 --- - - def f(a, b): - - # --- LINE 9 --- - # label 0 - # a.1 = a :: pyobject - # del a - # b.1 = b :: pyobject - # del b - # $0.2 = global(float: ) :: pyobject - # $0.4 = call $0.2(b.1, ) :: pyobject - # del b.1 - # del $0.2 - # $0.5 = a.1 + $0.4 :: pyobject - # del a.1 - # del $0.4 - # s = $0.5 :: pyobject - # del $0.5 - - s = a + float(b) - - # --- LINE 10 --- - # $0.7 = cast(value=s) :: pyobject - # del s - # return $0.7 - - return s - -Here we see that all variables end up typed as ``pyobject``. This means -that the function was compiled in object mode and values are passed -around as generic Python objects, without Numba trying to look into them -to reason about their raw values. This is a situation you want to avoid -when caring about the speed of your code. - -There are several ways of understanding why a function fails to -compile in nopython mode: - -* pass *nopython=True*, which will raise an error indicating what went wrong - (see above :ref:`code-doesnt-compile`); -* enable warnings by setting the :envvar:`NUMBA_WARNINGS` environment - variable; for example with the ``f()`` function above:: - - >>> f(1, 2) - 3.0 - >>> f(1, "2") - example.py:7: NumbaWarning: Function "f" failed type inference: Internal error at : - float() only support for numbers - File "example.py", line 9 - @jit - example.py:7: NumbaWarning: Function "f" was compiled in object mode without forceobj=True. - @jit - 3.0 - -Disabling JIT compilation -========================= - -In order to debug code, it is possible to disable JIT compilation, which makes -the ``jit`` decorator (and the decorators ``njit`` and ``autojit``) act as if -they perform no operation, and the invocation of decorated functions calls the -original Python function instead of a compiled version. This can be toggled by -setting the :envvar:`NUMBA_DISABLE_JIT` enviroment variable to ``1``. - -When this mode is enabled, the ``vectorize`` and ``guvectorize`` decorators will -still result in compilation of a ufunc, as there is no straightforward pure -Python implementation of these functions. - - -Debugging JIT compiled code with GDB -==================================== - -Setting the ``debug`` keyword argument in the ``jit`` decorator -(e.g. ``@jit(debug=True)``) enables the emission of debug info in the jitted -code. To debug, GDB version 7.0 or above is required. Currently, the following -debug info is available: - -* Function name will be shown in the backtrace. But, no type information. -* Source location (filename and line number) is available. For example, - user can set break point by the absolute filename and line number; - e.g. ``break /path/to/myfile.py:6``. -* Local variables in the current function can be shown with ``info locals``. -* Type of variable with ``whatis myvar``. -* Value of variable with ``print myvar`` or ``display myvar``. - - * Simple numeric types, i.e. int, float and double, are shown in their - native representation. But, integers are assumed to be signed. - * Other types are shown as sequence of bytes. - -Known issues: - -* Stepping depends heavily on optimization level. - - * At full optimization (equivalent to O3), most of the variables are - optimized out. - * With no optimization (e.g. ``NUMBA_OPT=0``), source location jumps around - when stepping through the code. - * At O1 optimization (e.g. ``NUMBA_OPT=1``), stepping is stable but some - variables are optimized out. - -* Memory consumption increases significantly with debug info enabled. - The compiler emits extra information (`DWARF `_) - along with the instructions. The emitted object code can be 2x bigger with - debug info. - -Internal details: - -* Since Python semantics allow variables to bind to value of different types, - Numba internally creates multiple versions of the variable for each type. - So for code like:: - - x = 1 # type int - x = 2.3 # type float - x = (1, 2, 3) # type 3-tuple of int - - Each assignments will store to a different variable name. In the debugger, - the variables will be ``x``, ``x$1`` and ``x$2``. (In the Numba IR, they are - ``x``, ``x.1`` and ``x.2``.) - -* When debug is enabled, inlining of the function is disabled. - -Example debug usage -------------------- - -The python source: - -.. code-block:: python - :linenos: - - from numba import njit - - @njit(debug=True) - def foo(a): - b = a + 1 - c = a * 2.34 - d = (a, b, c) - print(a, b, c, d) - - r= foo(123) - print(r) - -In the terminal: - -.. code-block:: none - :emphasize-lines: 1, 8, 13, 15, 20, 25, 27, 29 - - $ NUMBA_OPT=1 gdb -q python - Reading symbols from python...done. - (gdb) break /home/user/chk_debug.py:5 - No source file named /home/user/chk_debug.py. - Make breakpoint pending on future shared library load? (y or [n]) y - - Breakpoint 1 (/home/user/chk_debug.py:5) pending. - (gdb) run chk_debug.py - Starting program: /home/user/miniconda/bin/python chk_debug.py - ... - Breakpoint 1, __main__::foo$241(long long) () at chk_debug.py:5 - 5 b = a + 1 - (gdb) n - 6 c = a * 2.34 - (gdb) bt - #0 __main__::foo$241(long long) () at chk_debug.py:6 - #1 0x00007ffff7fec47c in cpython::__main__::foo$241(long long) () - #2 0x00007fffeb7976e2 in call_cfunc (locals=0x0, kws=0x0, args=0x7fffeb486198, - ... - (gdb) info locals - a = 0 - d = - c = 0 - b = 124 - (gdb) whatis b - type = i64 - (gdb) whatis d - type = {i64, i64, double} - (gdb) print b - $2 = 124 - -Globally override debug setting -------------------------------- - -It is possible to enable debug for the full application by setting environment -variable ``NUMBA_DEBUGINFO=1``. This sets the default value of the ``debug`` -option in ``jit``. Debug can be turned off on individual functions by setting -``debug=False``. - -Beware that enabling debug info significantly increases the memory consumption -for each compiled function. For large application, this may cause out-of-memory -error. - -Debugging CUDA Python code -========================== - -Using the simulator -------------------- - -CUDA Python code can be run in the Python interpreter using the CUDA Simulator, -allowing it to be debugged with the Python debugger or with print statements. To -enable the CUDA simulator, set the environment variable -:envvar:`NUMBA_ENABLE_CUDASIM` to 1. For more information on the CUDA Simulator, -see :ref:`the CUDA Simulator documentation `. - - -Debug Info ----------- - -By setting the ``debug`` argument to ``cuda.jit`` to ``True`` -(``@cuda.jit(debug=True)``), Numba will emit source location in the compiled -CUDA code. Unlike the CPU target, only filename and line information are -available, but no variable type information is emitted. The information -is sufficient to debug memory error with -`cuda-memcheck `_. - -For example, given the following cuda python code: - -.. code-block:: python - :linenos: - - import numpy as np - from numba import cuda - - @cuda.jit(debug=True) - def foo(arr): - arr[cuda.threadIdx.x] = 1 - - arr = np.arange(30) - foo[1, 32](arr) # more threads than array elements - -We can use ``cuda-memcheck`` to find the memory error: - -.. code-block:: none - - $ cuda-memcheck python chk_cuda_debug.py - ========= CUDA-MEMCHECK - ========= Invalid __global__ write of size 8 - ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) - ========= by thread (31,0,0) in block (0,0,0) - ========= Address 0x500a600f8 is out of bounds - ... - ========= - ========= Invalid __global__ write of size 8 - ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) - ========= by thread (30,0,0) in block (0,0,0) - ========= Address 0x500a600f0 is out of bounds - ... diff --git a/numba/docs/source/user/vectorize.rst b/numba/docs/source/user/vectorize.rst deleted file mode 100644 index f1baa434c..000000000 --- a/numba/docs/source/user/vectorize.rst +++ /dev/null @@ -1,297 +0,0 @@ -================================== -Creating Numpy universal functions -================================== - -.. _vectorize: - -The ``@vectorize`` decorator -============================ - -Numba's vectorize allows Python functions taking scalar input arguments to -be used as NumPy `ufuncs`_. Creating a traditional NumPy ufunc is not -not the most straightforward process and involves writing some C code. -Numba makes this easy. Using the :func:`~numba.vectorize` decorator, Numba -can compile a pure Python function into a ufunc that operates over NumPy -arrays as fast as traditional ufuncs written in C. - -.. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html - -Using :func:`~numba.vectorize`, you write your function as operating over -input scalars, rather than arrays. Numba will generate the surrounding -loop (or *kernel*) allowing efficient iteration over the actual inputs. - -The :func:`~numba.vectorize` decorator has two modes of operation: - -* Eager, or decoration-time, compilation: If you pass one or more type - signatures to the decorator, you will be building a Numpy universal - function (ufunc). The rest of this subsection describes building - ufuncs using decoration-time compilation. - -* Lazy, or call-time, compilation: When not given any signatures, the - decorator will give you a Numba dynamic universal function - (:class:`~numba.DUFunc`) that dynamically compiles a new kernel when - called with a previously unsupported input type. A later - subsection, ":ref:`dynamic-universal-functions`", describes this mode in - more depth. - -As described above, if you pass a list of signatures to the -:func:`~numba.vectorize` decorator, your function will be compiled -into a Numpy ufunc. In the basic case, only one signature will be -passed:: - - from numba import vectorize, float64 - - @vectorize([float64(float64, float64)]) - def f(x, y): - return x + y - -If you pass several signatures, beware that you have to pass most specific -signatures before least specific ones (e.g., single-precision floats -before double-precision floats), otherwise type-based dispatching will not work -as expected:: - - @vectorize([int32(int32, int32), - int64(int64, int64), - float32(float32, float32), - float64(float64, float64)]) - def f(x, y): - return x + y - -The function will work as expected over the specified array types:: - - >>> a = np.arange(6) - >>> f(a, a) - array([ 0, 2, 4, 6, 8, 10]) - >>> a = np.linspace(0, 1, 6) - >>> f(a, a) - array([ 0. , 0.4, 0.8, 1.2, 1.6, 2. ]) - -but it will fail working on other types:: - - >>> a = np.linspace(0, 1+1j, 6) - >>> f(a, a) - Traceback (most recent call last): - File "", line 1, in - TypeError: ufunc 'ufunc' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'' - - -You might ask yourself, "why would I go through this instead of compiling -a simple iteration loop using the :ref:`@jit ` decorator?". The -answer is that NumPy ufuncs automatically get other features such as -reduction, accumulation or broadcasting. Using the example above:: - - >>> a = np.arange(12).reshape(3, 4) - >>> a - array([[ 0, 1, 2, 3], - [ 4, 5, 6, 7], - [ 8, 9, 10, 11]]) - >>> f.reduce(a, axis=0) - array([12, 15, 18, 21]) - >>> f.reduce(a, axis=1) - array([ 6, 22, 38]) - >>> f.accumulate(a) - array([[ 0, 1, 2, 3], - [ 4, 6, 8, 10], - [12, 15, 18, 21]]) - >>> f.accumulate(a, axis=1) - array([[ 0, 1, 3, 6], - [ 4, 9, 15, 22], - [ 8, 17, 27, 38]]) - -.. seealso:: - `Standard features of ufuncs `_ (NumPy documentation). - - -The :func:`~numba.vectorize` decorator supports multiple ufunc targets: - -================= =============================================================== -Target Description -================= =============================================================== -cpu Single-threaded CPU - - -parallel Multi-core CPU - - -cuda CUDA GPU - - .. NOTE:: This creates an *ufunc-like* object. - See `documentation for CUDA ufunc <../cuda/ufunc.html>`_ for detail. -================= =============================================================== - -A general guideline is to choose different targets for different data sizes -and algorithms. -The "cpu" target works well for small data sizes (approx. less than 1KB) and low -compute intensity algorithms. It has the least amount of overhead. -The "parallel" target works well for medium data sizes (approx. less than 1MB). -Threading adds a small delay. -The "cuda" target works well for big data sizes (approx. greater than 1MB) and -high compute intensity algorithms. Transfering memory to and from the GPU adds -significant overhead. - - -.. _guvectorize: - -The ``@guvectorize`` decorator -============================== - -While :func:`~numba.vectorize` allows you to write ufuncs that work on one -element at a time, the :func:`~numba.guvectorize` decorator takes the concept -one step further and allows you to write ufuncs that will work on an -arbitrary number of elements of input arrays, and take and return arrays of -differing dimensions. The typical example is a running median or a -convolution filter. - -Contrary to :func:`~numba.vectorize` functions, :func:`~numba.guvectorize` -functions don't return their result value: they take it as an array -argument, which must be filled in by the function. This is because the -array is actually allocated by NumPy's dispatch mechanism, which calls into -the Numba-generated code. - -Here is a very simple example:: - - @guvectorize([(int64[:], int64, int64[:])], '(n),()->(n)') - def g(x, y, res): - for i in range(x.shape[0]): - res[i] = x[i] + y - -The underlying Python function simply adds a given scalar (``y``) to all -elements of a 1-dimension array. What's more interesting is the declaration. -There are two things there: - -* the declaration of input and output *layouts*, in symbolic form: - ``(n),()->(n)`` tells NumPy that the function takes a *n*-element one-dimension - array, a scalar (symbolically denoted by the empty tuple ``()``) and - returns a *n*-element one-dimension array; - -* the list of supported concrete *signatures* as in ``@vectorize``; here we - only support ``int64`` arrays. - -.. note:: - 1D array type can also receive scalar arguments (those with shape ``()``). - In the above example, the second argument also could be declared as - ``int64[:]``. In that case, the value must be read by ``y[0]``. - -We can now check what the compiled ufunc does, over a simple example:: - - >>> a = np.arange(5) - >>> a - array([0, 1, 2, 3, 4]) - >>> g(a, 2) - array([2, 3, 4, 5, 6]) - -The nice thing is that NumPy will automatically dispatch over more -complicated inputs, depending on their shapes:: - - >>> a = np.arange(6).reshape(2, 3) - >>> a - array([[0, 1, 2], - [3, 4, 5]]) - >>> g(a, 10) - array([[10, 11, 12], - [13, 14, 15]]) - >>> g(a, np.array([10, 20])) - array([[10, 11, 12], - [23, 24, 25]]) - - -.. note:: - Both :func:`~numba.vectorize` and :func:`~numba.guvectorize` support - passing ``nopython=True`` :ref:`as in the @jit decorator `. - Use it to ensure the generated code does not fallback to - :term:`object mode`. - -.. _dynamic-universal-functions: - -Dynamic universal functions -=========================== - -As described above, if you do not pass any signatures to the -:func:`~numba.vectorize` decorator, your Python function will be used -to build a dynamic universal function, or :class:`~numba.DUFunc`. For -example:: - - from numba import vectorize - - @vectorize - def f(x, y): - return x * y - -The resulting :func:`f` is a :class:`~numba.DUFunc` instance that -starts with no supported input types. As you make calls to :func:`f`, -Numba generates new kernels whenever you pass a previously unsupported -input type. Given the example above, the following set of interpreter -interactions illustrate how dynamic compilation works:: - - >>> f - - >>> f.ufunc - - >>> f.ufunc.types - [] - -The example above shows that :class:`~numba.DUFunc` instances are not -ufuncs. Rather than subclass ufunc's, :class:`~numba.DUFunc` -instances work by keeping a :attr:`~numba.DUFunc.ufunc` member, and -then delegating ufunc property reads and method calls to this member -(also known as type aggregation). When we look at the initial types -supported by the ufunc, we can verify there are none. - -Let's try to make a call to :func:`f`:: - - >>> f(3,4) - 12 - >>> f.types # shorthand for f.ufunc.types - ['ll->l'] - -If this was a normal Numpy ufunc, we would have seen an exception -complaining that the ufunc couldn't handle the input types. When we -call :func:`f` with integer arguments, not only do we receive an -answer, but we can verify that Numba created a loop supporting C -:code:`long` integers. - -We can add additional loops by calling :func:`f` with different inputs:: - - >>> f(1.,2.) - 2.0 - >>> f.types - ['ll->l', 'dd->d'] - -We can now verify that Numba added a second loop for dealing with -floating-point inputs, :code:`"dd->d"`. - -If we mix input types to :func:`f`, we can verify that `Numpy ufunc -casting rules`_ are still in effect:: - - >>> f(1,2.) - 2.0 - >>> f.types - ['ll->l', 'dd->d'] - -.. _`Numpy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules - -This example demonstrates that calling :func:`f` with mixed types -caused Numpy to select the floating-point loop, and cast the integer -argument to a floating-point value. Thus, Numba did not create a -special :code:`"dl->d"` kernel. - -This :class:`~numba.DUFunc` behavior leads us to a point similar to -the warning given above in "`The @vectorize decorator`_" subsection, -but instead of signature declaration order in the decorator, call -order matters. If we had passed in floating-point arguments first, -any calls with integer arguments would be cast to double-precision -floating-point values. For example:: - - >>> @vectorize - ... def g(a, b): return a / b - ... - >>> g(2.,3.) - 0.66666666666666663 - >>> g(2,3) - 0.66666666666666663 - >>> g.types - ['dd->d'] - -If you require precise support for various type signatures, you should -specify them in the :func:`~numba.vectorize` decorator, and not rely -on dynamic compilation. diff --git a/numba/examples/.coveragerc b/numba/examples/.coveragerc deleted file mode 100644 index db8bf3e23..000000000 --- a/numba/examples/.coveragerc +++ /dev/null @@ -1,8 +0,0 @@ -# configuration file used by run_coverage.py -[run] -branch = True -source = . -concurrency = multiprocessing -parallel = True - -[html] diff --git a/numba/examples/__init__.py b/numba/examples/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/examples/binarytree.py b/numba/examples/binarytree.py deleted file mode 100755 index 8be2919cb..000000000 --- a/numba/examples/binarytree.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This is a more complicated jitclasses example. -Here, we implement a binarytree and iterative preorder and inorder traversal -function using a handwritten stack. -""" -from __future__ import print_function, absolute_import -import random -from collections import OrderedDict -from numba import njit -from numba import jitclass -from numba import int32, deferred_type, optional -from numba.runtime import rtsys - -node_type = deferred_type() - -spec = OrderedDict() -spec['data'] = int32 -spec['left'] = optional(node_type) -spec['right'] = optional(node_type) - - -@jitclass(spec) -class TreeNode(object): - def __init__(self, data): - self.data = data - self.left = None - self.right = None - - -node_type.define(TreeNode.class_type.instance_type) - -stack_type = deferred_type() - -spec = OrderedDict() -spec['data'] = TreeNode.class_type.instance_type -spec['next'] = optional(stack_type) - - -@jitclass(spec) -class Stack(object): - def __init__(self, data, next): - self.data = data - self.next = next - - -stack_type.define(Stack.class_type.instance_type) - - -@njit -def push(stack, data): - return Stack(data, stack) - - -@njit -def pop(stack): - return stack.next - - -@njit -def make_stack(data): - return push(None, data) - - -@njit -def list_preorder(node): - """ - Returns a list of the data by preorder traversing the tree - """ - out = [] - - stack = make_stack(node) - - while stack is not None: - node = stack.data - out.append(node.data) - stack = pop(stack) - - if node.right is not None: - stack = push(stack, node.right) - if node.left is not None: - stack = push(stack, node.left) - - return out - - -@njit -def list_inorder(node): - """ - Returns a list of the data by inorder traversing the tree - """ - - out = [] - - done = False - - current = node - stack = None - - while not done: - if current is not None: - stack = push(stack, current) - current = current.left - - else: - if stack is not None: - tos = stack.data - out.append(tos.data) - stack = pop(stack) - current = tos.right - else: - done = True - - return out - - -def build_random_tree(size): - """ - Create a randomly constructred tree that is fairly balanced - """ - root = TreeNode(0) - - for i in range(1, size): - cursor = root - while True: - choice = random.choice(['L', 'R']) - if choice == 'L': - if cursor.left: - cursor = cursor.left - else: - cursor.left = TreeNode(i) - break - elif choice == 'R': - if cursor.right: - cursor = cursor.right - else: - cursor.right = TreeNode(i) - break - return root - - -def build_simple_tree(): - """ - Create a simple tree - """ - node = TreeNode(1) - node.left = TreeNode(2) - node.right = TreeNode(3) - node.right.left = TreeNode(4) - node.right.right = TreeNode(5) - return node - - -def run(tree): - preorder = list_preorder(tree) - print("== Preorder == ") - print(preorder) - - inorder = list_inorder(tree) - print("== Inorder == ") - print(inorder) - - return preorder, inorder - - -def runme(): - print("== Simple Tree ==") - preorder, inorder = run(build_simple_tree()) - assert preorder == [1, 2, 3, 4, 5] - assert inorder == [2, 1, 4, 3, 5] - - print("== Big Random Tree ==") - run(build_random_tree(100)) - - -if __name__ == '__main__': - runme() - print("== Print memory allocation information == ") - print(rtsys.get_allocation_stats()) diff --git a/numba/examples/blackscholes/blackscholes.ods b/numba/examples/blackscholes/blackscholes.ods deleted file mode 100644 index 77908d4514240edb494aca4b6739315f39de8531..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 38825 zcmbrlb9`k@w>27f)JexqI=1bO-LY-kwr$(CZQHhOCtvz`p7%ZH-22}1$9Ff@ulA@p z$DFlLYgFx3Qewa$hyVcK000H~`n>*TtRYkY004i!Ul4$~vAMpTvz5NKm6e&XuC|@A zr3JNvg&viqwym)(m8F%wg`TCZy}7=H9hI$>jlQ;?t&zUIoz(x}{G$2y3;nCaXK7(z zY-n%u7n>~|wXL1DoxQD&whi^)e&FEXf93LTr>_F>fAG`R)zvrC|Dv(9q1LsxvH6pP zlerl%fRxy`Ke+<{{n>oI{=d^f{i^+crek1csconKuVntUWBwxgXY-#LVP$D$Z}qhw z?Eh)RfAYip57De_EDde+ZU0;I|C8r`ifCtPX=bHup>OuTMEpf2KD8rzv`TiO0E#~13$S=Z9S?w@0Lu5N0%%7Xmf(b;oFg-j1T zguI#8c&ue0^C-5OyzbxyrfbI&lxz+4+ej z=Ivy#@^pTSSEw2XjF^wXCJ-2Ftfv`=LNcJ!Glk9OY4^0V9TEk~vw-;4szLqqr?r80 zRM#e89?XsivMb%1U^l^?4tEB~hRR^99uovJ8;OjB*{O(%0x~?YVySOz;ef*lzo!I@ zgqfMCaeHmA*zfT3x!a*u4uK;A>&v3;FJU<0Z%eKDGfSuD3*p4*l$yhyl zv`A`dYN13uM8Q#78PV|BCHSkdx>d7Wc@xIW@O#nJo*i2BY-{b-gx$GtQZ*(o5)T9Vs3i}|}}D>WT! zrXFa|!4VX<%Nn=IjVbYz*cGYh-_NE{DKOo&)jBZw3vhR^Ag)+Ksls=kC(5`xJs+(d zDi$^L&o(X2o_Nxnc;@M%kNwga^>>9Wdq_wz46Iq6c`di$SDkA#&I@1=RFkN3pWCJp zx1wmLz@c>xdlnNtE3HpGJf(K$itN-hD?_AZRu1N;HJRCNBqZA6tw9X2_;S5%{ZfPD z*>3we@);|)w@-E8N7Is+bu)sCKlv18gYVH0)3f<@lmqnO!1ITQz`{S{Dfgg3bMHj|05;vgiWeelf~ zl!D{6Yda9=o3H=RIWV=l(*2yP8+1DGYHfXKqARu6(c3lP@;~qn+O6=Q2*NYn? z)v?6W3XOmrx$>=miAo$luKG+ZuJNqCn-XKOVptZZQ}h>WIIYUtlq)EN*Z7~UMKY6L zk`2Zl8W5=|6|k9VMZ|8C*~HYQO4aVGcZ}Ft@P245H5f1qRcdt=j01vBGKe=$5j&5q zz{+rG@Sg8#2?G32iqwfM2%*tqrzni|>R^V_6e4 zIIO+uL{9Dv>WqN1tvr z+N(+!qMUJFkNDhDh&LvSzmp}~P&}b)nLahfc&)AByESc{A5Va$3+EVWB)$otH^~Z%|5-Uj!BgQVzkbmZM^jq1z(tV}vc_8!_q^kDEjSs|GY)$)w`nm~ zdsnFa4YpPK!s$>hMJUbxw$y|W6obkVJJU41%_BZq^d%WV9PBwa7;H&7X}yrNR=+!S zsB%D*veM1RR5;`J4r{#rMr`;ZQ6oA|8u110B)s8-cCzuAo?N!}@Muc{gyi;2->$`wWtR?Wyt{1&6s6g4V-oyD&CB+=AHaMm-vqH#{>| zn;*}C{6W()ESX(z&lgEiN~gq5bWsn$nco$+$ADYLTo1NvTW@{`Z0p$ccs%LeA$;C` zV;%S;w7|D#0P^`xFHgmbi7P;Wl;dL{wBGEA4mAj**xxO{olm2J)MJ#GP+^OS#$?}x zlk535_KE5FICz9|1m?Kca`eiy-c=OkC#Fb82wv~%UHw6u>ChD>kEq5w@>K{4r# zx`AV0v}qmkWL0Fg2QU5Jh^4c4;TSCR6T&&2V9!0q?}!&(NdyHUEErLZR;SevZ5}~{ z#-JWV?M@c5CLL&f2f-231}_(4I-DqKkIKF~IR5koU6KRfXci{xTG(dE5!^poj$b&H zIPWN$n12Kz2T4+klvF4zFOxB&>-GH=&I#!HlwaC!4OVuDO1vAfiMX&TMvGtFb_!4Q zfgwP;oKmXMp@<#DV*9Y7zS<|0ODK1YgrAEXa`;!K13l6NSXC7qpHn6Xo-9yw zIk+F>m^M~mQqR3T*m~q0+>`It>DwQE&!UGnZ$a=C3GHI70+bnaDF<=MR=B*f0vg5l zI6ifq%TM7VhU_hoA^h!DeCO)mR(Q}F7a@EjMuvixNplv1ghj0X zf$V7nq%|Z}6@^avoO%%UU?m9_O1;u2e#AGQS20#zhUZ*|b=Wd!%ODq5qJpUE#~DNu zo`Qf;$S`3DG9ViIiO%L)TC}m7;pc2E0XaQ5f~WPqpLgy0SUW%ac297dR&lAyXCHAt zGviQaJ8tZaDw@JdJL!4K%Q|6ZC|n!(Ug&IgXm*L^<=>3Gk5kCu*OyD5<)B( z*S>X7(FHwXgp$X{L5j9d+KKMjZRUCeK@TnC`!vWr~fvR8(rgqK7c zj_iebH}k}GUXIXcMgHc=NusqqAtBj*F#B!6-q?;2i#mmNz$DH~6GH=#C#LO8Y(|9p zy4XQ26`6BHBFxypUSELPbYy>&oklxV8~{3${cTQ3o}a=XdK^LSCcMN$4AyHDNNQ%o{%fI;81QYX6+a^ zlEjx})rlEj^Da9hP1+g>i5*{)EAxQo?GY=Q{Bu&AGm2BHI*$&Rt#@sw4)oy~K3+80eMLtn16}v-EeqM*Kqa~6 zQpZ0uF)N;jSTBsHl{P>R+*Z)L2&sZzuyQhRuS&AD%Y&hO%xi+CQ3pC3fZCBzJUVP1 zmCwyK(hTBZt&h(Ce88&|g*<_0Q%|mkO&Y?66qGG`Nk54DXKYoP2wT>_ysbcQM+8I( zk;m35b<68#lL`k2+(Y$qdPfPcWhwCCm(2kcCp9S$o}mc&3z&NFd;@+`FhR4yz$RTKO=RSd33!bj zMNM$suYvPXE^jp;8Zqqh(igKAZ;CxJMN;83Raj;^q$#&aQeYJ)UoGBZXQ03FW2kM_ zan?vdbA7{IO*ME>4(#5d*WJ00wXW5Az+UJ%zu|&35b8dQ2B{qwekMu60=J!JakBhA5e3eh3r#j44~ugZtAV@s|{TxIb7$L(H3`Ahs*)azOX zHDCR=F)7bWX1#}Nkkf0DerRs_E5SnIF)wB>1~oF4!G@e!Zu24^Cblg>e1Emu2)baJ ztpO(Ux^~wbLi3_DrWFpo0>Ka(W0-?kNt4+C7n@4{b5&-(qh?Nhu-Fp!jqsHWN9Jaw z)I=<@SI&$QeL~d;u{dsnga;WEF7Nwq@l?dn5O_FIQZ`$K>0Y|F7vS_hdvekSI9sVn zTo9o8ktxZY<#A7Pd%mCzP~~RhIYbA;w4$3YXuOkW#`F>9ytH4Pb?+H7KyAa_k^?}F z4=Wp|CD>RtOExMp0X)eAZZa^^*fb!^6)-ga)3?3sq_Xzp^>bMPG3~>m)%9M!;G8&w z3I9IEgQoqAu-pg(JBFE^YI;$N6QevFtZ-eA&$SRL|7!007M1{~BEXr!hOm*O*RH z$3$P(PDbC#Oj}p~&oqH8_21A>+H-YxGt7Co%v+%}mfx73gbrNid1N5yI-AzCp!~aP zlfl6v&`e7|vl;lin?z0+z#wzHFbFO9OY2NRSCVSnZP z6#l_0EFq8|QbIZRKHlNp=<3>d=y>RUs_3Y=lD_`+er|srZ&kirz5iakVG7owJIA9F zG7V`XNjoXNs|csw6hR{Y`;POBthb}M_)^O}__XfTdJgn74PWJ$W?`jD!2pe<_Ti>a zsGsgo>ymG&tSX-PY!+oox=dcBpn$}oZ{vroa#%*7QC4#o=_bf_AEVw9L!ig9NtW?MQU9EQKlg zXg`JDx1zKcF`FEpgu|ui*dYLhezFm%)p^8bYv5A9UXYbINNi@<(+utVNn)=GDUPU}zpxww^T z>aR&r@BN4#+=T(a)hzdyb7eGm8{pQfBE(qP|g$9)vI0r0X9q zh6a(LAgqTqG+~@hRuH`)8aoq+Ffh8~&{Qo0cAsJ=+Mwg->&Y?w-V8(k>IGucvNt5z z3lCxLJ?7{>WlfvARTO3fSB)J$X!c(ECmv|WWF#3pZuXMtBL-groHS+XOdBx#tr-m-xdF$B+ za0<4#QP{&K#AWvrSPYm=!a7V{_1%B#_@q^;yCHnD2_hf69J#B zp-6U6fjyMRvD_iw^kqcpO96OkYIqQVr`)hy67bQIppWJl=v?qmeRIOPSn}nVw^s1V z+k{lieVM{cNbSJGbpyDJ?frs)Li1*X3_b5Ff8FL;0QHL2HP=2m?DECbpm3R_3nKN*w+e1~(^hI_?NP zLOL~H;pJ?E{OY+4-0Ha{+9SWZQb_BUJZhZsSdnlw!nxOV#t7GOr4FCozj6jx4XiqZ z(P|Bc;obP{rcjI3r>XV9YKC2JneGWx`nbFCqVeN4`mVOqr}Bi-nX-4T62D{Qstyl<)9R@cCrO++!ft* z(SpB|QFA|R(Rx5#>vD4{Vzu-JnvCq_UaVtB`89;1_-ZRVV|-!@TO-rrN(!$Mj#Zc> zW3Z>}JOAq-{B#3FrQm9Z%%O1eCspYoDy-|zlor!4wbx$^`i^$c{IZPL3%?jXOqo|- zwN)KN4Kzea*$AIQ4A03YNC0hoi^<*5& zgW@RO1&F?w+J^E>ILhMMu$}85A0n=k!pP{(6wG@bu1v4O#3~V;?7cwFN@s;k*v<#N6u0;32e{i4`GMh&H_K(m#m+vC zfTj12fXV{B`MI*x(qcwby9K1CWGbsc4%EAGyQJYydNn;9kAX|oP~a7kpuJAArD$Ty z?%qe${-IcDHsMD6dBO4>)i4g$@MoeV4*zF*#hS@ct`|=l*518tTQ80Pu3IcGC%Po! zs%K}byfzT*t5k%Hl8q(S6P5n<2_0a;=(^#?5?NP3XwV{}Gp1*BS=| zpH&XUfT(g5;3J5^3;r__6q29UuS5zO{o^SJ-R)hz7dAF#ku(-(;Wd^U67~#} zk1uQV@l_+d_ZcxkC0vSg>2nz(PBHXio7JD}9MSU9C~i9P7O`giW`>nY9zRS@5W0LNp|>^gCHRMxB%m} z)7Rr?k|R2vCdLL_LX5@zZ7wUDq?R)008 zXH_7d;mUxJ%sWzI$3dF(dz*5w=$5Nkv$~upu%$u{{FBr#s6j9Ozf3w6^S=Im`MVs6 zWVEgH6Yg*G+j^_9MDUF=e!{D44ob{h0v~2_Yp^yYWb{SVbADTCiADWF0R_Smt9{S! z9c@v6#Z(}~b^`5UPOxq*iUZpy=>x6=c&Zp)qaw@qZX~7urDi`Irg7o4>+50P5`QUFFfLC{~ac47^LSlfH@;ly&0Bs-SvSd!B@bX=j zaaRQokQ({0lNy`3GvQ_!ngISG+H);EsQwKzgOWt=uUB|3H9O zFYlnmUzwzV!U7+%KIIm7KhmV9F4>^Z-gSd`k+X0|m{)r(6au(KXgjyTZM|>5dJWkC zgOsa^i<*Qf;O4vR+HENArqQUz+y(UN@80b(V{RCnXc~?-n#de}bs= zUArk#?>-iCV^X7htNIt=va9}6p!CBq89k{$O^Rr7?J!wwU$UOPG3cZNvce68F!lYr zTn!CG;Oncs{slp)PC-7Lm-TP`xH8D(SYrT~aY}q;B#a{B%=_DS7wIFgp)pZ zGMhNqX#XR)s7+W6CS8OmT}sW*_PNK%AA@}ow*I<32SZy)MK*-2g37U%fPiy;m@H z#tw4QvS((J3oaIT{MFHHqhBD^{f-m4Mcs^sjqCf1@^#xn5f@9ba(_q84D5iA#=|xs zdDR`l}%==K|%d4LJ_3yX@n5H%1~gAdOO zp?4WQYplYdWwa8Ry=^vMyzV-e=&Z$17M|#EdRBpoart<>R0ARz{x%c{m-Zk8L{_Cq zQXAKf5=2%xODTdl1#0*k7dT>)8ySScppR^kdy*R)Sv-M0*XXD}BybOuGUDL1Hh>>R zVErV#jteEy=%qvwFYa(Eg_QKHZ!O|nNNWz0ZIBlkV(}#{g^e>-lDd$O3=x8lYoxmu ztZEe{uDtteoZm@v3bpG5C;j0sG6eKXS`Iaex3OD~j8V&{5z>p)U4cqkh>^E0APRyq zwv56voCQ|u3zg&_?~pMrszd*R*b(YXC9_K|AdqYx{bcwhjsCD*uhbj*lw33?S~d+* zx$x#Px*Tz%F4YrD%0KqqG`SuW24y_w(z?KCFEpT*V{$`j**D-{JkPK`x<&4z7;P(X zgw}i@FM+0NL!J3isaXV)SXlaND1|WzS-Emwum&wP^lDJR0;J1VuwG-OZStlQ*Rf+% zW-sXK0%Qp4V531<0UusQ6(vSPl2>s*qaFf=tyES^1|tKU4Zt`MGcCik!C3U_nUK2} zK&?EeLKusz(#c=u) zbX=$TU8TD;QUmCLy49{*3N1@vaRu(9>*}{=oOAgVH-Aa^)6+>TPYobXf=&H zvBVfRW=!(nWg^c_R4u~VbMChX9Q*_1aw4}OVY0RR1|YD%Up3_0AT^w~z~|0A;^=S+ zWv=dm^h@CH%?v_7rsC_SWfj>p4nTZpjNVJ6WT3`eI|j_ah?HC>DD1lX?@e8TebMch z3A5d0Ebp;dtPQhW?6ww|o7jH!JD179sR@F@1|J#8Uzg_wJ%ap&g78M)s5sIIc^Rk^ zH45Tu@j|8Y6Nrjnu_F163HQNLeHC#1WdD;{4@p^|eHGXdINJeES|<-eO7#b~^@bO^ zuF8ie)FD)nIR>6Z&cYz;H-wo04mNiE^OA#$WKPg=itFE#pVDVUxto2W8D-sZaFvpi5}?GW?Q_f1RLoO7s`YMlVu%ktfM@-= zVW)0&P}fh=a?Wsvoz@@Vr|aj$#y!)YJOwSozNO&KI3N7&!9Teu;)i^9KcJm{S(;Mx z(ZeL(P#6=|b#a_U8ih98uX(-DpBfWczf{y6%hB|PBg%*;+? z;N8X)WLox$VmAj34-*sj>%prF)M9IM9X@-3-)A)$6RdSvu@ffr5E+7HvTAedB2IA_ zh+JE*?3^L=(lKv7!Wd0q?{HU6Zqdl!NzL1mJk* zB5dt@HzM5n+Z3gfF5btHY#Q|K01qeQn&l50VC5TN;-4T0S0kAhtp7mM1^oeHO)Y{3ANb_LtQQP)Xq%7-~tlQ|nS%oxxMFb0oiUbJqII5i+fJ%uC1W8fRTGOqN=C71$(j_{IV+q2kLB0V`bBa}tQ=2I83-V5O;)KVKa%DKY-MDj zJX{iHW6ACu$?~PlEWf-Y{`R`W!ipG9J%Ad_8<*~;Ro++m(sgN+a1#zpb2i8XqScDn zr`1IP^TT~7doL9U4@koosj4mT3b8YHBpI7w`b$szV=P9?v#r+OLYV@NTq*2Ymvrn* z(4jH`BJdoFT#6{RCvi|o@%i)PoKs9J+K zRFaQJ(nxHo!$@mt)D%P1$ZAR8C0WxulISr7W+GrnVyFec$dmBcP3-c)dBvskNcZh# zIR9OBVv>v(IWZTHM{hDq3O^Z)ckP9TpFh;IEFRsfcq|%?VW^52d@vWv@2k-V6;hVb zi^xi^sTfk09QzyYS%U!m4o^dBFGH|ks2{XNW^Y3<6t6nXC0c1J)QKmWI5in>hcEU; zdawAvE@bEtuR06m+6V8Ba>h652QngH+@iMuVm7^8};?t#x_zjpYIEOprH$`)3gO3nidG&F_SiI+W*ng^kKu!T;K9$oXuF)#I(c(4 z_{r+DJ#T)KMiP8tzQ%Qk0I*m4u182Lgf4cMCyARE0O}g8R>tQM8jjkv$HweCh@3h( zN=LUAEvYkftIO?Bk=difH;1^a36vwO3&gIG+o9C|c0iG^Mu~3?EoccPTN8-C$RTr- zu5UF;DIJ>RI%PBqZ?wK-zOC9MOJLPyQ?bLTafEY+ZZi7PcdUQ(od%6*9(>K0zSA@b z|6AYn2guyJy*Anl0)(*h^rS87ScFcuczSLccve{Z-?b4v)WJ5#oxW60X z27N3uWnk3UCnx>aHQHuKRJv9f0R$GRB0IB32-SC zB^SlO8zoHR>|840)@a-Eo+B+YR${9gWC%<&U&` z8TN(!al`eN{m$v*)G3M!`w)1$1=t>9&6#0c6(XCJs)eQ&`VMD4-OzL=t0~x<3|Wpp z_u5WOE$Vc*HaMoKRGb9wd>+^z%xDzwJ1AMZu-H_Aw+c9p00=Zswp$&#To_y!QO*_)19}*)Fh4v3 z-hkv)Bz!zhK}ZTJ0{myOvHNCn`f6+Y>gk%jBm07S&FjTQEB)o|7zaJOE4wPTnTax% z+J(2uR_Hg;pKjNv3P-BYRoReRlZal1SBU7S*j!<*&f3E;+wxfQFGxsMAdh6$)c&Bt z+J?s2r5xD|P7jFK1;!JJb$iKEk*>!Sb2Ls7#^2mTuDb?}AhX)*ahq(MZNefr3t87t*^*?522sy3)ID%pIWg4f{{C!_289tAx2236&{N=PKOfj zkO1j`^kPLpnu~Y~`|cF6=xkF0gytaf&cKj|RFQ>~wWVJ}SyF~Bk2QcI`fjh3ZLgcT zZ_o8Z=fFRg3SgoruMP!%lMZ(CPu#^b#dyB@NfynWG-(}OAMWP#BYtQzrSIQm>XDtM zp%pefrxfuAKt`<@aVMs$M$3rsfyKdGEShh7V9)bz~p zEunW}6usP3N&j2bWlH}a-Zt7GF%7^Q;sdMyGJIR*iU=(;-wWqayMc@KLia~v8{VGzXZ|K?A7Lf9^QxvTs&CUC60YSdJN7@7p56L? zGVp6Vw|oryixdU_x9Tv4uaX$HMcUyW>;UoRu#Q8OIa25y3{$slEFgAI4ND5tl{A`V z!r&W#s^A5%kC;7touwC|JgS5YkHB_aM2Xc)Z#2KeM{5poWX9Vg8`LM{>V*EPT~eVp z(N*mYfWhI(;JlGPPS)kyGX}F=--E1NS78$}q2qeoz&$9}KK8=$MhU|pZG%)K95jP#uJj`D^AHUNpB@Gg*! zs@i!&H5SKC!AFk`&$!ag%IH_|{bQ(c}y zp9+mj*iG^PT#z~LeHYNyD(L|9EfVPeuuwxL&7%`!?*04Cw&XN5f4|? zGM5FHtsB+75T$j@PidVIS3ezBmpCq{K(@Uq-=->=?l~$E7sm5tRl_sAm5(2iWNqPp z%N8w+92SZZ?whaRkBJ|$0!$owz>_ERLF9uk{w~kfk1c`+r`QTFqJWoAsD?Kd$*)~#Yp8ZaB!g(_LBWRh^`e~%RYs0 z{?^}q{+4oO5>6YoAV*RfLz~^Mn`jBlylEY%-D&N6WH)agX5a3*lL58ODp2dx1)!*W zp8O7)nr3g(`^wg*ewYh*CQJsuHg*!Y6~dyWRFR?-FXn|YqP_kN_Vjj3BO{+KV+eeSsdI# za52`3iIlw>AL+s#kvH--#{l}V=dq`MIN;KuV>awQwV=2LL$2NKc7mQYvxPo;^xwk= zT=cso=H1}DMd}!w(ma~!Jpoq`D|6A>O52-dtc3y)2cG|i0dAL9t8grlk30SS9d|e$ zTgm6lItm+}T#Ipo%0$YHV9D&@-AhLM>LgBh+9O06djPe3h#tZAC1T$oCb6U@Ja_!w zWQb{EwjefyD?B-E0C0c@Q>{%CLV?7ky53U760e6e{!PM^`pk#3D&9VGM}qv=$v4ok z<8#Xe?sh#|bR($r(^u49=9^;ljqTHrCzOa<>ygnw9P%u< z18q-!Av+GgQ~Pr2+{Sa`7o>;9*Oxp3ZS{Sw((u`sr^_*Z8SPKdKk|t<7}5FRYw?)1 z`TsUE2Kn`5o2{L*nZE6x1!wEZrxvSBh##GWKjyNPylG=F3xn5}x~sI8ya-8LoB;%L z^m5`zqT;CCU$A0YG%@R!r|hkxe(gEJaA#n^?yt7$j-+wI#oU_;!6c%n5olDYlylIlBEXocX@DC@Ngu%p-s#LhQ2|x<6!331 zzcDmOkdn^`;TfcYk#k&`Pz8a2n<)vlLOx%X7QDT6fgIaoAQ+0%}%+(E_!p>GII* z&z0tn10v5~x3#V86r`-AnpUVQ2&}dR2o&88^9)IOS3u0D>Fl$oR+J*l(YfSpk8EFZ zAvaPWD?h0)#m_%Yf_S9n64emJU?e%bEl`+{-uO8}|8tSl)T;D?dd*yZPZtK1YXFxA z4kCGNV`Z-_5bL$9vEAAe4iVURC>1hF#=&G@HOB>vkO1Gu2WQ_~beU&Q&i}`;D|Agt z0kf|0p|0R|6W+wvN&sYP>xunTuBnI&V#k<}yh-gxz)PnPA(nZBN%i&A8%0)&$)gZ` zejT-{o4pE##5i%etSFiTg)2;`H1M=bDXO1QemPcatE9htku!3GME&XAZ(dOp8B8g1 zBA2t%wwHrbk}Ap!o$Z953(2S_*pBvw7W?-mh&XT*4>%I!v) z)^xTPRKI?Necn`%6;waP=Dfm0D21FqYx8&JA;1!I6<2Qq5Kc&+>{;*kSt)t?mmseJ z$owCW7|h=!?bXC;gN|y-QWbLfZ)wKO!;%T=%S*Eo!aoNb?74cNM1I&guc#SZ&%lWy z_Bm8MC1D&E1 zU;{P`;i}qn=%_JH{9nR$OuS`PnYwVqEUzft4zA>j@LSw3#4-ZjJz=b<65{(=6ZCHN8)m?~z|}L2ib} zc7AT2c&H8kh9mK$YB0wm(68l{MfTg7v9>0Xv_Bey=s9+-7rn+3!iElkAw!B6rcvJI zTnlybuQ>Ta)`3*n`(H*izNdJI{zqcrwNGTHcHd1aJl@+Z$%d?sxxp7MZ$Hom=Xhn? zv2#TN2&nxm9(7#G;(C!%w9LhWUc}|!7oV}=#WzUSn{dJ5Y27D}?l~(*I}9moG7iUP za=I0h>ud%wV<9ax4RKkT7~bQ#36AeCT=#QfE)f0r=GwtekkM$}cKlDyj=3a(9YGif z=FOXvf1Olc55qVtR;unRjWu6Tw!+N%TPd=2cPf=tR1};4mZgAC(j{1+U+0nDY0ZsP zBYj^O?7UKB7IWpfE>MoQ>j0H)G76hHleq!wNZRi0{~QZ+xrh!G?vxDZ9^E%hVJ&E2 zr*^y5{mB|}=7{WdUFj?OBQVphFkWChe`zsuMVJyKZ4Ls>Z%nIqQ%L;Xas0t9%rzFN zEKRt&On#`qmdRITwi&rN9SPrma%OWQ#WLjPrgIHWXL_}@wNaW;tPdDk3+(bIqxoC zgnOd2zhnHlA3iQQ3NL{H0E}Y)Z}-Em_2d6M>HU+I`oAyZRZ+FvZGrQ+tfaA<5?ZHD z2qXA*h?wfms+zS#X{CV{+^`(aA0MOJ+S$P;6GtMTuvflbAb`+S%|f5H%G{gnfaAf1 z^n8L;n=*AXZACe#v`v?fM(P+jpxMcpT77*xi-UdSASTl`Y~(^>wVXssiSg75bX>C` ze3EOewI7g^GM5UE)U?5H9H7u7Cxemhif=a@-@=yNLr&dIEDym4^S5kKv|{8|(q?;SQDZ|g zlqgWLv3pG7s{Psl_cVQOx*4s4>cZMH(vexJow_7Q^s8njtE8u|g*3oiu>9^kUA2VG zx)rsds-GYGjg`mPC`E=umtRj7lwrs{L~CN;zFca8AuL;5R!Eh9K>m+Q}%|KGE46#@ZD*Mge$5vhu%Mt^?t%L@D z%`wjQC`sVEHW;u@jja5NfB>g054q39AbIChf9=1@wtSJJ>e?M;=9q? zvi+jqph6)?+0Uv(Qbh$y++I>rDGLQwQQE9YM#TJ|`7{pSRt@!pQkdneA`{-;&W0am zeL1z4@oW)v`yO(cG#Gfv+)6ptdlPK5Lm^`_pW!E~;g@OU&T$=(^k8ZOwp{upe=UVf zUsmxzSpP6#{BC>F=4`(sqDYj=`psK;I)wq~=XZASe3^hrpq8NoUPJ%2xjme$nfua$ z+2RC=B6&Ftyc>Fw!NLSLf7_4qbXCMpq_nhiNYqSOmTFIz>*q?`=0<`^B+JIB>YboZ zDwraAlqnG%9I3+=FEDPzsTZ}jsWr0+B(s@&*xdXWx^Y-ErbmKNE8_x|$!v!PlFDk@ zSh#co;B0DU=-h!mf%&xg2t7ra1zMV#Rc0bGN7e~(yPf_r_;v==Z9AG30c)_J%7d)$ z6M|&;jCBdM$+;+Lu$&{uFx>!E)WU2bFuiTEx<>3@gJE7ub@6?gLY-UCoojc3!EZ{k z^s(P!uu(g$pmRO(>XF?I8&Ik6KY@E#Ta-PG9<}Ru+~k2wf0Q~`#-bGx8>deW+f!9m zf|=iVPI-__4Lp98uBB4dI1$GS?21(uo89Z7Vfd&Ll5K`j>4vaQJ8`5ujq3C8zn({0 zs&1p96s+Ss_@*kP7W85vM0~*h8wA!OP8(A~f}9i7;)G>YJ9e6nRMXDi^DPKzPa~=N z)7^L+oXjZM-Kz~R%>v7itdeuTlc@4i&0r&S-83ck%%S&Hr*^ZsKy7}Vh(fe~QZ@7} zl=&s$inrGG@u!)KOH>wOW={yRwng4mzE{c>=yi-;t)bJaky3(7f`AM{h5_g;gOL5Y zOGmNnT#%yrd9Fg}n@?Gpk#MgsgeV|I_Wm>0&?M$qBea)(ya)V1*+S(z1ebaCn(@lPEFiKd{@RbmG@@%@F6 z@i*pB{D_jOloM1UMK+L)C`pk-Xxk}7o5i=Kk;7YRfia_W0nS047qk{?r-??n9fYM1 ztHhe+oUiLDgO6z>wR(Ywx{1v~^=WTfrh<{_n-%-)rsI~AkZb&$6h4E0K!S!4lE}opslS{a7-@nW!7pE>DvTT=F1j#3%2Vh7^MTabgw@Vc{ zcFT{E6g*bDAd(<(AmqlL!;$$gcADETp6x7d`@AxtE(CfMau+noBrk(kNa_P8*_MxE z)om9*H}(|lN;2Xi92=@OZ4VzCI)eR7*R0R*sw5J*v0gS0^X|g$6(6lKR6%yYtSYu* zl>d4N98v7>Ge8;iV4&mwwD*=#bu8VwFa!xMA-Dz$?(XjH?(Xgm!QI^qm4Lh`8!j6vC!gLbe@t3YTgvORd!ck-;^T23it~d7>dLdg5hQ|2Ci!fzP_dLA`+tHkvI{ zEotjSgk5b9^x%>|n%%8D7waqQ`wO~9&nU?!;1@;^&=#jU{u%&4iTJ-_1PH$}f`5K0 zhsl2qPoaWyKT^a|xDzrHDrO)WXp{Tw!FAVSV+J(q6K3`v-*ct1@$FGG*fN`K`1Wv5 z*W=g4>4^`fc(H8VyP$pW33Al$hIsF<#!OG~JeWNeinHNjx5XxGyYj?CTW7xTam5oJ zr3GA<=$8e;6DH)Y5!TZwk|*F1XXMyo7u8=6%}%mAjn>_p%td1|tVU1W_G7Ub(uz0L z>IHmA9^L(*BHEVBLM+E4KlO%`qo84yrwx03aFIc; z`vd7vTn$^~;Y_=jYEcPVJC7Gxv}>vPj8V zBgKHj3EMUq6OwwobKj$obnSb@i*3{bL6#QO7*FCkK ze5MKKODXN|MpW|-@N<=(j;g?OA0guiC#_lu?Q3;C&P=5OL=3ziK-`9VLNl{O^|-^j!Xgw? zjZuArn?1o!Cge1jx-$T(vN|#ifwhZpxw7grC>j3A6HriSC=dP;bdcI3Ju04^FP`Go z=}y>q3km3YBGQ1LtaOX{(E&6;t0^(lQP4Os4W43DOLZ8MqLQzP6FPrHYijV*sFiw0 z42Ev4%QiS3A>0MRsDx}?!GLoPS!^zIEuNBc43F|wU+z{_4Wi%js+r8DnsVUqA}j!Fgr!ELbQ~q)XIe(|&?%zK#%bE|j78f1it|*IiKP={ zJI=0fRb%N+Vtq0M3)A)yI(FAC ze!m#4;~XK~(ogvC2AbutloY0`qHC-If05tLmaYiag7m1i8k>j>pH*dIM6|dDYU(51 z!0KE#l*Q@lvL#X5cgIPR^(ppffB*oZ&z<}4JN6gGPxB|^-*5QN_*-^z4v|F#qI(W? zrV`jef^8wrhWX^*3qmU4|REqj1{S&q%<^+{$AbNj<_z_rJdu{ zW+)OzM>YFZh6H^<@IEq%>JxF!3vPAJ+w2b|{PtlGzA?fj^3d8w!qZ7u3 z6`LP2lh;}6*W%rtNF(Q>S+IGG3*ev~J`X?-7|8TFsuw z3rZj=rZXq%6pAsWA25?!dYjfva}1a!%VCAQA4U!Uo!Q}6=sEMC#!bIcpGMX43R3;Ap#_)%8XewHIFIE})Sp>aj4fkWHtaO(FrY^Cq!% zpM%DHFtf@qSddJePDKBNgsKyH~BN#0(4AccJH(kN;&)U@r) zF#xdz3=_-oHvK~EcsGEXGqsO*k0_rzXhg)REGLnr072|?)1Ffbz^d&B=BPG2k}O@s zSD&D)bY^BFXLro;!={EbIco$FS!SVvBlY4<$k*2-81wQOcW2@(SY@srZpYH=l^xA+ z@w7m^Tgh6hOu(2&g}e*orDL_ZGU_deL5SljUFSL8yUMg@`nZ&A{4z|`hGdn4bBPkA=_~C|&)Ea0EXJ`-oC6MtV*}JC9c}9ZCFtfZ zq^BD$Jlmcrd@q}1vGoR6p$E7Sk;}wGPNX(86+|ow({~i25q>ixa}u(sX=;H&RjRn4 zMIhD?u4mi-9(3F1GiyE3BPkPe3L;fH%`@M3g4jG#u{{FIBmd6#l^&n@KI;#@e`O8` zy@DuZO`MKWyC({gV)vlCBqf|Qp&MQbG+?Wyf|~!<#4wl|e{L$6yC=*3mDTpQfXS~arZbSg|2 zz6GpK04pLPhG3Y!su#MCVaUP*tuFpT_xHhH=>GfHN<&n#pwXC(EiZJR@KazLt18r7 z)D!p`#!3k%-JO*JUSp9rec(*X+-~SpOwH)_xjBX9GSuSvQ|5zQrZ@*$&F(g@`R!Jg z5ksEnjLYFcvAz|GNadGF(fxP6%hLB7fw#QH@6v=QC_Bpz1eK|?F)P^SS=M=`9a)=# zzF6ZrS>=RAJDI5L5?jgk-C(JvVv0UwliqF^mf#!>hCUpLno3A@4&EbdBCeTjH_195 z5I@m7yMLuQTl_|WLU427f7N5qZX@n2>zGnOB+E%jZd10rgZ>F&h;qrdYIjY-SH_)$ z6wS}sI;0$_ASf-rP)&|fr2cid_CZk_ zQ(WRFq`OHQPK9Lyq$G3I#qN0*L<%WoMzdY?U@TjkF#=5S0Dv3(0P51g=h z%jqTRMd&@1U*n&Hfk7~_+K;+AtXe=FxwD>UKP%vpM$OVx{!)4YS)r;M)VgLV)V*0C zS!hl0%e)mTF`5VJI>bdrONUrG5Z{BWBk!}N zdSGOnqiQ>e62f7^-<3sevuS6i8p=)f1eUK@)Ys zemt+n+uAoC6Xs|XWif#RxnHd^V3=MSHerct6JgY+q z(p2PKUX58wPajYwI#?fX3@a!9Jl(3lv{Xm}6&D3m5Vo@uNEQ@P&&ClEbD%}Y5#d`_ zMaFS-a^y#pb~$>4SD!O3kl*-9ce||XRiJ$Iv|$VtIk&RE)}Aco;cGRjiYyFUXE5v# z87N~HX%d%|?oA!j2#@wJR4VQzHG<_9vqk}FKK*0Y5 zL|~we^FBN6W?%=telc+aaY3oG*YW8zZ$$%I#1*9~Gvcd~9&Ah;6s@diyRAwTy7DD7 zWUxZ}^&z3lQi(PoIsK4x)Bth6IH5Z@RJ_yf!w~8R8~zIyE^nmOq6`hm>SGuDfQR#V z@N~NA+8nDGVC!{4=_c?`%j?wo;0z}5ynuzY?jbultD}5OAXq4O($^5K$Cq+aP!_7gw}6vy^ngA0kC-oOCDiGY>ufV zTu+#W4hEkrl6g6Oh*u;Twc+lvlD6J`<&4^L&O35k#QxZ*e$zk~zzx?6H;QjHgD!|S z0yG(r#~kc4{`tCcJO2b{h;S`T7^^&RO@`5&kuouL?JoaPx;?}sqxVVkbj|UFYaz-r z8csa(EUW)6<$v+%_;YsbKa~GJl>dK;^3zcMhw}eFq5QPJQ+`p~jRyF(ZN-9`IJ_0h zxP#KA(lj-POPZJ>HSIUia52C@Jn^xb8K51ng{0}e)-p)QexRHvbkz_yt|?lXn8!@rG&L>CLmtcqho*DuqhxS3gfAWqdtqy z@2-Ze)R#HM zj$0R2tbWMcTUTw8yO!JTjpvyTfv~2jT91u6{c7F1-Lj}Od*VbFkuJP2Y7>oLiT_jL zCs2%6Vd~>iHfPS)t!)9>dY1TozDxXLFB1Ry4sP+qz6CX|hT}8iKZI8eg&trND^DPo zEKk_20$IN!WezGW=ZeMF^xUuP2+nH)KEjbB#+%V$M69QfBt8&Bd{xAi*H~tL7w4LNE@sfOL zR-g(UFI{a-3wl(xDM=BisJ4KEfVM}S8SXpyFFSQBiTeVGYkS2 z8a(SK>Sg5+DH0c&2x1pS{^6PN7Y?*@=WWQhJTrdu?~MPgp;Wje^bP2CON66^^u3~$ z4Fjo`o5Wi8N7_IlD5D5qkD z@EqqXbX=^lM}I}-du4x7ydGzm+G>91usK@P31JerTN*1P9vnRs5tFtz6?z0g`v~qDx(77>DC12Z!gWL)dU~k``KP= zKV&TgNQZY9y_&^QIMaAfDygT7^hk{ocp3$z*uBahf||XMU!o#Y;I2*TE+-jg^Ms7W(Eny7vox z&_^L79vjIv-zk3_X9AM4qUAaCT9U!`A@}0>c_tSzXC5N-r__obMab#y8g)&wLFBv? z_O8!;wZ?U0MN%fXGS8HML|UD3*>dl#^-WW;^YosyTC}Pt(;9AOCJF#TV%Z$vvp1R} zW*1`UOSmJt%@S$$2dKH51k2rYpR1IaD|P_)`f`~2P7@R?GI7;nmVhCTq|)=(4}N(w zO|?UeIYiUnY{WQY9SYs(s;=d-WLmAg3)YQ`zux45$xEz(0?|(5j#k%Os6qUiN;krU9?0j_v(A4N zzPk{$)oOYxG09PXeIm|!+=~K%BY~R{))zY|E<)^!<;PTm*W#;HtD-h_+3Fo%@mjyQ zhdxKEq??hKQGvV`8?$-hh44$14v};q;(_QGZqTLWYoTREp}M*d=}f`om;yk%dQFOW z?V(|mE+0B$k4`;n{TG5$9P|0uOv$j`sQvijhwxV%_Xmc>>68VScng~p0|mTqcQRrJ z{2g|k!;&0GV4MAy4$p#5fy%;?CdXGFO<^6uEeRZmwO+jAh?OJrDR*lN?i$MokGoo$dx3-E5(sWs58jnXm`(Re&(~7Sv(FCU4KPaxTHn^jKaq z`cYCDkUzh~YLCZ`Y2Qs^6>#Tm)QJbMCm6}nT_z1y&hvJflZE&?938Z6!!L-{XV%LO z8aUV$pdQ$Wb7`1{{yYLnXnNgN=q`7Zz4-CuDZ%abyU;qh&&~Y#|Aze2{)hbkNALgN z*ZXPzL;nBY=>2s6hWs}?lmA@4A~VZ_d`Jif+Y)5wlV9s=-WapCRHBE&1#6=*v_JO9xuzo6aGS6LcyY zbQtnLI70rC`xAc1{o{Yj{m>m~>1{O{*}pxoxAF+%gD6#Ux{sUc4y$E3H8YuP?Amocv>zW@uo- zL)^ID>uw}nUTWAW^sS-iSH4+L_nL^rn=4wlnZE56SF#G5LW%)aF!^4g)_L&}^Fdh= zR@po+3MqYyRp+SU4Cuotvb!L7@1)N3kt)N2{-(ew8xo_A?dA^c4P@{sm}?ri`(UnB z6Pv!_vtm`xyZgI@Z(gF)v@!Kb<}%jaMI1C@glk0~D~GJalXhNJt1eT$Q{^`N%pKcv zVUsW&Z$EbG7L7Ozu1@4m#CG>#bxP9LgDE0lT3*ceO-kG`yCSu{)cXdb{h|%?S6*ZE z27I^S5a5{2o8P&Ay?>2>C7yArhWEuZmi!J8(c5o^2&6|aorH`*2 zKUMK><Dc{wn-4 z^x15_5N;Ozj7-RF?Y1H7?LINp%77b?5brM~$FlBg>8`@x@(+C`w<~cX_e6d@odUUo zUkO{_sLGXt4MEf%J62QCljkqq^r%fQC!^m(-FNA|$z1jg3K?X26b2DPmG;>IGYi~S zl@@B9h{>R>N2b57eKeVRgL2ZeiOWN*@J0S^}iaPClD13Gr=NIZP)&TEr zS%*^gvbe|j!Qwmh_uec5{Z9Qq{-FL-pVgMX{80O)GQ|&}7tc4c7+Rf)lErsrYTK?J zXK0w9@{}z02~7Jl9XFqtvLDAygx*IFiB@a_TPuVcy(WXL3vYnCSPIv`bxZ}r`a-m! z$Nm@fcZ)C=h=}u;KzzEvQ;76@rv77j^zRFD!yOK3O7#BjNhj{*Dpx8#QB15=>D^ z7E$SlfoHupQB$YVjQd{Ly^CTcXqH9e zS?ed`m@An4q4k@*X#MI^4ILeUbOT_F^bacP%N}N+f~<1t>9l0L_g}p#kJB{WC>I=7 z+&yR9o<^?L9Uc(e7-wTV)Ey=a0OX)$#kDU1t=YkR&;%saq1@h|GYCMvdJpmGvJpfY zI1#*$D>#ivSiABhMW$(lNPj#q(ktEX3a&~)bk#|kX}k)iSLBr;LWDivD2s@_>Oo(h zMGLugOoHuxWw_0VP|$o2hvYU%y1;I)Fc0mUppCpW`q(np*hv-KeKf+Q7xGUr#i5pe zVVUoKoJzz1kI8ENM(ym~uKV?VIYmZ!5j)<^CdF=ncr7)6+;t^QU^GRJfq}-m8V>+2j=7JA#>mi^VQ^c`b2af~I}>uq-wkuR3@< z%mf1I(*(iYskkyKggnU{&}uRf&h-s&#GaY}ZtPZdzz4l#QaXo2G>Q)8JP~&WZ-#3@Di>xIQA>MnQYMxb++`b1pMw+zA@jMng8m4r_7 z_GG}550UvWkk$1u|6~9 zYoei~Vi786LHtIi`G@6!vhqY^qYLKabrAnBNTWjS!(uTBZVQ@SQLTz@$)}Z3qFB;5 zvtF;rJ7hUq(+$=Rl){_$TnU{g<;G5?Iw04z5N;n?ByyH_6m$7eFfn8t8CL>szzrHl z2p78Bu=4~=1Y`M`uu?HO!#5l|4Xm)u2z6h!Hr5sZE{z)Ha$hE_tf9;=g6&7SZOG2n zk=KAm;9_o2#MQS)vFpbY66Jl&-`Ttoob-BW*Zhj#0M!uCq&t~cas2j;b!x{5ajHb6h|3m)&_sKuqf5`v; zko^CQ2lo2DcAgCg0N~~IJrLYqi>KH)pI1=fIOyBhK8J#1$EBvA!o|_I(6!VvwlHMJ zm6sDBXTs&=fMByUFfi8DXZaZ&j{IdN2Isk`g)PgEQ|!3*HWn=3<=5OS-z)lY#L3Lq!jv7?$j;7+g_6?I(UHQDmcr7;kdm62nVItYS=_&>($oDr zpOw9h+4o#}x|Gjh<9_&}r>3B${F(Qqh5y|~z2yD1NtTwDe^>Nk!w(a`S4TrdMMwE# z|7T5xHhOwyf9L0To4$r;(3>sV+To7qy@ z{rpH_WnmbnWH0r0x&EWxE(JUveEd8?eB@y?OkOT>)El_mJah$~rUW%$5bnss!`{63 zLnSn0toAEc+N1I#*Ry8Y3}(l})MVSSG}a7+!n`a#F)m3jZ*T6Bdl8LoZU-GLEuB?u z_HSf&^i0%LMptoP^_QIt4h^FWWb6qp=2sHtd`!Ep5`BHr`Ted9wXjnj9rBXt)dS0?1%6diiRtq3T!=%yG_O3W_7&@ za3x1;+rG-v*KZi4iGZ5M(8)wT7O*~o!f3W6ARx%gi_mic5wTi+t6~bT)dZ5nQQp#t z5R>MlCotj@RU?N-L}UbzISRWWS!f+8=xA!$jO}VP50)f&nz*8!U7}O4LLF=#GxMA?QIw-ath{WJ$NR;wP4kn2G0XRKQ zyG_fM3Vm8bwvZz1N6m3AlE~@x!L^XC{JWp(MEc)B%Nl(!sH+s8P%PxwC)h5WI0&JC zsON(1==e}{hB&{?m?p^0w_=42vo{6vl{#_zf~CB_xIoTdjNY%MrjK>@qE94gU|4sP zuE%JWbQeW7-*|AaqZtF>WMlRLu2)i>o~diF{Pd)>wD7&xCoLTL^I$#=7?}aWyF$K- zMZar3Xh?oxqA2tDvc(t&^$xZVks05(8WOi%Q(ciz;4FB>m0@Lwq-977W>Zt6%WJs& z+vkD@O${!(ft}>!p>1&k)1arLku{XWh#sOM!MM6kThx#@AS>@YT6qg7m)gio!rUc+ zL;U=>aUwm1zdRve@Q;eE75a^Ndci}f@3+=5H3%`^I_%>PE0p25-xBMj~j*VM;9CB2uy$pT5qVZe(df z;=)YwYZN*mXi;JNGB#{Bid2rb)5+w`0Cv375GSA}<&^C0J(Sl6fL3l`&FOvS8VYOw zpg}5^BFS3*-ZSv=V8a<5_KjJvpJHf#C~}&-yPu(2*;o6qhINj#%|sCNkjD1qDy-O7 zcBrW258n|vfWkAyW&fL&ev-aE7Lf@mTl z4Qv&75Uw*%wH(zl^EJ$ADQf|c_DfajC;Etc-Ht} zn_93XHWQQ(4|NM#IwTTPfvS$gjmq~`PZJO5kD0ii=Pn-!9_?h;!m`0p;rCNN3BJyD z4%Fuzc0q?+>Tl^hFoK>{GGe87l4OY2R3By|SFu4h>9Jb&h)I^sr{+3mJGv!dWOba- za!zSF7R*P6rQ1m`+q~ton4Ea?{_?U0Sb4{B6f{e*E@V~ID|ep6*vylZjQ*oA_wyn* z_QLW6`hB*0V+|{Ti-m3YLuDZE;Cl#SXz_cV~zRv9l$!gl9cDwBG${nswv~oWl zR$R{8)_DP^J8b}>*v=vlrd0p%+_(GJV{)Ga*dL}?k#g|UAWe%ZKxnQtAY(n0nca9Z zC3Sbdpt~~4aD+^J4Zaj(!9#IhCvq5uJyD%e;Jq$jVR+SA&3SJ`j!84<5g%d@sdav` zbY`90DDpW4`J`FMRzK$+b7MM&s6(F#!Il*3aLC?g!_(CTS&FISLuJCv?n9VLcQ&>Z zE<+AjQ(2gz?k2^U6~<}5x$X1}&aA!+AGqE!3s2scG$I6@H4lei#XMrTS>=z6X-o02 z3=cAXjI=Q|pYz1(8j zEHqKk--GvDq*h#lf2so=_YY|-r*dp2OuD#OPf!%EFFxc^SpKv~J8AHdH5V8O*Vwsw zixk^pMra&LhSB5#Jj&8k?tSvs9NQbAz?LqQfTfK(F@7M6nYQEDkCV!6K(FKl?V#!8 zq@z+)D11|w(Ln7}r69T5gvJzhMB?@tp;nq|I^-(+JW^1Q2}!K(G)01v1?`c}6~7@` zbL$3>@xQV4{_Jjq^xy}!u>5){dh*f(-BfrPcXlU=4W7bP0kcsQ<37+$IIGN+UjKM}$<0p`>(Z zN!hrnqN2iVN-)*zwQ*046^or;%hJ{#=@+b9zMR7G&k*QtUl3e%J|cBd!*BbNg3V`Y zKhX#fe4wlLsCR5Jbss3O8J97BGY`8EK|P#;!IB?i>meHH&3sfkI!84v&;Jo8b`tNa zikuOI3EWEbl=fCGg^r*W>nBu)M5m>UF=-gA~J2$MB*%I3XM zs{703JMYgs6lq4t7{Z)PoTom2u5t+2&3g_IJ75y(Og>?T_vZ6f)`1kqi2MU{C2ir- zfj_i=N|^Upif?T~7lfY~)>S~UasUiMV<*5H!1}Zs9Vp><@3_4`e#3za3>7WurLOY# zZeF#yvaBBWfEb>hbNEV)KDQ0)s8Bw%Rn{sdQvs|xZ;tVCA=j%YR`bb^)H{sS4}Qb#u$9E+h3tce7lW5;1@ukc7?D zaJ5l^=>vnB2TbbN^^tF?drV3upBy|_KwE8J(a4$3-hLiHFm-CP@0&8?t*r{peOJKM zu#jQ)XjXPSFBuNfz%rd8iS7QhaePCwAf@^4L-C~>tVcUt*_}JJ_KxJe8hpaQ(LIge zhYV?Iat)5I%5ZUqox-u;q>RH+KO}?w!Fplfqo{INF-Zk&#om^%*97D{yUndxzUd56O6ialdUtvqO*B{5dhW)3Z@cI#^*$!=iG=4Q`G2!bj0 zlP#C1dQM8K1N1xeiJ3GWj>+B2_2w;2S)3rtrK=poulMwe$*RpYu=Dw~qjTht??1u= z?U3hG2Im|ReOqH`Ub2m9L=!w>g&pZ+nY)~DuPj1m(9HnRpo}nQ)Xl}f$>}|rbxTF5 zlNfQrXilp7(a>sB0j#?3%_LB zD^?RW8WG58j+7GGpTBQ5;`-5g-8C#LST5sBt>wlSWoRc|1rF2I;JmDgQKP;n3u5T& z3k?@L5Kb=1jl<;0J&i9lMLm|t;^DVYm!*qBC>dZ2MVFsHJmn%RK$7Lk8(!Ib9-3yi zqO-a*HZK06R=@!*2WNXVqPe{=VRKNkfTfxdlM3o6@TLov=v0HXd^p_`NK|}8C}=i2%!98ZtS)=9m^EqwTbU5o<5(XhM}4_H7Z1-vXeLJz=e`<1M`5OEGthcW>jA&H z)wZieH>MlD&Ub0AT?Z*U+{6JYogO%zJ-X*>Y!IxvD3K*8ii5A<+#ZC})8{srYyLK8 z#+x5Gbv;_|HJ+PaJmtQWR0Q-M>v*a$c;AaQ--1=I`Y9MEx^Mn#&XaUcDWk|v*Dj2; zzuc@ckFUR;#l}|o$XJ=2S-xYjVVj7BS{}z}LF3)vS+%R&#r9z*l);d;158nDkEQ7& z8anaWwrIQm0!n1PYMr4int`FB&h4K4(UpsB!10-~c$w(-pm{T>2LwK+db%H}576j)P$Mci*Ih(CvqBg7+9v zmwnS(?n$pKu zz*=Xx_JRlEg5ns8O~B44_+=0`ZYMkBWTJ1~zNW&luOFJsn)|CVvk@%BAq-QI=6yOWoP;uKvjhh#gcH=}1XvY%&x3)Xzz8U7pjSX}4hR){9I ziPK!}pz%p;ra_^tWq3zQrOr@vcB09p6(G_UUS$Oj_6=eI6)e$qfEw1p;eGlrNr-P? zTb&gCMWe6<4Nt1Z-mGcQt@*8_Pn;^rGt=%f-d+X0+;^w>MZjL)?$NeW8iG{f3iUB zF;u!htC-Nc<1B)I_NR%aTSO63W9QIC3hHvdfR!-MUN@;z=nN|D;D)HyNNOMfg%Ti* zThfn*k{bQ+Iw+}pYOPx+Co2ucC@^C@30|AS3yabWcls=GR6~gyN)eVa`av~^e&3?? zRTE3`ddj*rmJ{XjQ&&hAR^mv(Fx#x8n(Ej#crtPcmx5vyn(6IrSb0p+X$#l&dSd|3s`zXA+`oX+CE2jo zE^t%bb|u_IkQ*uRU+>bg z-+}MvNUe@?z}fBY9!;lG;-+TqR{Il0gk_U*iV3DT?@3pDYUZh$HXIFds6lm4HA#*~ zU&S~Z8j43olN-Co#cQ8 zySlmr>J}{%i;y8CqBkjGP{>tz&0w$QRc}_sH^O75(MYeot+=5cU%SPEySh}2Iu;40 zpCuSueLi;7(mJtiDZR4G;!9(i4|&$G94;>#qoi2kuB3wdt+2`W;@>05x#^Y9%;a1# z#bIB`7tV|lkwqeQt;?PYq+UDR02%jOW`Na(Q^MRp2Rf3z10oEnKTg=Qil`1^;T=bj zK=YO_(#;^u3!5hq_G6btm|OAy+1kJogbAkD?Kgg{e&=GXmL}?WhuapY>+TwDo`DB% z2&QP@>?iDl7=L#dVc$Fv6E7#K*k=1UG}q0QPe4;-!9sQHX^lM7XNVjb=T?ZzF2!@K zHFiC|fP&{^?a8}Us@Mi{OV#enBq<^q-IwPQ50d`6hRgoc$`rX8X=v?9%0L(kgmziq zf>b`^LI5-!^&zEdPifGne#A2{N*2WGVtmX{5^xi;##@q? zw#YS0N{Mlql>SXLH;@&|4k8ifc0%=#gy4WU{oV4Gw!D6KD)8_$FZT0=_Gsf+iZN_JKYv&k596^t>gdGn=clw6-3^X(!alefbC*nE{rYY8dp zkkb!J#r^kZ=3Qlo-M#C1Oz()LK|)>jw>I^y z1p&n6=Uw$Q$|gC~-nGD8#nh>kLYzl#U7l6tmDW`lgYwSI-ru*M-E_*a(?jglV0cJo zGZgRd@p;J8<)AjvrN57vzZ?aP%fS)TN?cByjj%TmJ|7wUn$t+cy0_U+gQ8KQHIt(d zH9;N43O1@`eqGv+Vt)x(Wy$oGxi7Ngyj;15)61n>xKqkJ(oGoCK)nluboA3HtD6&d zb#*m2H+TB_7lig{-%H9vy4!e*Q`~04j4cm&gEqNNWh1GMtKM4wFCt+GPx&qwSNZT8 z+Qg(r{RsofuOE3KcxbBoNv|zAC<@RVSqueqtaKmq

Af^Y7FLv8bI z4XLJ^3xu*MAdk!@`w%lrol1v06j8=S$=!+k-SF-7)4{8cvU|Shnb-jx@~S>EEBbTH~1UYan<$A_^|4`RE{sulLv9(=WNu3=;5p{`Cl zJF{g?TOzxNlH4sRDl)sMBjIyXhy~N@%^hUv>%;!`t$juXz42>u_{_?+cJ;$!j|%tY zR6fL~ZpP9JD;<@wNs=Xy4_~Y2N^l$1lU|eT`gL*vpz7+G6V8xK#>A2qd%9`213M!e zlZ(ZnZXVG|o#7gN+)qu%gz|*8W1ysaINum>*1vZU%4YZq$i^Kx2Qsuylt;tT-dz;j zQ_%u$V<19c2ch(U|1OnLIoWIL8wSEbl)L|7u#+voSGoQUZ8H2|)>ye+pyQnW&uK1E zU-%!JsU%Ye+ZKA-1h_rlDf&6X=j>5J;b40)2LnZ8_+_^dpQm1+Bhxzz)gLf=aULV8 za|LtYx+$!uCkItm)@!dXc7zWqedYRM0Pg1N5+tIn1nf3j8gP$5jbWN7Aj<;ijzcz@ zlOOL!+>>aun}{3}sR6vS26DE)UBkGE6X>L4Di_yauUf#>2O({<&~}=}8#t z8_?BOgVP+;}9vY+s$70(cW^Dl>JKt#G0K*D8=Oc>A;PrXwRY$QZRb!t9!nuR4f*l;cH2bW|c?63wC9!F%}}qXZUJ1i_9< zOH7lzX;9X4T~DY1_FF4;`|i*Lm2Up%Tn!M(=#?4*Zr0sev<+j&yA?KY0Vwa2O@=t(vOhzcx-nTpSl()#5^7Wl{*7ygor#X7vH2mEbG?Rcg>F#Kk<}v1R}zK zpXug@*dj;*X3VXavyb3?E$soU=bcql1BdE^6S%08Ui`K|*#`MpUvv2G>AQQ3tAPd` z7~to-1onS*A3=P+kBIZjagz&62vC}9TNoSY+uFVO9+^pCxBN^6=kNFg;myk!6jYKG zN$x7NipwjNX|?T>CBXn0EH|L5^K>V}a6h3St|RET7KVR#=q?E>?7gvD6hA?WLRv%8 zZ*IejE2WmeLk@B3&Jn^Hv#Uo{mh=WuY1q-Xf19{D1F0J|&eY(;2x4*PR=@3%7YV&1 zJ<;$}=oq8O-9nUitQhD7clnz)IvJL;0Xt|SojPiaQgE|$GFJW^DICQ#3ra%aaR{GL zwDQzntv=zy4#maLSfEfq@_@4sh&aZvmXcIj8R$;r$6CX@xtBv9OrtL4eKMDrzHK(& zms2IM#$D_4IXbh;X8HtSyx&;eFxMBsSR1VB-&6z~;3OT47Fm0fgS#hVqU=0>K}d-LzIqGr>;UC?Sww!GKF;3Y2St;>*GBTb)P4+f6T8_>e}1b=v#arl|Mm$n)EBq%hdEgY2*3hpP&Y2md|Sr z{%P93Liyc9)aS1J6UEBX%HHaERfhfFCjKiNjDMQ){7L#>;n-Lj+UVQ<)>;1w>Gu|e z{0k&IOG`5=Z3}&~|FT8D_fOZqzv7NcLmF?e#{Txka3@M%|&;S68-T(lQzd!*1m_38(T3XosF=Boe{5482 z`fmlf;C}iBmC?5{)7JfK<&EU`B@{T+KZo8=pkG7N{SHL-e8qm0`d2_Se**m)2<3Mm z0Q~<7i1tsQUtNO#4)mG)zXGEB6X;i8sxKbI|4C?!zxosZv)r#9*IxWC{gX&Iem3BD zzpH;D{OT<0#WUMKiA~@agnyhi{0Z=@Q>Yi8TK}XjpaSYKkNVXDY)M)fK~Xr_-~KJ|B3MLEg;kQ z1>t{ULABN|0RQe9{JNC)cMG6&{x1I8vpxTP3y4jALHM6oP+|59z@HZUykGvgB=Mg? z2yOuY@Mo!?H 0, 1.0 - ret_val, ret_val) - - # Despite the memory overhead and redundant computation, the above - # is much faster than: - - # for i in range(len(d)): - # if d[i] > 0: - # ret_val[i] = 1.0 - ret_val[i] - # return ret_val - - -def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears, - Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - sqrtT = np.sqrt(T) - d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd(d1) - cndd2 = cnd(d2) - - expRT = np.exp(- R * T) - callResult[:] = (S * cndd1 - X * expRT * cndd2) - putResult[:] = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)) - - -def randfloat(rand_var, low, high): - return (1.0 - rand_var) * low + rand_var * high - - -def main (*args): - OPT_N = 4000000 - iterations = 10 - if len(args) >= 2: - iterations = int(args[0]) - - callResult = np.zeros(OPT_N) - putResult = -np.ones(OPT_N) - stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0) - optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0) - optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0) - - time0 = time.time() - for i in range(iterations): - black_scholes(callResult, putResult, stockPrice, optionStrike, - optionYears, RISKFREE, VOLATILITY) - time1 = time.time() - print("Time: %f msec" % ((time1 - time0) / iterations * 1000)) - - -if __name__ == "__main__": - import sys - main(*sys.argv[1:]) diff --git a/numba/examples/blackscholes/blackscholes_cuda.py b/numba/examples/blackscholes/blackscholes_cuda.py deleted file mode 100755 index 9803b8278..000000000 --- a/numba/examples/blackscholes/blackscholes_cuda.py +++ /dev/null @@ -1,130 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function - -import math -import time - -import numpy as np - -from numba import cuda - -from blackscholes_numba import black_scholes, black_scholes_numba - - -RISKFREE = 0.02 -VOLATILITY = 0.30 - - -@cuda.jit(device=True) -def cnd_cuda(d): - A1 = 0.31938153 - A2 = -0.356563782 - A3 = 1.781477937 - A4 = -1.821255978 - A5 = 1.330274429 - RSQRT2PI = 0.39894228040143267793994605993438 - K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) - ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - if d > 0: - ret_val = 1.0 - ret_val - return ret_val - - -# FIXME: There is a bug in how Numba computes the first line of a -# decorated function, such that it doesn't account for a decorator -# that occupies more than one line. The following decorator should -# comply with PEP 8, and not go past the 79-th column. - -@cuda.jit("(double[:], double[:], double[:], double[:], double[:], double, double)") -def black_scholes_cuda(callResult, putResult, S, X, - T, R, V): - # S = stockPrice - # X = optionStrike - # T = optionYears - # R = Riskfree - # V = Volatility - i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x - if i >= S.shape[0]: - return - sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_cuda(d1) - cndd2 = cnd_cuda(d2) - - expRT = math.exp((-1. * R) * T[i]) - callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2) - putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)) - - -def randfloat(rand_var, low, high): - return (1.0 - rand_var) * low + rand_var * high - - -def main (*args): - OPT_N = 4000000 - iterations = 10 - if len(args) >= 2: - iterations = int(args[0]) - - callResultNumpy = np.zeros(OPT_N) - putResultNumpy = -np.ones(OPT_N) - stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0) - optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0) - optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0) - callResultNumba = np.zeros(OPT_N) - putResultNumba = -np.ones(OPT_N) - callResultCuda = np.zeros(OPT_N) - putResultCuda = -np.ones(OPT_N) - - time0 = time.time() - for i in range(iterations): - black_scholes(callResultNumpy, putResultNumpy, stockPrice, - optionStrike, optionYears, RISKFREE, VOLATILITY) - time1 = time.time() - print("Numpy Time: %f msec" % - ((1000 * (time1 - time0)) / iterations)) - - time0 = time.time() - for i in range(iterations): - black_scholes_numba(callResultNumba, putResultNumba, stockPrice, - optionStrike, optionYears, RISKFREE, VOLATILITY) - time1 = time.time() - print("Numba Time: %f msec" % - ((1000 * (time1 - time0)) / iterations)) - - time0 = time.time() - blockdim = 1024, 1 - griddim = int(math.ceil(float(OPT_N)/blockdim[0])), 1 - stream = cuda.stream() - d_callResult = cuda.to_device(callResultCuda, stream) - d_putResult = cuda.to_device(putResultCuda, stream) - d_stockPrice = cuda.to_device(stockPrice, stream) - d_optionStrike = cuda.to_device(optionStrike, stream) - d_optionYears = cuda.to_device(optionYears, stream) - time1 = time.time() - for i in range(iterations): - black_scholes_cuda[griddim, blockdim, stream]( - d_callResult, d_putResult, d_stockPrice, d_optionStrike, - d_optionYears, RISKFREE, VOLATILITY) - d_callResult.to_host(stream) - d_putResult.to_host(stream) - stream.synchronize() - time2 = time.time() - dt = (time1 - time0) * 10 + (time2 - time1) - print("Numba / CUDA time: %f msec" % ((1000 * dt) / iterations)) - - delta = np.abs(callResultNumpy - callResultCuda) - L1norm = delta.sum() / np.abs(callResultNumpy).sum() - print("L1 norm: %E" % L1norm) - print("Max absolute error: %E" % delta.max()) - - delta = np.abs(callResultNumpy - callResultCuda) - L1norm = delta.sum() / np.abs(callResultNumpy).sum() - print("L1 norm (Numba / CUDA): %E" % L1norm) - print("Max absolute error (Numba / CUDA): %E" % delta.max()) - -if __name__ == "__main__": - import sys - main(*sys.argv[1:]) diff --git a/numba/examples/blackscholes/blackscholes_numba.py b/numba/examples/blackscholes/blackscholes_numba.py deleted file mode 100755 index fdb13753f..000000000 --- a/numba/examples/blackscholes/blackscholes_numba.py +++ /dev/null @@ -1,95 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function - -import math -import time - -import numpy as np - -from numba import jit - -from blackscholes import black_scholes - - -RISKFREE = 0.02 -VOLATILITY = 0.30 - - -@jit -def cnd_numba(d): - A1 = 0.31938153 - A2 = -0.356563782 - A3 = 1.781477937 - A4 = -1.821255978 - A5 = 1.330274429 - RSQRT2PI = 0.39894228040143267793994605993438 - K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) - ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - if d > 0: - ret_val = 1.0 - ret_val - return ret_val - - -@jit -def black_scholes_numba(callResult, putResult, stockPrice, optionStrike, - optionYears, Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - for i in range(len(S)): - sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_numba(d1) - cndd2 = cnd_numba(d2) - - expRT = math.exp((-1. * R) * T[i]) - callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2) - putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)) - - -def randfloat(rand_var, low, high): - return (1.0 - rand_var) * low + rand_var * high - - -def main (*args): - OPT_N = 4000000 - iterations = 10 - if len(args) >= 2: - iterations = int(args[0]) - - callResultNumpy = np.zeros(OPT_N) - putResultNumpy = -np.ones(OPT_N) - stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0) - optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0) - optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0) - callResultNumba = np.zeros(OPT_N) - putResultNumba = -np.ones(OPT_N) - - time0 = time.time() - for i in range(iterations): - black_scholes(callResultNumpy, putResultNumpy, stockPrice, - optionStrike, optionYears, RISKFREE, VOLATILITY) - time1 = time.time() - print("Numpy Time: %f msec" % - ((1000 * (time1 - time0)) / iterations)) - - time0 = time.time() - for i in range(iterations): - black_scholes_numba(callResultNumba, putResultNumba, stockPrice, - optionStrike, optionYears, RISKFREE, VOLATILITY) - time1 = time.time() - print("Numba Time: %f msec" % - ((1000 * (time1 - time0)) / iterations)) - - delta = np.abs(callResultNumpy - callResultNumba) - L1norm = delta.sum() / np.abs(callResultNumpy).sum() - print("L1 norm: %E" % L1norm) - print("Max absolute error: %E" % delta.max()) - -if __name__ == "__main__": - import sys - main(*sys.argv[1:]) diff --git a/numba/examples/blackscholes/blackscholes_pa.py b/numba/examples/blackscholes/blackscholes_pa.py deleted file mode 100644 index 2adf04219..000000000 --- a/numba/examples/blackscholes/blackscholes_pa.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numba -import numpy as np -import math -import argparse -import time - -run_parallel = numba.config.NUMBA_NUM_THREADS > 1 - -@numba.vectorize(nopython=True) -def cndf2(inp): - out = 0.5 + 0.5 * math.erf((math.sqrt(2.0)/2.0) * inp) - return out - -@numba.njit(parallel=run_parallel,fastmath=True) -def blackscholes(sptprice, strike, rate, volatility, timev): - logterm = np.log(sptprice / strike) - powterm = 0.5 * volatility * volatility - den = volatility * np.sqrt(timev) - d1 = (((rate + powterm) * timev) + logterm) / den - d2 = d1 - den - NofXd1 = cndf2(d1) - NofXd2 = cndf2(d2) - futureValue = strike * np.exp(- rate * timev) - c1 = futureValue * NofXd2 - call = sptprice * NofXd1 - c1 - put = call - futureValue + sptprice - return put - - -def run(iterations): - sptprice = np.full((iterations,), 42.0) - initStrike = 40 + (np.arange(iterations) + 1.0) / iterations - rate = np.full((iterations,), 0.5) - volatility = np.full((iterations,), 0.2) - timev = np.full((iterations,), 0.5) - - t1 = time.time() - put = blackscholes(sptprice, initStrike, rate, volatility, timev) - t = time.time()-t1 - print("checksum: ", sum(put)) - print("SELFTIMED ", t) - -def main(): - parser = argparse.ArgumentParser(description='Black-Scholes') - parser.add_argument('--options', dest='options', type=int, default=10000000) - args = parser.parse_args() - options = args.options - - run(10) - print("options = ", options) - run(options) - -if __name__ == '__main__': - main() diff --git a/numba/examples/blackscholes/blackscholes_vec.py b/numba/examples/blackscholes/blackscholes_vec.py deleted file mode 100644 index 138419e1c..000000000 --- a/numba/examples/blackscholes/blackscholes_vec.py +++ /dev/null @@ -1,56 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numba -import numpy as np -import math -import argparse -import time - -@numba.vectorize(nopython=True) -def cndf2(inp): - out = 0.5 + 0.5 * math.erf((math.sqrt(2.0)/2.0) * inp) - return out - -@numba.vectorize('f8(f8,f8,f8,f8,f8)', target="parallel") -def blackscholes(sptprice, strike, rate, volatility, timev): - logterm = np.log(sptprice / strike) - powterm = 0.5 * volatility * volatility - den = volatility * np.sqrt(timev) - d1 = (((rate + powterm) * timev) + logterm) / den - d2 = d1 - den - NofXd1 = cndf2(d1) - NofXd2 = cndf2(d2) - futureValue = strike * np.exp(- rate * timev) - c1 = futureValue * NofXd2 - call = sptprice * NofXd1 - c1 - put = call - futureValue + sptprice - return put - -def run(iterations): - sptprice = np.full((iterations,), 42.0) - initStrike = 40 + (np.arange(iterations) + 1.0) / iterations - rate = np.full((iterations,), 0.5) - volatility = np.full((iterations,), 0.2) - timev = np.full((iterations,), 0.5) - - t1 = time.time() - put = blackscholes(sptprice, initStrike, rate, volatility, timev) - t = time.time()-t1 - print("checksum: ", sum(put)) - print("SELFTIMED ", t) - -def main(): - parser = argparse.ArgumentParser(description='Black-Scholes') - parser.add_argument('--options', dest='options', type=int, default=10000000) - args = parser.parse_args() - options = args.options - - run(10) - print("options = ", options) - run(options) - -if __name__ == '__main__': - main() diff --git a/numba/examples/blur_image.py b/numba/examples/blur_image.py deleted file mode 100755 index bd58c45b8..000000000 --- a/numba/examples/blur_image.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -from scipy.misc import ascent -from numpy import ones -import numpy - -from numba.decorators import jit - - -@jit(nopython=True) -def filter2d_core(image, filt, result): - M, N = image.shape - Mf, Nf = filt.shape - Mf2 = Mf // 2 - Nf2 = Nf // 2 - for i in range(Mf2, M - Mf2): - for j in range(Nf2, N - Nf2): - num = 0 - for ii in range(Mf): - for jj in range(Nf): - num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii,j-Nf2+jj]) - result[i, j] = num - - -@jit(nopython=True) -def filter2d(image, filt): - result = numpy.zeros_like(image) - filter2d_core(image, filt, result) - return result - - -image = ascent().astype(numpy.float64) -filter = ones((7,7), dtype=image.dtype) - -result = filter2d(image, filter) # warm up - -from timeit import default_timer as time - -start = time() -result = filter2d(image, filter) -duration = time() - start - -from scipy.ndimage import convolve - -start = time() -result2 = convolve(image, filter) -duration2 = time() - start - -print("Time for Numba filter = %f\nTime for scipy convolve = %f" % (duration, duration2)) - -from pylab import subplot, imshow, show, title, gray - -subplot(1,3,1) -imshow(image) -title('Original Image') -gray() -subplot(1,3,2) -imshow(result) -title('Numba Filtered Image') -gray() -subplot(1,3,3) -imshow(result2) -title('Scipy Filtered Image') -gray() - -show() diff --git a/numba/examples/bubblesort.py b/numba/examples/bubblesort.py deleted file mode 100755 index 53a7611f4..000000000 --- a/numba/examples/bubblesort.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -from timeit import default_timer as timer - -import numpy as np - -from numba import jit - - -def bubblesort(X): - N = X.shape[0] - for end in range(N, 1, -1): - for i in range(end - 1): - if X[i] > X[i + 1]: - X[i], X[i + 1] = X[i + 1], X[i] - - -bubblesort_fast = jit(nopython=True)(bubblesort) - -dtype = np.int64 - -def main(): - Xtest = np.array(list(reversed(range(8))), dtype=dtype) - - print('== Test Pure-Python ==') - X0 = Xtest.copy() - bubblesort(X0) - print(X0) - - print('== Test Numba == ') - X1 = Xtest.copy() - bubblesort_fast(X1) - print(X1) - - assert all(X0 == X1) - - REP = 10 - N = 1000 - - Xorig = np.array(list(reversed(range(N))), dtype=dtype) - - t0 = timer() - for t in range(REP): - X0 = Xorig.copy() - bubblesort(X0) - tpython = (timer() - t0) / REP - - t1 = timer() - for t in range(REP): - X1 = Xorig.copy() - bubblesort_fast(X1) - tnumba = (timer() - t1) / REP - - assert all(X0 == X1) - - print('Python', tpython) - print('Numba', tnumba) - print('Speedup', tpython / tnumba, 'x') - - -if __name__ == '__main__': - main() diff --git a/numba/examples/cffi_example.py b/numba/examples/cffi_example.py deleted file mode 100755 index d40c1cb0f..000000000 --- a/numba/examples/cffi_example.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -from math import pi - -from cffi import FFI - -from numba import jit - - -ffi = FFI() -ffi.cdef('double sin(double x);') - -# loads the entire libm namespace -libm = ffi.dlopen("m") -c_sin = libm.sin - -@jit(nopython=True) -def cffi_sin_example(x): - return c_sin(x) - -print(cffi_sin_example(pi)) diff --git a/numba/examples/compile_with_pycc.py b/numba/examples/compile_with_pycc.py deleted file mode 100755 index fb3b5ceb0..000000000 --- a/numba/examples/compile_with_pycc.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env python -from numba.pycc import exportmany, export - - -def mult(a, b): - return a * b - -export('multi i4(i4, i4)')(mult) -exportmany(['multf f4(f4, f4)', 'mult f8(f8, f8)'])(mult) diff --git a/numba/examples/ctypes_example.py b/numba/examples/ctypes_example.py deleted file mode 100755 index 04188ce26..000000000 --- a/numba/examples/ctypes_example.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, absolute_import, division - -from math import pi -import sys - -from ctypes import * - -from numba import jit - - -is_windows = sys.platform.startswith('win32') -if is_windows: - raise OSError('Example does not work on Windows platforms yet.') - - -proc = CDLL(None) - -c_sin = proc.sin -c_sin.argtypes = [c_double] -c_sin.restype = c_double - -def use_c_sin(x): - return c_sin(x) - - -ctype_wrapping = CFUNCTYPE(c_double, c_double)(use_c_sin) - -def use_ctype_wrapping(x): - return ctype_wrapping(x) - - -cfunc = jit(nopython=True)(use_c_sin) -print(cfunc(pi)) - -cfunc = jit(nopython=True)(use_ctype_wrapping) -print(cfunc(pi)) - - diff --git a/numba/examples/cuda_dask.py b/numba/examples/cuda_dask.py deleted file mode 100644 index 8e33348c1..000000000 --- a/numba/examples/cuda_dask.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -An example that demonstrate the CUDA features with -dask using the "Bag" collections. - -The script can be configured to use multiprocessing or multithreading. -""" - -from __future__ import print_function, division, absolute_import - -import sys -import math -import numpy as np -import dask -import dask.bag as db -import dask.threaded -import dask.multiprocessing - -from numba import vectorize, cuda, float32 - - -@vectorize(['float32(float32)'], target='cuda') -def gpu_cos(x): - """ - A simple CUDA ufunc to compute the elemwise cosine - """ - return math.cos(x) - - -# maximum size blockdim of ``gpu_single_block_sum`` kernel. -# also, the shared memory size of the kernel -gpu_block_sum_max_blockdim = 512 - - -@cuda.jit -def gpu_single_block_sum(arr, out): - """ - A naive single threadblock sum reduction - """ - temp = cuda.shared.array(gpu_block_sum_max_blockdim, dtype=float32) - tid = cuda.threadIdx.x - blksz = cuda.blockDim.x - temp[tid] = 0 - # block stride loop to sum-reduce cooperatively - for i in range(tid, arr.size, blksz): - temp[tid] += arr[i] - cuda.syncthreads() - # naive intra block sum that uses a single thread - if tid == 0: - for i in range(1, blksz): - temp[0] += temp[i] - # store result - out[0] = temp[0] - - -def sum_parts(data): - """ - Driver for ``gpu_single_block_sum`` kernel - """ - arr = np.asarray(data, dtype=np.float32) - out = cuda.device_array(1, dtype=np.float32) - gpu_single_block_sum[1, gpu_block_sum_max_blockdim](arr, out) - return out.copy_to_host()[0] - - -def main(kind): - input_array = np.random.random(5000) - - getter = {'processes': dask.multiprocessing.get, - 'threads': dask.threaded.get}[kind] - - # sets the scheduler - with dask.set_options(get=getter): - - # set ``partition_size`` to ensure each partition has enough work - bag = db.from_sequence(input_array, partition_size=1000) - - # compute elemwise cosine on the gpu within each partition - bag_cos = bag.map_partitions( - lambda x: gpu_cos(np.asarray(x, dtype=np.float32))) - - # apply partial sum-reduce on each partition - # then, finish it on the host - got = bag_cos.reduction(sum_parts, sum).compute() - - # cross validate with numpy - expected = np.sum(np.cos(input_array)) - - print('Got: ', got) - print('Expected:', expected) - correct = np.allclose(got, expected) - print('Correct: ', correct) - sys.exit(0 if correct else 1) - - -if __name__ == '__main__': - argv = sys.argv[1:] - if len(argv) == 1: - main(argv[0]) - else: - print(''' -Usage: {name} - -Args: - scheduler: dask scheduler to use; either "processes" or "threads" - -'''.format(name=sys.argv[0])) diff --git a/numba/examples/cuda_ipc.py b/numba/examples/cuda_ipc.py deleted file mode 100644 index a78a789f0..000000000 --- a/numba/examples/cuda_ipc.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import multiprocessing as mp - -import numpy as np - -from numba import cuda - - -def parent(): - arr = np.arange(10) - darr = cuda.to_device(arr) - ipch = darr.get_ipc_handle() - - # launch child proc - mpc = mp.get_context('spawn') - queue = mpc.Queue() - childproc = mpc.Process(target=child, args=[queue]) - - childproc.start() - queue.put(ipch) - childproc.join(1) - hostarr = queue.get() - - print('original array:', arr) - # device array is modified by child process - print('device array:', darr.copy_to_host()) - print('returned host array', hostarr) - - # verify - np.testing.assert_equal(darr.copy_to_host(), arr + 1) - np.testing.assert_equal(hostarr, arr * 2) - - -@cuda.jit -def plus1(arr): - i = cuda.grid(1) - if i < arr.size: - arr[i] += 1 - - -def child(queue): - ipch = queue.get() - with ipch as darr: - # keep a copy - arr = darr.copy_to_host() - # modify host array - arr *= 2 - # modify device array directly - plus1[(darr.size + 64 - 1) // 64, 64](darr) - # send host array back - queue.put(arr) - - -def main(): - parent() - - -if __name__ == '__main__': - main() diff --git a/numba/examples/cuda_mpi.py b/numba/examples/cuda_mpi.py deleted file mode 100755 index 14924cf57..000000000 --- a/numba/examples/cuda_mpi.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# Demonstration of using MPI and Numba CUDA to perform parallel computation -# using GPUs in multiple nodes. This example requires MPI4py to be installed. -# -# The root process creates an input data array that is scattered to all nodes. -# Each node calls a CUDA jitted function on its portion of the input data. -# Output data is then gathered back to the master node. -# -# Notes/limitations: -# -# 1. It is generally more efficient to avoid initialising all data on the root -# node then scattering it out to all other nodes, and instead each node -# should initialise its own data, but initialisation is done on the root node -# here to keep the example simple. -# 2. If multiple GPUs are available to a single MPI process, additional code may -# need adding to ensure the correct GPU is used by each process - this will -# depend on the exact configuration of the MPI cluster. -# -# This example can be invoked with: -# -# $ mpirun -np python cuda_mpi.py -# -# where np is the number of processes (e.g. 4). For demonstrating the code, this -# does work with a single node and a single GPU, since multiple processes can -# share a single GPU. However, in a production setting, it may be more -# appropriate to provide one GPU per MPI process. - -from __future__ import print_function - -from mpi4py import MPI -from numba import cuda -import numpy as np - -mpi_comm = MPI.COMM_WORLD - -# Input data size -total_n = 10 - - -# Process 0 creates input data -if mpi_comm.rank == 0: - input_data = np.arange(total_n, dtype=np.int32) - print("Input:", input_data) -else: - input_data = None - - -# Compute partitioning of the input array -proc_n = [ total_n // mpi_comm.size + (total_n % mpi_comm.size > n) - for n in range(mpi_comm.size) ] -pos = 0 -pos_n = [] -for n in range(mpi_comm.size): - pos_n.append(pos) - pos += proc_n[n] - -my_n = proc_n[mpi_comm.rank] -my_offset = pos_n[mpi_comm.rank] -print('Process %d, my_n = %d' % (mpi_comm.rank, my_n)) -print('Process %d, my_offset = %d' % (mpi_comm.rank, my_offset)) - - -# Distribute input data across processes -my_input_data = np.zeros(my_n, dtype=np.int32) -mpi_comm.Scatterv([input_data, proc_n, pos_n, MPI.INT], my_input_data) -print('Process %d, my_input_data = %s' % (mpi_comm.rank, my_input_data)) - - -# Perform computation on local data - -@cuda.jit -def sqplus2(input_data, output_data): - for i in range(len(input_data)): - d = input_data[i] - output_data[i] = d * d + 2 - - -my_output_data = np.empty_like(my_input_data) -sqplus2(my_input_data, my_output_data) -print('Process %d, my_output_data = %s' % (mpi_comm.rank, my_output_data)) - - -# Bring result back to root process -if mpi_comm.rank == 0: - output_data = np.empty_like(input_data) -else: - output_data = None - -mpi_comm.Gatherv(my_output_data, [output_data, proc_n, pos_n, MPI.INT]) - -if mpi_comm.rank == 0: - print("Output:", output_data) - - -MPI.Finalize() diff --git a/numba/examples/cudajit/matmul.ods b/numba/examples/cudajit/matmul.ods deleted file mode 100644 index 869a17f0d6827807707f6458ecb61cb36409544d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42474 zcmbTe1CS-#wl-QumuzZ^4T$w~l&AOiqE000aX81V;LvWL?E008{?ez5?o%&d$Y-E58YY;7&g4D=k$ zY^-UWtqo~x^c>6_Xl!hatPO1poUDwj9cdhF?Tz#d9ZZdk9A*CxpI>hN>x2143D{U0 zo0&M-|K-hrf!4uM&(X<2U(cTQUv>}>5P#+JUq)X9*+1Cn85kH@8hyF3v8Od~vbX<} zgsYV$Fo3KC_@CSXfd2e^eg5z1pnpmKGaX|~8$Cy(e<$txbPH47_nq@a`zx`92d-PTP8}`Y-GRPICPasf4p4iY>Zb{)#BI2yPN-1`_5I~)ak~|suHL} zhsPn5O@!i-wMxSEL#04SvYZWS{>$vKK3sQ#is&mV?;_QtP(>; z-%EB-V-mcxiz6g?#TXV)ayixbbNTjDCuAzxq_n6m>RqEU+rNvAE3Oz--86$SoNANx z)pLr6?C!U*7BQB#a~`Pgx_aE46H5G){h32fBw2ecaBj0%D$vS$jO;7Wp|xQ$R)}Pf zT@gH8%S&QgMH9RMtLEaJX52kxcGxF_CHtnqkVE}xz@2yPg}K&epCtXj_vm2HIot4o zP(-P_x#qFDp|YK`R)L?d#Bz3UtVql7=~NAC(mY!JBTYr{=w-+y;aJSA_GUX2CD46b#&bg zc^npngMc+qgU9AgWlI+xfpQB>77Y$dLEza<-N?K!aOMqz@w8Me8T-&KRhO}vM|z&l*(p<%&~|NpUQ$lR zpb#t9m>H800wfccj!W4ClF#YK^LakvI-}*~2|W)z9&zpVNWX{2B*PKMF@WOO;AZz@ zun!qbd|CvBAi*-7@fxOpZPdCJC2)u96sCn+4t&0&YAI?#>54R+oWTXCnc;el~Ss-g&Krpw@}C<*2TU~^q|Rtxl3A+x1R~G%KwpA zZSBWM33*2zJGApu^7M77@?S=6<>#_ou<8++@nIgYS> z91Fw&#|mjH(Bev;fcx|^A!W5~YsY*(o_Co_U5>wh7P}|f*qrLNLOiiyanCuFuydW6 zx@8oVx!6Fn?{|U9VGQ#g>-t)~orxzg7F=i3*0xSL{iYnm!m^$UxfYTRHjtb%T6El} zkR%ntFfgQS)(e(m?rL(VB>s`k^dp_>NBUa?32vSvW|t6phn1)-9L3^I@Lj1#T;1i-5EaIlm=G zU;{YhtD!`*e=rn`dn&cY=MSCX817OH&g5PfIv&Kp8NZw-iTfDbu7tPkEiHds zQr0p{Y(8eiW>W^;>5LSY-M#aXvzI&S*T>B~ zn*pV336DKI5UYa7C|lb=66N^CRPme+KRIyKk2cOqRk?on+R@UxWvoS;8JC^XM-%W3 z9cPioO{g_yG-QH)eYrzL*TAtT23E#V|NVWQX)Ol!r2^9Ho{DDT-WA(JbK4?eH- zpm1~*5F{Es&8ftBE_`CHolaJL!n{;b!n9r>2|Z?09R5Cn&S5|}PfXT@pfZSkVq^5~ zl`NUzUhc8n?AISbFD0SXA7aH9rM9}C$pIaAlqwjIq;g7)$}RlEv)nI?vbnm-GT+cb za{1kq-d^MhA_4^h4_@e)RQ*PMT!x5~bi3(U;FPsP@cGpc^&lq#ARaxuaA9KQPMA2F zVUy7gq?MO$-#zyBoQWBT3x_j73Rxk6>~y2q6sVNLmkk7^Sb7j1I}jLADDr$8DwLtM z36^O^IIhfQiRH~1bAO0p$G+SDI^>a+Bd*>-WYqUIXqyc8W!|!3Uy#5npVqS3HiK|$ zb>jwp9O%tn2?e&xX{Ge;DE1@Y887S_J*9(Ij&|(X$)feJ6C> z@}K)%xU|Xeg&0!={A0K9lISUts=c?my#oRO;A%7^o_2p%D-?rI$MGZ;bu9-V=ot2u>&fpjfRJHEok&TZPv8S_|&7 zQ112gz{pOK5fM zj}SU$-8D)h9C+I$J3V-~_H;H`ym3Q1Vdy6?(rs9ZU`-Nc-Kmc5o6zkEix_x&-_hzM zRG1|R+G;xE!A$uPh(bC6#mV1@KA7i>botf>5B48DsR%kr(T`Me)1u{GEDEnUp*R+< zgV#TC!wZp5?@V8D1M%zzVo-$Pyw@HZF2*&uYMbD0$LQGk6jY@99y8I-HyFQVSZ`61 zuj=;2HbV;7<0^nv_z%_CW86UeN!t&iWJgs74ZsPT6Io24G6!=QxXQUQOqO8~@=&sE zQdqy6;vx;u;{r5UL|%5hFg&fG7kBqKHWoPyM(D*?Xxf!sAoZHfB`XGKY4(207Ww%i z3W1geU)oNA181La-H=JWV9bbOFaoTe5&-AWLHah_Os{kQU6<#6w?&^v<}JpW1;@;f zl!IbEfw^2WQ0*8W$4!|=A+Lr-F4iOY#vRixd3AKyix-O#Dm)RF9TVD# za26FZGX7@wv#>8|#DPP@Xc2CGx!!{nkEE=gv+aT={#Z(#_x^FG{taqZ`uY%qyW)fL z9Zad8565nI{r%*^+v!mV1!3h_jEuF%$-PaqYBO7~qYG;|$>&rL#NV95i&sKe33L__qKrJ;euQ*Hs+E5CQ`!NvtLD)Op9&?o{iNXsT_fLs z{z$Saa0L=+6%dQ);h`hDG{+{woaNF>sis9z_^DFM!xyoE)=s&o?Q8k3AyhrQNOQ+o zH`c^5E^QxuT}v;frEIP!2U48?b%$DPDVe2_vsfeME2Tk?Z5&akD9nZEI7sN3a$E_TH!O%K^oXI#&Ujc<}j??-{QPRLuq z1^|vLGZ(hpVNQqAaog4@@WZ$-_al%6wdd#YYsBYquFpCi{$}Hc5rB3u$P=_-KYh*1 zk}*~#*6{F|VKtZszjBm?;yTN;EX*JqZpQbQTb=N8rG?Q`d_l$Y_qijkP}8oILq0ef z&cIi5Z>gCx3Ns!6m^3NOgN|)KF+-O#;-g|alF3e9>Fq6KJv(jmRNyL7lb&6VEg9H#ZSTm)S`0YD4E2+UN8z8TUzzSA2B6ZO-> ze@7xOYJ_HjgA|jSlTDQINQ^V66L617coPlBjKAS?)r`*zE&u}Z_AK=JYqIjshW$-48%Lr62Wc4$45ze_q-r`T3*C`emB;zwA$=+AoEXDdPSg zr+pg+nJPiQ%lIiC-IT~XC=<0&#C0T}%MuRT|3xS9i={h}F6$1^B=V2=8)i0=CmJ?z zs2Erj@B<298~(If?%+pAoyfr|~1~jnfqr9ffRcMp}L>5X88O2#MO!%zh~Z^2tJs2s=eO(6j6MDK}b0!JSOBjd%@t~T_em-YaWsO7k> z5ndGTLCiZ9ytWsT_?-oNhvZD+pa4)&sltyVt-~wO8m695r#g&37YsTc7*KT1HTa}V3 zsX15BH_FZLSt4xdhQRQ&6^UWARgP{dn`4V4QXmNUP_i{Z{s;ki*X{MaUB33H5FVt3ZcRY$rk74Ngeol4Q@w{ zVpKH*2J$Mcv0{OR9aczwu(iZomms;HIC_D#@>>;foFdF5TNF!WESctmt zpzc^8N7}+fbn-Ff!VpwXq-6+!Tr8M;z35T!a}5{-2-=+EEa0=##({fD1K>)lk+FOA zA9b}Dazi+CcC5wIRSHLo6q<8(v~w&O2=ziNL=OWuNz0M&)TXsMqc(LDp;FwG%S$?y zWfEQQn>ieZydS?uipG=}aEh{CEuB`MZJ#^EQaVu@ECmS8Wih*o>dX#RCig$`CnkiM zdEncCr1U6ThkjYC>W$UO?8#+vBP~3 zZwz?NK;DmLU;}L6q|`EHW7p@W(2T1C1IE&tJENGW1Z-Kj4pX>54#nlaAKYT$4(t(s zO#mT*61Y!2pijKxFR)3AkvsQcs2KwCrd-XxZsVI?4IwuGa&WMS2}!>>CE>*`fs(mj zjKAwIChVtsU&YvK#sapPR;$RTR-u%D0#6&j?@2$%P8VH8+2nP=j_n$>x+gIti^{>2 z9o}yqYQ^-MuZHpa!s7N#JdIB8{R{;a4f(i60f|ySaZwFo4faR;{P;uP4AC0q-BG_$`V2imoz5NIsz56w8k?0j_ z7cZFnd+s;rLkz(e^E^Mke=Ud4-hgeJ0M3$ngF$9g!~~kEyP;nXm@!|AR@VZgVUXgu z>HY3i>LH~cUJOS2VYX|`r~%lHk!&dKa3eVBQ>p!`@p>PiN;B@n3j`PsQ(pLhdF*-p zbq_XTz?dvM#ZYSKO-47ie~)dC?gtQ4Kam)yf-1v!p#umpoVj0~|GEtT7BCw>1>rY6 zv9on)B*er%+a5h{LMAIN39LfGZwLCl7`Io|*uhY+78QgQzOER^J5US{fQ$)cqgMGX zaNxSz?sWZfRNkhNC<*g)8fJF^E2Im5E?VMRm7UPb$u|PmNMx>qDvA8*s8Tjz`kYClAe&a?X0*!e#n@yF?+__m?MuTdx_fX#Ae(d+q z#Ql;q$mTR|cB;bP;-yZ1h0g(m2iM+aS;l~}02a(Ibu9;55oW|EuT>e|qJ)o$8-6ht z(!t3ZGVL*#%Ng1sj}H4GDXO0%WB}Cr8=cy89E*9-U6Ihhzj|ks`O!ZBbG?;Z7VZtR zxQHHe@sm>`oS^F`*Z03pcbcdf|3M&ru53}M3##~X?G-?f6s}Z;`_tKYhx)c;?AalG zdrVv27NFx%kp+F^f`^e>zdzV7;rX9HQr`6xb8pn4Pjx^;@)10-` zqxK7gM$et9(Z8sgyfPKXKyEQp(R@e>>rW3TOVl4d-%})h`b1EBOcKq@Er6IcaE9pJ^_dR?`(;hwSP z_b`q#vb8en=nfH>HGhkS=Hvln!y5H;)2i+xdev8W>jQ}GzMU(er67wI$$1D6Ya}es z(Qn-us)0YdiW^+qJVZ5{BcsUC zL=jw$UXPp_b8s#(2y&oTR(E4PWM|rHk_cnyP}nM+F!6|o$qi}%r4-ISk$*Aq5S#fq zqGIIr6R$~tRKlcp^ApjHJafC1e*G;+Zub8p--)_5_llG2P|}_jGI5uAoR}%;PgG5t z8@!}GX#O>4+^!%t=n5RktJz~GS2E#R93OcNhAvaG>_I3r`)}c=In9tzA8 z?wgE>y{4i$<31y6awfT2aA9+U zlyHA4g{YQ}0cdA|+0wEMW$XJ+Gbfz&{XwX<-*1Xn9YNHb}+g^$bm#8Mc)j9LFs5zWR1+J z8$W;8h=!{7w60B8%vK>641E#oL<+h>a6ZwXVvLu-D<<{dJ7Pa?! zMU(4ls)^m30}|X=Gub}$4x5-81Ae>ICj_WZokm8DE_ZMOJABw<`GLCJVd zfhLSPB8(%`fxP%=(pJIBkKLj4ti&?%#|P3axkx#drlB3RbtDw7)r7+G&4ld#NR)o` z00?#JL|XkXIa~qR;9TxAqQR)GV}EL?J&{3VZ426%o3cyy&T=-D5qFYVD7}+uXMZXK zHV%DJ>Dn-QzU753`_##U+FuTMQ~x0VY~pSC1xAF4M{VLg_-`bGlWET|dLuRteMzMj z9h)ChV?*tzQw|I{J{m#NU!6M8_l~%a4JYmj9IXzEvo7+oO%Y0{BS`Qm^i7cM^iL(Eh?6@pnd(B3B<`W&MnNMt zup6cBHo{I`t~Cpn6**f)7y_F>*M!tA@+^tCL`zuRZ0!n5a0~DX3c#_D+^g+=e=(RJ zw@%a*xSI=B9(37q!TS;>1IkXC!D%y0O2JhYmvJq{k9vy44tro=)&e+D(@%QFFfHL& z$nJD+3vRcfQ7`zOpSf?jT&*I`dy)fjEz=xvJ+Umn=&Rg`;kK$MXSqaCI|F`imqIR% znCKL7Nx^HFVb-{s_!AN~A}(D(VKlsHl-8;lUaQBX`(Li5KwVH9v^23jM%lI4WDc*% zpsj^O?(_7wzzyy5aK*LIqOPT&k9k2Jc2dwhN+G4an)$!-8??~wt+5X6ce)We=-@ZQ zVUBslI&31Qye31AzVo_%IKYqEH~=c|x%R%lZrFWdBy$@9U|o}+Me?C(hB&~K2j1H< zTZRCx6;)VleOucI9|K(KHUy>{-pJ&P!Vxi?67g!g;v%!{#9-bCoO$4!ggXF!3%@;7 zptGd=7L%L9oA#|5xeAPS_YJYJ#Z3r6fXya1*w}p4N^{MQVwV+tr6jCLIW3hh%Ry^ZKkblN(Pe_LI4pxRv5niuRwS7klqp0_5)$X~3zxbnAz9vVGGzipV-0XJ z-}`3hxhhK6sU5sC&FIS{)$Tp0Oa+~*}uXWG5s<_ViKU^UR z^R?Bm2IrlZLLtEEZYzc!mLthgyN&VL&#!4G@d$jCxn4Kfs)W4y*lY%8afhBk=Ncf- zwTKyf-xZFA?Itr8cwX6XZh7!t&%Ah?4;4=9UYAkcy;*D+WODTM6PC%~CM%FHh4$e8o1ufw|LGSThza}aa{r* z-RanR<8XUYx7Ol#HKHAs3BGV?NtKmIoqn!(zl@89ytYp!zFWS@?1<3}9o(Z$HZAz0 zMSXhOfa`Sq#1(?M_0{C=0$6|U)Wd-)V4(p)i1l3=y;fkmX zM(t1*SDAM{>h~-VX*U&jn!9K7rvw*j+HJ@FV0777>LA-vnrdTf7I8FFc(Ran`$hE$ z{0G#vJ}Lu5LIMDEIR5)}n*Z;hj^V#R9kmzla$~IoH7F7?bfKdJ6y|_B+N$gdCCn9+ z<(w`kRii&V5@^s%B&qNfLjE;)9=#u4Mkw(IL#J4`$PSTtDCrU6U^dq*SM2_ zOBY)k`qkD7Oo17&vttk}=f?Aj1{F|sbCZ6$p|-8qWH)_S3act+ZE0QNMj&$GyJbrv zHPlv>1Pq66vTijJ$$ZA^dA4rC(hM5s-uO3Bj^(|!-Gy;Xbm2xT3;|+^Si3T-hBabF>smM2OrCp$CS<_+7aAyOXES=nc?4gmt`;C2tHnCFa3grcU+T=HKS^y+rc+EK0bg3b8Re~-lnDMA zK75vlbOUG_`A7?41yw;+6Vs%G{M~EBkor~M=G2`1$;1@F3BP1+l_;Mabp3;Cl>x|bAlz4SK=Am!hzi{-yr(?$Pg zov!@N`q}tz*3niQ$tX6{3X|72+j;B{KX}?jzI$Fr7~ui?VRd`;bhxEs ze3D}_CS=JD>`ie%96Elf`k|_C{fQBnimm&&-A5*q9y-tcxsk}i6n40=t8Wb*Ke$E; zrXNdP)o+Ap%U~56W?-eHKC0!tG@6BuMZ5on0!J-WKa`EF@hbgNxuEDt;U*H$6N2*z z2Mv=fFbq5*BWSe(dHZiVDIHz~+57j@Lh0R1v-?w-uu>R8M1&$oC>I$9TO*~3oN$35ZeH9DoZ%%1ruZXA1a6533$}lS z(yQ&O4=(XWxc-5Fd{k4+X{$E6A@aj@@CG;2Tw(NPY?S(v$}MeT&#E9agcP3}!yz#t zPuv|0{Z(xy{_R$oF!pc73UIxgUl33~zJLn{{(^KLI*n88*@_^0-YRvXj$1DLWz|(4 z7CtO83esqUc;9~dx9QKC)G>036#oP>U@2@1V|kkVwe<{N33}mWO`A{?C#^cSMlx}^ z&5SJcgpdQVNU=US1cTbqhR81rm<@YVeh~&g^&m)bQZoLZrJx%*;EgU^X0Jrn9zJnYvSR$R%(Lm^Dz0j7(=qby<9g$gWtA_{i)^l zRh{m(o$$H{*@wG?A0*TqRz#>y)!z8E-ns@pA}*KR>6OtRp=R>wxP7~;HFtGzT@$|1 zak)LznoLLeRpm2RT^KVpX1!D4BGCCSTYA7KuPiI*sX5kdST6WK(BQWl!K@>Uogpf` zdKc0$wB3K^E_GuapGF>sBi*w_yi^d%YarS9) zFy0>2SutGZ|FY)JE?bdlsNw!i?^-;imUu{YW3k=CdfE)m`|aeUC*P?ETtnWy+Gxe? zY98#O$H2kqek&JjHefpu7g~lB4mWj*`{@Npi~H%!b2gxYf_sVMoQrLtpZn{y=mf3S3?}GQHm4pg0?#l`nFVRDXCMoDg!)h8&D9v zBF8@K&8EiKE*9%SRg|@FCkwJu;Mf#m>4JK^{%veIPi^m7Jtf_Lv5{A6$g0aCxv~*% zYNydg%Jg-C!t#}fdF^5B)Un~+mAc7GgMYP2N^ccziizd`P|FGr#&(rf!t~WhVea8> z{dquX@V-->b#?Z#0>!bTid1Sx1V{a`Z!bDlXL!!Vuibg-*nT%zwxrCXH>sDKT5N6i zz)Fqi5u1Yf0&~B$)udgrxtKZ3W3Ao6Vh{I40mj*YVrHIu25~#Pk@&*y_MkG%<741f zCy4yB8CzGW^ZNz+MYcz0cP*YrVAHPL9L8l(78`4?IeMUnE=aI#Mq^_i>e@!|7(sK= zNU&~dM+Zj)j)=Ev=*z>l+vILLI5Us$$hKN$PER*iz-@{Ux*Gbg90PLC@WlkI>VeLn z+YDDJHCaHq;9KSPVQb;KtiET`cDx6@7&9l|`hqB`GG3%|2UMp6&In_+h;NxfL6doe zCUP}(zxvZs>VDPDsVN>%QM#Ziu|-g32`NqHnVZZ=n#gslDeh90I-$<9N6^?IySYuQ z{0PWVecj_%R3LWfH*xKUHNP~CyXXb!dpGMkk~ZNo%8U~i3IRLOs>0oRmCUzcF|p1L~;%KbClARMZ{ammU%ST9Uyq`OfTx(bmkCn2_fA^MA+ zW}|_BBZjE7$6LrPB*?U%$tTuf!!O$KTVt+sip-dwl~Th^>%#!rw0T4cnXF}A55zdP zPPh-ti*8FjRutiW{_r?<8E~IS&CeyPtDg zwuA+BPA`6asLX*{?6LN1!{@*7)--I~5l&0=b?)wVVR%IZ^17HfGS=ulzA)0AiBqM> z8C2yzCIMRar{kU(K4RI>Dd4Qj#JoHW_+1n(?%n3NK#^W@gVF+C4D=ug^HK-+preI^ z-}wB&jX4?%b(deb5yb2NKW=0^*YdK&Rzb+QeO~2RHF?vI~KTxf^+?U_hC+*D5d2QdoDc8n+pIU-{HXJQO8=r{2M zL`qZSIdMS9hM;eZx5hax9WEZ;F3U}iD{EqHb+5)QhpdcUO|Q<*m(~iNw(CbH@Q#2! z{M7)NIVs6N?JbD9C@em;IIcmcjUj-zLlaQdR>sD>H1llE9>7hlUF#cpptwDS%9@n$ zVBZA4dzqF^pYbfYL#seh?;|Z-7})}(ga?*jKyaH^TovPkOy&OyxZeJlc~ypkhPB2J zvs|*5(VyssKbC=Pk`8sNN=56_fTDKkPS$lJBWYM&JCnj%}Qz1qacpS}wVI=b#CQ|bw4}>FLj?3XslO3-tXe2_8P9z1jF?rdlKw|;#Z;!Tw z&|IZzUwwm{tW6Y_wEs|%ii{{=In4T^JDCTYMa>;H%=(EBAB8c{#H=Pcqf+;2k|+^$fRY3b<1D4aX={pwx;ZU1S1VJk3O6AzJAB+zFi zR{hNURlT^`pHTNdHS`yPvT2@l_|$M}7Q4dHTuY_^hvsrYd;fI6*ow^8M58Nez~Y!v zEd^rV7#34=osY3 z!@y84Y3HR?)uBg1J~Nm1nx)pRPV|-AlX>8)czygkJHsdNx67S;-^`*XSMHy0)I8q` zK9KR?SnTi3eUMZd3KGx3Tj)SyMI~h<40@39=6@P}-6q)6l&<9CYGtok3I}yMnLx1|DT1S zD^<`e+TNe=|4U`8WjmFl{s&tTvvo}Ao`Mn zmo}G@6k2Y^Vg1MC=av&rG}6GYW4QeEA0x~f0a=Kp;A>^ z_LqIk{sGs2m-D@c&##gIXk4+;Cf~>i{?^lwW$J!1)%SE9v;FLzJ{tCV?}OwlxO>LD z*bHHy@}8-OcyW=DI@6sO4E5$o)IO}o;C@Ymgt|^enS;l!mkq;v&jfpA72^%Tg8zCw zV%r7Hj*4S|rv5Ho^#s|q1WeX=TdF)}YDHWg#=R^!i;_Vj2irFiXjOK^PfOEEU-x)O z2J1U}_1s2O?iunnHS#I)hJdLxb_@!P!s!`m8VDEejWUXBS+B6+)vny+R=|Ccv8}?~ zr0!HZ##zfYgKNDXqigB%95k*)HDj~q1~g6A7tI}Lvl{MhIbu;wG z&mhL9D7f_09!@!))n1H(sMhi~$8Q-z6!uSxbY|GA`%DSf{4z?hVx8_#WY4S-Qc^d( z)^?;V8uTN^(C*M;H&!^Ad*N^y>3ah>lf;kYq|1?ZaRg%7XM4mx z8T5_}#3N)L>?1cm1mhAnCE6zODu1+Rk_g0jl%Z{bz>5<|0C1qBBpBSDKnUMnr7l}W zCxtB2%=Wr*%~`0c@%Y^!8?KGvGakdIZ$1G=ycN1Dc<$rz4P-t|>sqo6d`OJd&q}tm zf>w~|Yl$yOP1&IxrquOHW_^mjad1z~+qxq*6cw3}QqIn1kaBxFBLw`O#ooU2tQwuW zv#i{?b1XjQUcX%25b#+PI)+qAP0v0YHF9tFka>Kqd2Eju>H?PgxK;Lb3hFF4T>L0J zG)nAqH0tj-Vm%7pl3;7-=1BDo(PNd|=BV%jbl!R~vEu>N2(td>9F`MchWILnYc1jd ze#K_9TaQP5G2pk-Wy=xX-yS@hBOa$FI(I7?v`Hj)Spo6{Da||RFs6UjG0VoS2vd#I z_h6IBvIXhFtqr>{AD0vvx9`Zrn>_!6S9`C5@H3?EQehM2 zPDSaBvXV!cdApLFjZ#HZ>7<6zWktAK8D_guRCX!;l$y-WnaNUX4e-xeMWYx>=yTkH zBl*Z-U9s>-YKD(}a*iE~1q3nvmq$vli^fflpZm`nWva=)Lp^=+ID>*RJMw20#%f4f zPz^NXO(d2(MKW<8K;NUIIhbxVUpYDcv_kXq&AEQLIC{G>uD)8WuF>~S zzI@TRfzUEWOm9HB#(5WkK({GnG{Q^cGhn(8#HqCDVG(BXpGs8wJcMx`!LXeaQJZFb zZr^bRPR9&=!bxXL?_gv$?)rWD&&Euz5WBCLtOFx&|Eep?e315VRQP^v02sYK1cbft z%Hs@YU%xUx*>N(zDs`sBNgX&>Mn0N9{5-Y0egdK&2{rf__SsPdzmoG?d!gfwCi8+{ zV{`CWjB8K0ekRzv(|@*Z#}s#nq2aOMn9yKw^BHX9XxBA%mkrh(ytuAn3q1ew?vY?E z$4+^1ba9*@7H+#Tv@mxZ9ohkH}V@ku>ku$u>SlCZi=uCkkZl8}M$9n%t{ z-|mSn5}-jAOUrgD_-XG4+Nx5JCLm3A+r7sX@)jOkr!~wzZd30DP|Q~Rk%)e`1|4UA zdzTjM>xQb>d0m_M^Iw>lIS5IA>>B`pU(o-5Ow9BL6Qd}h@>W%C;#E=@ojP_E6w8~<9K4dqYNxL zM3?3Y(f@mQi{~2H)jOf@Y0b+FMmOk+83Zs_aLA$F`qPv3TA6+30}k%}F!XCE>J!pM z!;W?~d@Px{S#CZ*(xL9TH`Rg0IPk(T30=4cDE}N=;Dait_6`4)3^6m^QkSFdz)kkX z2F&S>1q)Tg=G?5vn*e{s;XtE;CDgWNej)4;diTow_uT~UR;5%^K}$|s4T!@GH5R=%MSXky zgdsIMquG8ErV=K|9b%ZKALt84AcT0Bh zFGEL@vl`|6QumYr{Vx?A#nHc2@b>4kg+5fxH;rgEH0OTLN2S|DTN4!Zj~Z3H#ZwLn zDQ4VQ6R_{Y$adOs{AJ!0n|T99{wcxu-$w&Ve`T`HZ$FCl^DSeMfhKPy(FJOR!p8T8 zNt{5FfwfS`?O&F(LzwDGE>QGB&u|ey!;h^&dx@-qZs#M`f5gG$Rot_esPPIL@cRui z4$XfH9k~9=K~DP>Bt=71+D#yFk*DqOj~c-KzN$dkSBf8$aAdG`H{kN$eY#@ZVwQea zZHMdr0@0=4SqV)&sRBqZ(E*Oh-q4{QOgw@TPr+{hpb4)q< zK}!}X(s4}4FK+Oxi`*cZ1A+8S{>JL{u3tx-=X?LBlBG$66`-fW_-L%W4Z(YDKTTJ_ zH&LGM%Dxbp%xZ+w*C0cUm1$&ej5PQFgIETz5YSM9ns;vNS>FV+AxT_^@3V*(w{Y}t zVKs~-YjVH(uz$QLm1ucmUWfkl5{WGN#>z0LO|u(8rxiIxw$5y-gWEb4YHb86-h4GazhLvfH+`Gq=o@F7@^}w-ShrOJ^9eR;T)61RCNshH#xssA|qrF z{Un_z<>^?yGBF6&e@vP%M&NoWVZWY4{=M6O^|SNtN&%sJcWE+Y;0=btyb)9k8Vr0V02u2f<>3ydT%)F* z*E%CHnRoXYDmHk4Z0eitabsLAXBsnd-etthROVS|w(4M?KbJWMGf0f;uYQ%D!R`;! zWp39WNWXIsw!=sp2e9qFoqJI|96+?+^;IB3*?2lV29|>;SFx6Dju6%~gn@~URaXfu zRS%U)F;RKpoZmxWC7-O$Bsv7T-3q0Rx1kNd=xfaDm1eOK;sm&aw5z#olvg9*uAv#g z;g+(_2fP|If*(p9_qe5Gfj2SNfQx8I-EVbBeKBQ@d!E0Ti{plH*6Bv2;ER*>+|9P) za5U{_Tu&dC4_n_Ir0%v53xj~nB>Nmdq=AZgx^hTj580{R8@hCoeHSIeJ2NC3 zqm4^H+!M%>CTsf6u^btKE0EdV1;3@k#(`DT)PO1Fz+mPIfFN!0HUWbotvA6ryIjo; zQ-pzfX>+Bs7mW1-dxJu9OeZ_}oixV6>x6`FCA%}%lX$dIN6{qxoFV{`?;?gip4djF z9xr%ArJ{Q8xJw@NJR7a3#^XcQqSRw@PV~{S$TlWNs~`XItjzF{Nh#wbJ3;9z`vJM5 z_+9>-JAeNydo;A<;`t|k>rmTcy-sOINav=^1!Pj%Np@FCtp|tvU3RoY&12@!0~*mN zUL}xf(q)84^y^{vRP4hpjve|`UQ_7H7NDkvE(&yy^AZ@5YSC{mTe9TK!E*>pnyeh#^(aKV-ovBM?N&J8TdK+&v|Fs$9DQ<6e?NC3ZH0~gd#AIO zx~o=(;8WCgm^DA+SA<1o*YB!PF6TNGpT3Zv8cg84gb>SUqBpc=O zHXZG1>qZ;p_BP5vtlxJgO7D!7-0Dg@)Mne&m^aGRZItISP;QOq-D>^YjlKMXU$7Qf zG=XKc%k71+&zpb+?wl3dn#f1qD77emzxV?+JT=S{%87EvtjrosIsE=my#4ic#~ObA z{XmRZ%R=TO0<)TYBwl|Wubwgbg0pi|{r!mD&B?T&<$>Mp3ojn*DFuNulldqhjbT}5 zt&xyzc!{-1$VcdCP-`ouNPa|1h{zNCL7p2aE5FCLcx;UETpE^)YFqY9w!J8vOwAle z#y3;sgY$R2D3rGOoJQcTSY`unSadaLQT+4-8YWPC2?T{>+yZRdT;r{KI;7t;@WHGL z{U50-ZGIkAEM35m@0)4j4Gu2opBnwz_9;L7_`O$=%7IY3@nmv~j|UvnWZ#UNX{6u0 zVMI`1TbMfp0JhS>291r_$3NO{NBB$9b~CT8oUfG8G-m%+w*l{-sweq_TK(2^Y_3`g zA>(ucVij3n6rbn?!*Y*J`_5Lfa$C) z)E+JxQzo}J=tI(%g!L@j7A#mR+L0jpS}yW~k(>w9K#G=U>p|o5d*TU=ua`(0Mob4! zjzU>bLd*w@Hqcu{z)*!r0rW>)HJ8$F9l$@WRq`{}`H!#G>Z$Ah1|z?4CV+#Zo28M% zpZ~^KS39*{WkLSvDu$oSHT9#9w=51@U+SsWTk<6#cXtC2&NC!TB#%|Jd^~m$6|b#q zTyZ>VWfR4}+1(y;DE4${lS#W33@qT%XzK~ObRnbds`Cm7@wP(A6v|>T;HcZW%|(U+$6sLeQNdG z8LjOZ6fF#mDn(*yd?Bz8GX>2bLlN0M5TPUO&Of5NteQg3m6A8gN`5r_&M$PNwCCGn zc{&s!AkfgfH@lRqgVb18cKpc3ePZM<_BDO_4M_>RB8r2^TY}>YN zblJ9TTU}*5m1hJd6bdPEFc!~ZO-sgWscdT&F+@MWd-84 zv4=&>Y}%hcp2N3kDD8oaNSJ{M>6u9V=1Smg-3SBRdzW+fVQY|7xB2WU(iAOOuUEbG znq*%i&)UJtO%`soR+2R3-d*fqm~*cwO&MN_4Qv8$T8kvE$jU%`=e}cvV!iIHEaERt z;|@?2N6s@FNLTMj70ly?3Gy`0v+RoVr%3IgiG2_KWkI4~{xb>_WD#)5$^2s~^|6>E z!WC4Tg3hUuH&`@Ci^BA|X~9v~IFCwtpKXl*taQP`1~eVy0-l~kU~1pOC2d?vh(ZCu zDA<4PE>7^;{$hB-170>oMqEa4SN0&plUjm5P&PoLnrsk@7>bI{lZ1#_vu+JL!y-)u zV;rs3)AkJ0!vz3V^1JQcvTMVcq*k0+{dDK0>o?iw#2FYK@i>qL5tcm30IqRSU?wX} z140n(d^jO3(2t!gy{w%d%=RAMz>Jo72=?Om92H9q=V}JnY-?}Y$fsc#ocJg(>4qVW zU_Fp#8LcI9y5nR4=+D#qSWp%@-sO^OwJl7tmh~DquU7?+R3?^ZVDsD=gZS!!s(_Re zR_K#NmdFptrl218FDCgL)mDc+9utz@`N>XM%>(WvAU*TyG|qBu98NP%3Xb7@_i;fm zlzX;NxUgK~>KWQp%T%6uN~3&z&-dfSWXz)I^jGz@ilE2Ptv~ptZJXMIA*85Qm~gUu3IZ z;}M0^T%t|h)K%Pq$h2DM1Qo-&lsW5v&d6%j9-J&jy&8cX+r5+bym)UkGfH=wS1uZ{ z49D&TPo3*ZKPK6?2qIS|LF(p|m49-r3k8hC2rk+}K>xt#HgkT-W;fYn-s7BPHZ7ap zs!-NUGYXdkZT?x0fI=tZd;BQjnf0psT$;rl+)rq!2jUVDiNSXz*f`;wQsU1IaFSrz zye3U>Z{IMCCc8MUwXZ(bdO^DiHv7X~3#+F)m6rZzsg0}>4N|HR$pZ7afWppd0lYTl z`@&%NRmvi?m%#OaMv`+ExKfKb%#5|tjb~G;e(&<WZs6|{Fv72ZlF1TD9jU;=j zJW`&!@Z5Qs&R1^lp{J~?^@<~1>G@@|+C z2L`4yn7PMvQ*;bJBZXHpCG$h#jl6u{@f6u5<4jty?dq4%#D%h*+~K;Nil*9gqBYt) z2|EgDx));&EU4oLY4V;QbYBls#CYd?JfF6sZP-}IPJ{YiBlnW<>N^5_)d;duPSZVq zo_lnewiNDj4PsI*s&trw4r`S)j&vrson+N<6-D%u)>G-k88s@aU%?q)-#U)RYb42X z@X?^6bpVu8bs0zn$C#AcVdQ3`w&W92M^lb>Z&v=SOU#5!=OF73h-@!eI zFcM;dUq2-o{o2Z{Ln_uWbAk?#5%yzBV!Uattjp+nxc#UO8q~j!r)wpdMYjEkR~@`>t|O zbaoN|a}YB1Lbx+-XH?-#r}cTBjf%S-qqrc}X&-cEXD`G z%vgAT&~9g_8jm{E0#8AG%e)GYcO>OP$LU{I;~+5OjI%_AbviloaB(c6s)(qu&(^Jp zSDmRv-IOY7j6B*d9|{o=2}&z0`~*rGqAZs+Yp)WD-hRVTxQkB2!M;IFaoYHBjJVPh z7_@Tz^^_=3@_xOH=-Zg?B{bWxf6m6Gx)aeI0oZr|kVW#~r@07t;)9&KPw8b^rke%D zQ8?;Vl{dp9`v^Jh#v&nJJxC%$=*GrD`-5#4F-aLX&sHsSOOjkeA!`Nsn9Z!iXv?|1 z!a37UDgNk*BEHq-PLZL*f+1Z-qkaEpn_`!#gQPiR@P1**Q(9r~YoT>Sp3Nl_R!q&d z@SGZuNN7UKKuO-H23E0DK0h#)-e4-8MVY?kc*xVUyHBXVNw>xV`=Pz6L9JHkC_u%t zmKbwNcA<+~balI2czPWa zu#IeLLPSwS43&Huv@bc9*Q`ROt51z7%k15-FC4;0>jd<523q2CveS#=<94=JHI=3% z{h-1L&3S+cyIJ>03^xU;<7ESx`yN3#tx(`|qH|Q`T6XSQJXcCo;%3xc`?%{3T9{F7 zs!mYwt~}}#(Jezwf)`fyF$AJSjL@01H{-016GW47pfeZs_yIJSY#Pt>So6d&J0$f* z(~W};)!w7Dcr2XZI&l^0B{&cfd^Zpk33t(b*FQq0%AfDmO4h$SuXJv54Qz@$TOaUg zQ0`w9UD|6*`r31tgVPk^>lst z`NWeWsU@2*?3AxyGjx>`IqLZEazVN+DTlWJUBP<@TH>X6@)ck?_+sZ>Il5F`H1Ou# z^>#9*TDLF(LPa0G?Y z()ZDQI1ZVTpQevBHj1Tqv)a*)j%vz;K1G3hlLH!!!Xt(-v)OODt%2P6J-Vy{ftyC{D-cXPc)HdJze>GL5RZt1I81R9N)jDklU9VD-IAujvlwh-PZYvl{py z12o+wL-10K6jsv`g$6Q3JbAM=74>^cC6lRmO|%;uD-Tz2>><3pN+XKhvDhiYO0;W7 zT77LOEzW?DJxq`v;+M%t5dR-jGi=>GqSZ#4)kh+c&(s@U?b8jaKpLrWtW(j6M%rE| zpg|{EY@i{hL?D*@wV~GPZP9Fe_onluA(gur%FW(Zb}Bgr0GL7lRZet3=RA|(CTLpw ztu?@X(TRK5J@Bp0Uv)Z@D)zWYU7=QX&L)6mE6%pZnv)SYi}11NBHL4qRQsmYE`spO za4>3s;rae{;EB{(+#8gkTI+$$&rPd72za!olfTX_-}FN8KVbhnXhf2asF8mam~^7~ z?;bRe|NTMZe-U_;%4&251A_OZ8iDGoxV3l%8^v&!@~Y3b*A7Q(;DvxFYjP6b&DvI! z%Xo+l9;fKTvHsqV{gt`tU@PK8p1xzSLBtegM#52{x5r7V8)6?$pXG{tgv3MXX{X*I znaIxhHQ{f`WS3c?Pn9OsVTfcYh1+D!ENZkV#FRM&PWa``PooPnyzUcUS>KECxNKYT zvoAwmE!RZsn!J1NC>0Fe`f-18})O@8a3xZwvJ0mM>Vx^~TGR^B0I{D7c ztAv+XAhVE**)Hb_kwm&q=wrFg3HgZBljA>5=Z_Jk_*@3GN9J`p6WwMCP^T+iPkRg^ znykGenrLF>8qL*0FQ%Qk6fLQmiSGh0BeD(NQ11^h+q@ty8)~#Cx9$itbeTA)3?$$H93CFdH&PfW@KdNv zWXC1*3MbROxW7tR9H0R|O~;yw&{Q38ez=0A=(nZjx`|n&W+Bo|=xdEZ)71*M@xvC6 z>(7q-p0L;Xm4IW^=y?c9OonieJRz&pR66WYK$BR=*+{IekszpXFj#m{*MJ(dv1P5e zr>zk-c0urYIGaHTcxMnfb(8b>1pj9l+N7lQiGGzhZ3X}DmLcN5FT<}|t_FX828xOw zwGCuI5P5uu+B1a?6rw_f4x1BcUS>HRsUz>Js*PjxpQMk8kEbecYd9QR+ZAiNq{h{r zVjW4(oT4BD#4;#iGLen8`?<}$j1Hg1o0a&Dtje64g|BkzR^OZzHbaWjBFi9|z1$!~ zZHa+CwQ8D%)>@{X(tP8%ktv6h(@T%pulh8f?nfoHf3XFWl8Zo5i=)fwov;L8se`n+ zpC}5NE6dx@xuNQ$%ErZ9#GIm%78{&PeO-FzdVG@dsZ7-C2#gF& zxG(?O1Qk!YmsGl}%A!!iS!~cvY9Y8UA6D%kG9hndYE_d=$u7$v&Y-DIl{pT2P!Xf&6-2?y)>=!z|T`*J0x4fLBHY6(at8LShMxQr`zqp zzqfxt#3H>dd<8r6{`IArOJQ?IYO+$@gKmhA=E!LUi^^dm14Y_)kb- zF9Cj+mX4F!hv+FE@7KGB)>2U44U3!8Y%~{O>EwnWj)Nk&qqGI6=+g00@9zR2H*UOE zn`C6Y`AES%1N7_f;ejRuwcSvB=VIdLBhnby#tb5RO}`Xr6>}>m;tM6q=#8 zor$kME|5=h8EG~Y1SF4y!w&5`3t>ZW8Mnf_T@0loqvT12JH}4~F}E9X5a)uhJ-d*Z zyU?qcX&E;nTWQO?CMwOaXAOyNSwiQk_;Db@lHLnuYu6-h>FN8&rwEs^vS=$E>Zz58 z7BlGt72TeNFE%Jd+w{p&&NJP>u!}6c{b2RwYZ0MxHww+F4{L%uGNS#d2GXSsLnGhe^zQOP!4(XsMM@U>&p- zX`^gC!US2j#>P&;7InnNH)e8+&V~|YffMB}vuu8`#NI!plI5(*tHH4e$$`MW2)%|4 zG4j%yI)lL$F!m;ucLo_|;iJ{LpPAA@dWD>Na*RIqUjQrKm`CWwnY?5W-(??tamDSy zrZIDAfvIWhI=vey7PENIo?W!@cpi}#zNA9ghaF;zOvD^_6Wmb$BqGch!>#)&>9u?} zA2p$pBC8V7(L9mVb4?cPs3oT%1lFnheu>M6D@1D{=j4HVkHbF_&b*R5R8USX8~N-b zLLWyR{SDl$o#MyfOAIpT#TdyZg4UHzSx#pD<)+a`F0{_==V%#cKPdgJ+m?!SUE^Vt z$G!0Lto!}I{+=qmu9N6(v(*53hWGBub@>RJp+d2buI8R@hemS{c{cv5f8Fu6EO^n2 zvT6fj2oX7i0=kh-4Vi3uv7;idW_43QwK1+9!+f-;yJ>swU`V zN-;vUAn(KrBJJO2?AC51ji5PJ3GZ*T5g@M*X$fH+ePB9OK3O#z^gi)u+Lz&b>N4J+ z`(C9mTn3IU%di-{BCOVd1AuxS-!I~JOZ;oSHX#+}qS`T_G8w`qjVOTDjIS--#yIs% zGNDU_TKwaM*#J~a`rYI);2Qdd_!ohsk0Zokv(%G@@YI*NA3ALWG;?s;c&&n{1)yo7 z9}L#yrrm?+vPi^XsF4Su-=z?-un|2eQPvZN7svUdGkkH>I&s)Hoqj`L9*D^zfu)Se z$sT;}_73`foZj9$uRx^7R3~K5hXQ@}1XEeiKk6bIWRCWxZ@ZlG7&Q?$dXq9n>VH0? zoKdwjF17l_F<;$V3YSq3+o?=}OAv(^r)e4u$82MsajeamKHdt^uh0>>Er$ii?Dj2fxHazkbcEujlpwtfaD57vqVDn9ZJH;?yq?M` z_{xRKqjl+~WBc^;=R4mU`k$wW$3&4LkS|K$_@7e3pF;S5`|18e3ICyl|4Wp>ME@U3 z_&-7k%>RWFwEjg2_73>>f}Nw{iYWc(jlam;LT&tgOh{G^sz^>KIGbKi?ZhRSiiX1L z&~+4@VM)l$LYOnnOgW5e*f(=-tnY>0MKm1RMTXzlb*(X~%^NB0Wy@uUnIX?MWg>7zc5X! z{0gPnVi-z*n5DZ4CN8H2iY^+$bxP9Jd1-!~)QjgGz79_78nd3tFL!XbhCwILUN5tA zP-N0|i(a0fPxVWlU2gh$d*;~&Z8_SuYr!=FbPPXJ++E!(8cO~U?(0!Rl0&Kv_(JMo?=@8D+sQpmK*!RP&zY8o>jPF z{1lFvXxes=AJWoZb6V~kuUgjGbX(fePNu0DBWaC=h$~Ktm%Sz=9(hM`q2`YS=;WO< zY8v^P0P)835K5Pc1}@H-q%tC!U(AxK)0#k9w}Hy##(kL85!RT>!bcKRFWjZVwi<3+ zs}{hHFzAb6J7+^fWrW>^P8X|ow#lDr_B*W1`rik(3g0iLQF9u1HfCt7JlcOmCxPrR zujV}@3W@zd4_*1`RQf0bS2giWJR13;{hLT<<0m#1ZTwj*l9AkOB8sSB9gL${M16QA zIl{EW2;z(LIAk?!e9C>RPTU3505_Pd+w01&emdeZe>tKwt=vPdUVJxEW=gB(H%s$< zs~I`Us@$5)C>Z|u%I=)Va|WE%*Gw|tO?#pfYXAIRiTt&d-pAw)0oc@(zaLQD&X z%w4y3242V^m(7o3qxf~l27cN)lM!ZBpe@^FQFrdjKoo_fYd&JmdCvR7JvqX#781+! zyUelS@(+VI@FI$-h-G5lu7|slsNy)_uARS8V4U*Pp|mh3IE%qdzy*6c-h93w-h%5K zr+@0@7NZZQk7)_PUF}E9&NA z`>v*KkhP>_{++O5G?MH_xT6*;1Y2r$p?qQsNUxCEJ2$7MQoGsJLq?U|lyDY0F%2nR zG%-_ByL1rVo(7X$6HhN(-V}z8bc!g7>G!!5uR{cORL{0~=$Mp}Y*1+T8Dv@9v(S;< z%FnoN%dQVv=apr-yxEBl=k0gS8?9aC+Qj#i+QB4CKmkCrLEGCQXG68sP9slQc3Yt# zR$g|DkXSoU#xYiG(?}bn0S;wCZ6N_7?kkmwKE}Ai`$Uil`B2~_2Aq?tvD~t#iw9h7p#oIYknoP1aoRkPuU9%ek{J5cq~(% z=Z*$wCdX5?Vvn1cr)$1M&4+iynRLfe^;Y2Ha^LB(Rpbabjn`!pa-umFtl zxJZz=n=*ce550+3V29*3!Q^mAe7*n>XwpDn9tzz2>W+m5Ji_z>JUDlSULe5>EpS9v z-^x)#&!u035ai$D58{IWW{b024Yuu97wa>~`p)r<@#f5j?Gf@4@Jvu40%&z%-I^Y> zDrSr!l4lj(Xfm3(Q}KZk`uI`%x*?!r3i@nm{vMv<67a%V*!#9vM+{so63vpvdevEH zQot1j6mCZHGPVw-B@c5od-pXCpCdZjCguun3T)JRIl|ez-<)u9p!V2P0pS%9>bu(G zxnG_3`V|nrt92=Y!`Z`Wh^z;^eQ&?)>W4$S@&;Enq#Dn+=@2{#yR?wRqF?vY^bfe~ zu5S#2y!Nk&j{t6E2@qR2IIR&$CX10q@E0-^j&YZ#CarQBP`cM>Hthx&LAq-dxWFS` zzp&}+kHOz^c?EYv`JQ@UcsR@v8W8!8f5xwx#EDPVlxf_<5RrtMFjv zH%nKiuRfXA-~YlDC5~(3s4rYWVEylY^!zuc2J?S#Z zl!L<@rY7F@0PX%0AB8Q_se>=vwfz+zMfMdR#jaAf{U<&uYNE5ur7W$EtA2ThwVqBV zPKi}b{YH0aXYe`Q>Z`ai>J!J;et?q8HEj;{cdE47{jK}TU#Ph14u&kBuI(FYDz&wh zyVvG;ItdY3!8;sHmX>DrGuNo#J(krhwGhOL8Wx1e8nkwzR6L7(!xb5c_;q4LrtM&A zPa4XjCF|D0Jm=LdWXnd3*x0PMChqHBNtWx%DdYBy-AD;TZQzFQ4xg=q0Ye8T@g3;Z zk|4CEc1jMXH<`B+%9OcC4Qz|GHj|1*QLh}sJ}3=Df<|&=%gAH#6T3XvPFPoj&>S!~ zBnB&hdR+M(L|f|NYm6z!^F`eoz8~>t5+PouFHH3Uw{^cgE-?uM?OjT`NX2_Z<~dEd zyY&SdGAEk6lWI?o>B?wNWeblfs8e?}V_#z32arhz@##*sS?~-iOxwh3B_NHpSe3wC zB-Jz2aP4*h9I3)`9WPTQjxaPJZNWIM4GScw@7Urs9~~FmY$|?-h6^Fh^p#3UdAJpX zD=KBoeQ8tSmc6>Z>;D9ot}soU?r1UuccfgOhWZw|u&^G*T6X{=a+IE*NW#)PMu8y< zLE$gcH4ajA(RyMWvoPL=W|o+>pBO1aU5DJV6gUfY#es-Qx-aQUr!IMASMzPcNwH#{ zMY_zXcz&K(CynYile=h-l;*sDRbQTJxv??)qDa%1TO(6K@&B^ zM3YPXy}4MuTF%FJcJ|Rv>Ld5@KW+`@6?{CM=X%vOtr7LnRT<|>l34zFTuV7l=foxM zIiD<{PIk@)9+E=duBwmz|?vdlYN&F3G0;vnG^O=>%`Sy*0l6OWBbQHVTCrYCKEOUAF_Q7ewQ1@ z#w%yqcv zZCYt-LV5<55rlv5WO(@V^f8tHXr$~p2Fg;>Vc_B5UJcGX^cBa0Qyc^JAYkG2_b z&(Qt;w#HHBcK{35KeUqNB8{ctNM&EG%m1t1c13D!S1p0cH8oN=rF(C^4xU%%$4rV9NziGE1l1uPj}BbpD#>VO56go(=Iy# zaYI+#t)-_cx1A}%=Ht(G_J4RK6<6Fs+Bh`&A&0eL@klj52$9!e?{Ro!Aw5G7wD|e7 zzH!hp%NMl7V}89F3TQYEsEGxivU}_rDgClYuX-*BlHMCDF~vjIa}+P;%d3IQ)%n;2 zQ$Ea*Qc<;*qy^xNTmDiHnGvtg(_;@;nKMVil*zu$NW)haj+N{r!_abs8IT%+L$>rPcs_30@ahe-G`L4x58`i!3={iL-1N;5FeCPpQTkkf)4AK8odtO_R9bybbZLylSL7pV*kH;K^{?tzjYgXZ{5~t{gA-bd|D2uNEHomB z;3~5k$ZdweHANi6+l2?qY%|gJLL-VjQN~)qH`vb<-}3O* zq(;O*+%%6bkDF6q-Dqru_>{ zI&F9UfC)O?&Pi95oXb!7@f^dI?oF7Glb>wo=1=@@mmhxxafjQdYM6Xs2ob~o5JOo0 zgCYOHkpKG_!tx&s`TvO_e+y8@kEzJ=00RL0*}gsi0RHwu@$i2ItP!}HI68f4kMj~S z(lHPbnAjTG8C%$z@e-*hi_&rs@$*6P*qNGI7@2VWEk{oKr$510S6e5pe;nc^a&fff z`b)T+%htfg#EHwM%R$4LLT?VmIJpPtko+kZbLJ3G67 z4fN+4{&9(a4ada5z(W6z{l7;tb2K)#{+Io~UBH(uGrbwTv4OJzt*eEJ8!-{VKj-n^ zCY*_g@9)|Da~ga+f1Qlcm)iRuIe6Ny&L(_J42(>)jEuAlY|4x*Tuf|SENnb~y(PH* zs~wNA5!c@xm}wcl{#gHXVCLXrWZ|Lz``|xDFfq1p{<_Ju#{ZOx=aW}v5@%tRVPsSK z=WG9eEdFapBM+mmyjMOprvI1uKimIdY-VEn4-I)fxvzPY`)d{bHIK}6EOd+{Dz+Bx z1poDj-j`4;vb23WpfuBLt6t2YbSc=zyGDPw>3*rH&M`)ThmBu3t=JEU#4VYAazv36E=T7qg`Y!QF>TIrcGBz7qKSjUX%}VP zZ7X&uoSHWN`uZv*8dnyvR>eSDDpYH^_|02o zpHmNINj%m--VqRp8s!#59N2%bJ0w)4LLZ*Vf_#nUhOxGNpw1iV`^ZPf^HO>HQ{c<1 zEezP$%MaW_=WDU6&{IRZ>)$(=bTb$W>|=@h7>-w?Cn9BS}-9l!webp<;D`y8fhzaX=ZfcNE4QUl0=K0 zh@dHe3CPo&Kh(3vl`y+FyI{ekk_YxkJfpd{(fymq%5fD=6*36-`oYT{?kYgP zAMg%AOr|mjS-Pa!B0@X*o(Z-mA*2T#?*jC&D9ZEX zCFC0^XLTP;cqSsaEy=|z$E>JMlcdzK&Q%?kR-e6g0+slsPqvMW1R*fVcR!utpl%o9 zVTKo^$WR*bHV~n(Ug(6&xBq@-j|XP0G(>B*2C186#-=j4h@FI>|o~Dp_+(16|R%qQ+o+j?wZj z$i`1zB#`rn>yxL~DEaX+fjf?oLmSy4tktZgjx^a2M`!ATzlB2{E9|@eLjOIRhAYq@ za7opX%&eR}Bh@X%-Ewmo3w`DepdY5xYb$)ywivKW?qBL=uT`g0!XuIXhE4Kge0CE< zl_l-)alBM$#>MY_+b6sYL9d~Uv4UV;Rp#;7|8Z=M(v5vD^OHou4ouozc)Y)XibwtD z&29uP0-*)TgL!>$3=ztFw|P!deAPt#6m;QDu7u%9Z*kCqwmF-_f)dkOH1V=QheVch6I*$!t{=quWpvqt9=JtQ6(E zR$ZZ63Q#>JK8g!|xvDxJ`OC@3ZxTmiH!(E_EG&v^cB-dawC5`cO=mq2z{qXh-(oQY z$xr#rdt!V(zwaR@X3S5cE!>XmJu3zB=8GyyE}zzb?67cMN(;YPq~kH^iZXwXe$lC**ZVX_s2IAB@+oz&n>V&-$C{1O@6S(toK;ruJi+{?eX4nR%G6vp z_T&YpR}5hP@S_S8+bY+&o#1_F8bDinpwA|k2{qr2bcVeFxU)vToNi(66%6s5q~}qE z-5?5l?D~VIJPsJB${XR>HKrGB6rE*nBvUO3r@iIdW9H|PKsR?Y1g#2e>-dhfD$fK_G#xJ3Q`gljncu-x?mNs=25U`*AI<%dA zY$X!l2O1|B>1a2=>vXAh5X*1cMcet%?gxAH3qN;Mm!QJ^2a<`%*AO>r*C6Eyp49E? z4yWWQKdDG-<0<=7uDr6%>dkr+QYo9vqsFsHbf+YN4-teA!z5=j}sHi}G3PK7yx*S9P*viO;b ziau)TZQrn<3zLI;B+kT0lR!lAUH9C1+4r zLpb`_hM1GgFm8z2H!5hPz=z92G9A9&uc0&PT7@;8cVl!vIEu1CE{5T-xJ!M#`S8Ox zfQt*)dPjq01L^Bi(5U(=sis2Wy|M$MY+9zMm@m;e+k=O9&swiPF<4~COGOk2BUs2G3glDZ zpL-i*)C{wco*lyS(B=d!vqVOmReQX=ioX;HkDl%X?E7LO-dwNXE-so2xko>C504`Aiq~M~@R`N+&gO}g<}?Khlq>ds9VeAg1jIZY zfm~xZ_ZilYll)ju3bCH3Oc^&W-+Ho)Na{q(}%RtwA`Db-8^`}y<1^pC#bRu zk5ymFIU(u%Mc$XmFx&OC-OH6ywK@3M$HW%0k2riDZ%*pRg#W&Ycj|7MpH6RZG3ebY*XgDW(zHXOO@%4_p?rkr2HIBk+o z!=cvjsig)0mC;AO;dkrq?rydeFSsjpDu=Finh)i>@Je=us;c4rOUpy(p0tK~t{im! zOP0Ekzz~dZ0k^vR(VUufm1BdQUCB{vr-Y|tM>=x`+qXLBawfhfCqpr@L-gXB;hnuc zZ13OuGH`+cU5&mB(W;fO!=OL-MQJaps|=wNCIP@^cz}=hkxkc={Eg`w?at2mH&s7g z8I+$8rUUOX@(*hI-@ofgrn{JZx?YZEe`XxkJ~he1DYnaCCpYe3xMxFmBp^h|NP} zK88=NkXzg#ZHkzH$g)0jWkJ#m!#+A`Jprx|@Q!cKaHyIyGf?#~$v|*D!XM2*UB}gR z{KlWuo|>wAhi{G@R&ZkZ%&Lb#u-IT#RzT+S;KlLV2+|7L-aSjbeGn0`<-BzyfvR-d zK0I^iXAKs^B7mdtlh56+(2RF;`@+0}Z`a%;-NWa7xi3nb_F*uzym1WjLCooE1}pQ$ z?`GbX+r(iM%w3MIdmGPtIfLtfn}#?S;Go%bHpiaUS+W)H zki6&oaD%v2(#Z2A51CtJrj}e@ZwzWhIUAyHT=h%on&*&8*c8w8R$s{-xE&!Js}0$( zX?d&0dcOuz3$AT<8&N7}vvQN*QzZ78)%05vSX}|NK07tW0$gol-X%A_^({RjMOQ=x z8~NMgL(}4Ckahq$-Xr$4Eqh7!H|@?;4m6Fz-4}@4epOWQ{*jXdxw+)e>tP-+RPQ0@M81^_S}USw7QP~)F){6F(3I1qreaEM}6Gs9KLv@+GImcCx5tK>>~OhXg_09WK-?e z;5+Bc4l!pG-#Ej@R(~I;8^@hZyTNHK8>`fwA0{Y^F$!Nhx5L%`Y zas{!G|$9UF3@?B3U!wc7(H zmmhFDbP-6e)Z{HD`aTxv5)1#}?Ft|Nw?F62640wGuqoB#E30UA6BgX5Tj2GmP=zD; zETy){?ohoNH;zb!+<9$a)eQ|Abyda9%~k!{&e7F@UM*I}Q4y6sSEZ@D^YQA$L`}%hI2Z00i=cBtqO3(D#%zOl=J@W!7t%V; zuOP;p??D_aGm$M>H#~xBLV}dR4{wPgA8*t5V?Ay6HW#0vr0-r&ov?Y)mFl1gJ>(go z@ZSewte~NwrWozcdzbT!3dblu7B|K@Gs-}DP0`(biOQUAEu8Z8h`0j31e3jTw2GF^ zeZlRHXYiTdOhiI7y#5a>{Tk*cU;uZSr!xc{(RL?i7QF>8&o4k@IE|1|Cima`O=mXWNAOK z6pYYnwmcwW_@@XsCL0mL&Yd*{9L*7R4uQ&IMNo7ca0YHYz0kVFbL1Ug0f{9Kv_d1@UF zqjUYX(}^1&^48xyVlVB39iOFG0?xkGy=+!8sm}OnC{_zU;-P9=&H1`WT2O!jk+lf2 zxfUn7(=kXS_QJk7cbz;{?7AD+3=YQXJl-`wQP7!U5l6l_;Cy`~&SU?x70pIdMb^|F zyPAnt*Ul#0&1lCN-(|rnwC60AZX1Z=eoxA_{*jb-l6` z#q#2`$}e?_exSnsK6xQ0P9^&WEa$6r1e+J&Pb|pWdM8cY@KlNr2mZ)FLaLkI@AmC_ zY+)hzfgkCd8;xho_iF)ss~_UYyDP6uP;aB0oiLImNKKNoc!{xq zMSGX9K}Tms&xUsnbbi5810nUun*QoNP!(R+_4FIiz#8Jc3DDCB|CGWyaO4ZldtV1ti9g$NK-WY6>6#kd!-EhUQt%Znks;EyK8 zOJ>caEpILB7Qd14U*4ggWdx5#q)Y68>KiCdKLCLr6F z4tWoG8wfj!Ak1j4jZK)^wOQ=g8c}smE~LN(8z5@xIxQ+uXZXk4!yYzFekN_7(hr5( z2&yXd5(CUt_Ty4Rv%Sj%Bu`ZE&>55PIKwyP%{JD)7K$+%(E$au7&*|U4>r%i*L+B<13z+d%ADczmy7kb_9`pl34^zuNl(W<_@#Kt)%!wKDM zR>bqE3-l`5v$%Q^-J|Tx<%_(hN~Hu&g?2n0Z)Q)-8=q^#47bJ$t_U?%C({d_HGqXXpDppYuLKrODDJz42{#&*&X`{7sO8%ec;B zbQRl#QAql7%zk7lGxdXyI{)O-#I9Q>!Ztt>rMG@H?j1~kgaybnt>m5P7VWCp{Vb;J z5hmX&^8R*tUjNIPf%slGs9X~IFkc>p!L!6lyrk}HDo=xF1`66)atJor(*34L#92dA z@#|5w)#Tc6p&r-6Orw((T5lD#nI+9-oVZNXBaH0i^6@#9=Q8!&Km$pzT@HFY3{faoaS0Jf zXQ?MUJviB7Hq#p+ud!+9z(FH^D#hom{o}P4w&z93s2c33t7uLb_=HQT!jJ`z@3m!! zs4bHlZ96Bp|Ab4wv0`k*mFX)h9+rHjmyMH>D=SvgYgm%;YU@XKkwbB>b!J@sZ5+qd zXm^#fhr)$q^@uKJ_Tn>#m$!}|Ki`d&YmFFMS#22l#1Q*tNk=F@FcJP-%g!%p)TJ_( zDbHTV*J`#Q9Rh4;UIRQAzn?Aw+)u}&{n+yrKZG{N4tnFew~7HO0K|_0wdTHeKMLqY zx@eRSb_2=<&ecL8yq0b^_nyNI-P-jkS_Jo=_R=1sS7j}dJoV}`WkhcP9u z@J>N{PfUT>M^l74N#?xDXaM<0`XsrdTRMn#nWEkO70F`e#-$S{b1fxiiZKVtjm4gD zKtMrvRgUs4halfpG~;@nupo-pEt*hz$c}0Nvn{%Tn9iAi_FLB6zisDEV&!BUq#I86 zidKvttvkf;d=qaepEkAh+npKjOabraoq+?-Kf1f5kmWYbhm&A4Q6zo^vSI#D>Fl=nE2-C)QU^4f`_2c^^K9NGI+7Y2= z!V-w#@S2bBLt2Ltf03Y>hlQ)@Q9@CX}LzwQis> z#=I|+fkBR9U}-9dZe9F zvWT}>XbaXXkLZ<^dhN!lnLUg>OHnfUYyB&Qsr@c!g{QUj>1OS|bZlRnXj0Umu0RGR zwBv@`0FNX;9i++?YI%6 zfYd+v$x<;0|5QJJIQdgd5KVkZLvDwsal5i?nKcYg`5*-MBrbm#Kb&hE5HzyF$zoXm z;hRSbhKbc`xp;a)d+Y?H1l~n7kxFy6wW{wNnWguK#MP&cFnG36Wwu`u!IArt+ud}% zCJ)Fg<)uDVI(2q;qI}^-onAF*%=Gb6mP5p&%u4fBRpqDM?`$BJbyI|x1RiO+%RBhJ z?6%yQ{8>l?F<9=y)vm$_Ot|@ssv>NBx%pBY5qabKo!On@71x`sb_@)_kjCW{?v3nAP-HN|tXG&4-VGNdw`oeL-BoS!_Ju3h zIHeeEf_w_okK1*A*llysC8)%KmVlE@^o``q)%DUz$K0cyNEAL8Y{K`PP}L65R%v0y zx9XaGeU(VKU?JFMcnb=3&BS!Z49S>1aCOqP5$n{l#YXF=>KAT#b-?6I(n_k-;VaI3 zG?wCu$4Te6_(*988wu@vWEPOL{ibR$)*aQBe*w1~I%_A}oc_;EILq7W1(WPOYUikg zQ|N0Xj!*dIY^{ZOj+U2{bv2&VEsb@It|$})qewwq+sxzEA_E%p_0Pg07)Z7od9ak>^Vr9;zn$f@t;cN(|nvnuU3RR0ib zjyy$vR|06t3;}->Hs-v1t9StY7$D7|F%NAN<+UwV1Ngq#Pj==5XYuv~rgDR@Xl^U; zr)0ntecSZWb<~ZQ}n*pJlA4U1s@#i5PIB;{2C}4v6 z=X^N~TfjR7{=K1mtc*zZQ_rd@<1oN&fMBxlk8mLNc3KoO)#{yXv z{=Y+9h_hzi72p+OA&xKRh%Xw?QCSO-I6aD6faHh4cwvz~XsxD%9%B1=E*4lsOn%*1;yyGM%Xp7>X zKwG#$;B9(1K}UgK>f#=0a)ZD^?VOvQ_h? z!)(q0PBmO^0$6K|y#{Q)=>H2M%nMj>1HhVEY)P{D(y$K%U}vqc|K+&N!D1Sm5y0G$(>?2-w1<4=MxUOy#`+Za`v*XBNk7h z;--Lyee8?H`J$e}dFjY)2_8DIhlTScG= n or y >= n: - return - - C[y, x] = 0 - for i in range(n): - C[y, x] += A[y, i] * B[i, x] - - -A = np.array(np.random.random((n, n)), dtype=np.float32) -B = np.array(np.random.random((n, n)), dtype=np.float32) -C = np.empty_like(A) - -print("N = %d x %d" % (n, n)) - -s = time() -stream = cuda.stream() -with stream.auto_synchronize(): - dA = cuda.to_device(A, stream) - dB = cuda.to_device(B, stream) - dC = cuda.to_device(C, stream) - cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), stream](dA, dB, dC) - dC.to_host(stream) - -e = time() -tcuda = e - s - -# Host compute -Amat = np.matrix(A) -Bmat = np.matrix(B) - -s = time() -Cans = Amat * Bmat -e = time() -tcpu = e - s - -# Check result -assert np.allclose(C, Cans) -#relerr = lambda got, gold: abs(got - gold)/gold -#for y in range(n): -# for x in range(n): -# err = relerr(C[y, x], Cans[y, x]) -# assert err < 1e-5, (x, y, err) - -print('cpu: %f' % tcpu) -print('cuda: %f' % tcuda) -print('cuda speedup: %.2fx' % (tcpu / tcuda)) - diff --git a/numba/examples/cudajit/matmul_benchmark.txt b/numba/examples/cudajit/matmul_benchmark.txt deleted file mode 100644 index c8344c3ff..000000000 --- a/numba/examples/cudajit/matmul_benchmark.txt +++ /dev/null @@ -1,13 +0,0 @@ -MatrixMul 1000 x 1000 - -lincuda - -cpu: 0.281652 -cuda: 0.217016 -cuda speedup: 1.30x - -osx 10.8 - -cpu: 0.017879 -cuda: 1.362290 -cuda speedup: 0.01x diff --git a/numba/examples/cudajit/matmul_smem.py b/numba/examples/cudajit/matmul_smem.py deleted file mode 100755 index bfb2d3c40..000000000 --- a/numba/examples/cudajit/matmul_smem.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function - -from timeit import default_timer as time - -import numpy as np - -from numba import cuda, float32 - - -bpg = 50 -tpb = 32 -n = bpg * tpb - - -@cuda.jit('(float32[:,:], float32[:,:], float32[:,:])') -def cu_square_matrix_mul(A, B, C): - sA = cuda.shared.array(shape=(tpb, tpb), dtype=float32) - sB = cuda.shared.array(shape=(tpb, tpb), dtype=float32) - - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - bx = cuda.blockIdx.x - by = cuda.blockIdx.y - bw = cuda.blockDim.x - bh = cuda.blockDim.y - - x = tx + bx * bw - y = ty + by * bh - - acc = 0. - for i in range(bpg): - if x < n and y < n: - sA[ty, tx] = A[y, tx + i * tpb] - sB[ty, tx] = B[ty + i * tpb, x] - - cuda.syncthreads() - - if x < n and y < n: - for j in range(tpb): - acc += sA[ty, j] * sB[j, tx] - - cuda.syncthreads() - - if x < n and y < n: - C[y, x] = acc - -A = np.array(np.random.random((n, n)), dtype=np.float32) -B = np.array(np.random.random((n, n)), dtype=np.float32) -C = np.empty_like(A) - -print("N = %d x %d" % (n, n)) - -s = time() -stream = cuda.stream() -with stream.auto_synchronize(): - dA = cuda.to_device(A, stream) - dB = cuda.to_device(B, stream) - dC = cuda.to_device(C, stream) - cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), stream](dA, dB, dC) - dC.to_host(stream) - -e = time() -tcuda = e - s - -# Host compute -Amat = np.matrix(A) -Bmat = np.matrix(B) - -s = time() -Cans = Amat * Bmat -e = time() -tcpu = e - s - -print('cpu: %f' % tcpu) -print('cuda: %f' % tcuda) -print('cuda speedup: %.2fx' % (tcpu / tcuda)) - -# Check result -assert np.allclose(C, Cans) -#relerr = lambda got, gold: abs(got - gold)/gold -#for y in range(n): -# for x in range(n): -# err = relerr(C[y, x], Cans[y, x]) -# assert err < 1e-5, (x, y, err) - diff --git a/numba/examples/cudajit/sum.py b/numba/examples/cudajit/sum.py deleted file mode 100755 index 58ca582d9..000000000 --- a/numba/examples/cudajit/sum.py +++ /dev/null @@ -1,30 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function - -from timeit import default_timer as time - -import numpy as np - -from numba import cuda - - -@cuda.jit('(f4[:], f4[:], f4[:])') -def cuda_sum(a, b, c): - i = cuda.grid(1) - c[i] = a[i] + b[i] - -griddim = 50, 1 -blockdim = 32, 1, 1 -N = griddim[0] * blockdim[0] -print("N", N) -cuda_sum_configured = cuda_sum.configure(griddim, blockdim) -a = np.array(np.random.random(N), dtype=np.float32) -b = np.array(np.random.random(N), dtype=np.float32) -c = np.empty_like(a) - -ts = time() -cuda_sum_configured(a, b, c) -te = time() -print(te - ts) -assert (a + b == c).all() -#print c diff --git a/numba/examples/fbcorr.py b/numba/examples/fbcorr.py deleted file mode 100755 index 68f819cf5..000000000 --- a/numba/examples/fbcorr.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This file demonstrates a filterbank correlation loop. -""" -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import jit - - -@jit(nopython=True) -def fbcorr(imgs, filters, output): - n_imgs, n_rows, n_cols, n_channels = imgs.shape - n_filters, height, width, n_ch2 = filters.shape - - for ii in range(n_imgs): - for rr in range(n_rows - height + 1): - for cc in range(n_cols - width + 1): - for hh in range(height): - for ww in range(width): - for jj in range(n_channels): - for ff in range(n_filters): - imgval = imgs[ii, rr + hh, cc + ww, jj] - filterval = filters[ff, hh, ww, jj] - output[ii, ff, rr, cc] += imgval * filterval - -def main (): - imgs = np.random.randn(10, 16, 16, 3) - filt = np.random.randn(6, 5, 5, 3) - output = np.zeros((10, 6, 15, 15)) - - import time - t0 = time.time() - fbcorr(imgs, filt, output) - print(time.time() - t0) - -if __name__ == "__main__": - main() diff --git a/numba/examples/gaussian-blur/gaussian-blur-pa.py b/numba/examples/gaussian-blur/gaussian-blur-pa.py deleted file mode 100644 index 44761882e..000000000 --- a/numba/examples/gaussian-blur/gaussian-blur-pa.py +++ /dev/null @@ -1,95 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import njit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -@stencil() -def gaussian_blur(a): - return (a[-2,-2] * 0.003 + a[-1,-2] * 0.0133 + a[0,-2] * 0.0219 + a[1,-2] * 0.0133 + a[2,-2] * 0.0030 + - a[-2,-1] * 0.0133 + a[-1,-1] * 0.0596 + a[0,-1] * 0.0983 + a[1,-1] * 0.0596 + a[2,-1] * 0.0133 + - a[-2, 0] * 0.0219 + a[-1, 0] * 0.0983 + a[0, 0] * 0.1621 + a[1, 0] * 0.0983 + a[2, 0] * 0.0219 + - a[-2, 1] * 0.0133 + a[-1, 1] * 0.0596 + a[0, 1] * 0.0983 + a[1, 1] * 0.0596 + a[2, 1] * 0.0133 + - a[-2, 2] * 0.003 + a[-1, 2] * 0.0133 + a[0, 2] * 0.0219 + a[1, 2] * 0.0133 + a[2, 2] * 0.0030) - -@stencil() -def gaussian_blur_3d(a): - return (a[-2,-2,0] * 0.003 + a[-1,-2,0] * 0.0133 + a[0,-2,0] * 0.0219 + a[1,-2,0] * 0.0133 + a[2,-2,0] * 0.0030 + - a[-2,-1,0] * 0.0133 + a[-1,-1,0] * 0.0596 + a[0,-1,0] * 0.0983 + a[1,-1,0] * 0.0596 + a[2,-1,0] * 0.0133 + - a[-2, 0,0] * 0.0219 + a[-1, 0,0] * 0.0983 + a[0, 0,0] * 0.1621 + a[1, 0,0] * 0.0983 + a[2, 0,0] * 0.0219 + - a[-2, 1,0] * 0.0133 + a[-1, 1,0] * 0.0596 + a[0, 1,0] * 0.0983 + a[1, 1,0] * 0.0596 + a[2, 1,0] * 0.0133 + - a[-2, 2,0] * 0.003 + a[-1, 2,0] * 0.0133 + a[0, 2,0] * 0.0219 + a[1, 2,0] * 0.0133 + a[2, 2,0] * 0.0030) - -@njit(parallel=True) -def run_gaussian_blur(input_arr, iterations): - output_arr = input_arr.copy() - for i in range(iterations): - gaussian_blur(input_arr, out=output_arr) - input_arr, output_arr = output_arr, input_arr - - return input_arr - -@njit(parallel=True) -def run_gaussian_blur_3d(input_arr, iterations): - output_arr = input_arr.copy() - for i in range(iterations): - gaussian_blur_3d(input_arr, out=output_arr) - input_arr, output_arr = output_arr, input_arr - - return input_arr - -def main (*args): - iterations = 60 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - if len(args) > 1: - iterations = int(args[1]) - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-blur" + parts[1] - - input_img = Image.open(input_file) - input_arr = np.array(input_img) - assert(input_arr.ndim == 2 or input_arr.ndim == 3) - tstart = time.time() - if input_arr.ndim == 2: - output_arr = run_gaussian_blur(input_arr, 1).astype(input_arr.dtype) - else: - output_arr = run_gaussian_blur_3d(input_arr, 1).astype(input_arr.dtype) - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - if input_arr.ndim == 2: - output_arr = run_gaussian_blur(input_arr, iterations).astype(input_arr.dtype) - else: - output_arr = run_gaussian_blur_3d(input_arr, iterations).astype(input_arr.dtype) - htime = time.time() - tstart - print("SELFTIMED ", htime) - - new_img = Image.fromarray(output_arr, mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - input_img.close() - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/gaussian-blur/gaussian-blur.py b/numba/examples/gaussian-blur/gaussian-blur.py deleted file mode 100644 index d60ce60cf..000000000 --- a/numba/examples/gaussian-blur/gaussian-blur.py +++ /dev/null @@ -1,121 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import jit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -@stencil() -def gaussian_blur(a): - return (a[-2,-2] * 0.003 + a[-1,-2] * 0.0133 + a[0,-2] * 0.0219 + a[1,-2] * 0.0133 + a[2,-2] * 0.0030 + - a[-2,-1] * 0.0133 + a[-1,-1] * 0.0596 + a[0,-1] * 0.0983 + a[1,-1] * 0.0596 + a[2,-1] * 0.0133 + - a[-2, 0] * 0.0219 + a[-1, 0] * 0.0983 + a[0, 0] * 0.1621 + a[1, 0] * 0.0983 + a[2, 0] * 0.0219 + - a[-2, 1] * 0.0133 + a[-1, 1] * 0.0596 + a[0, 1] * 0.0983 + a[1, 1] * 0.0596 + a[2, 1] * 0.0133 + - a[-2, 2] * 0.003 + a[-1, 2] * 0.0133 + a[0, 2] * 0.0219 + a[1, 2] * 0.0133 + a[2, 2] * 0.0030) - -def gaussian_blur_std(a, res): - ashape = a.shape - for i in range(2,ashape[0]-2): - for j in range(2,ashape[1]-2): - res[i,j] = (a[i-2,j-2] * 0.003 + a[i-1,j-2] * 0.0133 + a[i,j-2] * 0.0219 + a[i+1,j-2] * 0.0133 + a[i+2,j-2] * 0.0030 + - a[i-2,j-1] * 0.0133 + a[i-1,j-1] * 0.0596 + a[i,j-1] * 0.0983 + a[i+1,j-1] * 0.0596 + a[i+2,j-1] * 0.0133 + - a[i-2,j+0] * 0.0219 + a[i-1,j+0] * 0.0983 + a[i,j+0] * 0.1621 + a[i+1,j+0] * 0.0983 + a[i+2,j+0] * 0.0219 + - a[i-2,j+1] * 0.0133 + a[i-1,j+1] * 0.0596 + a[i,j+1] * 0.0983 + a[i+1,j+1] * 0.0596 + a[i+2,j+1] * 0.0133 + - a[i-2,j+2] * 0.003 + a[i-1,j+2] * 0.0133 + a[i,j+2] * 0.0219 + a[i+1,j+2] * 0.0133 + a[i+2,j+2] * 0.0030) - return res - -@stencil() -def gaussian_blur_3d(a): - return (a[-2,-2,0] * 0.003 + a[-1,-2,0] * 0.0133 + a[0,-2,0] * 0.0219 + a[1,-2,0] * 0.0133 + a[2,-2,0] * 0.0030 + - a[-2,-1,0] * 0.0133 + a[-1,-1,0] * 0.0596 + a[0,-1,0] * 0.0983 + a[1,-1,0] * 0.0596 + a[2,-1,0] * 0.0133 + - a[-2, 0,0] * 0.0219 + a[-1, 0,0] * 0.0983 + a[0, 0,0] * 0.1621 + a[1, 0,0] * 0.0983 + a[2, 0,0] * 0.0219 + - a[-2, 1,0] * 0.0133 + a[-1, 1,0] * 0.0596 + a[0, 1,0] * 0.0983 + a[1, 1,0] * 0.0596 + a[2, 1,0] * 0.0133 + - a[-2, 2,0] * 0.003 + a[-1, 2,0] * 0.0133 + a[0, 2,0] * 0.0219 + a[1, 2,0] * 0.0133 + a[2, 2,0] * 0.0030) - -def gaussian_blur_std_3d(a, res): - ashape = a.shape - for i in range(2,ashape[0]-2): - for j in range(2,ashape[1]-2): - for k in range(ashape[2]): - res[i,j,k] = (a[i-2,j-2,k] * 0.003 + a[i-1,j-2,k] * 0.0133 + a[i,j-2,k] * 0.0219 + a[i+1,j-2,k] * 0.0133 + a[i+2,j-2,k] * 0.0030 + - a[i-2,j-1,k] * 0.0133 + a[i-1,j-1,k] * 0.0596 + a[i,j-1,k] * 0.0983 + a[i+1,j-1,k] * 0.0596 + a[i+2,j-1,k] * 0.0133 + - a[i-2,j+0,k] * 0.0219 + a[i-1,j+0,k] * 0.0983 + a[i,j+0,k] * 0.1621 + a[i+1,j+0,k] * 0.0983 + a[i+2,j+0,k] * 0.0219 + - a[i-2,j+1,k] * 0.0133 + a[i-1,j+1,k] * 0.0596 + a[i,j+1,k] * 0.0983 + a[i+1,j+1,k] * 0.0596 + a[i+2,j+1,k] * 0.0133 + - a[i-2,j+2,k] * 0.003 + a[i-1,j+2,k] * 0.0133 + a[i,j+2,k] * 0.0219 + a[i+1,j+2,k] * 0.0133 + a[i+2,j+2,k] * 0.0030) - return res -def main (*args): - iterations = 10 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - if len(args) > 1: - iterations = int(args[1]) - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-blur" + parts[1] - - input_img = Image.open(input_file) - input_arr = np.array(input_img) - - assert(input_arr.ndim == 2 or input_arr.ndim == 3) - - tstart = time.time() - if input_arr.ndim == 2: - output_arr = gaussian_blur(input_arr) - else: - output_arr = gaussian_blur_3d(input_arr) - - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - for i in range(iterations): - if input_arr.ndim == 2: - output_arr = gaussian_blur(input_arr) - else: - output_arr = gaussian_blur_3d(input_arr) - input_arr, output_arr = output_arr, input_arr - htime = time.time() - tstart - print("SELFTIMED ", htime) - - output_arr = input_arr.astype(np.uint8) - - new_img = Image.fromarray(output_arr, mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - input_img.close() - - input_img = Image.open(input_file) - input_arr = np.array(input_img) - - tstart = time.time() - output_arr = input_arr.copy() - for i in range(iterations): - if input_arr.ndim == 2: - gaussian_blur_std(input_arr, output_arr) - else: - gaussian_blur_std_3d(input_arr, output_arr) - input_arr, output_arr = output_arr, input_arr - htime = time.time() - tstart - print("Standard Python time", htime) - input_img.close() - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/harris/harris.py b/numba/examples/harris/harris.py deleted file mode 100644 index f5ad4b107..000000000 --- a/numba/examples/harris/harris.py +++ /dev/null @@ -1,73 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import njit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -@stencil() -def xsten(a): - return ((a[-1,-1] * -1.0) + (a[-1,0] * -2.0) + (a[-1,1] * -1.0) + a[1,-1] + (a[1,0] * 2.0) + a[1,1]) / 12.0 - -@stencil() -def ysten(a): - return ((a[-1,-1] * -1.0) + (a[0,-1] * -2.0) + (a[1,-1] * -1.0) + a[-1,1] + (a[0,1] * 2.0) + a[1,1]) / 12.0 - -@stencil() -def harris_common(a): - return (a[-1,-1] + a[-1,0] + a[-1,1] + a[0,-1] + a[0,0] + a[0,1] + a[1,-1] + a[1,0] + a[1,1]) - -@njit() -def harris(Iin): - Ix = xsten(Iin) - Iy = ysten(Iin) - Ixx = Ix * Ix - Iyy = Iy * Iy - Ixy = Ix * Iy - Sxx = harris_common(Ixx) - Syy = harris_common(Iyy) - Sxy = harris_common(Ixy) - det = (Sxx * Syy) - (Sxy * Sxy) - trace = Sxx + Syy - return det - (0.04 * trace * trace) - -def main (*args): - iterations = 10 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-corners" + parts[1] - - input_img = Image.open(input_file).convert('L') - input_arr = np.array(input_img) - - tstart = time.time() - for i in range(iterations): - output_arr = harris(input_arr) - htime = time.time() - tstart - print("SELFTIMED ", htime) - - new_img = Image.fromarray(output_arr.astype(np.uint8), mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/harris/harris_numba.py b/numba/examples/harris/harris_numba.py deleted file mode 100644 index 3c13e98c4..000000000 --- a/numba/examples/harris/harris_numba.py +++ /dev/null @@ -1,88 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import njit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -@njit -def xsten(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = ((a[i-1,j-1] * -1.0) + (a[i-1,j] * -2.0) + (a[i-1,j+1] * -1.0) + a[i+1,j-1] + (a[i+1,j] * 2.0) + a[i+1,j+1]) / 12.0 - return ret - -@njit -def ysten(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = ((a[i-1,j-1] * -1.0) + (a[i,j-1] * -2.0) + (a[i+1,j-1] * -1.0) + a[i-1,j+1] + (a[i,j+1] * 2.0) + a[i+1,j+1]) / 12.0 - return ret - -@njit -def harris_common(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = (a[i-1,j-1] + a[i-1,j] + a[i-1,j+1] + a[i,j-1] + a[i,j] + a[i,j+1] + a[i+1,j-1] + a[i+1,j] + a[i+1,j+1]) - return ret - -@njit -def harris(Iin): - Ix = xsten(Iin) - Iy = ysten(Iin) - Ixx = Ix * Ix - Iyy = Iy * Iy - Ixy = Ix * Iy - Sxx = harris_common(Ixx) - Syy = harris_common(Iyy) - Sxy = harris_common(Ixy) - det = (Sxx * Syy) - (Sxy * Sxy) - trace = Sxx + Syy - return det - (0.04 * trace * trace) - -def main (*args): - iterations = 10 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-corners" + parts[1] - - input_img = Image.open(input_file).convert('L') - input_arr = np.array(input_img) - - tstart = time.time() - for i in range(iterations): - output_arr = harris(input_arr) - htime = time.time() - tstart - print("SELFTIMED ", htime) - - new_img = Image.fromarray(output_arr.astype(np.uint8), mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/harris/harris_pa.py b/numba/examples/harris/harris_pa.py deleted file mode 100644 index 0f3cf34a4..000000000 --- a/numba/examples/harris/harris_pa.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import njit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -@stencil() -def xsten(a): - return ((a[-1,-1] * -1.0) + (a[-1,0] * -2.0) + (a[-1,1] * -1.0) + a[1,-1] + (a[1,0] * 2.0) + a[1,1]) / 12.0 - -@stencil() -def ysten(a): - return ((a[-1,-1] * -1.0) + (a[0,-1] * -2.0) + (a[1,-1] * -1.0) + a[-1,1] + (a[0,1] * 2.0) + a[1,1]) / 12.0 - -@stencil() -def harris_common(a): - return (a[-1,-1] + a[-1,0] + a[-1,1] + a[0,-1] + a[0,0] + a[0,1] + a[1,-1] + a[1,0] + a[1,1]) - -@njit(parallel=True) -def harris(Iin,Ixout,Iyout,Sxxout,Syyout,Sxyout): - Ix = xsten(Iin,out=Ixout) - Iy = ysten(Iin,out=Iyout) - Ixx = Ix * Ix - Iyy = Iy * Iy - Ixy = Ix * Iy - Sxx = harris_common(Ixx,out=Sxxout) - Syy = harris_common(Iyy,out=Syyout) - Sxy = harris_common(Ixy,out=Sxyout) - det = (Sxx * Syy) - (Sxy * Sxy) - trace = Sxx + Syy - return det - (0.04 * trace * trace) - -def main (*args): - iterations = 10 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-corners" + parts[1] - - input_img = Image.open(input_file).convert('L') - input_arr = np.array(input_img) - - Ixout = np.empty_like(input_arr,dtype=float) - Iyout = np.empty_like(input_arr,dtype=float) - Sxxout = np.empty_like(input_arr,dtype=float) - Syyout = np.empty_like(input_arr,dtype=float) - Sxyout = np.empty_like(input_arr,dtype=float) - - output_arr = harris(input_arr,Ixout,Iyout,Sxxout,Syyout,Sxyout).astype(np.uint8) - - tstart = time.time() - output_arr = harris(input_arr,Ixout,Iyout,Sxxout,Syyout,Sxyout) - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - for i in range(iterations): - output_arr = harris(input_arr,Ixout,Iyout,Sxxout,Syyout,Sxyout) - htime = time.time() - tstart - print("SELFTIMED ", htime) - - new_img = Image.fromarray(output_arr.astype(np.uint8), mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/harris/harris_py.py b/numba/examples/harris/harris_py.py deleted file mode 100644 index 1e6e28dfa..000000000 --- a/numba/examples/harris/harris_py.py +++ /dev/null @@ -1,84 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np - -from numba import njit, stencil -try: - from PIL import Image -except ImportError: - raise RuntimeError("Pillow is needed to run this example. Try 'conda install pillow'") - -def xsten(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = ((a[i-1,j-1] * -1.0) + (a[i-1,j] * -2.0) + (a[i-1,j+1] * -1.0) + a[i+1,j-1] + (a[i+1,j] * 2.0) + a[i+1,j+1]) / 12.0 - return ret - -def ysten(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = ((a[i-1,j-1] * -1.0) + (a[i,j-1] * -2.0) + (a[i+1,j-1] * -1.0) + a[i-1,j+1] + (a[i,j+1] * 2.0) + a[i+1,j+1]) / 12.0 - return ret - -def harris_common(a): - ret = np.zeros_like(a) - ashape = a.shape - for i in range(1,ashape[0]-1): - for j in range(1,ashape[1]-1): - ret[i,j] = (a[i-1,j-1] + a[i-1,j] + a[i-1,j+1] + a[i,j-1] + a[i,j] + a[i,j+1] + a[i+1,j-1] + a[i+1,j] + a[i+1,j+1]) - return ret - -def harris(Iin): - Ix = xsten(Iin) - Iy = ysten(Iin) - Ixx = Ix * Ix - Iyy = Iy * Iy - Ixy = Ix * Iy - Sxx = harris_common(Ixx) - Syy = harris_common(Iyy) - Sxy = harris_common(Ixy) - det = (Sxx * Syy) - (Sxy * Sxy) - trace = Sxx + Syy - return det - (0.04 * trace * trace) - -def main (*args): - iterations = 1 - - if len(args) > 0: - input_file = args[0] - else: - raise ValueError("A jpeg file must be provided as the first command line parameter.") - - parts = os.path.splitext(input_file) - new_file_name = parts[0] + "-corners" + parts[1] - - input_img = Image.open(input_file).convert('L') - input_arr = np.array(input_img) - - tstart = time.time() - for i in range(iterations): - output_arr = harris(input_arr) - htime = time.time() - tstart - print("SELFTIMED ", htime) - - new_img = Image.fromarray(output_arr.astype(np.uint8), mode=input_img.mode) - new_img.format = input_img.format - new_img.save(new_file_name) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/jitclass.py b/numba/examples/jitclass.py deleted file mode 100755 index 51d2be21d..000000000 --- a/numba/examples/jitclass.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -A simple jitclass example. -""" - -import numpy as np -from numba import jitclass # import the decorator -from numba import int32, float32 # import the types - -spec = [ - ('value', int32), # a simple scalar field - ('array', float32[:]), # an array field -] - - -@jitclass(spec) -class Bag(object): - def __init__(self, value): - self.value = value - self.array = np.zeros(value, dtype=np.float32) - - @property - def size(self): - return self.array.size - - def increment(self, val): - for i in range(self.size): - self.array[i] += val - return self.array - - -mybag = Bag(21) -print('isinstance(mybag, Bag)', isinstance(mybag, Bag)) -print('mybag.value', mybag.value) -print('mybag.array', mybag.array) -print('mybag.size', mybag.size) -print('mybag.increment(3)', mybag.increment(3)) -print('mybag.increment(6)', mybag.increment(6)) diff --git a/numba/examples/juliaset/juliaset.py b/numba/examples/juliaset/juliaset.py deleted file mode 100644 index 670869b49..000000000 --- a/numba/examples/juliaset/juliaset.py +++ /dev/null @@ -1,77 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np -import numpy.matlib - -# This code was ported from a MATLAB implementation available at -# http://www.albertostrumia.it/Fractals/FractalMatlab/Julia/juliaSH.m. -# Original MATLAB implementation is copyright (c) the author. - -def iterate(col, Z, c): - for k in range(col): - Z = Z*Z + c - return np.exp(-np.abs(Z)) - -def juliaset(iters): - col = 128 # color depth - m = 5000 # image size - cx = 0 # center X - cy = 0 # center Y - l = 1.5 # span - zoomAmount = 0.6 - - # The c constant. - c = -.745429 + .11308j - - for zoom in range(iters): - # `x` and `y` are two 1000-element arrays representing the x - # and y axes: [-1.5, -1.497, ..., 0, ..., 1.497, 1.5] on the - # first iteration of this loop. - x = np.linspace(cx-l, cx+l, m) - y = np.linspace(cy-l, cy+l, m) - - # `X` and `Y` are two arrays containing, respectively, the x- - # and y-coordinates of each point on a 1000x1000 grid. - (X, Y) = np.meshgrid(x, y) - - # Let `Z` represent the complex plane: a 1000x1000 array of - # numbers each with a real and a complex part. - Z = X + Y*1j - - # Iterate the Julia set computation (squaring each element of - # Z and adding c) for `col` steps. - W = iterate(col, Z, c) - - # Mask out the NaN values (overflow). - minval = np.nanmin(W) - W[np.isnan(W)] = minval - minval/10 - print("checksum W = ", W.sum()) - - # Zoom into the next frame, shrinking the distance that `x` - # and `y` will cover. - l = l * zoomAmount - -def main (*args): - tstart = time.time() - iterate(1, np.empty((1,2), dtype=complex), complex(0.0)) - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - juliaset(10) - htime = time.time() - tstart - print("SELFTIMED ", htime) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/juliaset/juliaset_numba.py b/numba/examples/juliaset/juliaset_numba.py deleted file mode 100644 index 052415517..000000000 --- a/numba/examples/juliaset/juliaset_numba.py +++ /dev/null @@ -1,79 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np -import numpy.matlib -import numba - -# This code was ported from a MATLAB implementation available at -# http://www.albertostrumia.it/Fractals/FractalMatlab/Julia/juliaSH.m. -# Original MATLAB implementation is copyright (c) the author. - -@numba.njit() -def iterate(col, Z, c): - for k in range(col): - Z = Z*Z + c - return np.exp(-np.abs(Z)) - -def juliaset(iters): - col = 128 # color depth - m = 5000 # image size - cx = 0 # center X - cy = 0 # center Y - l = 1.5 # span - zoomAmount = 0.6 - - # The c constant. - c = -.745429 + .11308j - - for zoom in range(iters): - # `x` and `y` are two 1000-element arrays representing the x - # and y axes: [-1.5, -1.497, ..., 0, ..., 1.497, 1.5] on the - # first iteration of this loop. - x = np.linspace(cx-l, cx+l, m) - y = np.linspace(cy-l, cy+l, m) - - # `X` and `Y` are two arrays containing, respectively, the x- - # and y-coordinates of each point on a 1000x1000 grid. - (X, Y) = np.meshgrid(x, y) - - # Let `Z` represent the complex plane: a 1000x1000 array of - # numbers each with a real and a complex part. - Z = X + Y*1j - - # Iterate the Julia set computation (squaring each element of - # Z and adding c) for `col` steps. - W = iterate(col, Z, c) - - # Mask out the NaN values (overflow). - minval = np.nanmin(W) - W[np.isnan(W)] = minval - minval/10 - print("checksum W = ", W.sum()) - - # Zoom into the next frame, shrinking the distance that `x` - # and `y` will cover. - l = l * zoomAmount - -def main (*args): - tstart = time.time() - iterate(1, np.empty((1,2), dtype=complex), complex(0.0)) - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - juliaset(10) - htime = time.time() - tstart - print("SELFTIMED ", htime) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/juliaset/juliaset_pa.py b/numba/examples/juliaset/juliaset_pa.py deleted file mode 100644 index 1fff121fa..000000000 --- a/numba/examples/juliaset/juliaset_pa.py +++ /dev/null @@ -1,79 +0,0 @@ -#! /usr/bin/env python - -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function - -import sys -import time -import os - -import numpy as np -import numpy.matlib -import numba - -# This code was ported from a MATLAB implementation available at -# http://www.albertostrumia.it/Fractals/FractalMatlab/Julia/juliaSH.m. -# Original MATLAB implementation is copyright (c) the author. - -@numba.njit(parallel=True) -def iterate(col, Z, c): - for k in range(col): - Z = Z*Z + c - return np.exp(-np.abs(Z)) - -def juliaset(iters): - col = 128 # color depth - m = 5000 # image size - cx = 0 # center X - cy = 0 # center Y - l = 1.5 # span - zoomAmount = 0.6 - - # The c constant. - c = -.745429 + .11308j - - for zoom in range(iters): - # `x` and `y` are two 1000-element arrays representing the x - # and y axes: [-1.5, -1.497, ..., 0, ..., 1.497, 1.5] on the - # first iteration of this loop. - x = np.linspace(cx-l, cx+l, m) - y = np.linspace(cy-l, cy+l, m) - - # `X` and `Y` are two arrays containing, respectively, the x- - # and y-coordinates of each point on a 1000x1000 grid. - (X, Y) = np.meshgrid(x, y) - - # Let `Z` represent the complex plane: a 1000x1000 array of - # numbers each with a real and a complex part. - Z = X + Y*1j - - # Iterate the Julia set computation (squaring each element of - # Z and adding c) for `col` steps. - W = iterate(col, Z, c) - - # Mask out the NaN values (overflow). - minval = np.nanmin(W) - W[np.isnan(W)] = minval - minval/10 - print("checksum W = ", W.sum()) - - # Zoom into the next frame, shrinking the distance that `x` - # and `y` will cover. - l = l * zoomAmount - -def main (*args): - tstart = time.time() - iterate(1, np.empty((1,2), dtype=complex), complex(0.0)) - htime = time.time() - tstart - print("SELFPRIMED ", htime) - - tstart = time.time() - juliaset(10) - htime = time.time() - tstart - print("SELFTIMED ", htime) - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/numba/examples/k-means/k-means.py b/numba/examples/k-means/k-means.py deleted file mode 100644 index 099b962a0..000000000 --- a/numba/examples/k-means/k-means.py +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit -import numpy as np -from math import sqrt -import argparse -import time - -def kmeans(A, numCenter, numIter, N, D, init_centroids): - centroids = init_centroids - - for l in range(numIter): - dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) - for j in range(numCenter)] for i in range(N)]) - labels = np.array([dist[i,:].argmin() for i in range(N)]) - - centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) - for j in range(D)] for i in range(numCenter)]) - - return centroids - -def main(): - parser = argparse.ArgumentParser(description='K-Means') - parser.add_argument('--size', dest='size', type=int, default=1000000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--centers', dest='centers', type=int, default=5) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - features = args.features - centers = args.centers - iterations = args.iterations - - np.random.seed(0) - init_centroids = np.random.ranf((centers, features)) - kmeans(np.random.ranf((3000, features)), centers, 1, 3000, features, init_centroids) - print("size:", size) - A = np.random.ranf((size, features)) - - t1 = time.time() - res = kmeans(A, centers, iterations, size, features, init_centroids) - t = time.time()-t1 - print("checksum:", res.sum()) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/k-means/k-means_numba.py b/numba/examples/k-means/k-means_numba.py deleted file mode 100644 index b4c22cd9d..000000000 --- a/numba/examples/k-means/k-means_numba.py +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit -import numpy as np -from math import sqrt -import argparse -import time - -@njit -def kmeans(A, numCenter, numIter, N, D, init_centroids): - centroids = init_centroids - - for l in range(numIter): - dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) - for j in range(numCenter)] for i in range(N)]) - labels = np.array([dist[i,:].argmin() for i in range(N)]) - - centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) - for j in range(D)] for i in range(numCenter)]) - - return centroids - -def main(): - parser = argparse.ArgumentParser(description='K-Means') - parser.add_argument('--size', dest='size', type=int, default=1000000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--centers', dest='centers', type=int, default=5) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - features = args.features - centers = args.centers - iterations = args.iterations - - np.random.seed(0) - init_centroids = np.random.ranf((centers, features)) - kmeans(np.random.ranf((3000, features)), centers, 1, 3000, features, init_centroids) - print("size:", size) - A = np.random.ranf((size, features)) - - t1 = time.time() - res = kmeans(A, centers, iterations, size, features, init_centroids) - t = time.time()-t1 - print("checksum:", res.sum()) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/k-means/k-means_pa.py b/numba/examples/k-means/k-means_pa.py deleted file mode 100644 index c278338cb..000000000 --- a/numba/examples/k-means/k-means_pa.py +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit -import numpy as np -from math import sqrt -import argparse -import time - -@njit(parallel=True) -def kmeans(A, numCenter, numIter, N, D, init_centroids): - centroids = init_centroids - - for l in range(numIter): - dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) - for j in range(numCenter)] for i in range(N)]) - labels = np.array([dist[i,:].argmin() for i in range(N)]) - - centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) - for j in range(D)] for i in range(numCenter)]) - - return centroids - -def main(): - parser = argparse.ArgumentParser(description='K-Means') - parser.add_argument('--size', dest='size', type=int, default=1000000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--centers', dest='centers', type=int, default=5) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - features = args.features - centers = args.centers - iterations = args.iterations - - np.random.seed(0) - init_centroids = np.random.ranf((centers, features)) - kmeans(np.random.ranf((3000, features)), centers, 1, 3000, features, init_centroids) - print("size:", size) - A = np.random.ranf((size, features)) - - t1 = time.time() - res = kmeans(A, centers, iterations, size, features, init_centroids) - t = time.time()-t1 - print("checksum:", res.sum()) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/kernel-density-estimation/kernel_density_estimation.py b/numba/examples/kernel-density-estimation/kernel_density_estimation.py deleted file mode 100644 index a6c224135..000000000 --- a/numba/examples/kernel-density-estimation/kernel_density_estimation.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit, prange -import numpy as np -import argparse -import time - -def kde(X): - b = 0.5 - points = np.array([-1.0, 2.0, 5.0]) - N = points.shape[0] - n = X.shape[0] - exps = 0 - for i in prange(n): - p = X[i] - d = (-(p-points)**2)/(2*b**2) - m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - return exps - -def main(): - parser = argparse.ArgumentParser(description='Kernel-Density') - parser.add_argument('--size', dest='size', type=int, default=10000000) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - iterations = args.iterations - - np.random.seed(0) - kde(np.random.ranf(10)) - print("size:", size) - X = np.random.ranf(size) - t1 = time.time() - for _ in range(iterations): - res = kde(X) - t = time.time()-t1 - print("checksum:", res) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/kernel-density-estimation/kernel_density_estimation_numba.py b/numba/examples/kernel-density-estimation/kernel_density_estimation_numba.py deleted file mode 100644 index 9d5c0e6c3..000000000 --- a/numba/examples/kernel-density-estimation/kernel_density_estimation_numba.py +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit, prange -import numpy as np -import argparse -import time - -@njit() -def kde(X): - b = 0.5 - points = np.array([-1.0, 2.0, 5.0]) - N = points.shape[0] - n = X.shape[0] - exps = 0 - for i in prange(n): - p = X[i] - d = (-(p-points)**2)/(2*b**2) - m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - return exps - -def main(): - parser = argparse.ArgumentParser(description='Kernel-Density') - parser.add_argument('--size', dest='size', type=int, default=10000000) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - iterations = args.iterations - - np.random.seed(0) - kde(np.random.ranf(10)) - print("size:", size) - X = np.random.ranf(size) - t1 = time.time() - for _ in range(iterations): - res = kde(X) - t = time.time()-t1 - print("checksum:", res) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/kernel-density-estimation/kernel_density_estimation_pa.py b/numba/examples/kernel-density-estimation/kernel_density_estimation_pa.py deleted file mode 100644 index 576f6d395..000000000 --- a/numba/examples/kernel-density-estimation/kernel_density_estimation_pa.py +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import njit, prange -import numpy as np -import argparse -import time - -@njit(parallel=True) -def kde(X): - b = 0.5 - points = np.array([-1.0, 2.0, 5.0]) - N = points.shape[0] - n = X.shape[0] - exps = 0 - for i in prange(n): - p = X[i] - d = (-(p-points)**2)/(2*b**2) - m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - return exps - -def main(): - parser = argparse.ArgumentParser(description='Kernel-Density') - parser.add_argument('--size', dest='size', type=int, default=10000000) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - size = args.size - iterations = args.iterations - - np.random.seed(0) - kde(np.random.ranf(10)) - print("size:", size) - X = np.random.ranf(size) - t1 = time.time() - for _ in range(iterations): - res = kde(X) - t = time.time()-t1 - print("checksum:", res) - print("SELFTIMED:", t) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d-benchmark.txt b/numba/examples/laplace2d/laplace2d-benchmark.txt deleted file mode 100644 index d220f2e60..000000000 --- a/numba/examples/laplace2d/laplace2d-benchmark.txt +++ /dev/null @@ -1,29 +0,0 @@ - - -1st iteration - -C: too fast to measure -pure-python: 68 s -numba: 2 s -numba-cuda: 0.29 s 0.22 s -numba-cuda-smem: 0.47 s 0.23 s - - - -100 iteration -a -C: 5 s -pure-python: impossible to wait that long -numba: 215 s -numba-cuda: 18 s 13 s -numba-cuda-smem: 17 s 13 s - - - -Total 1000 iteration: - -C: 51 s -pure-python: impossible to wait that long -numba: impossible to wait that long -numba-cuda: 181 s 133 s -numba-cuda-smem: 170 s 137 s diff --git a/numba/examples/laplace2d/laplace2d-numba-cuda-improve.py b/numba/examples/laplace2d/laplace2d-numba-cuda-improve.py deleted file mode 100755 index d30b3d5ba..000000000 --- a/numba/examples/laplace2d/laplace2d-numba-cuda-improve.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python - -''' -Speed on OS X 10.8 650M 1024GB GPU: 186s -''' -from __future__ import print_function - -import time - -import numpy as np - -from numba import cuda, f8 - - -# NOTE: CUDA kernel does not return any value - -tpb = 16 - -@cuda.jit(device=True, inline=True) -def get_max(a, b): - if a > b : return a - else: return b - -@cuda.jit("(f8[:,:], f8[:,:], f8[:,:])") -def jacobi_relax_core(A, Anew, error): - err_sm = cuda.shared.array((tpb, tpb), dtype=f8) - - ty = cuda.threadIdx.x - tx = cuda.threadIdx.y - bx = cuda.blockIdx.x - by = cuda.blockIdx.y - - n = A.shape[0] - m = A.shape[1] - - i, j = cuda.grid(2) - - err_sm[ty, tx] = 0 - if j >= 1 and j < n - 1 and i >= 1 and i < m - 1: - Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \ - + A[j - 1, i] + A[j + 1, i]) - err_sm[ty, tx] = Anew[j, i] - A[j, i] - - cuda.syncthreads() - - # max-reduce err_sm vertically - t = tpb // 2 - while t > 0: - if ty < t: - err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty + t, tx]) - t //= 2 - cuda.syncthreads() - - # max-reduce err_sm horizontally - t = tpb // 2 - while t > 0: - if tx < t and ty == 0: - err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty, tx + t]) - t //= 2 - cuda.syncthreads() - - - if tx == 0 and ty == 0: - error[by, bx] = err_sm[0, 0] - -def main(): - NN = 512 - NM = 512 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 1000 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - blockdim = (tpb, tpb) - griddim = (NN//blockdim[0], NM//blockdim[1]) - - error_grid = np.zeros(griddim) - - stream = cuda.stream() - - dA = cuda.to_device(A, stream) # to device and don't come back - dAnew = cuda.to_device(Anew, stream) # to device and don't come back - derror_grid = cuda.to_device(error_grid, stream) - - while error > tol and iter < iter_max: - assert error_grid.dtype == np.float64 - - jacobi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid) - - derror_grid.to_host(stream) - - - # error_grid is available on host - stream.synchronize() - - error = np.abs(error_grid).max() - - # swap dA and dAnew - tmp = dA - dA = dAnew - dAnew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d-numba-cuda-smem.py b/numba/examples/laplace2d/laplace2d-numba-cuda-smem.py deleted file mode 100755 index 99d3ad004..000000000 --- a/numba/examples/laplace2d/laplace2d-numba-cuda-smem.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python - -''' -Speed on OS X 10.8 650M 1024GB GPU: 186s -''' -from __future__ import print_function - -import time - -import numpy as np - -from numba import cuda, f8 - - -# NOTE: CUDA kernel does not return any value - -@cuda.jit("(f8[:,:], f8[:,:], f8[:,:])") -def jacobi_relax_core(A, Anew, error): - smem = cuda.shared.array(shape=(32 + 2, 32 + 2), dtype=f8) - n = A.shape[0] - m = A.shape[1] - - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - - j = ty + cuda.blockIdx.y * cuda.blockDim.y - i = tx + cuda.blockIdx.x * cuda.blockDim.x - - sy = ty + 1 - sx = tx + 1 - - smem[sy, sx] = A[j, i] - if tx == 0 and i >= 1: - smem[sy, 0] = A[j, i - 1] - - if ty == 0 and j < m - 1: - smem[0, sx] = A[j - 1, i] - - if tx == 31 and j >= 1: - smem[sy, 33] = A[j, i + 1] - - if ty == 31 and j < n - 1: - smem[33, sx] = A[j + 1, i] - - cuda.syncthreads() # ensure smem is visible by all threads in the block - - if j >= 1 and j < n - 1 and i >= 1 and i < m - 1: - Anew[j, i] = 0.25 * ( smem[sy, sx + 1] + smem[sy, sx - 1] \ - + smem[sy - 1, sx] + smem[sy + 1, sx]) - error[j, i] = Anew[j, i] - A[j, i] - -def main(): - NN = 512 - NM = 512 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 1000 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - blockdim = (32, 32) - griddim = (NN//blockdim[0], NM//blockdim[1]) - - error_grid = np.zeros_like(A) - - stream = cuda.stream() - - dA = cuda.to_device(A, stream) # to device and don't come back - dAnew = cuda.to_device(Anew, stream) # to device and don't come back - derror_grid = cuda.to_device(error_grid, stream) - - while error > tol and iter < iter_max: - assert error_grid.dtype == np.float64 - - jacobi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid) - - derror_grid.to_host(stream) - - - # error_grid is available on host - stream.synchronize() - - error = np.abs(error_grid).max() - - # swap dA and dAnew - tmp = dA - dA = dAnew - dAnew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d-numba-cuda.py b/numba/examples/laplace2d/laplace2d-numba-cuda.py deleted file mode 100755 index 32f43fa99..000000000 --- a/numba/examples/laplace2d/laplace2d-numba-cuda.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python - -''' -Speed on OS X 10.8 650M 1024GB GPU: 186s -''' -from __future__ import print_function - -import time - -import numpy as np - -from numba import cuda - - -# NOTE: CUDA kernel does not return any value - -@cuda.jit("(f8[:,:], f8[:,:], f8[:,:])") -def jacobi_relax_core(A, Anew, error): - n = A.shape[0] - m = A.shape[1] - - j = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y - i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x - if j >= 1 and j < n - 1 and i >= 1 and i < m - 1: - Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \ - + A[j - 1, i] + A[j + 1, i]) - error[j, i] = Anew[j, i] - A[j, i] - -def main(): - NN = 512 - NM = 512 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 1000 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - blockdim = (32, 32) - griddim = (NN//blockdim[0], NM//blockdim[1]) - - error_grid = np.zeros_like(A) - - stream = cuda.stream() - - dA = cuda.to_device(A, stream) # to device and don't come back - dAnew = cuda.to_device(Anew, stream) # to device and don't come back - derror_grid = cuda.to_device(error_grid, stream) - - while error > tol and iter < iter_max: - assert error_grid.dtype == np.float64 - - jacobi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid) - - derror_grid.to_host(stream) - - - # error_grid is available on host - stream.synchronize() - - error = np.abs(error_grid).max() - - # swap dA and dAnew - tmp = dA - dA = dAnew - dAnew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d-numba.py b/numba/examples/laplace2d/laplace2d-numba.py deleted file mode 100755 index b3be83f2c..000000000 --- a/numba/examples/laplace2d/laplace2d-numba.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function - -import time - -import numpy as np - -from numba import jit - - -@jit -def jacobi_relax_core(A, Anew): - error = 0.0 - n = A.shape[0] - m = A.shape[1] - for j in range(1, n - 1): - for i in range(1, m - 1): - Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \ - + A[j - 1, i] + A[j + 1, i]) - error = max(error, abs(Anew[j, i] - A[j, i])) - return error - - -def main(): - NN = 512 - NM = 512 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 1000 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - while error > tol and iter < iter_max: - error = jacobi_relax_core(A, Anew) - - # swap A and Anew - tmp = A - A = Anew - Anew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d-pa.py b/numba/examples/laplace2d/laplace2d-pa.py deleted file mode 100755 index d07de3f15..000000000 --- a/numba/examples/laplace2d/laplace2d-pa.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import time - -import numpy as np - -from numba import jit, prange, stencil - -@stencil -def jacobi_kernel(A): - return 0.25 * (A[0,1] + A[0,-1] + A[-1,0] + A[1,0]) - -@jit(parallel=True) -def jacobi_relax_core(A, Anew): - error = 0.0 - n = A.shape[0] - m = A.shape[1] - Anew = jacobi_kernel(A) - error = np.max(np.abs(Anew - A)) - return error - - -def main(): - NN = 3000 - NM = 3000 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - iter_max = 1000 - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - while error > tol and iter < iter_max: - error = jacobi_relax_core(A, Anew) - - # swap A and Anew - tmp = A - A = Anew - Anew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/laplace2d/laplace2d.ods b/numba/examples/laplace2d/laplace2d.ods deleted file mode 100644 index 590150bdec4a21b6f7475996335a3f9796660004..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 33896 zcmbTeW0+)3*6&@_W!tvxE_T_~W!tv8Y}>Z0%eHOXwvD&?o|!Xq=DE-F;mzyH%$tJeaMdxH?Kx?gQZ)#6#ZDVL>YF+9PRaV?dbkt2M!MYmoNXv=xc-QZ|rpS^$jfyzhtcK==2@! z?EX06Vrc;c@KXZpkM98A{=9rW{@?DPe|`Jk?ig8E>pB?z%b9--%wIzPeEGXh*jU>* z+I)?J<3Bs`cXrr+)0&N)wXvO{{eN`-?=1hWMF(qZ3maW4LyP~V#lP#{A1D7k*nijB z-&y@>?SHqz`F^N(frW-6(`SkcYJchc#@Q8LDdT<+;Y+i zutq<)F2I|MO1N6nONnG7Sw!u0U9^n*9+&v=yr+kqv!qENk<9n|cdvE{(l`_3%&53& zJ=B3_Ch;9?Pd9kZiHZ8l{$;|5$vFJ5np^hdfMr*n?=lCvZQ+hqfjo3KGqkE7?3Os~ z;1&ZDy+Li31}F^(Y)z(F_6SSi+|qup4Ka!;#R^O>#HLu!1Zm&nfXzHu;ciSo`g@r} zy*k-jM{l29JH5QTE;r=a#H?SJ9^XB;)d6u8w-3S`)ci4{UbdMPQVc!Lc0L%-sK@*o zo)lD&LY3EAvS295~MkraQ8vh++g;tz!b{whbOX`!?GAe%X(FO2SompSyZz z6m~3Y$(KbLJTKh!fZA+WY;L@zkR58cofadxd2lR=kylf8nVGiIWHGm!fCWO7hE{jh zbxE8NXN)^moTx=m5cWDa=5KyW&xiq!US>9@4`66m|#U_&)zwugcIy# zyr{6zjPYj3y~P1DRX~%4W(QZ+!%N-MSLs2wrmSmU$wx5>4rFa$Ma4lV_G(G)Hy%wc%wr0wKrM7dHfk5M^Nb7ZzMPN(?*o7+ zEl;9I?7qya&b8bV&$=R~h1A(v293mq!XA6J<}m_FC6up7)QTCwEZ6GHty%oSzSJ;#uDcuZIa*pV~r3`85kj(EkTqP;6Ih=J~; zdot#?IyKGU2~F0mms%M=LLSplGbqcN1sX+?$;e^|;|+5@=Dk2WQCJeU$rVs)IXMID zn92VkCIu3~*M0-FlwS-xkD zW)@Z}o;_K(u{w?^i@9xtMy*KZ{?rZu#L-eOpmur51|!bHBDIo@o#iC)!>FWWyA0HW{zjv&~bTDrwN3e9v z*=nVr4(29qnM>U)iGJIC(?n{o(iur-emu%f$paGrcVbo#L_hr~7+h$HT(r)ha%EMX z`B5>MR3D~sTw&Vd-a`6=Ww>$==2+1@L8k>gWDdOI^cn5o&W1!t7wo;hQ zZE}#RHrihH_%==_#~hFB^k$1GSU7wsle{+AZ~7W1^ak<=IGv?O!tI|wrg(XA+Jc~a ze|%d|vVMAA#C`mEY470jz@fuy+Kc#mDAIzc^?upl7C#b8UKRsDT<2-0#ZLDc<>4d! zy+9}-60xeZr}~!qERJ%ksC%&UbN4VZ%-Nyv`FJZ&6639%`UU7$@0GP>i=XDr2rO+e z1EvX>xHM25zk%!7f|0`CcO2t^L4Td5To<3AiACLYoA_oet0nRFIuO z&nqiG({GLiJ+~^j*ncluNTNEw%CHKSkhxd$U1l_(c^ey zoi1&v__z=$OKycxn%@K?RMeFy#YHfinzIoP5{ND(7gaptEA83!eF-PP8zn8F^{`U7YbsT;f=t zDO}=>Snpt)u9}0^=MvunBd4S07j@YeI=_Zyu-cy}ZqDfh%p858aRbO(2$!DHI#bl7 zNWi9C+orq|Xz&G^?`I(RVeW9X1m38j$*Cr>pMfP=>cY4+Z80Z{au{gSopZM8_VZBC zF*H@C<Y=1FiaKfglDyC+oH7^(gV+ z$l|f0qFqhT6SS&Q&K;wcSp`bm>r2(dx~V8OWCve^4&wsebm7`nQ+WfULEBD}-I=t0 z!@P#~v^ZHr^M~-^0?o$a)ZV&{Z9BD7KTyP~gwgVw1hS>;sPKAN54F6&Il2_Mrfb*? zyVniaVBaZChwbT9xux3(@k%JzVW=l-9qBBP5#g+fKoVVzxLFXo8%uC)#7b|$pq-DR z`4k@rkO{dX21%rU{^4Z8L`wj7Mj)?@zat^oEaM@K6*QbXx(*!S0?y0&3SmY;5e!^> zf(O&;{His+{q*?GNh2&nAc;W{J{XXM6h3I0Y$#z{?U4Qn1Un9}l1o|Bi- zSdd(HLTJZ}U6xB}5oc;y4o16|Qt&>ZqMhJ{#^6ZXYI4+EJQGT)tbzh1a@3rezM9cD zsRD#_8==O5e5%VXS@>imUTI+S&5M1+G1E3yBu<1ZB%|LQdoL$WGMzcty0>KxukCif zll~&%vn~H{zBAFPmvXBLwu$v2JCwUvPg@Z%hbiMeban60*_6GR-Tu{UF`DX8&ROor zE6Hj(qh$wc!m8(bh5~&biX8cF=1CthybQK5UPD+h{vL7Mch$yn;1d!`tgfTQ6Q3F- zF!vNkaF6^Efullp1*259#SeVOYxZD8ajC@2o;PTj`RF|x`qeM-Q7ZnGbY#8J#X5J5 zP1b6ac~h`?Gn72(B#!-;cV1JZVe{OFWQBkx?L3&BLlRQ6HD8K^ED_PunH?G*+W<*D z&)F{y$BIC%s-Q8AV^^MQ@Buj+(=%5enV@?20tC2{d5DZ(PrkZ+;D+hJ!ta@x+2vFL zw6j5WXYfIGnr03u_uU#vgqnqL`%APzcG}I7Xw4mqO)mghnz0F1p%M+f-D?sdjx2jV z+y*l3oy0kK`cd?oOs^02@%4C}W_#>I&jPO1$PCh zu*pF&rcJ&wL9>eq!dwQdWL9H}8S3JZO*-G?XGerZh!$x4A8!z~gey+7;*hVu)gj4W zf6MIveJI?uM1?;?$@_UVPZ5M5)w*|I?E07Al}n1>$^e<*ly;W#H9IA^D}}lI@ycif zp%bYM`4QoU2h>J*6Q^{scH7%}lBQ2z4h2y<051BFJG_f4ecCD9v!+^4(||9XS`+wK z?q?SBuIHn{9-@78JP1Rs0IRO57qd5{X|zg97H{rA%cKl<LF@;A`xUFruGL)}}E zo*A7qKhvv*&;T-%htS~fS*qA#E=zy&jalm7&r@VENOhmx54r#4>NcV@2m!il=2oAI zK6w@`MY1WRkrT3{yJHKPN4U^!0eEMcdGBg{I31m#MxK7n41wEC2yDtre8nt*{RQtl z!S3!+G(wLQ4&%~}oih&OJrxCmem(qhS;CMOiBLr^>}l1hJo+33^a*msN!8RT+fmFg z#Bt1H|BMf=szZ^;0UdAf=G_oh*t>ahU%NI(W4qmqTG(4i_Id;DX5Emr+TKKHqdnZp zbMuEETqPUL4OQY(2WIwWJgVvuns5IxF9NmnNrX-g9t3L9i};mUQRM3@T)Y(BH;G-z zimxM9onQH_pvu!=u}bLk&I^y{!|r)=y%&3_H=j0k3e26~UASa%3F+d7BAP0~h1^05 zsG)qeQdFCIgv?v!17>UX7jTn1$de*BkOg%2L?%FC1eViQgGicZzBy}w>$0B%jD;MtpX%;-zR9NisFNH z`q;0dC$Z3^B7IzSqEfm~B1_ZYV}TS|?kcusNor39eFDbQTGsY~nSK<@a8XJj5O`f2 zHxdP}m50tGG^iY@5AQhOfuoW=@>fl<#rBu|B5EQK?VoHjtaqdJ6foYsWW8@NV(`FH zRna~ds+dH`9ir1xzZ1F*-9(^=J7WOqMBIZApRYYjW~CKBBXzYLZF~7%Q`H*h>!nHDuG;UCw4#W68IaBXjF}ISCZ=$|?W$vtX_L2B+ z7bF8+j=4q7$3%qLRr*C~8T4zQ%0T)G1VgdZv`;51+$1DrWa+`ki>ChC3&(+d4HW7d z1Ju3gp`+gK?H}KxvC?xy-|bpP4@}X+nF4B@+P+pB_lxWpG;_&;pbC0JZ|`1z1-F*9 zPt>pw000~_{uSK%yO4l&&*KYLC(;|LRa7LPngc0?jJ??4Ck8W7RZYzJf~ou zTaC^Us@S!Ga)1OnZkGBa{##ZHTYBN$R%g60gvhc~@ZbsFcr>;U><}S@@szTn`Pp|g zijHM!Q-q3&^TM)eq9h?|CE_bHr9#ABh?oR7r6=k3c$~Y-8SAY^7iUsVBB{+xj}vRR z9)}Da#)lJ6nM+#@(;om10NUTGfX_ysoxF!50P0oV`}e!xOb$gEx^`WDAuNnfcw#-C zSR`@#8~;|Z%0TczYFZ3*6O!aZC@TnL z1<>lrMHmQr6>hwPgND7p47psoFL*leAu#(y$h#+8b*er8wp6Du1y}~$EuE-fox#R2 zit4I|Z#!JyihsUik|nuPYqo`7bDqK`I0H=4xRDd`G_`)SBkwdmF1yPU1Y-D@Qt48U zon3~Qec48oEbMHxiJ4_S?I$UOWP#Ox z{*8TVzz_*lRb0}^;)ZKmSt+Fr_yBv{hdpQ;kUTKP)rZ{2CKkK-3Y<@I?Hj~mLrIU( z4Dmj9w3h2?${1jSB{2rHa#7dnal2hEMF6@G54BBzV{9V?L#TUg*L-fo?a+B}(;Z=Whp^hJ0W>aDtq&jVV%&WSn$VM zY%ecC&qk>vrcE=LZErqsOO6(%!b(eXZWhlW3L=AF@;af7H0qeBSEzvXMw$TR1~mFo zb<9?_B%)1T)Ta$_@3GNVL&1#b5?K!48sPb|VyCHiPI!YNpKk-;#_N3RKR(45z|%g* zm81SsCaF14>1(qv`jYl>n<40r8pWO?@2)K*MKoJL+Xa}$pXLgq7vCKw|HvWVwBT8O z4?rGDREjLr)5ajb(y;ie#kv3f=ajVet*OW_Ub41ZT^UPx4jvP zQMneuTMzdZm`d}Uid`rN(|;Ui^#9cGW!o{~=7YQ7?L^;-GxL5g15~T}n9KlpR5VtY zI3f*0KIg=5k+hpc^;g)qlA^eb$5EX*#p`UKmGOMv#YXZcU5gWKacHb7<59M8Nqu<)uyBz98jhg#O^Am5qMs=93&NcuCVo|*P)YJyEv z(X#8VKAE8R&up%+eYUdSyJ$Ar4ToKWb2-mD`Xe?DPsyb=L>_1SIGxWkQZAsQS&Qm} zXbdU_Zm{l>IH`0zh0r2FQ6-RUGlOdVcup@sUeG`}92PiugK~eP(yr(}CF(a=tI|=s z@|`Cr+wgy?3<&b`hYTB1I}85arE(K^!m`E0KeF9i z@ASiert#qJJgyhQM@5{#4}Pj0fZ1J$arh;ZQcQySH;Qox97xH141#9PzjwqKyVbs%^RScWEG#Q1xPE<0*t4 zN5hj3%#14W4m4blPULW21EAm?vjd4F+9RwG3T!qR{6PB z;|j|0h9ja%1aA8W;%Y^(&;Y6JZ4me!Gwox{&G@R=Fw) zYXb4YVx9M$4Wou~GOW{4-cY^TlxPOAre>P={Zutmts=_We>E39o-I{k?^Q{z=s>AmHd^-Qyc909IlHrU~P3;N%q%1^Tb>*lxDDSHumLb}l z8I7Rk6|l&RQFl8RzEEJU!SKTs2$$SU^#rJ`Wg7wDv0xY+Pg(m_JBbM7<7EqmIY``q|JVIy3|n zD~5SD>8S6qeiad~ewC~Wy-7KU-%V16B*Ril7J~{M^^#6G2o^O|VO00TTX?scZufV6 z8kIpB^l@~*>8}Da-EPdU0)16_4`|_ACQvj;D5Wn+%I8;?LiDqdQS|&zTp`rA;u!^~ z2?aG&NjyJIS2RD~0m)M*;;REl@82YuDp!h^)GpYgp9DSaE-2%6c)t2g!aDFyy#z~9 ztK3y+B6NaHsTn^8R3f;udZ>8%?m?ZLabCT=03YNA@RYqUxSgP+W(V9Rk11Oir8+t8 z{i|Ky3(=??t<|ZaODM(T$ro1Q2mVZ1`>M;CIE?GmW82B{*m ze|?7KUHH@>-%8ee^nAN_QN0r-$qE=YK+{_8C;Pv0uG)TZ-*6vu4;W}4d{Cm0=)@-u&)0m~*p^4RP!{zB z*)@R}aYKH6`GJ9+$RG*f|47LAB1RDC>4+}ki;&EB+^FkbVI1Y)+{K#Bbd>(N`$FbF z74+^Ee#%X53%dAPy*t|U0bOt|FmPHuY2K8$6I|l=0b=>GA?2aNb+FrHKlVgmTE6l% zHq$5fthkD^=2GFMc#!_cig)_YMt9Gi2d{3)kJf>*=z}$B<1%4e2<`K$T@x@ME)KGu zo_vz!F0h=NAv0P7_cR9YNN0}}cR|j!?=YC3#Y5*EL8oj}PMxK>8}a$6n2uSCQ<%5k zPxM$Wr0Or|*V6PHFQo1(q&z`Cd9P)jfGeDJf-_8j=zP8M8kw$o!4L5ec-qjvaR7Z% zu40YQ9=$1?UBd6Lo9uQHYUK4yP zjnj`4>07^*KHGvSnEQTsHo2ua$;s=&AN~su%`B=5Vol$+&(1Hm&&0joj)ULRglL|O zHetW$&%dL=lFnjJ5C8x@e*gaglJPHqR7B=k1@o9qnj=!u3-L+7_%H0GWSXbDWT7V88bYf4v=L#h~NW1)|=stKG+w*_u~gYI3dG= z{YA!V%EX1!C9S=2<#L9>!PrZ-DHy-POg)1FU>P6k&4#!C_) z7b8?Q4bGO-T0-Efu&~x(J>jTn<+iN7QHqE{506G!Pd#%Xe#Ch}NaRg;rk~s9z&Lc? z70&J*HoLcZisU@F%L5jcyZCKaBe{5XQEiF2qJ+8I>%e_?JysSt&+^KT{Vc{Kyc$Cs zb!l%Dlynd&en=;!!~@%RB8!@p0GY<5-ln+UZM+8KYe4No4LnxC1`v!^)#HfWkqJ9{ zKuV(#47*BcU_$rPlk8bARm5mIJ96G#%qdKTJDD5U8m`b=chI-l+uOQ!O<<8b1IQD4 z9*Zq3mfh93=Z2&!1Brkd(%v|V8f>}70=l_3#6}Y{+&dSS+hf7+Y!Cg*3r(@mHqbrZ)Ir;SSL9u=`^zgTv6w|~E<*}` zi8sm;|F(Zr6{^UIZ_^B+-8-$(d@9)dBaL)3f@i;!kUwP3ul3A?+Zv1PDm6sq)n%w0 zOIQivx-av1_NPsP%D3slKO&@i3A~H-f7JckZh}^vNGNVnqm@4d{}Kbb*?9yh`0_HH z1P%?1<2hO6b%T$3Gz?IP9`cpb&)ip!>D12M?-eYr%Hj zAKl$yXi|MA1D$lC7^!_tkI~6k-nRNyR6x}sG-z9lpzPPIeOJ{BsBv~dVCo!5`mf=j z{4o32Bg#-?=TJ?wBS`T2H&YrR45sXqdXh@bi40CH!P@S}gl*{EnbfR}r$0k)fcuiU zn3eS;F(X6K;}k0l!^i2Z&K&$Eg|0g6>|4PL$YuD#hXw3O2J*LygBeu!)drTbZe5CE zzDGIBelTpVd3v*X$Yy$@#|0gThl};fBIwtQ)Q4AF5fS9{?U@2Mv+7t`3sd~JC$(bF zeDJu06xv7o@BKtPo-&sY*QXVmgR2Alc!ODoVPgb%;0KL(U;7Ojlw`$rZI}1XK_)JWbBya&b5jpcSe@--~C`1IgoaB zRdVhb{3Gxo9}@CE1@gZH20i`~;7v*XuYvdyi2U^>F!T?By#HHZ?RO=v#RO>az^@a# zr=+^#6>;Q(sace>y)XvM$1_zt-4_WZPmQN zPb0x7X`*9EY{u{tA3spa2g^D@dfxR1L|i`aOacFPrY0J*C5pNSZuK%<7gIigNFTPQ zLRST4QCYXWTxoNJk_>D!?_}uK)@ZDUD`FfNiOt|u`5_pI*LRZJ%4si4R%wRs} z=rdAu-Ji_AC#Xb96qBVf5u!F9?;S_14ez*>zi_w;=gkj$ijkd8A5ZrU-%YP)Bxk%w zgV=bbT1gk0Uo%tVqz+W}vm|@2RQAsoUw}>&&6r#sA&A-2kiyYY?&7|cetG+lU=Oz- ze6M3E_v=cD;&BT%s{;4#RZt0I-0vg*ZfncnqI*D2liS4@@=*uB6bwl0xH}^*{8cBU z0vDSSp%Y5*b}RO)UM}G-k4kwN*lTDr*lF&sDq0F|m$I%VIMkDqA6_|6hGeN=r*SiW zey#%^ReZ{D8^edV7QrXof%$j%?+{R~SC<-^-0bO8o@rqR-2?fzTgFQ=+M&R-lYl0p zHhq4F8Mj+n@t^oTG%l!`_}_KlCK9G#)3$_`B1gxoK$yS@HM%14jmHL4;kGayZ99VI z*9(-m0L_PY;5f5-f;9serhOMPd!HQ+*zZZ15nFa&go-(s?56#Pgj8d~YKYMG1!+ej zG~&^k$$9l+($06jKeTo9wGWocN;QmOa3{l^@!wu_x0SF^jgbc=Z%!6rR${Wm`d^C> z)m6}oKu!Isn+Er(67MSp@Q2r(Qf3Nn!?27e8IixJkeZxc#)C8Gq==IT00Ch z--QMz33$3w{d=krSmzkp8_N`cZkqy`o~sS*F_e1#VM|fMC|UU_2k(*6mM(%qRNvS% zr;Llp@#+lAC0Q4_!&G510B~*8pfs?%cJeDMml1Kc?O9Fpcd2PHq7PXn;*pDCPAbGnZ4sI3I!1(~Ptn?v7-?2v4iqPb^(X zf@0=%#(R|Cj9_x&nN@5RVMYXT13XQJB%ZTw(t~bza2> z|E}nY`4#!QUXMroXH_(Pm_L-|K%kwr+w<7|F9kBz<*zi!TaAIZ z!fskIz4bw4BW9b&h=pERgAp=6Ls9(cwO(;Bth+*~_lAci$4+u&ggbiYI{O}3ybCCC4n3)|u+Co&;kzyVPprv-ihQ651pW!cN^RYbKV*O$^r$5Yw z_{D5^Z8oF>FA=c(IQ#G;>j4MX+qU#_|C6P5h29P%*mU81-;0;HCjKW(B{Hy%Kj{>~ zJ(-h_x*uHgE^!FDe$$i0O{HQWg%J@7AEsPn9Qa7>j}sQhwa$I>XmEAM>JIy#mRrZev-;|>1MTVmBl2v9Jv07hp}QM)OkwcNPO0Aj6zZy9^^B11a2i0(x9v9^rsN zf;onMylNKJF2;i?-DFz;^5G;2>A+r2Got>CjY8koX@f>TobeROkgNA&W0c97p#K@_ zzgbK>&uR=%CGvdk;O0@|XRYTKi_K%4ooH-sOsy>}SLfz%o214k0;zZbjyOE1B}$sO z*sTUyY1MTLT=M0Vs4(SV&0#hdud4}~kp*JnllERmAE-v-e z@kw`O`Ws!Dho8@3#(xcHDeSaMQEmaotP3-QWkl1ZAL_!OXup#Y({~T4_uQw=pt1??-mi z$HqC$rIO@;>kfB2B_z64)C#D^gnhlGddT&JJ>rG1jan`=so(H}uENtK0OpF_Vb_Vs zV&E(Q9Ib|G2&1-oAF$jOY4LPvjm6^glG=NG**({gMEDFcJfzTlPnM5m(1a}~ri!*= z+C<-IFSmS1XgMa#ju35Mn06#gBOb50_zNYMp|hbOE1ZPH(|>8!&u?QQeTXfrpU`0^Tk-=b>~oVk`xn< zHB%S%Zc3WCm<-B1WwbCN{pBrRWMnk2UAFbYlgZN}5y>@=W1p0dN{2zSA)5sKdFO;* zj%eJPFVG3hzJ0hwN!1($r;s{mVE;AEwZ9Dsgyl#Kd`bX(trR^W@XOYI)2cFJWMkjA zN8!zg)cmML=tTwECJGL(NunM5=5|H9J=m`HrrsqC=URRSSwZ&G5w4*XxgNXc-qpf_ zrXe#5+H2t6aY1@X$suDzrN~3;*a(x|6clc;tkw~F5eJkXQhB79lVQrKW;+Z|samyG z4YlgI{Em2@F;HY7n>#}4LI~%4hzU*oC8i!Vheos4DSSzzNMtFJ0o#r#mrl=aI_i}0 z4S-z*LR#<+(Kw6wjvyV&^FwzIbVsb0%A%5{0o-A>N*jNqz8?TB`YNjc5~h`o8@87< zr+g4ID}LtH7@*YwsZP&#)v7HfaAJMn9=mic>fCbB98Dj{*--Adxa>2i5#VrV%aBMU zbaxXaw3Pn?><>qgH0t$zekByh+Wv1G1^M-FtG$D(g`xeQB!qR#^0HFaaw3$5rf}cp03v@HaE`Y z4=h5WY+6>#rDuyLW-OX4SiiB5{jKVZEtHrdZwy5Z-@{bPvRyT> zAD{z@5`e2KXieI_MR-MNAmBBP%cUqk3YtXE@J)x%wO;4A9gWm~T;jWnk*dsAu2 zxZK(>9Guz zujXLSeb`o5ingS~=3v~x`*>y#)XoX92~iqs($yPx1z%gWz%COM58BiP1Wc%YeuJWR zBoHOuaRWTu{F5cd`@O}1%8tOknXTbX1R<2O zwo1Y0hn$%gEFQq5`TQP6m5RfXJ8I#TvAukgnoNf=5&MmdUYL;y{)SNgo2bXjmJ=Fb zFk3o|+6DlXF3QDiVJby|5>hAko#AQS)o8^GHV9t5XtH1Ewu9%MJ%JW0b+BvilB&Gc83%E9y{OS+TgEtibl- zWhlnG{E)?TGzF1LiHm0~nDJ{95m5QNNKWYM6t^ZfpXVtW(tSoxv4Ul9y}boqhE)3t zEjuAstF~VbQ&mLzhMu!^rjVK9nZTvlSI4+LZp>H_+J>S&rhkgQp2;ijMD{1x%B79( zECu1UD^tkpFBWesbA5DS){8}Oqn=$Km>wWPsu#{O28POXQ<31xXm1z)hN}A5_*S)sGqbYgj+Jx)kcvGz+e_1 z$IbJ`q_YTUL?6b85XR@kJm##wkPwD9T;NE&)>m9dNVV8%0T&~9k=q~Z5a+b)Pc74C zyNJTb@4U|fK7DlE9H76?t=5W~CKiZA<8O3hSXb`ZhLoyOB6aa7Ed4ljfP zuz^FWT2v=8MrE=NOen-UipQ?tP95*FM|*AT9Hbx1SaIeVS8_X-L9|wC_;gv%7smLL z3G1X;gqHA~2LqJu(Awl6dF`U@YCOtkkG*D76=0wfwlsj?dT?SH+DxoSE#F81nwUqz%fq~bVr2BwP9RvQ z^77)ss{KNTBi1>7$`-(KwA5c7f@V8M8Thx=$@=DTZ9H6R!#Tv_k9uwWa*-UwdAS!c zNG+MDLxtB4cPdZ*+S*K`GAsOPLn)*eenZYmYH8X2RQT7omZIPV>E8rMA{pyB&4hU# zs1DXF;3*Oq8c%Oqh|gDU`xWmJlbttrqQm?^i=FH!x;l6oG>2@XI$CkKc3G^Qmk08Z zi@>SoH@~V)zMU_1?zwH))VcL;3RQjr{y87!L>5jgfdT-G;{I>v!>^pee_nlr&p`Jd zsa^YO>(;C6h+dnWJy$!pB(j^aF$UF5s}6Xn8#pUK4+$|QY06@Aa&{J*+Fk;3c=3c( z;&dHG)ilw8=@=7NY+1;^Zf7Ybm1O*t<53#TR|QW|C=e58^qgBi7L(kF)IJ>^dtp|s zO!-+UBw#rx%!qV_U_LAOT8ddif3@;l>Gj$Ol}Y*7%UPZn^oq;X$zf%<5jmD*l=2vd z^^?@nYR1X8E2vz-8eH$%jmN4cNptYfqM)_`l+tz>N(9I#8d+duXC^fj5>ZKG&~$86 zJUVJxQJ=bSrHU*XxEn9qVynllV5B>^@B0|XQb|$ABIK0USj;x4ZKTh+lz8|mvz4&# zMfuYOf-XL#D3=wj?*zN7XnK<_>vyoT65md>TeYmUTa}I;G;L^Ki5YMMeC0PhEe(E~ zOgceq11d(89_?RLSZm?ZFK1E&F9uFH_zv!xsAj%4sQN%u#IlX)#yFa}MSTo!{ETeU zWvt$%8%>5#sJR7&UkddtvS0|uK0%A?r7&2|8Uww6EN*px#D4fv}$B_&04i76Wt<|Ix8reh5`Q#)J z$i~{+ z-o7Y;rSr^VCH4pN!wD6VD7xERc?KxI3-rxBqs0@#kG5Ne5`PH!r{w2;0wy%5FJw$O zt$Jhk_LJlu;b3sE>?vzZc)X6%^mu{bR4xPVGJ$4142mefX;NL#jMpSKt{$m~;eXJA zB98-p7drzKssx&QCFLu#{}^J-<4Lj+O?o^H6b~cj%Mti?`8x>ur0wJp=wkJ!!1oyiE|Hky zCo!Jf^AHn799!KnXR=(;bap_Tw(~dgRYD7?g}Qo(EeSUXnuBF5Mge~|I2SUsd$ST7bXS19HxC^WUe+!OaI z&aZjOlQbn}Wuz7)M(j+ls>#i&0YHWl=zN>4&sd1A4R%|6s8hg9Cy-N{z}R0enk||a zp~gF&*t9A)dK{QhsC!{_sYW{%*FK_2NH|?UUkuk&W*T5^=lflNgsEyrX`9323=nUy zqOe?roHK^bC7?r|YT*Bv_)<)>K@S>a1lycgm=3PNdkIlud3&fc`(de+unocknZJA0N1S*+;OWMYWEe*f8@y#!OkVftokc3D0} z!!gdli&bQb_dH90k>X*N-jC?|P!J4V7u<#%pk#gBc7JGkIyqa~YkC@U3`74gF3Mqf z5?;*qx&5)b`FZWi6X%p(7pD!`LQ1Q!mgpEq`=P7(YJDxpvLdXaVRh_Yz|YrcrUyAe06@hkm4%hrF&L{LAs1) zC2UGmSHeyn_>a&n`zO6gn@XPEO4>)G4aXdHbF8N+Ohvg?GnW#Cy2Fa+pUWlD)lFJ( z-VB7H@)~nc@Bwd$pevq}a?9+x5RU-*U!tn|$ zp3>mSYc;q}k#%sJY!bs~oC4=YV8akrx5VNms}W5;>a*GDBb)inC)?q?R~&OjUh>Te zOK-7P6$}D~w!=67IBJf( zSR>eXV}BIK*w0aM(+ruPPsajf2qhw`34jBe!1-B`*xZ4&Y`X=f2MY<7!kO(3clff{ z%)BNZrdQpPYEQ&lmBX--6UODWwFbB!YKRfwc|^b)jN`13$TIS1b5YL-*+PxUs_8)U z$ZrgzzZl{>1S6Z>>MYuv=q9k&hhUe}+FNyWdVwRSb-Zyc&9Zi~(LGMK@z8o@Qqlq1 zSa$u1-SPUBB@PDea@8qB__gN#Mg6}q2GV~r#=jTe5sIskY4q?Om&*7`uc8*BWvt}G z9SWgvjZdQsGu$o{4R2?`~#{p^*9jV_b6!;Zq5vh2J zn-=&x@ODO4SVf9YTcjG-$u)Bw8dnJ}Gk|9y7Bd~sWy1-z?9s-u9pZ8ks3yllPUnvi zBzPPLG)Cq%+vA;Q@=&JBUQfGq!x}6+!WyWfW$I0ogD$4+JLJqL8;S1xF2gc)-%#!k zF%9cclkV2jItj+r3(kr2RX(qBzArO3 zBv_uj>Yug|)y;*|@c)=V?FR3?B0=kJE!xuLt@wm>{Yh^Ghe08Xi!9O$nr1mp%@avg zvl%|v?sk=(P7jC@{5Fho2>*#`VU6MWiu;I5Dybf)j!2k0%}p)pGLn8Y463#@J_m_= zjP$U%cL?CK}{Ws|HhVu+@6MN@Yn_Z=izJ`0pOi(_|#3-;}hJU!_YD*rBC?l zHWJNX|Jz~sbtV44QTOY98M=QWKoPN{R(|yGLXYo|dqz-x0+c9F!E-{5%gm=EHDrCi zt78~^Ch4MLV<}5p>JG=&c10R4scX@*=q)Bj?X6PoemFfm6FVWK_|DL9%v5=~zFxfb+XUyW@aMNb;t~kx5 z4JoJcDKv$Ycl=S%WaoH#CnyG3Y%6KvEsTun#Qat{H&l^OUcZ=)kX2CBWQl#LqebU% zoIm#nOJ4;69k_OTm0RVe@x3pF^V1kSLf2RH%0st|Y$aCDHA=Q^i1Zy1=KIBOD%jqt z-pMNB6(I+E=2AE-s>}LXJaR5#PPOTAiLyHA*&^oQt;IfQtDCJ2Yx0i2G8kkU(;P7Y z0RSYv2KOJg=lHt}hW}VYF#g|bh|{Ru287Rz&K{fSs9!&=FssZJc@2Z!GR5U4&F^JB16|wPFZ$ zDQKJ?PrW`Kx`P*Zx!(38EgP8$(&9;i(vj<9s&WIpl<_qcu!P#R@m#8Pn{ww1yEsZ% z9cy$+DAYof)m``{b0d^ySux)I|O$K5?q6Y1a}MW8k_*Z zB}j1j4#_V!B=6O)^Y$}WO4OaCSypx7}&O)pK){MT!gGPT0CNTV+iS>2`7(_JmOlgezBN^ zH{2s=DOSKAHY83kA1A3*q!3r_Hg&}Il%O!qmg*pi`58b&+b@kUUA2BJPwA8`PW{|| z8qunxBP5j{dAutoKi|DhLAbbB%G@QtKWfyg_0F(DRI|}rh>9q zIme-0F5Sxy999{rSb2h>a5zPUT&00YDG<_Cq>oBnLifLr2p?bZ^(3dl4~uLBJ+^kC z6z-xT(g>Gm$qvhR_Te|J{!ue~zDHOA($|ae)OY1*=SjwD1M^mnr6fo`7+jMX4qGIZ zE=9M15OOcb_cjf5s~Al291i8CdzKok#7JU8|^Eu3^AE5zV3zvkh6U zDFOdn?tw+LIQx?KRpr0ep%h4AbPZ~gWa)z*q5USM!L0}Xv>8X9R*LhP^=%V*p2$YD zb(5LTw#oLu9`v)UDhx8dU&kNt?;?za(lur&q!E$gpCyN$Xgv{;ZP6&E8 zS{rmTTP@@EMB2D##8_GnV$PF9*cEZr1tSYNuaqi>gMUOqsDj|RD?_gg?N;YzjyZFR zLi?c4KCqG3zCO96=2o}P(sN^(USM}zn9Ia;y~h!rzn?9^k7dg5Mu}9mwCV=r5Yw53 zN+A=|^9Af*D{VwztBRR(mTr0w@qKQuH!lt6>{6vOP@=^JqI^s=mER@1R6x(neSIhS zX`ge1gDa;0bsrqIXc#MNR1Ks;MyczT@(-uqLYlBRmZQ_ZiwMxl;;$_46ZTD2Mks*`I@>ZlindTsfcb z>z4Y9C&f{@y&g1t9jAWvW-6>H)~Gd>sK6A)>{(sA#eh!0RZ^SubLh2K7W4cO0`#xE*!RkC70T0iEk)Vc7nySj zUcG7Kr+~esIFQ1BNZv!$!LPMJ4j;DCe>NLc2+3V)od*>>(|dCJDRSY{)9Ooe$@<8< zHX%J6NalT{!v;VCf$g`uv@ssFZx)4KqTz>41f@;GcAVRGhJy>_3(6}ZayX$B z$6$uLVtfUjaJDuHav8W5(KM9}Sao_x^LFdL_@JGjb>RJFZ9yUdILm_CD}>nFD^Dqx z!8GsLlA~lxjbc?7;TL*C2ES(IHr8w+w&BWJKe65ulEw^(Vi0Ku0kR1VUyqUqe>w}q zv~n+M*X-k<%3UG;^Dvwop=A?5C7p0|400RiT*$Q;r|PN zVE&yyEZVHGA^Wrf`NNo~SYC?}31!%l)%CkR+;=>>I#{7Kvx!2uq&?2}SJ+`!S*U7q zHFoU6E`FB1DW5%WjX6-yHbNAV$}@^6utscVJ^_6YLk1b|Jb^xl!wUQe+uP1XKI;qT z0sRIsSI=HkZ7jwS>I)ahakqN8$@y>m>)6ub%GUOAWA+Lkv9(8=66dxS($tgZC`%j~ zRGW;a^Qqn6Cc#Q9`O&y(me&LKQTU@If=KBhlrWZ$^E(QeSZi={GSKq$fldg@f!2lc zb0sCqA5Ms#b$yf16k(;Qb67Yoo*()1SXGcUSn}o7%=Az+Xl+EO-%s-Pm82!$zZa!K z4+&OWS5qD+T+|oh-lq#BTSBSC#AZEVXqJA@-7K9mV4OFn(<^4W2wr(-cVF4#-N6jByQ&CH| zW&oSaaNxjlUPaBlet{IoA3{k|fcyamP8=V0#8v+k$RF^B*9hO67x0G;iR&6pN#ago zNzFIxxTb-OD)2+bx5O(7^z@2DGq;6eU`wfI*Tzm-(|dT$Xe+JD+EECgV>?+;9$G6Z zn7hZ%X%*KHJ1v*v|8PQJ4Omb+6C}UGEdgKJIRoSXMi>(P1%tAWo1Yf8j>9qmB>J1Q~e4>dutmg=ZIO^cm9x} zqEg{#^cH4d9Sf3PO^Lh)IYhlUF4m=v?){h1XL@9mvxiy8T*6b^mbC~)jBz?d{f)%= zg~2zlo9VSK)^ay0wri|hx}N=?3vLg_Vtr=9JO;+AzU;FBly?SN!@dbSC(yEA}(l*rcwl#|0rd8Aq+hC<}UFSh0rv+|fs zl@&M7glY!rw=f~*_o)Nc8xzQ$aQ5k2qcpQ%E;%x!4mG&U0NoJRsgVjD;-)kv#H7(I zxL5JVxPBHMnU$`n*!)I5`J;{ykxHJ=-FYNB9mOI=Uv($jNy?N?>W3K^+D+ko(Z-* zy<@;`b{|cV+Lo`zY<4PxFK4Grz%;kuF_hg*yyuF!dv5+xG^ux!g2{@0Sfq+4QAJ5q z^VQ{n5NbaPxd4<67yW7BMsx<4jdAK;5#a@)N@jY~;j+Q)3Y@}8X5wUEY{X_CS1(oe z5qfrU=~+Unq_U1{dJ{`{GPhCRJi=P|koV`=qqoCsdUBfgZq~6*M@EZCcy9Iw=+#m# z?63*nM&`nv=zX#|ngU~Girw!Wx&rbC`mHnP2mXL2-e>q$qo{|Ha-jQK3M>BY-2;Cx zPk7)DVMyHS`!K};o-|@fbe?zK{C!hp>LT_7eUSM4 zF@qdPA9RYFRBOBIj4}0wFfALrts=w)h$Uno`=&OZLX*B>t8M{$AzpGm_ChQ(1p5x) znA3_uB--6p6;^d@MEkXx`5+v}v4=VW`4!p6 z$jFb_1JuMc@dR$yC+1E7U%W|R^W`~EAm*AcbECwJT$Ra9nq>zD>oPLDA&BR^IZvQ5 zB*Y7>dXqc$^o4{$0Ub-}{enUB5B5L`WDjhW(~}U-X$N#Pa(Q0v_EU2VlpMe)$x?(v zxM!l{qQ#OPwGXTT*#rISP#}AdP(#nGI68be?l{NQ7`?z2gvT`W#gyD-A5B-2y!dXG zL{cn|n~Y&=YIzV!lOZ=Y5Z3Jw?|l=__+)|NDIm0OXd3TZXq_I*l`3VlHh1aRQ!5W4 z-#4-9y*3)3MxvCf#-z^~bynYEh3o12ZC4&5@ll$1Pr7%_0{amB@xvs|t{Qsb3R}uoQY6(?(!0*t zKzY(B_ZNN|P!=c8X|$tO-i5Rn>60UP5+Ke^N#SX;*}8t#TzmnA=LZT)G#{nea}=;N z$N0r)BE+SACig@N$wUXr4s*8riG`|`C?b>Kh{J8~3HS`Cp#||I^SBd@trU41^t?1C zAWK@X`!!R^z4*3M`C3(^ChfSrQ}t;u;d@4Vo_Wz&41{qm5-sZHXZE&R7ulP^a_hU> zxbiK+?iGqkbk8}$v~3R%^VpA(>}63QYp{jd37Uz=r^YRa`E1icIC-7Z`Q4!sY+|(! z0n#BH;~Sv$4eIX1GjQB>IL{`F_7NYjX`3<0)S%p2Yy_*HjstxZ;iuYtW^VI2pTC5c zC#|$OfbwDbzp~ncN9HEcGJDTz=c`pDyUxkW0$b)RlAuE7+NAKbwt?N|#HjWb2S^;E z+knIYKlt$O`x{@)-4j?0AaSt!cZmb@e~80>91s8ZjtA!d5QqPzIy%|{=;>WtT1^yx=ovXVIqAP2CHki-LxX?ivvsn!{GQ9ufF3B6{zLtV zk&co6XWoYv{=1EO$osfSHa0f@s_3B)Kf3t6Iwl4N7WyCiKWj3vH#D^TSN=yG0On<; zH=#GwbJU}CHZyV|CL;J}93Krh6A|yv(S2wJFZcJx7y#wwAN1&HfrX8DnHU(EXc-x4 z8Q2sVSpW>|09Hos?_UYP@A+2<_J8= zw1)p^)$__KGQMQtlwx5}_y_yH z4@&!H4vuC91m8!{QBU8}h}OWy$=Z>Jh?(d|B2ZJ`@@ErHs5u`ex@@M(erZu zbBX~!PRY-`f7SIp`R}g12aS0}Qzt8ZYdted2YSb!n{>9;CU2BYSkko`lc2SmgZFRk(Bk3<9?k^h4Nrr(L^B{L;?ldGP83d9M-UMpW#Fdq5ZUyiGWR9TiIk!k^HSmdN3HDjK1&*`}3^^3UU_`*={ zoCL^2%%JkPH!q7W6X2`gv#?4`_oak&>A=aK9roCkECzZ}D7gD~-*W6|W_I3SRf7lI zO4njQLsWlmdTOJ?fC+lW;}y|cmE?YNaZ)C*6u4HBb&zcK^cplODyr)w-rc47nB!TZ zVB!1YV-yHzh&WLi>Jw{H&ImrIemGTofZ{8=iq(WYbhRrqOUqnn{7BmoWV2z#*u8Phw5=v#o0$Da3<^`pT*#hfi zoh&X!Y=w?Lq{78kg+^m|wkvCPZXuai7ZOYnGDt)_Qhj}~;TZj~a2E0t39;>yA&3Gu zb+YjG>rI(?>$Y4n$q&-n3*anhOb+1X){))Y%c+`u=dsCh#jT(Lw7C%V2tEmL9DMz+ z3$!+Lur7k^x|mJ2*TQqvIz-G)uNI{$s?q~dR?~CHp<4ly&RLV@w~{otNosBHAzq(TS9B+wY^9H4>xKH?X8g;yqrdT)R@Ul#;NN%?S0UCFx zCJ5YNgv{0p%Hbq-Y%V(soy9j-SzX%i{Xrn*qsHj1)k&Vr;nk&i=@jL==94&Ja!P8H z+w`Uf450@s&A=WR6K>5wBfx&EQc(yq?$F`$oOSOXzSniFehbzz#qu26{kpMqTrBkM zh}FJRfV*+fmxlG=fgqJ9oZ>vVWrnAsU3-QHTmf-B!a=_5-FC^+5~XP-DK6UvLZ~v& zKMHIib|U7~Av0UN8lzdy!`0e)@`|l6S88T4w>!l@$~4d5qoXuGk;RA&N3H}3FCj`5 zUaei4lK+YUhN(2zyey;>B|v@od8Lg%Np;V(FJuzJCt>^a+`%-ekKnznZw-Z$9igvP zB8V{6Qo=b7V`VxpiP%{5GqqLHv4<~G^|tAcR9~*+vSz^!`Oten5R*l!yXBIR#@h_~ zecaTf@-s0ily=((@qQZ;!s_t?j+vGUkIZ}qQ%|_;8@5t%a6hpp!)G2+GNwox8$Bwe zVho$EwvA|82+Wcubc-|&^@tCt%ulcr`9Y0kr(^<(Hx>24DRt4Esg&_`>8+0~oy+zH zq7{5gd6KaMC*|uDC5uI0c-b@4n(NTYx=QP1QhFdL_j{L}MYxO~rsW)36$kUSrYbp` zL@M@Jlx0fSXFp+`Q+lgtxgYIx>(j+k7wk&@NqatQZ@Y5r8Fc}!QO2nDaP4*J>?O+4 zyOdq+ZRZe5BmLGc90jTuYzlV=&_&ZaXA%a{kqn^dJPGB)v%Larjr!uX#M{9RT!^JE z)^ix7!CWHmT&9;o7}b6)}UznI(X+6++d-NGE8eMvw=L3g}<-YrqqnhT|%w@bjb zU_w%meq9A%WRQohnSf>e29+%Nve5bhCf34qkmZe~(8o^)FYG^WESPxs^eKDavteGP z=Og1Uk=9n}PcGo06GB-v@ey)a@ubXSEL7ak1bxYso0{zCP-%D$>vWZk3KV26T$r~9 zxqzOU*DH$VH+tPoROtVjbwjyjPtYn13Eamhmlu3$&UcVGxs^^b%TUS@Rr~<=W{m2| zMwC`utx zkxfwN`1(A&t*n;RIpfXd>-46fnDZBGqdh{_^W%GZ&n5FPZWb;_9PieI3l83nLCWaEN+#0lxVar?jWLp9Pid1mUA7 zVcnaSyfi+SxLho%37QSHxz{tFR$1jOIdLbiu#Kz}(LMLmBdL*K!m{rgqm}@OeS2wibTa_;SeB3C7Cyu1n(6j)<#{qP zqvv+1cvG`PH-!B@(sqIR3iCKhOH`eH8uDsiHeA@!$M^O_(@mbruvL1#s-y44zNsYT zxx`%}Yf@Q$LLzVRsp1$m(xN!yd|>!s^ni$#SK2yehLNQQ^G>(iO$JvrvS68ipppcU z{G3`+ykylWo^3RN{KAVA3SikON|_Z0;P*D(#`Vt(a^Bo5kSqw2fFsOftKDw4p>e8b zqq2+FEmye8UEpN68lLq%6Q$No(J$8coGH~H$4*FT_%Wb5#MozM-?y$*x_o83eqen~>>>mq;b3=7V0alYA5|>5*SqCc-ec^Ob{#^SCGYQwrjrDShZM zH8Jg4tiAk$C z@Kc@<+q>MDhk`N>zYn%tgtzIkCTNY}HJJ;$%MFE10Gk}CvI&=#Af-uiS#UN2Nyzu0 zwWcXK)n2eHA++$fz3;k_DCApxQge0}Q4`zB=t7?l_6Bw)D1Ls<+-noMB+^#~@>_la zxY!>4!9&S`^AzXrYcx;n7+$M@6sb4fjY76WhmwTZo zp&WP$!>PRHEx3h>MTIqM4!r=DdDS$$vdAJ#=kdqT*h$$J5@}(6I3Bym#mL+@2TH9A zvgqH?4``j;)V`oAZO88E?Qh_e83~pGU@9n{yDuJMhdoB+Q}8z-4xWTH0vS`=NUF=R3$l6 z*?x4Oze-LlM>q4<3%^9Ayr)bUag)~Ef^Riu*nI^m)pZ=%1(EtrgNv@g!Rjh$LGxt` zjG2r>40IisX}8Qk%4tCLlp(Uy%cJfE5O8ER>_LZ+>RXh0wb0mSXatMUm>*ZfAxqC9 zX?-~4$v^v;RMni2&`6fv_ zut&@1)t&U+GNH8Z-7|;N^q5gA5Bl8?zTt|Fy0Kj?^-yw7IO8O-_5h54l`GABwLoq^BdNkxb=e!ftj3*INzt}KEoE3vE%%YvS ziQZvvkUkx}11w8NM6GLD5oRk5Ue6Ox{>GGZ$GMjSSlIhjGaI{c|c9b5d$78)&fc(oSYW{ocI^@j09v5_r|@ zT4L#&pV4Bsa@3kBeE)gs6hE1Hn#(fjYwJ1P%mzFvH$7}~$(+m6W`tK;`Zekwqt8Gu zsBDau-53HHL@XR4Z5{oVQ!JW0uIz}KV&eK|)q*v09g;T-tG&mh=6%pZdMbvW*c#a5 ziSTmIJGYKB-zM;C?XySFU&}mo+cfZx7Qw+=r>sqf$K+tECEMz(_4ZZ< zKcgCZMwUKr3816b^dmeD+{WL{6SHA&d6Lr7JV>?t(KY>@;lR3E)Tx=qTH{GFJvG01 zTUv!@4+cktlA)_6s$ZFr7{{C3rjn{V+;f5|w-4=yJN6#RY0&6ctr2nn?a!HkUa?wk zGVMm*Oq^Wa*{L32Je=nuO+4XnL%RS(jRj%+0$0`e-ewgNGE}>k{?1HgN2sy^nvh{d zxjko;THyeg*V1XjXpV~#BThscdow(Q3MC&PY$iV)4ld~7ajd0Y%0XF=AXjkTkK6Eb zD8e*e!6Tg#Qz3U%qrPIsQ^6YyJaR)op#E2!J)PN-ZU#rrN-je7kKe)y+{>0Oxyj$7sySq zke@dQnPLHLKvGzcc=H?9=3MNf=WgocS^HM8{?(_9zC7tDg)zPBAb<9WTB z-eN%~lc6C~%Sym!oHh^=+Xn~pRegC#!&%^>ZGgk5JA*)N6aQAv@J&vb&h-=Sy7aOYr0TJ$8B{surS=OY9;!eQ}N3-OjMI#ATgbqB-{kJFzX)(7HTsl4zjCQ_@oBgQqjtcf8O|g68M^1>oohU5C zbqST?Ya6bNuMF`k4XOO6J=V+?c4$0K0~P)3qh~67pTQXuWM(QTzsqTq@RuO5=KS39 zd1M2$#s7KT)SdijedKBSspyOt0Pu3Re<@_+sFa8?Gwarstvk*S7v)amDtC_e z9A4MhX&a&319UAqi`!~~@vdgCvaB^ij~w4<-{QYlCzJJBfJXwpfkzT5); zwG3%cg?IwaGc{gSt*o!Wus}m;Pn||d0=)0CW)4v zD6sSeqHep_B|hL<#h*q7E!g?+fL|`t_n)~tS}?_7)48s(S>*`JSEEKZjO<^0+M?Xf zKLb5`dpB-*H!qY1wrB2HR>r=2wFRXAK4Km1Nh&_AQiMiIX@E zokO!l7h%-^-!%z4Y+ThFdfyCt;83-Wg7$5|t}{myhFqcdld`6xt_dm#1MS=8n`6)Q zqVnm2{41`R7uM9}5%r~SVbvPc;g1QR^FeqzTxtd~AXm#)`toPwEH%dk;2oWamyf0J zQfe}4Rh~v0(dtBK666L}jEGFwGgO4qDd*Yy4*65jfQe*;Ill?ZxTGI*AdCuAzK%1# zET1hoq^f3aD=rikMs^ayk49se1y3YReMR5Us3McZ;K60OY}e9fpOVpK`?3OEqv38t z{#}+c=iO>*S=@I;S_Yj$CH&B=Ez_AU2Pl8l7kuu8nn+1I z4z#D;^Gq=g>$3GRMCK2b8ATslIy3jRw=<TV6jyH=i9uJ`K9^6v9q z2hBdw)CRgxJ?H@|L5<%OVF241WB0E^PlZ`4Ha9k^M}z=(jrI2dp8_wT1Y{NX z`TNUfc;L%t2P4Oam;2wzvczjvd6H}c;KqAch4j|g23|AKo`sKvW ziM`%3Q$2HGdDxCEmzM+d$_`qynv^2Hwl@iVbG$DJN|~lEKuCVu7TMo1;ZPUClX_$C zz**jCHeOWZva?N{0*$k#S-mVunOu}&!+CRWZ%|sLR>NzZ8Bp<-YQDJy_E{z>DOd_) z@%w`#yOH5Ljk3sCuJUd5gK5P18J7`u*!P)iN=RfaW>$FQ1qd`{<7)D%JZ?i&aUOII zGNf;qFkuN$B1-pj)J5H6?TzYtHqRtt*g==uBvbhBLY|n*K`ib&N{>7ZZUqDApr$VE zzB%;04rpO7g)PqAT0HajA};&vzI>->S^{CbWS-iq8P+^s%bF2RrqP`>MW~=w*#?$1 zCgBueJBe1jrPgg;eHQ9uNgapT;?c9iW34`|Do53x$36_OB8+oqJWBEXJ=SPK>h&K1HZZ1|$?%P@4jtj-WS z((5_;0aS`-_`dg$iIiJ>5JN5;2n6Is#D*01qww*tJC4`niw7a8v>hMno`3B}i=8{= zSU1qr0);sR(GwEuCr&km&(ayFs2mr>aNV^HlXQ>a!@!nXwL_M&mCW<^%t|XNg}+d@ zegb9iyqmzONg{1s+KOnn6T`Vbq{HKgB};-zV#o%%>C;6+aeO->e(~w_f)4chDc2-H zPU=vbu_?_`R^iRgIoND~Y79x6O+7(fl8@htpcqVRWPOFc{AfRW?D`O=M`i(bKmI|E zF9k(rskC_P#xl4Gh^#^=NN-Q!SGBKIMwio zrUFs+a${Edt7@Zr$Kz&xl^v428WD;;pF&ML zOpK`YNSnLQxkr^*LJM135MowG;`g*#hVFdnD`{VGL^waXCFd>U9jY%L0CF+0i|cLS zEqK9AMqlxi8pG}hso!0h4ZdWC5nHlr(d%CZwkYCjm z7q}`myv{tLrdG%Gikk`u}t=2aJm03_v8{7SO5b70%8vgX#DqE2paGfA|fi3Y-nmZ5;Mp=bsm#GJln?NSmGze3Yo)CYSDrW zxEE%>({YTKmNK@ZY{y%vJe+5f&I(C$74>a_Dni@Wt*9zh;5B;U^WxB;pw32aHA6V` zUNIr6*7HQ=oK}4N14UnESezquzh7<@dt+w9u^1yE?Z*vFCG+upJSd_2NzDPSyiD!X zT$P6(h-+0Uur%Pp!#to~m#hRR7%Ir$gP#LW@gw^g3jNPgkFtO~#OeNJionf3ql-Td zcK)OG#|s~SMl62_cmB&FfSZ5f{3ZDL|J1k#m+LQE18)8a`YZJL?*sFz6v!W4!~zcG zpD2$*q5mD{AsG5Ell$**ehZEMcckA3J^a5xdK@bK?>N7YPv?Jw^DA`vzl(DD-=O>o zrT%x6hhXZzj2?Jh{5i`1CA9hvu%B1c;}BsF!PS46B@oQuSE%)$SIp0Xk3&N|L=gLB zZNTOB|3f^nKYKK#%qJeh1nBeoy`m#QZ1FW8tC)xxQaU$M{&h z@6U3N#RwjjB>ysAj-L(qUF7Ibgvaa1A4Cg&SWJf4^Ndk;Ef|5g0A zMIQft53to9A^dN8kfZSk;LjfXZGPFWQ+TcUYqr^+t$s{se!oF!>Hd8N&7UZbpJ^V* u&o7&E{+$Z_S@QAk<-;@2FT?ft%M*~S1SC)$3'.format(__file__)) - iter_max = 1000 - - NN = 3000 - NM = 3000 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - print("Jacobi relaxation Calculation: %d x %d mesh" % (n, m)) - - timer = time.time() - iter = 0 - - while error > tol and iter < iter_max: - error = jacobi_relax_core(A, Anew) - - # swap A and Anew - tmp = A - A = Anew - Anew = tmp - - if iter % 100 == 0: - print("%5d, %0.6f (elapsed: %f s)" % (iter, error, time.time()-timer)) - - iter += 1 - - runtime = time.time() - timer - print(" total: %f s" % runtime) - -if __name__ == '__main__': - main() diff --git a/numba/examples/linear_regression/linear_regression.py b/numba/examples/linear_regression/linear_regression.py deleted file mode 100644 index 953386be7..000000000 --- a/numba/examples/linear_regression/linear_regression.py +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numba -import numpy as np -import argparse -import time - - -def linear_regression(Y, X, w, iterations, alphaN): - for i in range(iterations): - w -= alphaN * np.dot(X.T, np.dot(X,w)-Y) - return w - -def main(): - parser = argparse.ArgumentParser(description='Linear Regression.') - parser.add_argument('--samples', dest='samples', type=int, default=200000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--functions', dest='functions', type=int, default=4) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - N = args.samples - D = args.features - p = args.functions - iterations = args.iterations - alphaN = 0.01/N - w = np.zeros((D,p)) - np.random.seed(0) - points = np.random.random((N,D)) - labels = np.random.random((N,p)) - t1 = time.time() - w = linear_regression(labels, points, w, iterations, alphaN) - selftimed = time.time()-t1 - print("SELFTIMED ", selftimed) - print("checksum: ", np.sum(w)) - -if __name__ == '__main__': - main() diff --git a/numba/examples/linear_regression/linear_regression_numba.py b/numba/examples/linear_regression/linear_regression_numba.py deleted file mode 100644 index 790ae308e..000000000 --- a/numba/examples/linear_regression/linear_regression_numba.py +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numba -import numpy as np -import argparse -import time - -@numba.njit() -def linear_regression(Y, X, w, iterations, alphaN): - for i in range(iterations): - w -= alphaN * np.dot(X.T, np.dot(X,w)-Y) - return w - -def main(): - parser = argparse.ArgumentParser(description='Linear Regression.') - parser.add_argument('--samples', dest='samples', type=int, default=200000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--functions', dest='functions', type=int, default=4) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - N = args.samples - D = args.features - p = args.functions - iterations = args.iterations - alphaN = 0.01/N - w = np.zeros((D,p)) - np.random.seed(0) - points = np.random.random((N,D)) - labels = np.random.random((N,p)) - t1 = time.time() - w = linear_regression(labels, points, w, iterations, alphaN) - selftimed = time.time()-t1 - print("SELFTIMED ", selftimed) - print("checksum: ", np.sum(w)) - -if __name__ == '__main__': - main() diff --git a/numba/examples/linear_regression/linear_regression_pa.py b/numba/examples/linear_regression/linear_regression_pa.py deleted file mode 100644 index daaa6b2b3..000000000 --- a/numba/examples/linear_regression/linear_regression_pa.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numba -import numpy as np -import argparse -import time - -run_parallel = numba.config.NUMBA_NUM_THREADS > 1 - -@numba.njit(parallel=run_parallel) -def linear_regression(Y, X, w, iterations, alphaN): - for i in range(iterations): - w -= alphaN * np.dot(X.T, np.dot(X,w)-Y) - return w - -def main(): - parser = argparse.ArgumentParser(description='Linear Regression.') - parser.add_argument('--samples', dest='samples', type=int, default=200000) - parser.add_argument('--features', dest='features', type=int, default=10) - parser.add_argument('--functions', dest='functions', type=int, default=4) - parser.add_argument('--iterations', dest='iterations', type=int, default=20) - args = parser.parse_args() - N = args.samples - D = args.features - p = args.functions - iterations = args.iterations - alphaN = 0.01/N - w = np.zeros((D,p)) - np.random.seed(0) - points = np.random.random((N,D)) - labels = np.random.random((N,p)) - t1 = time.time() - w = linear_regression(labels, points, w, iterations, alphaN) - selftimed = time.time()-t1 - print("SELFTIMED ", selftimed) - print("checksum: ", np.sum(w)) - -if __name__ == '__main__': - main() diff --git a/numba/examples/linkedlist.py b/numba/examples/linkedlist.py deleted file mode 100755 index 68ec017a5..000000000 --- a/numba/examples/linkedlist.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This example demonstrates jitclasses and deferred types for writing a -singly-linked-list. -""" -from __future__ import print_function, absolute_import -from collections import OrderedDict -import numpy as np -from numba import njit -from numba import jitclass -from numba import int32, deferred_type, optional -from numba.runtime import rtsys - -node_type = deferred_type() - -spec = OrderedDict() -spec['data'] = int32 -spec['next'] = optional(node_type) - - -@jitclass(spec) -class LinkedNode(object): - def __init__(self, data, next): - self.data = data - self.next = next - - def prepend(self, data): - return LinkedNode(data, self) - - -@njit -def make_linked_node(data): - return LinkedNode(data, None) - - -node_type.define(LinkedNode.class_type.instance_type) - - -@njit -def fill_array(arr): - """ - Fills the array with n, n - 1, n - 2 and so on - First we populate a linked list with values 1 ... n - Then, we traverse the the linked list in reverse and put the value - into the array from the index. - """ - head = make_linked_node(0) - for i in range(1, arr.size): - head = head.prepend(i) - - c = 0 - while head is not None: - arr[c] = head.data - head = head.next - c += 1 - - -def runme(): - arr = np.zeros(10, dtype=np.int32) - fill_array(arr) - print("== Result ==") - print(arr) - # Check answer - np.testing.assert_equal(arr, np.arange(arr.size, dtype=arr.dtype)[::-1]) - - -if __name__ == '__main__': - runme() - print("== Print memory allocation information == ") - print(rtsys.get_allocation_stats()) diff --git a/numba/examples/logistic-regression/logistic_regression.py b/numba/examples/logistic-regression/logistic_regression.py deleted file mode 100644 index 8f7f571cc..000000000 --- a/numba/examples/logistic-regression/logistic_regression.py +++ /dev/null @@ -1,52 +0,0 @@ -import numba -import numpy as np -import argparse -import time - -run_parallel = numba.config.NUMBA_NUM_THREADS > 1 - - -@numba.njit(parallel=run_parallel, fastmath=True) -def logistic_regression(Y, X, w, iterations): - for i in range(iterations): - w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) - return w - - -def main(): - parser = argparse.ArgumentParser(description='Logistic Regression.') - parser.add_argument('--dimension', dest='dimension', type=int, default=10) - parser.add_argument('--points', dest='points', type=int, default=20000000) - parser.add_argument('--iterations', dest='iterations', - type=int, default=30) - args = parser.parse_args() - - np.random.seed(0) - D = 3 - N = 4 - iterations = 10 - points = np.ones((N, D)) - labels = np.ones(N) - w = 2.0 * np.ones(D) - 1.3 - t1 = time.time() - w = logistic_regression(labels, points, w, iterations) - compiletime = time.time() - t1 - print("SELFPRIMED ", compiletime) - print("checksum ", w) - - D = args.dimension - N = args.points - iterations = args.iterations - print("D=", D, " N=", N, " iterations=", iterations) - points = np.random.random((N, D)) - labels = np.random.random(N) - w = 2.0 * np.ones(D) - 1.3 - t2 = time.time() - w = logistic_regression(labels, points, w, iterations) - selftimed = time.time() - t2 - print("SELFTIMED ", selftimed) - print("checksum: ", np.sum(w)) - - -if __name__ == '__main__': - main() diff --git a/numba/examples/mandel/mandel_jit.py b/numba/examples/mandel/mandel_jit.py deleted file mode 100755 index 05aa4e295..000000000 --- a/numba/examples/mandel/mandel_jit.py +++ /dev/null @@ -1,53 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -from timeit import default_timer as timer -from matplotlib.pylab import imshow, jet, show, ion -import numpy as np - -from numba import jit - - -@jit -def mandel(x, y, max_iters): - """ - Given the real and imaginary parts of a complex number, - determine if it is a candidate for membership in the Mandelbrot - set given a fixed number of iterations. - """ - i = 0 - c = complex(x,y) - z = 0.0j - for i in range(max_iters): - z = z*z + c - if (z.real*z.real + z.imag*z.imag) >= 4: - return i - - return 255 - -@jit -def create_fractal(min_x, max_x, min_y, max_y, image, iters): - height = image.shape[0] - width = image.shape[1] - - pixel_size_x = (max_x - min_x) / width - pixel_size_y = (max_y - min_y) / height - for x in range(width): - real = min_x + x * pixel_size_x - for y in range(height): - imag = min_y + y * pixel_size_y - color = mandel(real, imag, iters) - image[y, x] = color - - return image - -image = np.zeros((500 * 2, 750 * 2), dtype=np.uint8) -s = timer() -create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20) -e = timer() -print(e - s) -imshow(image) -#jet() -#ion() -show() diff --git a/numba/examples/mandel/mandel_vectorize.py b/numba/examples/mandel/mandel_vectorize.py deleted file mode 100755 index 686b518ff..000000000 --- a/numba/examples/mandel/mandel_vectorize.py +++ /dev/null @@ -1,55 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - - -from timeit import default_timer as timer - -from matplotlib.pylab import imshow, jet, show, ion - -from numba import vectorize -import numpy as np - -sig = 'uint8(uint32, f4, f4, f4, f4, uint32, uint32, uint32)' - -@vectorize([sig], target='cuda') -def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters): - pixel_size_x = (max_x - min_x) / width - pixel_size_y = (max_y - min_y) / height - - x = tid % width - y = tid / width - - real = min_x + x * pixel_size_x - imag = min_y + y * pixel_size_y - - c = complex(real, imag) - z = 0.0j - - for i in range(iters): - z = z * z + c - if (z.real * z.real + z.imag * z.imag) >= 4: - return i - return 255 - -def create_fractal(min_x, max_x, min_y, max_y, width, height, iters): - tids = np.arange(width * height, dtype=np.uint32) - return mandel(tids, np.float32(min_x), np.float32(max_x), np.float32(min_y), - np.float32(max_y), np.uint32(height), np.uint32(width), - np.uint32(iters)) - -def main(): - width = 500 * 10 - height = 750 * 10 - ts = timer() - pixels = create_fractal(-2.0, 1.0, -1.0, 1.0, width, height, 20) - te = timer() - print('time: %f' % (te - ts)) - image = pixels.reshape(width, height) - #print(image) - imshow(image) - show() - - -if __name__ == '__main__': - main() diff --git a/numba/examples/mergesort.py b/numba/examples/mergesort.py deleted file mode 100755 index 0f51c69f3..000000000 --- a/numba/examples/mergesort.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -An inplace and an out-of-place implementation of recursive mergesort. -This is not an efficient sort implementation. -The purpose is to demonstrate recursion support. -""" -from __future__ import print_function, division, absolute_import - -from timeit import default_timer as timer - -import numpy as np - -from numba import njit - - -@njit -def mergesort_inplace(arr): - "Inplace mergesort" - assert arr.ndim == 1 - - if arr.size > 2: - mid = arr.size // 2 - first = arr[:mid] - second = arr[mid:] - mergesort_inplace(first) - mergesort_inplace(second) - - left = 0 - right = mid - while left < mid and right < arr.size: - if arr[left] <= arr[right]: - left += 1 - else: - temp = arr[right] - right += 1 - # copy left array to the right by one - for i in range(mid, left, -1): - arr[i] = arr[i - 1] - arr[left] = temp - left += 1 - mid += 1 - elif arr.size == 2: - a, b = arr - arr[0], arr[1] = ((a, b) if a <= b else (b, a)) - return arr - - -@njit -def mergesort(arr): - "mergesort" - assert arr.ndim == 1 - - if arr.size > 2: - mid = arr.size // 2 - first = mergesort(arr[:mid].copy()) - second = mergesort(arr[mid:].copy()) - - left = right = 0 - writeidx = 0 - while left < first.size and right < second.size: - if first[left] <= second[right]: - arr[writeidx] = first[left] - left += 1 - else: - arr[writeidx] = second[right] - right += 1 - writeidx += 1 - - while left < first.size: - arr[writeidx] = first[left] - writeidx += 1 - left += 1 - - while right < second.size: - arr[writeidx] = second[right] - writeidx += 1 - right += 1 - - elif arr.size == 2: - a, b = arr - arr[0], arr[1] = ((a, b) if a <= b else (b, a)) - return arr - - -def run(mergesort): - print(('Running %s' % mergesort.py_func.__name__).center(80, '=')) - # Small case (warmup) - print("Warmup") - arr = np.random.random(6) - expect = arr.copy() - expect.sort() - print("unsorted", arr) - res = mergesort(arr) - print(" sorted", res) - # Test correstness - assert np.all(expect == res) - print() - # Large case - nelem = 10**3 - print("Sorting %d float64" % nelem) - arr = np.random.random(nelem) - expect = arr.copy() - - # Run pure python version - ts = timer() - mergesort.py_func(arr.copy()) - te = timer() - print('python took %.3fms' % (1000 * (te - ts))) - - # Run numpy version - ts = timer() - expect.sort() - te = timer() - print('numpy took %.3fms' % (1000 * (te - ts))) - - # Run numba version - ts = timer() - res = mergesort(arr) - te = timer() - print('numba took %.3fms' % (1000 * (te - ts))) - # Test correstness - assert np.all(expect == res) - - -def main(): - run(mergesort) - run(mergesort_inplace) - -if __name__ == '__main__': - main() diff --git a/numba/examples/movemean.py b/numba/examples/movemean.py deleted file mode 100755 index 245f11999..000000000 --- a/numba/examples/movemean.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -""" -A moving average function using @guvectorize. -""" - -import numpy as np - -from numba import guvectorize - -@guvectorize(['void(float64[:], intp[:], float64[:])'], '(n),()->(n)') -def move_mean(a, window_arr, out): - window_width = window_arr[0] - asum = 0.0 - count = 0 - for i in range(window_width): - asum += a[i] - count += 1 - out[i] = asum / count - for i in range(window_width, len(a)): - asum += a[i] - a[i - window_width] - out[i] = asum / count - -arr = np.arange(20, dtype=np.float64).reshape(2, 10) -print(arr) -print(move_mean(arr, 3)) diff --git a/numba/examples/nbody/nbody.py b/numba/examples/nbody/nbody.py deleted file mode 100755 index 817b354f1..000000000 --- a/numba/examples/nbody/nbody.py +++ /dev/null @@ -1,130 +0,0 @@ -#! /usr/bin/env python - -import numpy as np -import time - - -flopsPerInteraction = 30 -SOFTENING_SQUARED = 0.01 - - -def normalize (vector): - dist = np.sqrt((vector * vector).sum()) - if dist > 1e-6: - vector /= dist - return dist - - -def randomize_bodies(pos, vel, cluster_scale, velocity_scale, n): - np.random.seed(42) - scale = cluster_scale - vscale = scale * velocity_scale - inner = 2.5 * scale - outer = 4.0 * scale - - i = 0 - while i < n: - point = np.random.random(3) / 2. - length = normalize(point) - if length > 1.: - continue - pos[i,:3] = point * ((inner + (outer - inner)) * np.random.random(3)) - pos[i,3] = 1.0 - axis = np.array((0., 0., 1.)) - normalize(axis) - if (1 - ((point * axis).sum())) < 1e-6: - axis[0] = point[1] - axis[1] = point[0] - normalize(axis) - vv = np.cross(pos[i,:3], axis) - vel[i] = vscale * vv - print("%d: %s, %s" % (i, pos[i], vel[i])) - i += 1 - - -def check_correctness(pin, pout, v, f, dt, n, integrate_0, integrate_1): - pin_ref = np.zeros_like(pin) - pout_ref = np.zeros_like(pout) - v_ref = np.zeros_like(v) - f_ref = np.zeros_like(f) - randomize_bodies(pin_ref, v_ref, 1.54, 8.0, n) - integrate_0(pout_ref, pin_ref, v_ref, f_ref, dt, n) - integrate_1(pout, pin, v, f, dt, n) - - errt = 0 - errmax = 0 - - errs = np.fabs(pout_ref - pout).reshape(4 * n) - errt = errs.sum() - errmax = errs.max() - - print("Maximum error: %0.4f -- Total error: %0.4f" % (errmax, errt)) - - -def body_body_interaction(force, pos_mass0, pos_mass1): - r = pos_mass1[:3] - pos_mass0 - dist_sqr = (r * r).sum() - dist_sqr += SOFTENING_SQUARED - inv_dist = np.sqrt(dist_sqr) - inv_dist_cube = inv_dist * inv_dist * inv_dist - s = pos_mass1[3] * inv_dist_cube - force += r * s - - -def integrate(position_out, position_in, velocity, force, delta_time, n): - for i in range(n): - p = position_in[i][:3] - f = np.zeros(3) - for j in range(i): - body_body_interaction(f, p, position_in[j]) - inv_mass = position_in[i,3] - v = velocity[i] - v += f * inv_mass * delta_time - p += v * delta_time - position_out[i,:3] = p - position_out[i,3] = inv_mass - velocity[i] = v - - -def compute_perf_stats(milliseconds, iterations, n): - interactionsPerSecond = float(n * n) - interactionsPerSecond *= 1e-9 * iterations * 1000 / milliseconds - return interactionsPerSecond * flopsPerInteraction; - - -def main(*args): - n = 128 - iterations = 10 - dt = 0.01667 - - if len(args) > 0: - n = int(args[0]) - if len(args) > 1: - iterations = int(args[1]) - - pin = np.zeros((n, 4)) - pout = np.zeros((n, 4)) - v = np.zeros((n, 3)) - f = np.zeros((n, 3)) - - randomize_bodies(pin, v, 1.54, 8.0, n) - - check_correctness(pin, pout, v, f, dt, n, integrate, integrate) - - time0 = time.time() - for i in range(iterations): - integrate(pout, pin, v, f, dt, n) - t = pout - pout = pin - pin = t - time1 = time.time() - ms = (time1 - time0)*1000 - gf = compute_perf_stats(ms, iterations, n) - - print("%d n-body iterations" % iterations) - print("%f ms: %f GFLOP/s" % (ms, gf)) - - -if __name__ == "__main__": - import sys - main(*sys.argv[1:]) diff --git a/numba/examples/nbody/nbody_modified_by_MarkHarris.py b/numba/examples/nbody/nbody_modified_by_MarkHarris.py deleted file mode 100755 index 49e95e116..000000000 --- a/numba/examples/nbody/nbody_modified_by_MarkHarris.py +++ /dev/null @@ -1,129 +0,0 @@ -#! /usr/bin/env python - -# Maximum error: 0.0000 -- Total error: 0.0000 -# 10 n-body iterations -# 3265.094042 ms: 0.001505 GFLOP/s - -import numpy as np -import time -from numba import * - -flopsPerInteraction = 30 -SOFTENING_SQUARED = 0.01 - - -def normalize (vector): - dist = np.sqrt((vector * vector).sum()) - if dist > 1e-6: - vector /= dist - return dist - - -def randomize_bodies(pos, vel, cluster_scale, velocity_scale, n): - np.random.seed(42) - scale = cluster_scale - vscale = scale * velocity_scale - inner = 2.5 * scale - outer = 4.0 * scale - - i = 0 - while i < n: - point = np.random.random(3) / 2. - length = normalize(point) - if length > 1.: - continue - pos[i,:3] = point * ((inner + (outer - inner)) * np.random.random(3)) - pos[i,3] = 1.0 - axis = np.array((0., 0., 1.)) - normalize(axis) - if (1 - ((point * axis).sum())) < 1e-6: - axis[0] = point[1] - axis[1] = point[0] - normalize(axis) - vv = np.cross(pos[i,:3], axis) - vel[i,:3] = vscale * vv - vel[i,3] = 1.0 - #print("%d: %s, %s" % (i, pos[i], vel[i])) - i += 1 - - -def check_correctness(pin, pout, v, dt, n, integrate_0, integrate_1): - pin_ref = np.zeros_like(pin) - pout_ref = np.zeros_like(pout) - v_ref = np.zeros_like(v) - randomize_bodies(pin_ref, v_ref, 1.54, 8.0, n) - integrate_0(pout_ref, pin, np.copy(v), dt, n) - integrate_1(pout, pin, np.copy(v), dt, n) - - errt = 0 - errmax = 0 - - errs = np.fabs(pout_ref - pout).reshape(4 * n) - errt = errs.sum() - errmax = errs.max() - - print("Maximum error: %0.4f -- Total error: %0.4f" % (errmax, errt)) - -def check_overflow(x): - return np.isnan(np.sum(x)) - -def body_body_interaction(force, pos_mass0, pos_mass1): - r = pos_mass1[:3] - pos_mass0[:3] - dist_sqr = (r * r).sum() - dist_sqr += SOFTENING_SQUARED - inv_dist = 1.0 / np.sqrt(dist_sqr) - inv_dist_cube = inv_dist * inv_dist * inv_dist - s = pos_mass1[3] * inv_dist_cube - force += r * s - -def integrate(position_out, position_in, velocity, delta_time, n): - for i in range(n): - p = position_in[i] - f = np.zeros(3) - for j in range(n): - body_body_interaction(f, p, position_in[j]) - inv_mass = velocity[i,3] - v = velocity[i,:3] + f * inv_mass * delta_time - position_out[i,:3] = p[:3] + v * delta_time - position_out[i,3] = position_in[i,3] - velocity[i,:3] = v - -def compute_perf_stats(milliseconds, iterations, n): - interactionsPerSecond = float(n * n) - interactionsPerSecond *= 1e-9 * iterations * 1000 / milliseconds - return interactionsPerSecond * flopsPerInteraction; - - -def main(*args): - n = 128 - iterations = 10 - dt = 0.01667 - - if len(args) > 0: - n = int(args[0]) - if len(args) > 1: - iterations = int(args[1]) - - pin = np.zeros((n, 4)) - pout = np.zeros((n, 4)) - v = np.zeros((n, 4)) - - randomize_bodies(pin, v, 1.54, 8.0, n) - - check_correctness(pin, pout, v, dt, n, integrate, integrate) - - time0 = time.time() - for i in range(iterations): - integrate(pout, pin, v, dt, n) - pin, pout = pout, pin - time1 = time.time() - ms = (time1 - time0)*1000 - gf = compute_perf_stats(ms, iterations, n) - - print("%d n-body iterations" % iterations) - print("%f ms: %f GFLOP/s" % (ms, gf)) - - -if __name__ == "__main__": - import sys - main(*sys.argv[1:]) diff --git a/numba/examples/nogil.py b/numba/examples/nogil.py deleted file mode 100755 index 5393050e5..000000000 --- a/numba/examples/nogil.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function, division, absolute_import - -import math -import threading -from timeit import repeat - -import numpy as np -from numba import jit - -nthreads = 4 -size = 10**6 - -def func_np(a, b): - """ - Control function using Numpy. - """ - return np.exp(2.1 * a + 3.2 * b) - -@jit('void(double[:], double[:], double[:])', nopython=True, nogil=True) -def inner_func_nb(result, a, b): - """ - Function under test. - """ - for i in range(len(result)): - result[i] = math.exp(2.1 * a[i] + 3.2 * b[i]) - -def timefunc(correct, s, func, *args, **kwargs): - """ - Benchmark *func* and print out its runtime. - """ - print(s.ljust(20), end=" ") - # Make sure the function is compiled before we start the benchmark - res = func(*args, **kwargs) - if correct is not None: - assert np.allclose(res, correct), (res, correct) - # time it - print('{:>5.0f} ms'.format(min(repeat(lambda: func(*args, **kwargs), - number=5, repeat=2)) * 1000)) - return res - -def make_singlethread(inner_func): - """ - Run the given function inside a single thread. - """ - def func(*args): - length = len(args[0]) - result = np.empty(length, dtype=np.float64) - inner_func(result, *args) - return result - return func - -def make_multithread(inner_func, numthreads): - """ - Run the given function inside *numthreads* threads, splitting its - arguments into equal-sized chunks. - """ - def func_mt(*args): - length = len(args[0]) - result = np.empty(length, dtype=np.float64) - args = (result,) + args - chunklen = (length + numthreads - 1) // numthreads - # Create argument tuples for each input chunk - chunks = [[arg[i * chunklen:(i + 1) * chunklen] for arg in args] - for i in range(numthreads)] - # Spawn one thread per chunk - threads = [threading.Thread(target=inner_func, args=chunk) - for chunk in chunks] - for thread in threads: - thread.start() - for thread in threads: - thread.join() - return result - return func_mt - - -func_nb = make_singlethread(inner_func_nb) -func_nb_mt = make_multithread(inner_func_nb, nthreads) - -a = np.random.rand(size) -b = np.random.rand(size) - -correct = timefunc(None, "numpy (1 thread)", func_np, a, b) -timefunc(correct, "numba (1 thread)", func_nb, a, b) -timefunc(correct, "numba (%d threads)" % nthreads, func_nb_mt, a, b) diff --git a/numba/examples/notebooks/LinearRegr.ipynb b/numba/examples/notebooks/LinearRegr.ipynb deleted file mode 100644 index 2a1463f94..000000000 --- a/numba/examples/notebooks/LinearRegr.ipynb +++ /dev/null @@ -1,200 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Linear Regression with Gradient Descent Algorithm\n", - "\n", - "This notebook demonstrates the implementation of linear regression with gradient descent algorithm. \n", - "\n", - "Consider the following implementation of the gradient descent loop with NumPy arrays based upon [1]:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%pylab inline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def gradient_descent_numpy(X, Y, theta, alpha, num_iters):\n", - " m = Y.shape[0]\n", - "\n", - " theta_x = 0.0\n", - " theta_y = 0.0\n", - "\n", - " for i in range(num_iters):\n", - " predict = theta_x + theta_y * X\n", - " err_x = (predict - Y)\n", - " err_y = (predict - Y) * X\n", - " theta_x = theta_x - alpha * (1.0 / m) * err_x.sum()\n", - " theta_y = theta_y - alpha * (1.0 / m) * err_y.sum()\n", - "\n", - " theta[0] = theta_x\n", - " theta[1] = theta_y" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To speedup this implementation with Numba, we need to add the `@jit` decorator to annotate the function signature. Then, we need to expand the NumPy array expressions into a loop. The resulting code is shown below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from numba import autojit, jit, f8, int32, void\n", - "\n", - "@jit(void(f8[:], f8[:], f8[:], f8, int32))\n", - "def gradient_descent_numba(X, Y, theta, alpha, num_iters):\n", - " m = Y.shape[0]\n", - "\n", - " theta_x = 0.0\n", - " theta_y = 0.0\n", - "\n", - " for i in range(num_iters):\n", - " err_acc_x = 0.0\n", - " err_acc_y = 0.0\n", - " for j in range(X.shape[0]):\n", - " predict = theta_x + theta_y * X[j]\n", - " err_acc_x += predict - Y[j]\n", - " err_acc_y += (predict - Y[j]) * X[j]\n", - " theta_x = theta_x - alpha * (1.0 / m) * err_acc_x\n", - " theta_y = theta_y - alpha * (1.0 / m) * err_acc_y\n", - "\n", - " theta[0] = theta_x\n", - " theta[1] = theta_y" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The rest of the code generates some artificial data to test our linear regression algorithm." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pylab\n", - "from timeit import default_timer as timer\n", - "\n", - "def populate_data(N, slope, intercept, stdev=10.0):\n", - " noise = stdev*np.random.randn(N)\n", - " X = np.arange(N, dtype=np.float64)\n", - " Y = noise + (slope * X + intercept)\n", - " return X, Y\n", - "\n", - "def run(gradient_descent, X, Y, iterations=10000, alpha=1e-6):\n", - " theta = np.empty(2, dtype=X.dtype)\n", - "\n", - " ts = timer()\n", - " gradient_descent(X, Y, theta, alpha, iterations)\n", - " te = timer()\n", - "\n", - " timing = te - ts\n", - "\n", - " print(\"x-offset = {} slope = {}\".format(*theta))\n", - " print(\"time elapsed: {} s\".format(timing))\n", - "\n", - " return theta, timing\n", - "\n", - "\n", - "def plot(X, theta, c='r'):\n", - " result = theta[0] + theta[1] * X\n", - " pylab.plot(X, result, c=c, linewidth=2)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will a benchmark with 50 elements to compare the pure python version against the numba version." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "N = 10\n", - "X, Y = populate_data(N, 3, 10)\n", - "pylab.scatter(X, Y, marker='o', c='b')\n", - "pylab.title('Linear Regression')\n", - "\n", - "print('NumPy'.center(30, '-'))\n", - "theta_python, time_python = run(gradient_descent_numpy, X, Y)\n", - "\n", - "print('Numba'.center(30, '-'))\n", - "theta_numba, time_numba = run(gradient_descent_numba, X, Y)\n", - "\n", - "# make sure all method yields the same result\n", - "assert np.allclose(theta_python, theta_numba)\n", - "\n", - "print('Summary'.center(30, '='))\n", - "print('Numba speedup %.1fx' % (time_python / time_numba))\n", - "\n", - "plot(X, theta_numba, c='r')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## References\n", - "\n", - "[1] http://aimotion.blogspot.com/2011/10/machine-learning-with-python-linear.html" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.4.3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/numba/examples/notebooks/LinearRegr.py b/numba/examples/notebooks/LinearRegr.py deleted file mode 100644 index 65c70f348..000000000 --- a/numba/examples/notebooks/LinearRegr.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -# 3.0 - -# - -# # Linear Regression with Gradient Descent Algorithm -# -# This notebook demonstrates the implementation of linear regression with gradient descent algorithm. -# -# Consider the following implementation of the gradient descent loop with NumPy arrays based upon [1]: - -# - -def gradient_descent_numpy(X, Y, theta, alpha, num_iters): - m = Y.shape[0] - - theta_x = 0.0 - theta_y = 0.0 - - for i in range(num_iters): - predict = theta_x + theta_y * X - err_x = (predict - Y) - err_y = (predict - Y) * X - theta_x = theta_x - alpha * (1.0 / m) * err_x.sum() - theta_y = theta_y - alpha * (1.0 / m) * err_y.sum() - - theta[0] = theta_x - theta[1] = theta_y - -# - -# To speedup this implementation with Numba, we need to add the `@jit` decorator to annotate the function signature. Then, we need to expand the NumPy array expressions into a loop. The resulting code is shown below: - -# - -from numba import autojit, jit, f8, int32, void - -@jit(void(f8[:], f8[:], f8[:], f8, int32)) -def gradient_descent_numba(X, Y, theta, alpha, num_iters): - m = Y.shape[0] - - theta_x = 0.0 - theta_y = 0.0 - - for i in range(num_iters): - err_acc_x = 0.0 - err_acc_y = 0.0 - for j in range(X.shape[0]): - predict = theta_x + theta_y * X[j] - err_acc_x += predict - Y[j] - err_acc_y += (predict - Y[j]) * X[j] - theta_x = theta_x - alpha * (1.0 / m) * err_acc_x - theta_y = theta_y - alpha * (1.0 / m) * err_acc_y - - theta[0] = theta_x - theta[1] = theta_y - -# - -# The rest of the code generates some artificial data to test our linear regression algorithm. - -# - -import numpy as np -import pylab -from timeit import default_timer as timer - -def populate_data(N): - noise = np.random.random(N).astype(np.float64) - X = np.arange(N, dtype=np.float64) - slope = 3 - Y = noise * (slope * X) - return X, Y - -def run(gradient_descent, X, Y, iterations=1000, alpha=1e-6): - theta = np.empty(2, dtype=X.dtype) - - ts = timer() - gradient_descent(X, Y, theta, alpha, iterations) - te = timer() - - timing = te - ts - - print "x-offset = {} slope = {}".format(*theta) - print "time elapsed: {} s".format(timing) - - return theta, timing - - -def plot(X, theta, c='r'): - result = theta[0] + theta[1] * X - pylab.plot(X, result, c=c, linewidth=2) - -# - -# We will a benchmark with 50 elements to compare the pure python version against the numba version. - -# - -N = 50 -X, Y = populate_data(N) -pylab.scatter(X, Y, marker='o', c='b') -pylab.title('Linear Regression') - -print 'Python'.center(30, '-') -theta_python, time_python = run(gradient_descent_numpy, X, Y) - -print 'Numba'.center(30, '-') -theta_numba, time_numba = run(gradient_descent_numba, X, Y) - -# make sure all method yields the same result -assert np.allclose(theta_python, theta_numba) - -print 'Summary'.center(30, '=') -print 'Numba speedup %.1fx' % (time_python / time_numba) - -plot(X, theta_numba, c='r') -pylab.show() - -# - -# -# ## References -# -# [1] http://aimotion.blogspot.com/2011/10/machine-learning-with-python-linear.html - diff --git a/numba/examples/notebooks/Using Numba.ipynb b/numba/examples/notebooks/Using Numba.ipynb deleted file mode 100644 index ba1f63e6b..000000000 --- a/numba/examples/notebooks/Using Numba.ipynb +++ /dev/null @@ -1,220 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "I always enjoy showing people how much easier Numba makes it to speed up their NumPy-based technical codes. With Numba, you usually can just write the code with loops and then add a decorator to your function and get speed-ups equivalent to having written the code in another compiled language (like C or Fortran). \n", - "\n", - "Tonight when I saw this question on Stack Exchange: http://scicomp.stackexchange.com/questions/5473/how-to-express-this-complicated-expression-using-numpy-slices it looked like a perfect opportunity to test Numba again.\n", - "\n", - "So, I copied the looped_ver code from Nat Wilson (modified it slightly to make x[0] = 0) and then decorated it to let Numba compile the code. The result continues to impress me about the code that Mark Florisson, Jon Riehl, and Siu Kwan Lam have put together. Here is the equation that is being solved:\n", - "\n", - "$$\\displaystyle x_i = \\sum_{j=0}^{i-1} k_{i-j} a_{i-j} a_{j}$$\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" - ] - } - ], - "source": [ - "%pylab inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "from numba import jit\n", - "\n", - "def looped_ver(k, a):\n", - " x = np.empty_like(a)\n", - " x[0] = 0.0\n", - " for i in range(1, x.size):\n", - " sm = 0.0\n", - " for j in range(0, i):\n", - " sm += k[i-j,j] * a[i-j] * a[j]\n", - " x[i] = sm\n", - " return x\n", - "\n", - "typed_ver = jit('f8[:](f8[:,:],f8[:])')(looped_ver)\n", - "auto_ver = jit(looped_ver)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import time\n", - "import numpy as np\n", - "repeat = 3\n", - "\n", - "def getbest(func, *args):\n", - " import time\n", - " best = 1e12\n", - " for i in range(repeat):\n", - " start = time.time()\n", - " func(*args)\n", - " current = time.time() - start\n", - " if current < best:\n", - " best = current\n", - " return best\n", - " \n", - "\n", - "def timeit(N):\n", - " res = {'looped':[], 'auto':[], 'typed':[]}\n", - " for n in N:\n", - " k = np.random.rand(n,n)\n", - " a = np.random.rand(n)\n", - " for version in ['looped', 'auto', 'typed']:\n", - " func = eval('%s_ver' % version)\n", - " res[version].append(getbest(func, k, a))\n", - " return res" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "N = [100,200,500,1000,5000]\n", - "res = timeit(N)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAEPCAYAAAC6Kkg/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4VOXZx/HvHZYk7AQwhC1h30WZgIgswa2IVVEURUFU\nbK3VllqXVrTKq1ar7WtttbYuqCAur1jcClpEDG4gZtj3JWEHZd+3MPf7x5lsECQDZ+bMcn+ua67M\nOXPmzD3PRfLjOcvziKpijDHGhCLJ6wKMMcbEHgsPY4wxIbPwMMYYEzILD2OMMSGz8DDGGBMyCw9j\njDEhi7rwEJExIvK9iMz3uhZjjDHli7rwAF4FfuJ1EcYYY04s6sJDVb8CdnhdhzHGmBOLuvAwxhgT\n/Sw8jDHGhKyy1wWcChGxAbmMMeYUqKq4sZ9o7XlI8HFCqmoPVR5++GHPa4iWh7WFtYW1xTGPQADd\ntAmdOhX9299c/SMddT0PEXkTyAHqicha4GFVfdXbqowxJsr98AMsWnT8A6BjR+fhoqgLD1W93usa\njDEmam3dWn5IFBaWhETHjnD11c7P9HSQ4IGcf/3LtTKiLjxMaHJycrwuIWpYW5SwtigRs22xY0f5\nIXHgQNmQGDjQ+ZmRURISESCqsXfuWUQ0Fus2xpjj7NpVfkjs2QMdOpQNio4doUmTUw4JEUFdOmEe\nV+GRlZXFmjVrPKgofmVmZrJ69WqvyzAm9u3ZA4sXlw2IhQth505o3/74kGjaFJLcvabJwuME4RFs\nGA8qil/WpsaEaN++40Ni0SLYsgXatSsbEJ06QWam6yFxIhYeFh4RY21qzAns3w9Llx4fEps2Qdu2\nx/ckmjeHSpU8LdnCw8IjYqxNTcI7eLD8kNiwAVq1Or4n0aIFVI7Oa5EsPCw8Isba1CSMQ4dg+fLj\nz0msXQstWx7fk2jVCqpU8brqkFh4WHicklWrVtG6dWsCgUCF32NtauLOkSPHh8SiRVBQAFlZZXsR\nHTtC69ZQtarXVbvCzfCIzr5VnKlZsyYSvLRu3759JCcnU6lSJUSEF154gSFDhkSsFongdeDGeKqw\nEFauLOlBFIVEfr5zJVNRSAwaBA895JynSE72uuqYYT2PCGvRogVjxoyhX79+Ef/sVatW0aZNG44e\nPVrh98RCm5oEd/QorFp1fE9ixQpo1KhsL6JjRyckUlO9rtoTbvY8onVgxLhVNGBZkY0bN1K9enV2\n795dvG7WrFlkZGQQCAQYM2YMffv25Y477qBOnTp07NiR3Nzc4m137drFLbfcQqNGjWjWrBkPP/xw\n8WuBQIC77rqL+vXr06pVKz755JOIfEdjwiIQcELigw/g8cfhhhvgrLOgZk34yU9gzBjnXopLLoFX\nX4Vt25ztP/yw7PYJGhxus8NWHmvUqBG9e/dmwoQJjBgxAoDx48dzww03kBS89vubb75h8ODBbNu2\njXfeeYerrrqK1atXU6tWLYYOHUpWVhYFBQXs3r2bAQMGkJWVxc0338zzzz/P1KlTWbBgASkpKQwc\nONDLr2pMxQQCsGbN8T2JpUuhfv2SHsRFF8FvfuPcYFejhtdVJx7Phww+hYdT9vFOtL7kdXcepyMr\nK0s/++yzMuveeOMN7du3r6qqFhYWaoMGDXTu3Lmqqvryyy9rs2bNymzftWtXffvtt3XDhg2ampqq\nhw8fLn7t9ddf14svvlhVVfv06aNjxowpfm3y5MmalJQUUr0na1NjTlkgoLp6teqkSapPPaU6fLhq\ndrZq9eqqjRurXnyx6l13qb78suqMGaq7dnldccwL/j678nc4oXoe0Xro/sorr+TOO+9k/fr1zJ07\nl/T0dLp06VL8epMmTcpsn5mZycaNG1mzZg2HDh0iPT0dKPmPQPPmzQHnkFjTpk3LvM+YiFN17ok4\ntiexeLHTYyjqSZx3Hvz85854TnXqeF21OYmECo9olZqayqBBgxg/fjxz585l2LBhZV5fv359meW1\na9fSqFEjmjZtSvXq1dm+fXu5+83IyGDdunXFyzbulwkrVefu6vJCIjm5JCS6dYObbnKep6V5XbU5\nRRYeUWLYsGHceuutbN68maeffrrMa5s2beL555/ntttuY8KECeTn59O/f39q165N3759ufvuuxk9\nejQ1atQgPz+fjRs30rt3bwYPHswzzzxD//79SU5O5qmnnvLo25m4onriiYeSkkqubDr7bBg61Hle\nv77XVRuXWXhE2Inus+jTpw+FhYX06NGDRo0alXmtZ8+eLFq0iLS0NBo3bszEiROpXbs24Jxc/93v\nfkeHDh3Yu3cvLVq04P777wfg9ttvZ9WqVXTu3Jm6devy29/+li+++CK8X9DEly1byg+JQKDs3daD\nBzs/zzgjonNKGO/YfR5RpG/fvowYMYIbb7yxeN2YMWN44403mDZtmic1xXqbmgravv34gFi4EA4f\nPn5Yjo4doWFDC4kYZHeYx6GZM2eyaNEirrnmGq9LMfFs587yexL79pWdeOiyy5yfjRpZSJhyWXhE\ngaFDhzJ58mSee+45Uu0GJuOG3btL5pQoPTTHrl1lQ6J//5KJhywkTAjssJX5UdamUW7v3vInHtq2\nrWTiodJDczRrFrGJh0z0sVF1LTwixto0SuzfD0uWlO1FLFrkXPVU3sRDWVmeTzxkoo+Fh4VHxFib\nRtiBA+VPPLRxozM0eOleRMeOzsRDFhKmguI6PESkP/AMzqCNY1T1yXK2sfCIEGvTMDl0qPyQWL/+\nxBMPRensdCZ2xG14iEgSsBy4ANgIfAdcp6pLj9nOwiNCrE1P0+HD5U88tGaNM6f1sSHRunXMzU5n\nYkc8h0cP4GFVvSS4/HucgbyePGY7C48IsTatoCNHnPkjjg2J/HzIzDw+JNq0sYmHTMTF830ejYF1\npZbXA909qiVm1axZkwULFpCVlcXtt99OkyZNeOCBB7wuKz4UFpY/8dDKldCkSUk4XHklPPigczI7\nJcXrqo1xXbSFR4WNHj26+HlOTg45OTme1RKKnJwc5s+fz/fff0+VCh6eSEpKYuXKlbRo0aJC2+/Z\ns6f4+T//+c/i59OnT2fo0KFlBks0J3D0qNNrODYkli+HjIySkLj0UrjvPuey2GrVvK7amDJyc3PL\nTB7npmg8bDVaVfsHl+PqsNWaNWto2bIlderU4YUXXmDQoEEVel+lSpVYsWJFhcPjRHJzc7nxxhtZ\nu3Zthd8T7W162gIBWL36+GE5li1zxmk69nBT+/ZQvbrXVRtzSuJ5GtrvgFYikikiVYHrgA89rsk1\n48aN49xzz+Wmm27itddeK17fr18/XnnlleLlsWPH0rt3b8AZ70pVOfPMM6lVqxYTJkwA4KWXXqJ1\n69bUr1+fgQMHsmnTpuL3JyUlkZ+fD8DNN9/MQw89xP79+xkwYAAbN26kZs2a1KpVi82bN0fgW0eJ\nopCYNAmeegqGD4fsbGcK07594R//cAYB7NcPXngBvv++/O0tOIwBouywlaoeFZE7gSmUXKq7xOOy\nXDNu3DjuueceunXrRo8ePdiyZQsNGjQod9ui0XenT59OUlISCxYsKJ7kadq0aYwaNYqpU6fSoUMH\n7r77bq677jqmT59e5r2lVatWjY8//phhw4aF1POIOarO5a7H9iSWLIFatUp6EL17wy9+4QzVERyh\n2BhTcVEVHgCq+gnQNhz7lv9xZ+wefTj0wzhfffUVa9euZfDgwdStW5dWrVrx5ptvMnLkyIp9ZqlD\nR2+++SYjRowonm3wiSeeoG7duqxdu5ZmzZrF92GmIqrOjXPlTTyUmloSEj16wIgRTkjUret11cbE\njagLj3A6lT/6bhk3bhwXX3wxdYN/wIYMGcLYsWMrHB6lbdy4EZ/PV7xcvXp16tWrx4YNG2jWrJlr\nNUcFVecQ0rED/C1e7NwPURQSPh/ceKPzvF49r6s2Ju4lVHh45eDBg7zzzjsEAgEyMjIAOHToELt2\n7WL+/PlUr16d/fv3F29/snMRjRo1KjOl7L59+9i2bdtxc50f60QTUUWNE81OByUh0aULXH+98/wE\nh/yMMeFn4REB7733HpUrV2bevHllLs8dPHgw48aN4+yzz2bixImMGDGCDRs2MGbMGBo2bFi8XcOG\nDcnPzy++2mrIkCFcf/31XH/99bRt25ZRo0bRo0cPmjZt+qN1pKens23bNnbv3k2tWrXC82UrYuvW\n8kOisLDslU1XX+38TE+34cKNiTIWHhEwbtw4brnlFho3blxm/R133MHIkSNZuHAhs2bNomHDhpx5\n5pkMHTqUqVOnFm83evRobrzxRg4ePMiLL77I1VdfzaOPPspVV13Fzp076dmzJ2+//fZJ62jbti1D\nhgyhRYsWBAIBFi9eXCakXLdjR/khceBA2ZAYOND5mZFhIWFMjIiq+zwqKlbv84gEVaVSpUqsXbv2\npIexKqJCbbprV/khsWdP2YmHih5NmlhIGOOBeB6exJymBQsWkJqaGp4exZ49x088tHCh08MoHRIX\nX1wyO51NPGRMXLLwiCMTJ07ktttu46mnnqKym8N3DxjgBMWWLSWz03XsCHfcUTLxkIWEMQnFDluZ\nHyUi6PvvOyHRvLlNPGRMDIvbIdkrysIjcqxNjYkf8Ty2lTHGmBhg4WGMMSZkFh7GGGNCZuFhjDEm\nZBYeMaZ58+ZMmzbN6zKMMQnOwiNCsrKyqFatGrVq1SIjI4Obb765zGCI5SmayMkYY6KNhUeEiAiT\nJk1i9+7dzJ49m7y8PB599FGvyzLGmFNi4RFBRfdLZGRkcMkll/Dss8/SrVu3Mtv89a9/ZeDAgbz0\n0ku88cYbPPXUU9SqVYsrrriieJs5c+bQpUsX6taty5AhQzh8+HDxayebnvaFF16gTZs2pKWlceed\nd4b5Gxtj4pWFhwfWrVvH5MmTGTlyJAUFBSxbtqz4tddff52bbrqJn/3sZ9xwww3cd9997N69mw8+\n+KB4mwkTJjBlyhQKCgqYN29e8XzoRdPTvvvuu2zatIlmzZpx3XXXlfnsSZMm4ff7mTdvHu+88w5T\npkyJyHc2xsSXxAoPEXcep2jgwIGkpaXRp08f+vXrx0MPPcTgwYMZP348AIsWLWLNmjVceumlP7qf\nkSNHkp6eTp06dbjsssuYO3cuUHZ62ipVqvDEE08wY8aMMnOW33///dSsWZOmTZvSr1+/4vcaY0wo\nEis8VN15nKIPPviA7du3U1BQwLPPPktycjLDhw/nzTffBGD8+PEMHjy4zIRR5UlPTy9+Xq1aNfbu\n3Qs409NmZmYWv1Z6etqTvdcYY0Jho+pGUHljRJ1zzjlUrVqVL7/8kjfffJO33nqr+LVQp4091elp\njTEmVInV84hSw4YN484776Rq1ar07NmzeH16ejr5+fkV3s+QIUN49dVXmT9/PocOHarw9LTGGBMq\nC48I+bFexLBhw1i4cCHDhg0rs37EiBEsWrSItLQ0rrrqqpPu54ILLiienrZx48YUFBSUmZ722PeG\n2rMxxpgiFR6SXUSuAT5R1T0i8iDQFXhMVWe7UojI1cBooD3Q7cf2G29Dsh88eJD09HRmz55Ny5Yt\nvS6njFhtU2MSnSrkFwT4z4wVfLbYz/ytfta8+LQn09D+QVUniEgv4ELgz8A/gXPcKARYAFwJvODS\n/mLG888/T7du3aIuOIwxsUEVVuUHmDRjBZ8t8TNvi5+N6ufoGbNJ0XpkJfvom+1j3IvufWYo4XE0\n+PNS4EVVnSQij7lViKouA5AEO5bSvHlzAN5//32PKzHGxAJVWLmqJCjmb/GzET9Hz5hDqqaRlewj\np7uPAWc9wIUdulKvWr3i9477+SjX6gglPDaIyAvARcCTIpKMnTM5bQUFBV6XYIyJUoEArFgZYNLM\nFUwrCgrxEygVFP3O8XHJWaO4sIOvTFCEWyjhMRjoD/xFVXeKSAZwbygfJiKfAumlVwEKPKCqH4Wy\nr9GjRxc/z8nJIScnJ5S3G2NMVAkEYPmKAJNnruSzJXnM3+pnE34C6U5QNE/1ccG5TlBc0L5iQZGb\nm0tubm5Y6j3pCXMR+e2Pva6qT7takMjnwN2JdMI8mlmbGuO+QACWLS8JigVb/WySYFCQRvNkH92a\n+Liki6/CQVERbs5hXpGeR83gz7ZAN+DD4PJlwCw3iihHQp33MMbEr6NHYekyJyimLfWzYGsem5OK\ngqIuzatnc1E7H5d0GUW/dl2pX62+1yVXSCiX6n4BXKqqe4LLNYFJqtrHlUJEBgLPAvWBncBcVb3k\nBNuW2/PIysoqc4e1OX2ZmZmsXr3a6zKMiQlHj8KSpU5QfL7Uz4JtfjYl5aHBoGiRkk33pj4GnOWj\nb5vIB4WbPY9QwmMZcKaqHgouJwPzVbWtG4WE4kThERGqUHQz3yuvQNWq3tRhjPHU0aOweEmAj78t\n6lH42VwpD9LnkEJdWqT46N4027OgKE+kD1sVGQfMEpH3gssDgdfcKCKmvPgizJ8PM2dacBiTIAoL\nYdFiJyg+X+pn4XY/m5P80HC2ExS1ffTv6OPSs0fRp3V0BEW4VbjnASAiXYHewcUvVHVOWKo6eR3e\n9Dxmz4af/AS++graRrzDZYyJgMJCWLgoGBTL/Czc5uf7Sn5oOIdUqRPsUfi49OxsereKraDw5LBV\nNPEkPHbuBJ8PHn8crr02sp9tjAmLI0dKgiJ3mXOO4ofKpYIi1cc5TX1ccpaPPq18MRUU5fHqnEcy\nMAjIotThLlV9xI1CQhHx8FCFq66Cxo3hueci97nGGNccPgwLFgb4ZNYqPl+Wx6Lt5QfFgLN99G4Z\n+0FRHq/OeXwA7AL8wCE3PjxmPPMMrF8PpUaoNcZEr8OHYf4CJyhyl/lZuD2PLZX9kDGHVKlNywbZ\nXObzMeCs39OrRXwGRbiF0vNYqKqdwlxPhUS05/HNN3Dllc4J8uA4VMaY6HHoUNmgWLTjmKColk2P\nZs7lsecleFB41fP4RkQ6q+oCNz44JmzdCtddBy+/bMFhTBQ4dAjmzgvw31mryF3uZ9EOP1ur5DlB\nkVSbluk+ruiezYAuv6dn8640qN7A65LjVig9j8VAK6AA57CVAKqqZ4avvBPWEv6eRyAAAwZAly7w\n5JPh/SxjzHEOHjw+KLZVLQmKVtV89Gjm45KzsumZZUFREV6dMM8sb72qRvyW7oiEx2OPwZQpMG0a\nVLap3o0JpwMHSoJi+oqioPBDxuySoMj0MeCsbM7NtKA4VZ5dqisiXSi5z+NLVZ3nRhGhCnt4TJsG\nN9wAeXnOFVbGGNfs3w9z5gb473er+CIYFNuT/Uij2aRIbVpV93FuM+fy2HMzfRYULvKq5zES+Bkw\nMbjqSpxJoZ51o5BQhDU8Nm1y7ucYNw4uvDA8n2FMgti/H2bPKQmKxTv8bE9xgiK1KCgynaDo0cyC\nIty8Co/5wLmqui+4XB2YEVfnPAoL4YILnMdDD7m/f2Pi2L59MGeO8t/vVjF9RR6Ld/rZcWxQZDnD\njFtQeMOrq62EkqloCT6Pr6HT//AHSEmBBx/0uhJjotrevTB7thMUX670s3hnHjtSg0GRVIvWmdlc\n28dH/y73WVDEqVDC41Xg22MGRnzF/ZI8MmkSjB/vjF+VZLPrGlNkz55gUOQ5h56W7MxjZ7VSQZHl\n47qsbPp3uY9zmlpQJIpTGRixV3Dxy7gZGHHNGujeHSZOhPPOc2+/xsSY3budoJgSDIrFu/zsqpZX\nEhQ1fPTMyqb/mT4Lihjk1TmPscBIVd0ZXK4L/K+q3uJGIaFwNTwOH4bevWHwYLj7bnf2aUwM2LUL\n/H4nKL5c5WfJTj+7qh8bFD4u6ZJNtyZdOaP6GV6XbE6TV+ExR1XPPtm6SHA1PEaOdHoe770HEl+n\ncIwpsnNnqaBY6WfJLj+7a/iRRn5Sk2rRpqYTFD/p7Aw3bkERn7w6YZ4kInVVdUewiLQQ3x99JkyA\n//wH/H4LDhM3duwo26NYusvPnhp+aDSb1KSatGnjY1hzHxd3vofuTSwozKkJ5Y///wIzRGRCcPka\n4I/ulxQhy5fDHXfAxx9DnTpeV2PMKdm+HfLylCn+VXy9yulR7K0ZvOGuUk3aWlCYMAn1hHkH4Pzg\n4jRVXRyWqk5ex+kdtjpwAHr0gNtvh1/8wr3CjAmjbducoPjUn8+Xq/JYusvPvlpFQVGDtjWzOa+5\nj4s6+SwoTLlsJsHTDY9bb3VufX3jDTtcZaLS1q0lQfHVKj9Ld+exr7YfySgKCh+9WmRzUScf3Rpb\nUJiK8eSch4gIcAPQQlUfEZFmQENVneVGIREzdix8/TV8950Fh4kKP/zgBMXU2SVBsb9OqaDo4OPm\nFtlc1OkeCwoTNUK52uqfQAA4X1XbBy/VnaKq3cJZ4AlqObWex8KF0K8f5OZCx46u12XMyXz/fTAo\n/Pl8le9n6W4/B+vmOaPHVqpB21rOhEUXdcy2oDCu8+pqq3NUtauIzAFQ1R0iUtWNIgBE5CngMpy5\nQlYBN6vqbrf2z549cPXV8L//a8FhImLz5rJBsWyPn4N1/ZDhd4Kis49bW/i4sOPdZDfykV4j3euS\njamwUHoe3wI9ge+CIdIAp+fhyn0eInIhzkn4gIj8CWeiqftPsG1oPQ9VuP56qFEDXnrJjXKNKWPj\nRicoPpuTz9f5fpbt9nMwzQ+N/KQm1aBtbR+9Wvi4sIPPgsJ4xquex9+B94AzROSPwNWAayMIqurU\nUoszgUFu7ZsvvnDGrJo717VdmsSkWhIUU2c7QbF8j5/D9f1ow9mkVq5Ouy4+ftbSxwXtf2tBYeJW\nqJfqtgMuCC5+pqpLw1KUyIfA26r65gleD63nceut0LYt3HuvSxWaRKAKGzaUBMU3Bc6hpyP1/WjG\nbFIrVaNd7Wx6t/RxfjvrUZjo59XVVtcAn6jqP0TkD8DjIvKYqs4OYR+fAqV/uwRQ4AFV/Si4zQPA\nkRMFR5HRo0cXP8/JySEnJ6f8DQ8edAY8nD+/omWaBKQK69Y5QTFtTgFfF+SxfI+fIw2cqVBTKlWj\nfddsftHSx/ntrEdhYkNubi65ublh2XdIk0Gp6pki0gt4FPgL8JCqnuNaMSI34cxWeL6qHvqR7Sre\n85g4EZ57zpla1hicoFi7NniOYnYB3xT4Wb4vj6NnOIeeUipVo31tH71bZdOvrfUoTPzw6pxH0URQ\nlwIvqeokEXnMjSIARKQ/cC/Q58eCI2Tjx8PQoa7tzsQWVWfcy+KgWO1n+d48AunBoKhSjfbdffyy\npY9+ba1HYUxFhdLz+A+wAbgI6AocAGapahdXChFZAVQFtgVXzVTVX55g24r1PHbsgKws57+ZtWu7\nUaaJYqpQUOAExedzgz2KvX4CDfNKehR1fPRp5aNf22x8GRYUJrF4NSR7NaA/sEBVV4hIBtBZVae4\nUUgoKhweL78Mn3wC774b/qJMRKlCfn7JOYpvVvtZsc8PDf0EGvpJrewERe+WPnLa+PA18tGwRkOv\nyzbGUza2VUXDIycHfvMbGDgw7DWZ8FGFVatKgmJGUVA08hNILwmKPq189G1tQWHMiVh4VCQ81q6F\ns892LspPTo5MYea0BQKwcmVJUMxc42flfmfSoqPpflIqp9KxTjZ9WjthYUFhTMV5dcI8trz1Fgwa\nZMERxQIBWLEieI5izmpmrMlj5X4/SY39BNJnk1wjhY69shnZykefVr+xoDAmioRyziMZ567vLEqF\njqo+EpbKfryWk/c8zjzTuUS3T5/IFGV+VCDgzL9VOihWHfCT1MRP4IzZpFROoUNdH31bOzfdWVAY\n4z6veh4fALsAP87ghdFr/nxn0uZevbyuJCEdPQrLlhVd9bSamWv95B/Io1ITP0fPmE1KrRQ69PHx\nm9Y++rS0HoUxsSiUnsdCVe0U5noq5KQ9j9/9DpKS4IknIldUgjp6FJYuLRUUa/zkH8yjcjM/Rxs4\nPYqOaT76tPbRu0W2BYUxHvLqUt0XgWdVdYEbH3w6fjQ8AgHIzHTmJu8UFVkXNwoLjwmKtX4KDvip\nnJnnBEWVFDrWda54Oq+FD1+Gj4yaGV6XbYwJ8uqwVS/gJhEpwDlsJTjDpp/pRiGu+eILSEuz4DhN\nhYWweLETFNPnBYPioJ8qmX4Kz/CTkpZCx5Y+rmrt47wWIy0ojEkwoYTHJWGrwk1vvGHDkYToyJGS\noMidu5qZ6/ysPuSnalFQ1EuhU2sfg1r76Nn81xYUxpg4u8/j4EFo3BjmzYMmTSJfWAw4cgQWLSoJ\nim/X+Vl92E/VLD+FDfykVE6mU1o2fdv46Jllh56MiScRPWwlIl+pai8R2YMzfHrxSziHrWq5UYgr\nJk92LtG14ADg8GFn2va8PGX6/DV8uzaPNUf8JGc5Q42nnJFMp3Y+rmmTTc8s61EYYyouvnoegwbB\ngAEwYkTki/LYoUOlgmLeGr5dl8faI36SmweDonIyner5yGmTzbmZ1qMwJhHZHeYn0rKlEyAJYvp0\n5V9vr+HbdX7WFeaR3MJPYX0/yQ2T6dzRx7WtfdajMMaERXz1PBLI+5P2Mfj9y0ltupjO9ZxzFD2a\nOXdmN6rZyOvyjDFRyHoeCe7DT/Yx+P2fclH3LD669VOSJMnrkowxCSbkvzoiUl1EKoWjGHNyH0/d\nz9X/vowLumXy4YiXLTiMMZ446V8eEUkSketFZJKI/AAsBTaJyGIR+bOItAp/mQbg09z9XPF/P+V8\nX1P+c+sYKiVZhhtjvHHScx4iMh2YijMw4kJVDQTXpwH9gOuB91R1fJhrLV1Twp3zmPblfvqPu4y+\nZzfmk1+8asFhjAlZRMe2EpEqqnrkdLdxU6KFxxcz9nPhmMvpc3Yj/nu7BYcx5tS4GR4nPWxVFAri\nGCoiDwWXm4lI99LbGPd9PesAF465gl5dMiw4jDFRI5Szrc8D5wJDgst7gH+4XpEpNtN/gPNfvJye\nZ6bz6R2vWXAYY6JGKJfqnqOqXUVkDoCq7hCRqmGqK+F9N/cAff95Bd07ncFnd4614DDGRJVQeh5H\ngpfoKoCINAACYakqwc1ZcJBezw0ku2N9Pv+1BYcxJvqEEh5/B94DzhCRPwJfAY+7VYiIPCIi80Rk\njoh8IiKEsjGVAAARhklEQVQJOd3c/MUHOfdvA+naPo3pI8dROcnu4zTGRJ+QhicRkXbABTgj6n6m\nqktcK0SkhqruDT7/FdBBVW8/wbZxebXVomUH8f35Srq0rc3Xd4+34DDGuMqz4UlUdSnOTYKuKwqO\noOok2CGxpSsPkv3nq+jcppYFhzEm6lX4L5SIZAMPAJnB97k+Da2IPAbcCOzEuQExISzPP0TXPw2i\nQ6sazLjnDQsOY0zUC+Wv1BvAvcACTrFXICKfAumlV+GcgH9AVT9S1QeBB0Xkd8CvgNEn2tfo0SUv\n5eTkkJOTcyoleW7VmkOc9cRVtG1RjZn3WnAYY9yTm5tLbm5uWPZd4XMeRTMKhqWK4z+rKTBZVTuf\n4PW4OOdRsO4QnR4ZRMvMFPz3v0WVSlW8LskYE8e8OufxsIi8DHwGHCpaqaoT3ShERFqp6srg4kDA\ntZPx0WjNhkN0fvRqWjSz4DDGxJ5QwuNmoB1QhZLDVgq4Eh7An0SkTXDfa4BfuLTfqLN+02E6/c81\nZDauyuxRFhzGmNgTymGrZaraNsz1VEgsH7ba+P1h2j10DY0yKrHgD/9nwWGMiZiIDoxYyjci0sGN\nD01Um7ccpv3Dg2mYnsS8B9+24DDGxKxQeh5LgJZAAc45D9cv1a2oWOx5/LDtMG0fvJZ69ZRFo98h\nubINC2aMiSyvTpj3d+MDE9G2HUdo9+B1pKUFWDR6ggWHMSbmhTQ8SbSIpZ7Hjl1HaHX/tdSsU8iy\nR9614DDGeCaiPY+i+ztEZA/BEXWLXsI5bFXLjULi0c7dR2gz6jqq1zrCktEWHMaY+GE9jzDZtecI\nre8fQnL1gyx/9N+kVk32uiRjTILz5GorEXmyIusM7Nl3hLajbqBqtYMse8SCwxgTf0K5VPeictZd\n4lYh8WLfgULa3H8DSSn7WPbIv6mWbMFhjIk/FTnncTvwS6CFiMwv9VJN4OtwFRaL9h8spM3vb0Cr\n7mXFoxOpnmLBYYyJTyc95yEitYG6wBPA70u9tEdVt4exth+rKerOeRw4VEib3w/lcNIuVj72HjVT\nU7wuyRhjynDznEdFwuOkf6krso2boi08Dh4upM3vh3GQHax49H1qV7fgMMZEn0ifMP9cRH4lIs2O\nKaKqiJwvImOB4W4UE4sOHSmk3f03sl+3s/wRCw5jTGKoSM8jBbgFuAFojjPLXypO8EwBnlfVOWGu\n89iaoqLncbiwkHb3D2fXkS2seOQD0mqlel2SMcacUEQPWx3zwVWA+sABVd3pRgGnIhrC40jhUdqP\nGs72Qz+w4pEPqFfbgsMYE908C49SBTTFmU72e1Vd50YhIX6+p+FxNBCgw6ib+GH/Jpb/z4c0qGvB\nYYyJfl4NjFj04bcBycBeoI6IHFXVv7lRTKy48i9/ZsOBlawaPdWCwxiTkEIOD2CVqk4tWhCRfi7W\nE/Ve+XQG/9n2NF/cmkd6WjWvyzHGGE+cSnjsFpG/4Jw03wVMdrek6LV68w5umzKEe9u/RK/OTb0u\nxxhjPBPyOQ8RycAZUbfouFk/VR3vdmEnqSHi5zwCAaXpPYOoX6UZ8558JqKfbYwxbvD0nAfQDee+\njnk4AdIGiGh4eOG6p59nh65h8R/e8roUY4zxXMjhoaofisi3qvo9gIic4X5Z0eWNaXN4d+toPh0+\ng9o1bLwqY4wJZVTdYkXBEXz+g3vlRJ/1W/Zw86Rr+VWrv3PBWa28LscYY6JCSOEhIvcds9xURMaJ\nSFzObx4IKOc9/ktaVenD324d4nU5xhgTNULtedQTkf+ISIfg8m+Bx4CGbhUkIneLSEBE0tza56ka\n/tex/JA0m68e+LvXpRhjTFQJNTxmqepPgaLjN5nASmCLG8WISBOcSafWuLG/0/Hv6Ut4Y8u9TBzy\nDmk17X4OY4wpLdQT5l1FJB2oKSKLcMIjFajuUj1/Be4FPnRpf6dk87YDXP/etfys/RNckt3Ry1KM\nMSYqhdrz+BuwHvg/nOAYAPwa2HG6hYjI5cA6VV1wuvs6Hapw3iN30SylI//6+QgvSzHGmKgVas9j\nC86ouvcB81V1Gs4MgxUiIp/iDKhYvApQ4EFgFGXnSf/RG1lGjx5d/DwnJ4ecnJyKlvGjfv7MBNZX\nncq6+2cj4sq9NMYY44nc3Fxyc3PDsu9Qh2S/B9gA/AA0Aeqp6tOnXYRIJ2AqsB8nNJoEP6d7eZcC\nh+sO84++yueKj3rw7pWTuapHtuv7N8YYL3l5h/kKVf2gVCHXulGEqi6k1BVbIlIAdFXV0z4cVlFb\ndxzmmv+7jqHtR1lwGGPMSYQaHq1EpDuwFWgKtHC/JMA5lBWxY0aq0OvhUTSskcHY20dG6mONMSZm\nhRoe/wLuwRnfaiHwuesVAaoarlAq151/n0R+6gTy77XzHMYYUxEnDQ8R+SXOQIj7ilYFf54TXJ8R\nntIi478z1vPPDSMYf8W7NEmr53U5xhgTEyrS81gO9FLVI8e+EOvDkmzfWciVr9/ANR1+xfXn9fK6\nHGOMiRmnNIe519y42koVOv/6YXbU+Jq1f/wvlZIquVSdMcZEJ6/n84gL9zz3Ocuqv8Ty38624DDG\nmBCd0pDsse7zWT/wzNph/Kv/azRv4NqYjsYYkzASLjx27Q7w0zHDubTpjYzIudjrcowxJiYl1DkP\nVeh655/ZWPt9Njw2ncpJCXvUzhiTgOycxyka9fxMFtb6C4tGfmfBYYwxpyFhDlt9lbeDp/KH8NcL\nXqBNejOvyzHGmJiWEIet9u1TGo68mnM7NWbKb2xWQGNMYrLDViF6dPznaP3FfHTnm16XYowxcSEh\nDluNnfca12T9guTKyV6XYowxcSHuw2NJ/h6+r/Mhjwy+3utSjDEmbsR9ePzhrXdpFuhL07QGXpdi\njDFxI67DQxU+3jiWn59zk9elGGNMXInr8PjwywIO1lrE3Zdd6nUpxhgTV+I6PJ6YNI7slCEkV67q\ndSnGGBNX4vZS3UOHA3x3ZCwTB0zwuhRjjIk7cdvzePrdr0ipVJ3Ls7t6XYoxxsSduA2PF2e9xiUZ\nw21OcmOMCYO4PGy1/od9rE59j8mD/+h1KcYYE5fisufx0Jvv0fBIT9o3yfC6FGOMiUtxGR4T81/j\nxjOHe12GMcbEragJDxF5WETWi8js4KP/qezni3lr2V19Dg9ec7nbJRpjjAmKtnMeT6vq06ezg9ET\nX6eTDKZmaopbNRljjDlGtIXHaV0aFQgoX+4Zy8uXjnOrHmOMMeWImsNWQXeKyFwReVlEaof65hcm\nzyBJkhjW75xw1GaMMSYooj0PEfkUSC+9ClDgAeB54BFVVRF5DHgaGHGifY0ePbr4eU5ODjk5Ofz9\ni7GcnzacpCS7t8MYY3Jzc8nNzQ3LvqNyGloRyQQ+UtUzT/B6udPQNrzrcj66/Xm6tWkS7hKNMSbm\nuDkNbdSEh4g0VNXNwed3Ad1UtdwZnEKdw9wYY0z8zmH+lIicBQSA1cBt3pZjjDHmRKKm5xEK63kY\nY0zo3Ox5RNvVVsYYY2KAhYcxxpiQWXgYY4wJmYWHMcaYkFl4GGOMCZmFhzHGmJBZeBhjjAmZhYcx\nxpiQWXgYY4wJmYWHMcaYkFl4GGOMCZmFhzHGmJBZeBhjjAmZhYcxxpiQWXgYY4wJmYWHMcaYkFl4\nGGOMCZmFhzHGmJBZeBhjjAmZhYcxxpiQWXgYY4wJmYWHMcaYkEVVeIjIr0RkiYgsEJE/eV2PMcaY\n8kVNeIhIDnAZ0FlVOwN/8bai2JCbm+t1CVHD2qKEtUUJa4vwiJrwAG4H/qSqhQCqutXjemKC/WKU\nsLYoYW1RwtoiPKIpPNoAfURkpoh8LiLZXhdkjDGmfJUj+WEi8imQXnoVoMCDwVrqqmoPEekGvAO0\niGR9xhhjKkZU1esaABCRycCTqjo9uLwSOEdVt5WzbXQUbYwxMUZVxY39RLTncRLvA+cD00WkDVCl\nvOAA9768McaYUxNN4fEq8IqILAAOATd6XI8xxpgTiJrDVsYYY2JHNF1tdVIi0l9ElorIchH5ndf1\nhIOIjBGR70Vkfql1dUVkiogsE5H/ikjtUq/dLyIrgjdXXlxqfVcRmR9sq2ci/T3cICJNRGSaiCwK\n3jj66+D6hGsPEUkWkW9FZE6wLR4Ork+4tgAQkSQRmS0iHwaXE7IdAERktYjMC/7bmBVcF/72UNWY\neOAE3UogE6gCzAXaeV1XGL5nL+AsYH6pdU8C9wWf/w7nfhiADsAcnMOPWcH2KepNfgt0Cz6fDPzE\n6+92Cm3REDgr+LwGsAxol8DtUS34sxIwE+iewG1xFzAe+DC4nJDtEKw9H+dK1dLrwt4esdTz6A6s\nUNU1qnoEeBu4wuOaXKeqXwE7jll9BTA2+HwsMDD4/HLgbVUtVNXVwAqgu4g0BGqq6nfB7caVek/M\nUNXNqjo3+HwvsARoQuK2x/7g02ScX34lAdtCRJoAA4CXS61OuHYoRTj+KFLY2yOWwqMxsK7U8vrg\nukRwhqp+D84fVOCM4Ppj22RDcF1jnPYpEvNtJSJZOD2ymUB6IrZH8FDNHGAz8GnwFz0R2+KvwL04\n4VkkEduhiAKfish3InJrcF3Y2yOarrYyFZdQVzmISA3gXWCkqu4t5z6fhGgPVQ0AZ4tILeA9EenI\n8d89rttCRC4FvlfVucHx8E4krtvhGOep6iYRaQBMEZFlRODfRSz1PDYAzUotNwmuSwTfi0g6QLB7\n+UNw/QagaantitrkROtjjohUxgmO11X1g+DqhG0PAFXdDeQC/Um8tjgPuFxE8oG3gPNF5HVgc4K1\nQzFV3RT8uQXnfrnuRODfRSyFx3dAKxHJFJGqwHXAhx7XFC4SfBT5ELgp+Hw48EGp9deJSFURaQ60\nAmYFu6m7RKS7iAjOPTMfEJteARar6t9KrUu49hCR+kVXzIhIKnARzjmghGoLVR2lqs1UtQXO34Bp\nqjoM+IgEaociIlIt2DNHRKoDFwMLiMS/C6+vFAjxqoL+OFfcrAB+73U9YfqObwIbcW6UXAvcDNQF\npga/+xSgTqnt78e5YmIJcHGp9b7gP6IVwN+8/l6n2BbnAUdxrqybA8wO/htIS7T2ADoHv/9cYD7w\nQHB9wrVFqe/Rl5KrrRKyHYDmpX4/FhT9XYxEe9hNgsYYY0IWS4etjDHGRAkLD2OMMSGz8DDGGBMy\nCw9jjDEhs/AwxhgTMgsPY4wxIbPwMMYYEzILD2MAEXlARBYG50WYLSLdgutfFJF2Lux/pIgMDT5/\nTUTWi0iV4HI9ESkIPq8vIh+f7ucZE242MKJJeCLSA2eI77NUtVBE0oCqAKr6cxf2Xwm4BTg7uEqB\nwuC6F0qtQ1W3ishGETlXVWec7mcbEy7W8zAGMoCtqloIoKrb1RnrBxH5PDjD2mXBmdpmizOb5arg\n6z4RyQ0Oh/1x0WB0xzgf8KszKm6RZ4C7RKS838EPgKGufkNjXGbhYYwz9k+zYCj8Q0T6HLuBqn6k\nqmeraldgHvDn4Ii/fwcGqWo34FXg8XL2fx7gP2bdWuArYFg52+cBvU/96xgTfhYeJuGp6j6gK/Bz\nYAvwtojcWN62InIfsF9V/wW0BTrhTMQzB3gAaFTO2zKC+z3Wn3AmNUqi7CjKPwTfY0zUsnMexgDq\njBD6BfCFiCzAGZJ6XOltRORCYBAlvQIBFqrqeSfZ/QEgpZzPXCkic4HBlJ2sJyX4HmOilvU8TMIT\nkTYi0qrUqrOANcdskwk8B1yjqoeDq5cBDYIn3BGRyiLSoZyPWIIzb0J5HgfuOWZdG2BhaN/CmMiy\nnocxUAN4NjjZUiHOXAdFV1kV9QiG48yR8H5wspwNqvpTEbkG+HvwvZVwToQvPmb/HwOvl1ou7mWo\n6mIRmY0TWEX6AZNc+WbGhInN52FMBIjIv4H7VHVVBbbNBa5Q1V1hL8yYU2ThYUwEiEhrIF1VvzrJ\ndvWBnqoar1Msmzhh4WGMMSZkdsLcGGNMyCw8jDHGhMzCwxhjTMgsPIwxxoTMwsMYY0zI/h8e+H7x\nHMH7HAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(N, log10(res['typed']), N, log10(res['auto']), N, log10(res['looped']))\n", - "legend(['Typed', 'Autojit', 'Python'], loc='upper left')\n", - "ylabel(r'$\\log_{10}$(time) in seconds')\n", - "xlabel('Size (N)')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[327.2894736842105,\n", - " 346.6474820143885,\n", - " 327.77878787878785,\n", - " 120.89588528678304,\n", - " 65.31371303785903]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[res['looped'][i]/res['auto'][i] for i in range(len(N))]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.29.0.dev+17.g1c27118.dirty\n" - ] - } - ], - "source": [ - "import numba\n", - "print(numba.__version__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This was run on a Macbook Pro. Running `sysctl -n machdep.cpu.brand_string` resulted in:\n", - "\n", - " Intel(R) Core(TM) i7-3720QM CPU @ 2.60GHz" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [default]", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/numba/examples/notebooks/j0 in Numba.ipynb b/numba/examples/notebooks/j0 in Numba.ipynb deleted file mode 100644 index 42d4ff613..000000000 --- a/numba/examples/notebooks/j0 in Numba.ipynb +++ /dev/null @@ -1,321 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "I have always wanted to write a ufunc function in Python. With Numba, you can --- and it will be fast." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" - ] - } - ], - "source": [ - "%pylab inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "from numba import jit\n", - "import math" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define some polynomial evaluation tools." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "@jit('f8(f8,f8[:])', nopython=True)\n", - "def polevl(x, coef):\n", - " N = len(coef)\n", - " ans = coef[0]\n", - " i = 1\n", - " while i < N:\n", - " ans = ans * x + coef[i]\n", - " i += 1\n", - " return ans\n", - "\n", - "@jit('f8(f8,f8[:])', nopython=True)\n", - "def p1evl(x, coef):\n", - " N = len(coef)\n", - " ans = x + coef[0]\n", - " i = 1\n", - " while i < N:\n", - " ans = ans * x + coef[i]\n", - " i += 1\n", - " return ans \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define some constants!" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "PP = np.array([\n", - " 7.96936729297347051624E-4,\n", - " 8.28352392107440799803E-2,\n", - " 1.23953371646414299388E0,\n", - " 5.44725003058768775090E0,\n", - " 8.74716500199817011941E0,\n", - " 5.30324038235394892183E0,\n", - " 9.99999999999999997821E-1], 'd')\n", - "\n", - "PQ = np.array([\n", - " 9.24408810558863637013E-4,\n", - " 8.56288474354474431428E-2,\n", - " 1.25352743901058953537E0,\n", - " 5.47097740330417105182E0,\n", - " 8.76190883237069594232E0,\n", - " 5.30605288235394617618E0,\n", - " 1.00000000000000000218E0], 'd')\n", - " \n", - "DR1 = 5.783185962946784521175995758455807035071\n", - "DR2 = 30.47126234366208639907816317502275584842\n", - "\n", - "RP = np.array([\n", - "-4.79443220978201773821E9,\n", - " 1.95617491946556577543E12,\n", - "-2.49248344360967716204E14,\n", - " 9.70862251047306323952E15], 'd')\n", - "\n", - "RQ = np.array([\n", - " # 1.00000000000000000000E0,\n", - " 4.99563147152651017219E2,\n", - " 1.73785401676374683123E5,\n", - " 4.84409658339962045305E7,\n", - " 1.11855537045356834862E10,\n", - " 2.11277520115489217587E12,\n", - " 3.10518229857422583814E14,\n", - " 3.18121955943204943306E16,\n", - " 1.71086294081043136091E18], 'd')\n", - "\n", - "QP = np.array([\n", - "-1.13663838898469149931E-2,\n", - "-1.28252718670509318512E0,\n", - "-1.95539544257735972385E1,\n", - "-9.32060152123768231369E1,\n", - "-1.77681167980488050595E2,\n", - "-1.47077505154951170175E2,\n", - "-5.14105326766599330220E1,\n", - "-6.05014350600728481186E0], 'd')\n", - "\n", - "QQ = np.array([\n", - " # 1.00000000000000000000E0,\n", - " 6.43178256118178023184E1,\n", - " 8.56430025976980587198E2,\n", - " 3.88240183605401609683E3,\n", - " 7.24046774195652478189E3,\n", - " 5.93072701187316984827E3,\n", - " 2.06209331660327847417E3,\n", - " 2.42005740240291393179E2], 'd')\n", - "\n", - "NPY_PI_4 = .78539816339744830962\n", - "SQ2OPI = .79788456080286535587989\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now for the function itself" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "@jit('f8(f8)')\n", - "def j0(x):\n", - " if (x < 0):\n", - " x = -x\n", - "\n", - " if (x <= 5.0):\n", - " z = x * x\n", - " if (x < 1.0e-5):\n", - " return (1.0 - z / 4.0)\n", - " p = (z-DR1) * (z-DR2)\n", - " p = p * polevl(z, RP) / polevl(z, RQ)\n", - " return p\n", - " \n", - " w = 5.0 / x\n", - " q = 25.0 / (x*x)\n", - " p = polevl(q, PP) / polevl(q, PQ)\n", - " q = polevl(q, QP) / p1evl(q, QQ)\n", - " xn = x - NPY_PI_4\n", - " p = p*math.cos(xn) - w * q * math.sin(xn)\n", - " return p * SQ2OPI / math.sqrt(x)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from numba import vectorize\n", - "import scipy.special as ss\n", - "\n", - "vj0 = vectorize(['f8(f8)'])(j0.py_func)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "x = np.linspace(-10,10,1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "100 loops, best of 3: 4.47 ms per loop\n" - ] - } - ], - "source": [ - "%timeit vj0(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The slowest run took 11.78 times longer than the fastest. This could mean that an intermediate result is being cached.\n", - "10000 loops, best of 3: 28.4 µs per loop\n" - ] - } - ], - "source": [ - "%timeit ss.j0(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEACAYAAABbMHZzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd0VFXXx/HvTg+9g4AiHQWlhypElCYCinQQQSmKIBYU\neBQJKgpIsYIVRFRAmiACUkNVEnoPAanSpEsJKXPePwisvBjSptwp+7NWljPDyTm/J8/Mzs25954j\nxhiUUkr5Fj+rAyillHI9Lf5KKeWDtPgrpZQP0uKvlFI+SIu/Ukr5IC3+SinlgxxS/EXkWxE5JSLb\n02jziYjEishWEaniiHGVUkpljaOO/CcDTe/0jyLSHChtjCkL9AG+cNC4SimlssAhxd8YsxY4n0aT\n1sD3yW03ALlFpLAjxlZKKZV5rprzLwYcTfH87+TXlFJKWUBP+CqllA8KcNE4fwN3p3hePPm1/xAR\nXWxIKaUyyRgjmWnvyCN/Sf5KzXygG4CI1AYuGGNO3akjY4x+OeBr2LBhlmfwxK+kJBv3v/E8OQfU\nY+fBU//5eW7a9zfZXq5B5cEvkZRkszyvp37p+9NxX1nhqEs9fwLWA+VE5IiI9BCRPiLSO7mYLwQO\nish+4EugryPGVcoZmrw3gsPxm9j95kIq3lvoP/9erWxRdg9eSkxcJB3HTbAgoVL2c8i0jzGmcwba\n9HPEWEo508fzVrHyymdE991M8YK57tiuROE8/PbMHB6dVofJS2rSo0mYC1MqZT894evFwsPDrY7g\nUc5dusbANT0Y+uC3VCtb9D//fvvPs1GV0rxY6mP6Lu7J1bgEF6X0Hvr+tJZkdb7IWUTEuFsm5RvC\nIyLYf2kXx8bNzPD32GyGQq81J6xAIxa++YYT0yl1ZyKCsfCEr1IeKzrmGKvjPmVmz7GZ+j4/P2FG\nt89Y/O9oYo+ddVI6pRxPi79SwHOTR1Ld/1nq3H9Ppr/3kaplqGBrS5eJo5yQTCnn0OKvfN7GfX+z\nU35ics+BWe7j+55D2Wj7hl2HTjswmVLOo8Vf+byek0dTTXpQqWTWl5uqUa4Y5ZPa0m/KRAcmU8p5\ntPgrn3bk9EW28z1f9XjN7r5GtXmZVVcncOFynAOSKeVcWvyVT3t1yvcUu9441Us7M6tV7fspEF+d\nlyf96IBkSjmXFn/ls2w2w68nJjCw4YsO6/OVOi/z88HPHNafUs6ixV/5rLFzVyD4079lA4f1+fpT\njxIfcI5pkVsc1qdSzqDFX/msieu/o0WR3vj5ZeremDQF+PtRP3sPPlg8yWF9KuUMWvyVTzp57jIH\ng37lnfYdHd73iHY92Mk0PfGr3JoWf+WThs/4hYJx9VJdtdNe9SqWIG9cVYZNm+fwvpVyFC3+yifN\n3PcDHSp0dVr/T5bpwszd053Wv1L20uKvfM72v05yLmQDwzq2dtoYQ9s9wYnQFRz755LTxlDKHlr8\nlc+JmDWDkgmtKJA7m9PGKFE4D4WuNeC9mTr1o9yTFn/lc1Ycn0vnym2dPk6bch2YGzvD6eMolRVa\n/JVP2XPkHy6GbuW1Jxs7faw327bidOhqDp+64PSxlMosLf7Kp4z+5VeKXW9MnhwhTh+reMFcFLz2\nEOPm/+70sZTKLEdt4N5MRPaKyD4RGZTKv+cSkfkislVEdohId0eMq1RmLTw4l9blnnTZeI3vacW8\nvfNdNp5SGWV38RcRP+AzoClQEegkIhVua/YisMsYUwV4GBgrIg7ZPF6pjDp57jKnQ1fxxpOPuWzM\nV1o8zpGgRbrHr3I7jjjyDwNijTGHjTEJwHTg9mvoDJAz+XFO4KwxJtEBYyuVYR/OXUz+q3UoUTiP\ny8asUa4YoXGl+WLRWpeNqVRGOKL4FwOOpnh+LPm1lD4D7heR48A2YIADxlUqU37du5CHi7V0+bi1\n8rTkx+hfXT6uUmlx1dRLU2CLMaaRiJQGlorIg8aYy6k1joiIuPU4PDyc8PBwl4RU3stmMxyQxXzR\n+E2Xj90nvBVd57fFZhvr0EXklO+KjIwkMjLSrj7EGGNfByK1gQhjTLPk54MBY4wZlaLNAuADY8y6\n5OfLgUHGmI2p9GfszaTU7Wat2U7nX54ifmysy8e22QxBg+7h1/bLaF6zvMvHV95PRDDGZOrIwhHT\nPtFAGREpISJBQEfg9ssbDgOPJocsDJQD/nLA2EplyKTVi6kQ2NSSsf38hFI0YVLkUkvGVyo1dhd/\nY0wS0A9YAuwCphtj9ohIHxHpndzsPaCuiGwHlgJvGGPO2Tu2Uhn1xz+LeaJSM8vGb16uCWuOL7Fs\nfKVuZ/e0j6PptI9ytONn/6XYuKKceO0ERfLlsCRDzNEzVJhYmn+H/kOO0CBLMijvZdW0j1Ju7fOF\nK8l7pZZlhR+g/N0FyHatLJOX/mlZBqVS0uKvvN68nYupW9i6KZ+bKudowoyNOvWj3IMWf+X19iUu\npXv9JlbHoH31Jmz9V4u/cg9a/JVX27DnKImBF3iibiWro/Bs4zpcCd1L7LGzVkdRSou/8m6TV0ZS\nND6cAH/r3+q5sgdT8NpDTFi83OooSmnxV95txcGV1C/2sNUxbqlT+BGWxkZaHUMpLf7Kux00K+lc\nL9zqGLe0q9mQ2IRIq2MopcVfea+1Ow9h87/G42H3WR3llvYNqhAffJxdh05bHUX5OC3+ymtNWrmS\nYgnhbrWYWlCgPwXj6jFp+Wqroygfp8Vfea1Vh1fy0N3uM99/U82C4TrvryynxV95JZvNcFhW0u0h\n9yv+bWs0ZF/8KqtjKB+nxV95pZXbDmDERuNqZa2O8h+dwqtxPeQwMUfPWB1F+TAt/sorfb86knuS\nHnar+f6bQoICyH+tLt8u03l/ZR0t/sorrT26hnp3N7A6xh3VyB/Okn069aOso8VfeaUjrKVdWD2r\nY9xRm+oNiYnT4q+so8VfeZ3tf50kKfA8LWq5z/X9t+v6cA3ish3gwHHd00hZQ4u/8jpTI9dRMK6u\nW6zncyfZQgLJc6UmP0Tq+v7KGu776VAqi5bHrqVKfved8rmpUu56LN273uoYykdp8VdeZ1/cOh5/\n0P2Lf+MKddl1aZ3VMZSPckjxF5FmIrJXRPaJyKA7tAkXkS0islNEVjpiXKVud/r8Fa5k20WX8JpW\nR0nXM43qcCH7Rq7GJVgdRfkgu4u/iPgBnwFNgYpAJxGpcFub3MDnwOPGmEpAO3vHVSo1P0RGkePq\ng+TLFWp1lHSVKJyH4GslmLNuu9VRlA9yxJF/GBBrjDlsjEkApgOtb2vTGZhtjPkbwBijtzYqp1i4\nYx0VstW3OkaGlQyoy9xNOvWjXM8Rxb8YcDTF82PJr6VUDsgnIitFJFpEnnbAuEr9x7bz63i0nPvP\n999Uv0Q9ok/qSV/legEuHKca0AjIDvwhIn8YY/an1jgiIuLW4/DwcMLDw10QUXm6+IQkzoT8QdeG\nU6yOkmHta9dl8qG3rI6hPExkZCSRkZF29SHGGPs6EKkNRBhjmiU/HwwYY8yoFG0GASHGmOHJz78B\nFhljZqfSn7E3k/JNs9Zsp/Mv7YgfG2N1lAyz2QwBQwrzR/dN1LrvbqvjKA8lIhhjMrWQlSOmfaKB\nMiJSQkSCgI7A/NvazAPqi4i/iGQDagF7HDC2UrfMjlrHvX6eM+UD4OcnFI6vy09rdepHuZbdxd8Y\nkwT0A5YAu4Dpxpg9ItJHRHont9kL/A5sB/4EvjLG7LZ3bKVSij6xgdrF6lgdI9OqFqhL5AE96atc\nyyFz/saYxUD521778rbnY4AxjhhPqdQctUXRourLVsfItJZV6jFw2QCrYygfo3f4Kq9w5PRF4kOP\n0LJWRaujZFqnhtW5mm0Pp89fsTqK8iFa/JVXmLl2EzmvVCFbSKDVUTItT44Qsl99gBlrNlkdRfkQ\nLf7KKyzdHUXZbGFWx8iyMiG1WLxzg9UxlA/R4q+8wvazUdS713OLf90SYWw7o8VfuY4Wf+UVTgVE\n0aaW5xb/p2rV4qS/Fn/lOlr8lcfbHHsc4x9HgwdKWh0lyx6uXBqb/zU2xx63OoryEVr8lceb9Uc0\n+a/XxM8vUzc4uhU/P6HA9TBmrNOjf+UaWvyVx1u1P4r7c3vulM9NFfPUYtV+Lf7KNbT4K4+391IU\nD5f1/OL/SPlaxFyOsjqG8hFa/JVHS0yycS40mnb13H/nrvR0ahDGhWwbiU9IsjqK8gFa/JVHW7o5\nloCEvFS8t5DVUexWumg+Aq8X4bcoXfNQOZ8Wf+XR5kVHcZfN86d8bipOLeZt0nl/5Xxa/JVH++NI\nNJULek/xr1YkjA3HtPgr59PirzzagetRNKno+fP9N7WoXItDCVr8lfNp8Vce6/K1eK5k20G7+tWs\njuIwT9WrTFy2/Zw8d9nqKMrLafFXHmvOuu2EXCtNkXw5rI7iMLmyB5P96gPMXLvZ6ijKy2nxVx5r\n4bYo7vH3nvn+m3SFT+UKWvyVx9p4Ioqaxbyv+NcrUUtX+FROp8VfeayjtihaVvW+4v9U7Vqc9Nc7\nfZVzOaT4i0gzEdkrIvtEZFAa7WqKSIKItHHEuMp3Hfvnksdu25ie8AdLYfO/wtYDJ6yOoryY3cVf\nRPyAz4CmQEWgk4hUuEO7kcDv9o6p1M9rN5HzSmWP3LYxPX5+Qv7rYcxcH211FOXFHHHkHwbEGmMO\nG2MSgOlA61Ta9QdmAacdMKbycUt3R1Em1PumfG66P3cYq2J16kc5jyOKfzHgaIrnx5Jfu0VEigJP\nGGMmAp676LpyG56+bWN6Hi4bxp5LWvyV8wS4aJyPgJTnAtL8BRAREXHrcXh4OOHh4U4JpTzXKf8o\nnqw12uoYTtOuXk3e2RlNYpKNAH+9LkP9f5GRkURGRtrVhxhj7OtApDYQYYxplvx8MGCMMaNStPnr\n5kOgAHAF6G2MmZ9Kf8beTMq7bY49To1vHyDx/TMevXtXegIHlmRBx99pWqOc1VGUmxMRjDGZ+jA4\n4pAiGigjIiVEJAjoCPy/om6MKZX8VZIb8/59Uyv8SmXEjW0bw7y68APcZQvjl2id+lHOYXfxN8Yk\nAf2AJcAuYLoxZo+I9BGR3ql9i71jKt/mLds2pqdywTDWH9bir5zDIXP+xpjFQPnbXvvyDm2fdcSY\nynftvRTNizVesjqG0zWtFMbgFTOtjqG8lJ5JUh4lMcnG+dBo2nvBto3paV+/Gley7eDytXiroygv\npMVfeZTlW/bjl5CbSiULWx3F6QrlzU7I1TLMWbfd6ijKC2nxVx7lFy/btjE9JQLCWLhN5/2V42nx\nVx5l/eEor9q2MT1hxcLYeEKLv3I8Lf7Koxy4HkXTir5T/B+vFsZRmxZ/5Xha/JXH8MZtG9PTqnZF\n4kOPcOT0RaujKC+jxV95jDnrthN8rZRXbduYnpCgAHJdqcrMtZusjqK8jBZ/5TEWbYv2ym0b01M2\nWxhLd+vUj3IsVy3s5hPi4hNZFL2XY2fPcU+B/DxcuQy5sgdbHctrbDwRRVix2lbHcLl6JcOYuXu6\n1TG8yoXLcSzfGsvxc+cpUagAzaqXJyjQ3+pYLqXF3wEWRcfwys9jiAmYSWB8YUKSCnLd7yzxv56g\nVEJrJnR8UxfncoCjtijeqer9d/berl2dWnwW+6rVMbzC3HU7eWXO+xwO/o3guOIE2fIS53+KxHln\nqWjrxKedXyO8cimrY7qETvvYIS4+kYYRw2gxuz535SjOlp57iB8bw6WP1nJ93B629dpLmTwVaD6r\nLg0jhhGfkGR1ZI917J9LXA89ROvalayO4nJ17y+BkQQ27vvb6ige62pcAjXffJ2n5jfigQLV2fvC\nAeLG7eLSR2uJHxvLH922kTs4L42mhdH0vQ9ITLJZHdnp7F7S2dE8ZUnngyfOU2NUewQ/FvedRI1y\nxe7YdvtfJ2n4SWf8JYjdb8+mUN7sLkzqHcbNXUnEqre49NE6q6NYovArj9O98nOM6v6k1VE8zpHT\nF3ng/ZYES07WvDqF8ncXuGPbtTsP0eKbboRIbra89RNF8+d0YdKss2pJZ59z8MR5Ko5+lOIh93H8\nw4VpFn6AB0sV4cToJeT1v4syw5ty+vwVFyX1Ht6+bWN6KuYJY/UBPembWYdPXeC+Dx6hREhljo/5\nNc3CD1C/0r38/cEycgcUpMK7zTl57rKLkrqeFv9MOnnuMg+Mbkr5kIfY8v7HGT5JFBIUwJ5R31Iw\noDSV3+miU0CZtP1sFHVKeP9ibnfSqHwYMf9q8c+Mq3EJVPugHaWDa7P1/U8yvCNajtAgdo/8hrsC\nK1Dhnce5cDnOyUmtocU/ExKTbNR4rztF/O9n04jxmd5MJMDfj23vfs01c4H6EUOclNI7nfKPpk0t\n3z3y71C/JuezbfSJuWhHCRs2AH8JIuqdj7L0Wd018ity+BWgZsQL2GzuPxWdWVr8M6HZiPe5aP5m\n8/Avs7yLVI7QIKIHzmZz/DRGzPjdwQm909YDJ7AFXObhyqWtjmKZssXzE3C9IIs3xlgdxSO8Pmk2\nMYlL2Py/aYQEZe2ixgB/PzYPncKxpC20HfOJgxNaT4t/Bn216A9WXvmMVX1n233tftni+RlVZwpv\nb3qWPUf+cVBC7zVzfTT54rx/28b0FCOMeRt16ic9G/f9zdiYvnzR5EeKF8xlV1+F8mbn9x5z+eXc\ne8xYtdVBCd2DFv8MOHnuMi8ue5rXKkygWtmiDunztTaNqBrQkRYf6/Xb6VkVG8X9uXx3yuemqoXC\n+POIFv/0tJz4Ig1CX+C5prUc0l+DB0vSp8R4us3rzLlL1xzSpzvQ4p8BjUYO5F55iNE92ji03wUD\nh3PEbzXj5q50aL/eZs+lKMLLavFv9kAYf8Vr8U9LxI+/cdZvN/Nfd+w5tc/7dKEwD9B8dIRD+7WS\nQ4q/iDQTkb0isk9EBqXy751FZFvy11oRecAR47rCN4v/JIb5LH/jI4f3XSRfDl6v9DFD1vTVrfru\nwJe2bUxPu/pVuZptt9defWKvc5euMWLLSwwL+9Thy6r4+QkL+39CdMJkZq/d4dC+rWJ38RcRP+Az\noClQEegkIhVua/YX0MAYUxl4D/ja3nFdIT4hiZd+70ufUh9yT6HcThljxNOtyWm7h54TvnFK/55u\n+Zb9+CXm8oltG9OTL1cooVfLM3vdNqujuKUOH4+jkK0Kb3Zo6pT+K5UsTKci79J9Vh+vuOrKEUf+\nYUCsMeawMSYBmA60TtnAGPOnMebmguR/AmnfFeUmun78BUEmF5/17uy0Mfz8hE+fGMnMU+969Q0l\nWfVLdBRFknTK56Z7A8NYtF2nfm534Pg5ll8Zz/dPj3LqOFNe6gVAz88nO3UcV3BE8S8GHE3x/Bhp\nF/eewCIHjOtUMUfPMOuf4Uxp/7nTrzLpFF6Vu5Mepstn4506jidafziKygW0+N9Uq3gYm09q8b9d\n5wmjqGBryyNVyzh1nAB/Pz557COmHnvb4+/Ud+mqniLyMNADqJ9Wu4iIiFuPw8PDCQ8Pd2qu1HSe\n+D6VpD2t61Z0yXiTnn6XR6fXIvZYX8oWz++SMT3Bgesb6FmprdUx3Ear6mH8eNi5R7eeZnPscaIT\nvyGq53aXjNejSRgRvzeg4ydjWTHsbZeMebvIyEgiIyPt6sPuhd1EpDYQYYxplvx8MGCMMaNua/cg\nMBtoZow5kEZ/li/stm7XYR6aWo3tfXa7dK65/Os9uSt7cSJT/PLzZZeuXCf3B3k58eppn9q9Ky3x\nCUkED8vLX/0PU/KuvFbHcQtVh7yMv/iz8f2xLhtz9faDhP9Ug629d/FgqSIuG/dOrFrYLRooIyIl\nRCQI6AjMvy3YPdwo/E+nVfjdRbfJb1M/+EWXn2T8rOMgVsd9zvGz/7p0XHc1e902Qq6U1cKfQlCg\nP7mvVmPG2o1WR3ELscfOso3v+arHay4dt8GDJanm34OOX0S4dFxHsrv4G2OSgH7AEmAXMN0Ys0dE\n+ohI7+RmQ4F8wAQR2SIibjtpOXvtDg76L+anfgNdPnbj6mUpHv8Ifb76yuVju6Pftm3g3kDH3Kjj\nTcplD2P5Xrf9CLlUn28/p2xiG4fdfJkZ014Ywl6/mazdecjlYzuCQ67zN8YsNsaUN8aUNcaMTH7t\nS2PMV8mPexlj8htjqhljqhpj3PYM3oA5w2mR5w27bwvPqjFPDGHh+bF6LTew+WQUYcXc9q1imYdK\nhbHznBb/MxevEnn1c8a3c/2BGtxYpqVu0Av0+v59S8a3l97hm8K89bs4HriGb59/3rIM7RtUJm/8\nA7wx5WfLMriLY2ygVXU98r9d2zphnA7a4JUrTWZG368nUyS+Lo+F3X5bketM7v0KMX6zPfLoX4t/\nCgNmj6BJzlcs32mrf9gAftz/iU9/uA8cP0dC8Ela1rrf6ihup1aFuwGIjjlmcRLrJCbZmHtyHBFN\n3rA0x82j/55TRliaIyu0+Cf7feM+jgQuZdILL1odhTc7NCPB7yJfL/7D6iiWmb4mmjxXq2d4sxxf\n4ucnFIwPY9Yfvjv1M2LGYoKS8tKzaW2rozC59yvs85/D6u0HrY6SKVr8k704/X3CQ/u7xZ6dAf5+\ntCzcnxHLPrY6imWW791AuRw6338nlfKGsfov3y3+E6In0KHUi26xzHfZ4vmpHdiHvj9+aHWUTNHi\nD0Ru+4u/An5l8vMvWR3llo97dOdY8FKf/dN+54UNNCyt8/138kiFMPZd9s3iv3r7Qf4J3sCYZzpa\nHeWWr54dwG6/aew6dNrqKBmmxR/o/9NY6gQ9T4nCeayOckvxgrmoZLrw6k9fWh3F5Ww2w5ngKNrX\n0+J/J+3r1+BCtk0+uRf06z9/QTX/Z8iXK9TqKLdUKlmYCkkdeH7yp1ZHyTCfL/6xx86yy28aE7v3\ntzrKfwx7vDfrr04mLj7R6igutXrHQSQpmBrlPGL9P0uULpqPwOtFWBS91+ooLnXhchzRCZMZ3e4F\nq6P8x6edBrLu+hces0Cjzxf/FyZNpEzik25xi/btnqr/AKGJxRk507f2+p395wYKJ+p8f3qKEca8\nTb419fPGlJ/JH1+dRlXcbz/nR6qWoWh8OC987RnLs/t08b9wOY6Vlz/nwyfddyvFtiV78eVGj9j+\nwGHWHtpA5QI65ZOeaoXD2HDMt4r/jNhv6Fmlj9Ux7mhE8zf49cw4rsYlWB0lXT5d/F+Z9BP5E6q6\nbOXOrBjZtQOnQlax9cAJq6O4zP5rUTStqMU/Pc0fDOOgD23ruHRTLP8GxzC0Qwuro9zRM41rkjOh\nDK9Omm51lHT5bPG32QzTDo3l9fquXRAqs4rky0H5pHYMmvad1VFc4vK1eC5n30b7h6pbHcXtta1f\nhWvZ9nrVpuJpiZj3HVX9u5AtJNDqKGkaWGcQ3+0f7fY3afps8X9vxmLEBPLak42sjpKuNx7tyYrz\n33rF1nHpmbNuO8HXSrnF/RbuLk+OELJdvY9Z67ZaHcXp4hOS+PPaFN5q8azVUdI1pF0TAD6YucTi\nJGnz2eL/0YaxdC830C1uEknPM4/WJMBk4/MFa6yO4nQLt0VRwl+nfDKqZFAYi3d4/9TP6NlLCUm8\niyfrVbI6Srr8/ISnSw9k3B9jrI6SJp8s/jNWbeVSYAxje3SwOkqG+PkJjxTsxoS1U62O4nTRJ/6k\nVjEt/hlV++4wNp/y/uL/VdRkWhbvYXWMDBv/bCcuBO3m59XbrI5yRz5Z/IfMH0uTPP3dfu4wpXfa\ndiY2YI7Xz+8eMetpW7ue1TE8xhM1a3Ec7y7+B46f42jIYj7o3MnqKBmWIzSIJrlfYsh81+0ullk+\nV/yjY45xKHAhE3v2Tr+xG6lWtij5rtUkYvo8q6M4zc6Dp0gKOstjNa1botfTNKtRnoTg0xw4fs7q\nKE4z+MefuOf6Yx63beUXPftwMHCB2y7R4nPFv9/UT6ksT7vVUg4Z1a7800zf471TP1NXradAXB0C\n/H3ubZllAf5+5L1ag+lroq2O4jSLj//Is9WftjpGppUonIcqPMOL339idZRU+dSn7OS5y0QnfsvH\nnQZYHSVLhnd8kjMh69l58JTVUZxiRex6Hsxb1+oYHqd8zjBWxHjn1M+KrQe4EnSA19s0tjpKlnzS\nZQAbk77l2D+XrI7yHz5V/Pt/+x1F48Np8GBJq6NkSaG82SmZ0IqhP0+zOopTxFxZz2MP6Hx/ZjUo\nHcbO895Z/N+fP42KtPOo83Mp1a90L3cnNOHFb91vyQeHFH8RaSYie0Vkn4gMukObT0QkVkS2ikgV\nR4ybGfEJScw7+TFvPvKKq4d2qOfrdOP3k99bHcPhLlyO49/sW+ncsKbVUTxOh3q1+Cf4T7e/qSiz\nbDbDmgs/0q9hF6uj2OXd5q/x29mP3G7JB7uLv4j4AZ8BTYGKQCcRqXBbm+ZAaWNMWaAP8IW942ZW\nxLQFBNny0ae5Z08rDGgVzvWA08xdt9PqKA41Y/VmQq9WoEi+HFZH8TjVyhbFPzEnizfGWB3FoWau\n2YbNL45ezepYHcUu3R6tQc6E0rz+3Uyro/w/jjjyDwNijTGHjTEJwHSg9W1tWgPfAxhjNgC5RaSw\nA8bOsAmbx/Psfa94xE1daQkK9KdmcFdGLvKuE7+/bltPmWDP/sVspXuoz4w/1lodw6HGLPmRmqGd\nPP4zCzCg5kAmx4xxq7/OHFH8iwFHUzw/lvxaWm3+TqWN0/y4YjOXA/9iZLenXDWkUw1q3pVN13/y\nquUeNp9eT3gpne/PqjrF67PuqPcU/8QkG5vjpzGouWdP+dz0VsfmJEkc439ZaXWUWwKsDpCaiIiI\nW4/Dw8MJDw+3q7+hv42naX7PuqkrLU/Wq0TgrHx89utqXn4i3Oo4drPZDCeD1tGp/kdWR/FY7WvX\nZ8asUVbHcJjPF6whKDG/W6+4mxkB/n50uvc1Rq0Zw2tt7F9PLDIyksjISLv6EGPs+zNERGoDEcaY\nZsnPBwPGGDMqRZsvgJXGmBnJz/cCDY0x/7lmUUSMvZlS2rjvb8ImP8DBl//yyGv776TF+x9y4Pw+\n9n7o+Wv9r9h6gMY/NiRh1FGv+BPfColJNoLeLMjG53ZQrWxRq+PY7b43+nBv7lIsejPV60c80oXL\nceR/pySbtoPrAAAW/ElEQVSzWi91+BpFIoIxJlMfHkdM+0QDZUSkhIgEAR2B+be1mQ90Sw5ZG7iQ\nWuF3hv5TP+MB09WrCj9AxFOd2BcwmwuX46yOYref16+nWFI9Lfx2CPD3o9D1evywep3VUex2+Vo8\nMX6zeftJz1nOISPy5AihUc5+DPplnNVRAAcUf2NMEtAPWALsAqYbY/aISB8R6Z3cZiFwUET2A18C\nfe0dNyNOn7/ChoSvGd/RM2/qSkvN8sXJfa0K789caHUUu60+tI4aRfRkr72qFajPiv2eP+//wazF\n5Iy7nzr332N1FIeb0ON59gf84habMznkOn9jzGJjTHljTFljzMjk1740xnyVok0/Y0wZY0xlY8xm\nR4ybnpcmTaHI9Yfccr9PR2hdqgs/bP/B6hh2O5C4inZhDayO4fFaV63PvjjPL/5Tt/xEi3s6Wx3D\nKcoWz08l04W+331qdRTvvcM3PiGJOcc/YsjDnn1TV1qGd3iKE6HLOXjivNVRsmznwVMkBJ/kqXoP\nWh3F43VqWJ1r2WLccimBjDp+9l+OhixiePt2Vkdxmo86vsyfCV9x8txlS3N4bfEf8v1cgm35efHx\nh6yO4jQlCuehWFxjhs2YZXWULJu0YjUF4+oTFOhvdRSPlyt7MLmvVGfqyj+tjpJl78z4hULXGlC2\neH6rozhNoyqlKRr/MC9+M8nSHF5Z/G02wxc7R9K/6mCvP4n4dOWuzD/0o9UxsmxZ7CpqFNQpH0ep\nmKs+i3Z57tTP7NifaF/BO6d8UhredCDzT48nLj7RsgxeWfzHzFlOolzlnS4trY7idEPaNudSyA7+\n2H3E6ihZsi9+FW1rNLQ6htdoel99dlz0zO0+dx06zZmQPxjavpXVUZzuuaa1yJZYnP9NnWtZBq8s\n/iPXjqRb6UE+sS58ruzBlLe15Z25nrfSZ8zRM1wPOUyn8GpWR/Ea3R+py4XsG7l05brVUTJt+KyZ\n3Bv/OIXyZrc6iku8WG0gX+380LIlH7yuOk5ZGs2lwFg+fs77/3S8qW/9Lqw863lX/Uxavob81+oS\nEuSWN5p7pHsK5Sb71fuYvMzz5v0XH/uJ7tV953P7TpeWxPtdYMJv1kzTeV3x/9+ikbQu+JrXLOWQ\nES+0qE+i/yVmrdludZRMWRKziur5dcrH0Splb8ScLSusjpEpq7cf5HJwrMdu2pIVAf5+tC32KiNW\njLFkfK8q/gs27OFE4Bom9nrO6iguFeDvR82Qzoxe7FlH/zFxq2lTTYu/o7V+sBFbL3hW8X9vnmdv\n2pJVnzzXjVNBf7Io2vXLcXtV8e87YzhNcr7qM3OGKQ1u3pVN8Z6z0ufhUxe4li2WLg/XsDqK13mu\ncT0uZd/C6fNXrI6SITabYfX5H+nbwHemfG4qkDsbDUJf4NWZY10+ttcU/9lrd/B3YCTf9e1ndRRL\ntK5bkaDEAnwyf5XVUTLkmyVryHulFjlCg6yO4nUK5c1OrivV+HqJZ1zyOWfdDhL9rnj8pi1Z9eVz\n/Yjxm83anYdcOq7XFP+X5kTweN43fHonqEcKdeXL9Z5xzf/8XUsJK/Co1TG8VpU8D/PrDvdZOz4t\nH/5+Y9MWX7g6LzXl7y7AQyH96PHdcJeO6xU/7WmRWzgZ8CeT+75gdRRLRTzVidiAOR6x0ufe+KV0\nreM7J/dcrU3VRuy84v7z/olJNjZdn8Ybzbxj05asmtr3VQ74/8aCDXtcNqZXFP+X573NkwUHky9X\nqNVRLFWjXDHyXKvKiJ9/szpKmqJjjpEQ9A8dw6taHcVr9Xi0Nley7eHwqQtWR0nTFwvXEZiUx+Hr\n23uaewrlplmugTw/422XjenxxX/c3JWc89/FpL69rY7iFlqX6sKPO9z7qp8vly6jWHwjn/0z3xVy\nZQ8m35XafLVktdVR0jRx7U80zOd7J3pT832/fpwIWMcPyze5ZDyP/vTFJyTx5upX6H/fKHJlD7Y6\njlsY3vEpToSu4MDxc1ZHuaNlB5fy8D065eNsNQo0YsGuZVbHuKPL1+LZI7MY6mWbtmRVgdzZ6FDk\nbfr9+ppL7vr16OLfZ+IUAk0OxvRoa3UUt3FPodwUi2vCsJ/dc6VPm81wJGAZvR/V4u9s3eo2ZU/C\nYqtj3NHo2UvIGVeBehVLWB3FbXzXvxfX5TwDJzv/8+uxxf/42X/5/thQPm0x3utX7sysblW68ush\n95z6mbNuBwGJuahf6V6ro3i9Dg2rkBRwiRVbD1gdJVXfb/mJx+7WKZ+UggL9GRn+CZ/sHciZi1ed\nOpbHFv8WY4ZS0taYZxrXtDqK2/lfu+b8G7KbdbsOWx3lP6asXUrZAL3E0xUC/P0omdSML5a539H/\nyXOXORy0kAgv3rQlqwa0bkjRpNo8NX6UU8fxyOI/eUkU223T+W2ANWtiuLscoUFUsLXlnbk/WR3l\nP9afWkyr+5taHcNntCjfnFV/L7I6xn9ETJ9Lobj6lL+7gNVR3NLMXmNYEzeB+X/udtoYdhV/Eckr\nIktEJEZEfheR3Km0KS4iK0Rkl4jsEJGX7BnzalwCfRf1pve9Y/SNk4a+D3Vh1bkfLFsuNjXHz/7L\nuex/MqClHvm7ykstGnM6dLXb3fvx874pdK3U3eoYbqvWfXfToeA7dJ7xLPEJSU4Zw94j/8HAMmNM\neWAFMCSVNonAq8aYikAd4EURqZDVAZt+8A45uIvP+/j2TSHpef6xeiT6XWHmmm1WR7nlo/lLyX+l\nrk/fhe1qpYvmI+fVB5i40H0u+Vy36zAXQrYyrKP3b7Zkj6kv9yHQhNJmzHin9G9v8W8NTEl+PAV4\n4vYGxpiTxpityY8vA3uAYlkZ7KNfIll/7VuW95+sJ3nTEeDvR1hoZ8YscZ/lHn7ZvYDwYo9bHcPn\n1MzXjFlb3Wfef9jsqVQ0HfTy7HQE+Psxt8e3LLw4iu+XbXR4//YW/0LGmFNwo8gDhdJqLCL3AlWA\nDZkdKOboGQau68bwapN4sFSRLET1PYNbdGVzwk9O+7MxMxKTbOz3+41+TVpYHcXnPFO3OTuvL7Q6\nBnDjUt9VF79j4KPdrY7iEcIrl+KVshN5bnE7Dp4479C+091CSUSWAoVTvgQY4K1Umt9xgllEcgCz\ngAHJfwHcUURExK3H4eHhVKtZh7CxbQjL2ZW3OjZLL7JK1qr2/QTPKMSnv67itTaNLM0ydflGghIK\nEF65lKU5fFHnh6vRY8m/LIzay2NhWZ5xdYgvFq7D3wTz9CO6lHdGjX2uLSuHrKPW6K4cGTWPkKAA\nIiMjiYyMtKtfMSbrJwRFZA8Qbow5JSJFgJXGmPtSaRcALAAWGWM+TqdPkzJTYpKN8oO6cy3pMkfG\nzNIlATKp5QdjiTm3i30fTrI0R4Nhb3M96Tob3nPu5WsqdQ8O7sddOYrx+1upnZZznfKv96RM3vL8\n9r/XLc3haS5fi6fEkMcpFFSSXSO/+M+0t4hgjMnUXLi9lXQ+0D358TPAvDu0mwTsTq/w3y4xyUal\nwX34J/Evtr49VQt/Fgx7qiP7A+Zy7tI1S3NE/TuX7nVaWZrBlz1d80nWnZ1raYYzF68SGzCHER26\nWprDE+UIDWLH0NkcTthIraFvOOQqPnur6SigsYjEAI8AIwFE5C4RWZD8uB7QBWgkIltEZLOIpDt3\nc+biVcq80ZUTCTHsHbrIJ3fncoQbK31W572ZCyzLsDBqLwn+5312sw538GKLBlwNPsCGPUcty/C/\nH2ZRIK4OVUrfZVkGT1Y0f062vPo7e66uocKgnlyNS7CrP7uKvzHmnDHmUWNMeWNME2PMheTXTxhj\nHk9+vM4Y42+MqWKMqWqMqWaMSfPSgwUb9nBPRD1AODB8MUXz57Qnps9rX+4Zpu60btpn3OJZPBDw\nlP7lZqFsIYGUSmzJhwt+sSzDtP0T6VWtj2Xje4Pydxfgr7eXczHxNIWG1Gf5lv1Z7sstP42t5jag\ndfHe/PXhDxTInc3qOB5vZLd2nA3eaNkaL+vOz6RnXV18z2rtH3yS5X/PsWTsGau2ci3gGEM7PGbJ\n+N6kUN7snBg7n2ZFu9J4Ri3C3nwjS/24ZfHf3Gs70157Qa/ld5A8OUKo4d+DQTO/cPnYv2/cR3zg\nPzz/WD2Xj63+v4FPNOFi6FY2xx53+djvLPySBjl6ERKU7gWGKgP8/IRZr/cn6tntXEu8krU+HJzJ\nIXRO0PFGtuvDpqTvXH7id+yiWVT0a6NTPm4gX65QSic8wfDZ01067vGz/7LHbzqjOz3n0nF9QY1y\nxdgx6vMsfa9+In1EoyqlKXC9JoOm/uyyMW02w+rzP/J8/Y4uG1OlrXedriw95drlvl///ieKxD1M\njXJZurFfOYkWfx/yfPW+TNs/wWXj/bBiE0lyXad83MiAVuFcDzzl1NUiU7LZDLOPfsLL9fq6ZDyV\ncVr8fchbHZoT53+KyUuiXDLemGXf8VCubnruxo0EBfpTLbAzoxa6Zs2nD2YuQUwAA9s84pLxVMZp\n8fchQYH+tCz4MkMXj3b6WJeuXGcnM3jnqW5OH0tlzutNu7Lh2g8uWfNp/B/j6FrmVT0AcENa/H3M\nxF49OR60it837nPqOBHT55M7rpJu1+iG2jeoTHBiId7/2bkrfc5dt5PzQTsY213P+bgjLf4+pki+\nHDwU0peXf3buLmiTdnxO90ovOHUMlXXtS77AxI0TnTrG63PH0Cjni7p0s5vS4u+DvnquPzH+s9h6\n4IRT+p+9dgf/BsUyouuTTulf2e/DZzryT/AfrN15yCn9r9h6gL8CfuOb3i86pX9lPy3+Pqj83QV4\nwHSl56SxTul/6PzPaZi9D9lCAp3Sv7JfgdzZqCxPM3C6c278e/7HETQIeZEShfM4pX9lPy3+Pmry\nc4PZnDSZ6JhjDu035ugZ9vr/zLiuvRzar3K88R1fIirxG46cvujQfldsPcB+//lMeeFlh/arHEuL\nv4+qVrYotQJ70/WbCIf22+OrcVRIaq93aXuA8MqluDehOb2+cuy9H71+GM5DetTv9rT4+7Dp/QYR\n6zefBRv2OKS/A8fP8WfCl3zVbbBD+lPON/6pwSz792POXLzqkP6mLI3mkN8ypvUf6JD+lPNo8fdh\nJQrn4bE8b/Ds9NccsjnEM1+Mo1xSG72804O0rluRIgn16PrpJ3b3ZbMZ+v/2Mt2Kv6fLsHsALf4+\nbvrLL3FJjjBw8iy7+vlj9xHWx0/k2+6pbe2s3Nl3XUay5PIYdh48ZVc/z0+cSqJc48sXnnFQMuVM\nWvx9XI7QIMY/8iUfx7zM4VMXstxPx28H8VBIP+pVLOHAdMoVGlcvSzW/Z2g7Ieu/uDfHHuebIwP5\nuuU3BAX6OzCdchYt/ooXWtSjol8b6o3qlaXpn4gff+O4/MnMAVnbVEJZb/aAocTKb3w8b1Wmv9dm\nMzSf0JuHQl+gS6NqTkinnMGu4i8ieUVkiYjEiMjvIpI7jbZ+yfv3zrdnTOUcq9/6kHNmP53HZ+6u\nz5ijZ3h3W2/GNZii+yx7sBKF8zC08te8tuaZTF/62eKD0VzhNL8NetNJ6ZQz2HvkPxhYZowpD6wA\nhqTRdgDgmnVkVablyRHCr91m8vPp4YyY8XuGvufSlevUGvsUtYKfoX+rBk5OqJwtoksL7g9oSfUP\nOhIXn5ih73lz6nyWXPyY5b1nkyM0yMkJlSPZW/xbA1OSH08BnkitkYgUBx4DvrFzPOVEj1Qtw+cN\n5jB0S1c+nL08zbaXr8VTcejTZPcrwOqI91yUUDnbnxHjMMbGA//rle4vgLd/WMAHO3syucl8at13\nt4sSKkext/gXMsacAjDGnAQK3aHdeOB1wP7rCZVTvdCiHh/Xnc3gqC60Hjku1QKwOfY4dw9pRpJJ\nYEfED7pFoxfJFhLI9qGzOJ94nHsGtWbXodP/aRMXn0jjd0cwYntvvm70K90erWFBUmUvMSbteiwi\nS4HCKV/iRhF/C/jOGJMvRduzxpj8t31/C6C5MaafiIQDrxljWqYxnkkvk3K+FVsP0Pa73lz1O0nz\nwr2oX/ZBLl69yoJdy9lqplI/uC/L3hqmV3Z4qatxCYS/+yYbkyZTw78HLSqGky04mFX7trD0zDfk\nSLqb3/t+p1szugkRwRiTqU0T0i3+6Qy4Bwg3xpwSkSLASmPMfbe1eR/oCiQCoUBOYI4xJtVdPkTE\nDBs27Nbz8PBwwsPDs5xRZZ3NZhg9exnfRf3MyYR9BBDC/blr8X67Z/VGLh+xdFMsw36ZTMy/0SSR\nQLHg++hVpwMvtWqoG7RYKDIyksjIyFvPhw8f7vLiPwo4Z4wZJSKDgLzGmDve2y8iDblx5N8qjTZ6\n5K+UUpmQlSN/eydrRwGNRSQGeAQYmRzkLhFZYGffSimlnMSuI39n0CN/pZTKHCuO/JVSSnkgLf5K\nKeWDtPgrpZQP0uKvlFI+SIu/Ukr5IC3+Sinlg7T4K6WUD9Lir5RSPkiLv1JK+SAt/kop5YO0+Cul\nlA/S4q+UUj5Ii79SSvkgLf5KKeWDtPgrpZQP0uKvlFI+SIu/Ukr5IC3+Sinlg7T4K6WUD7Kr+ItI\nXhFZIiIxIvK7iOS+Q7vcIjJTRPaIyC4RqWXPuEoppexj75H/YGCZMaY8sAIYcod2HwMLjTH3AZWB\nPXaOqzIgMjLS6gheRX+ejqU/T2vZW/xbA1OSH08Bnri9gYjkAh4yxkwGMMYkGmMu2TmuygD9cDmW\n/jwdS3+e1rK3+BcyxpwCMMacBAql0qYkcEZEJovIZhH5SkRC7RxXKaWUHdIt/iKyVES2p/jakfzf\nVqk0N6m8FgBUAz43xlQDrnJjukgppZRFxJjU6nUGv1lkDxBujDklIkWAlcnz+inbFAb+MMaUSn5e\nHxhkjGl5hz6zHkgppXyUMUYy0z7AzvHmA92BUcAzwLxUAp0SkaMiUs4Ysw94BNh9pw4z+z9AKaVU\n5tl75J8P+Bm4GzgMtDfGXBCRu4CvjTGPJ7erDHwDBAJ/AT2MMRftDa+UUipr7Cr+SimlPJNb3OEr\nIm1FZKeIJIlItdv+bYiIxCbfINbEqoyeSkSGicix5CutNotIM6szeRoRaSYie0Vkn4gMsjqPpxOR\nQyKyTUS2iEiU1Xk8jYh8KyKnRGR7itcydMNtSm5R/IEdwJPAqpQvish9QHvgPqA5MEFE9JxA5o0z\nxlRL/lpsdRhPIiJ+wGdAU6Ai0ElEKlibyuPZuHGhSFVjTJjVYTzQZG68H1PK6A23t7hF8TfGxBhj\nYoHbC3trYHryjWGHgFhA3yyZp78wsy4MiDXGHDbGJADTufG+VFknuEnt8UTGmLXA+dteTveG29u5\n+/8BxYCjKZ7/nfyaypx+IrJVRL7JyJ+D6v+5/T14DH0P2ssAS0UkWkR6WR3GS2Tkhtv/x95LPTNM\nRJYChVO+xI03wZvGmF9dlcMbpfWzBSYA7xhjjIi8B4wDnnN9SqVuqWeMOSEiBbnxS2BP8tGscpx0\nr+RxWfE3xjTOwrf9zY3LSG8qnvyaSiETP9uvAf1Fmzl/A/ekeK7vQTsZY04k//cfEZnLjak1Lf72\nOSUihVPccHs6vW9wx2mflPPT84GOIhIkIiWBMoBeHZAJyW+Em9oAO63K4qGigTIiUkJEgoCO3Hhf\nqiwQkWwikiP5cXagCfqezArhv7Wye/LjVG+4vZ3LjvzTIiJPAJ8CBYAFIrLVGNPcGLNbRH7mxh3B\nCUBfozcmZNZoEanCjSssDgF9rI3jWYwxSSLSD1jCjYOlb40xuiR51hUG5iYv4xIA/GiMWWJxJo8i\nIj8B4UB+ETkCDANGAjNF5FmSb7hNtx+tpUop5XvccdpHKaWUk2nxV0opH6TFXymlfJAWf6WU8kFa\n/JVSygdp8VdKKR+kxV8ppXyQFn+llPJB/wd4SoPIFchgagAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot(x, vj0(x), x, ss.j0(x))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This was run on a Macbook Air. Running `sysctl -n machdep.cpu.brand_string` resulted in:\n", - "\n", - " Intel(R) Core(TM) i7-3720QM CPU @ 2.60GHz" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [default]", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.1" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/numba/examples/notebooks/numba.ipynb b/numba/examples/notebooks/numba.ipynb deleted file mode 100644 index a48cb47be..000000000 --- a/numba/examples/notebooks/numba.ipynb +++ /dev/null @@ -1,804 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This example uses trunk version of numba at Github." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" - ] - } - ], - "source": [ - "%pylab inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "from numba import jit" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The most important api of numba is the decorator: `jit`.\n", - "\n", - "The `jit` decorator returns a compiled version of the function using the input types and the output types of the function. You can optionally specify the type using `out_type(in_type, ...)` syntax. Array inputs can be specified using `[:,:]` appended to the type. If no type are specified, it watches for what types you call the function with and infers the type of the return. If there is a previously compiled version of the code available it uses it, if not it generates machine code for the function and then executes that code. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def sum(arr):\n", - " M, N = arr.shape\n", - " sum = 0.0\n", - " for i in range(M):\n", - " for j in range(N):\n", - " sum += arr[i,j]\n", - " return sum\n", - "fastsum = jit('f8(f8[:,:])')(sum)\n", - "flexsum = jit(sum)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "179700.0\n", - "179700.0\n", - "179700.0\n", - "179700.0\n" - ] - } - ], - "source": [ - "arr2d = np.arange(600,dtype=float).reshape(20,30)\n", - "print(sum(arr2d))\n", - "print(fastsum(arr2d))\n", - "print(flexsum(arr2d))\n", - "print(flexsum(arr2d.astype(int)))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10000 loops, best of 3: 135 µs per loop\n" - ] - } - ], - "source": [ - "%timeit sum(arr2d)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The slowest run took 8.39 times longer than the fastest. This could mean that an intermediate result is being cached.\n", - "1000000 loops, best of 3: 827 ns per loop\n" - ] - } - ], - "source": [ - "%timeit fastsum(arr2d)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The slowest run took 15.67 times longer than the fastest. This could mean that an intermediate result is being cached.\n", - "100000 loops, best of 3: 3.9 µs per loop\n" - ] - } - ], - "source": [ - "%timeit arr2d.sum() " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The speed-up is even more pronounced the more inner loops in the code. Here is an image processing example:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "@jit('(f8[:,:],f8[:,:],f8[:,:])')\n", - "def filter(image, filt, output):\n", - " M, N = image.shape\n", - " m, n = filt.shape\n", - " for i in range(m//2, M-m//2):\n", - " for j in range(n//2, N-n//2):\n", - " result = 0.0\n", - " for k in range(m):\n", - " for l in range(n):\n", - " result += image[i+k-m//2,j+l-n//2]*filt[k, l]\n", - " output[i,j] = result\n", - "\n", - "try:\n", - " # py2\n", - " from urllib import urlopen\n", - "except ImportError:\n", - " # py3\n", - " from urllib.request import urlopen\n", - "\n", - "bytes = urlopen('http://www.cs.tut.fi/~foi/SA-DCT/original/image_Lake512.png').read() \n", - "\n", - "from matplotlib.pyplot import imread\n", - "\n", - "from io import BytesIO\n", - "\n", - "image = imread(BytesIO(bytes)).astype('double')" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQkAAAEACAYAAACgZ4OsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvXmMXdd15vs7d57qjjWPZE2s4iSJpERKlETRkmPFlu1E\nmZwB8BTDQWD0QzfQSBqNRoBGA8EzEqQBo9FGN2x3hO44rxOrIyt2FEWyKIkixcGcimMVa57vXHee\nz/ujtDb3vaJtqtF6fAG4AYJVt849Z59z9pq+9a21DdM0eTAejAfjwfhZw3K/J/BgPBgPxv+/xwMl\n8WA8GA/Gzx0PlMSD8WA8GD93PFASD8aD8WD83PFASTwYD8aD8XPHAyXxYDwYD8bPHR+bkjAM43nD\nMG4ahjFtGMYffVzXeTAejAfj4x3Gx8GTMAzDAkwDzwJrwDngC6Zp3vw/frEH48F4MD7W8XF5Eo8B\nM6ZpLpqmWQX+Gvj8x3StB+PBeDA+xvFxKYk+YFn7feWDzx6MB+PB+Gc2HgCXD8aD8WD83GH7mM67\nCgxqv/d/8JkahmE8KBp5MB6M+zhM0zTu5biPS0mcA0YNwxgC1oEvAL/detDx48f59Kc/jdPpJBgM\ncvnyZXK5HMFgkLGxMebm5mhrayOZTFKtVrl16xaHDh2ivb2dUqmEYRgkEgnsdjs3b94kGAxSqVSo\nVqusrq5isVhwOBzk83kajQZOp5NarUZPTw/pdJparUaj0aBQKJDP56nX67hcLkzTpF6vY7FYqFar\ntLW1USgUGBoaYmFhgUAggM1mo1AoYJompmkyODjInj17eOGFF3C5XKRSKf78z/+cra0t8vk8FosF\nt9vNvn37GB8f5+jRoywtLfHNb34Tv9/PH/3RHxGJRDBNk0ajAYBhGFSrVQA1H4vFQqPRwDAM6vW6\n+rxer2O1WtV8vve97/HVr34VwzCwWCxUKhUEpLZYLBiGof41Gg2q1So2m41Go4HFYsFms1Gv19V1\nrFYrAI1GQ83PNE3sdjumaWIYd9abzE/+16+r35tc63vf+x5f+tKX1Ofyv/ws70M+s1gs6j6tViv1\nel2dS463Wq3YbDZqtRqA+jmfz7O4uMg777zDCy+8QD6fJ5FIAHD+/HkSiQSFQgHDMHC5XIRCIXp7\ne6lUKgwNDfHee+9x8+ZNxsfHcTgclMtlgsEgLpeLTCaD3+8nkUhQq9UYHh4mkUhQLBbJZDKk02n1\nzOQd1Go1XC4XHo9Hfc9ms2GaJoFAgEgkwr/+1/9avWP5jjzXUqnEu+++y/nz5zl48CBPPfUUNptN\nrWOA7373u3z5y19W5zVNkyeeeOKehfljURKmadYNw/gG8DrbIc13TNO80Xqc1WrFbrfT1dXF9PQ0\nFouF3t5e0uk0//RP/0RfXx9utxvDMAgEAng8Hq5du0YkEsFqtWIYBoVCAbfbTTQapVgsEo/H8fv9\n1Ot1vF4v5XIZl8uFzWZTAicvf2FhgXq9jsfjoV6vA9sLXBadHNtoNEin0xw/fpx0Ok2lUqFUKuFy\nuSgUCrhcLsrlMvl8ntdffx0Av9/PwMAAm5ubmKaJz+fjc5/7HPF4nFdeeYVoNEp3dzef+MQn2Ldv\nH8FgUL1AWQAikLrA1Wo1tWB0YdEFUBYgQK1WU89KF8BGo4HNZlPvQRaUHCvPQxdKi8WC1WrFarWq\nucn55TP5jq4E9KErE1E8usBra6jpb/Jd+V3mB+Dz+chkMk3KSBSo3KM8L7fbTVdXF5FIhNdffx2f\nz0dnZydDQ0McPHiQzc1NVldXiUajWCwW8vk8MzMzTc/M5XIxNjbG+vo6TqeTdDpNV1cXHR0dpFIp\nurq6WF5eplwu43Q6SaVSah4ej6dpnh0dHRSLRUqlklIgxWIRv99PJpOhq6tLPU9RiLI2rFYrbreb\nT33qUxw/fpyTJ0/y7W9/m71793LkyBHcbjfVapVqtUosFqOtrQ273U6pVPo50vvh8XF5Epim+Rqw\n6+cd09nZSVtbG5VKBbfbTalUwmKxUCgUqNfrjI+Pc/v2bXp7e8lms7jdbhKJBDdv3iQcDitNLt5E\nLBaj0WhQqVSwWCyUy2UAIpEIPT09pFIpFhYWSCQSeDwenE4n2WyWUqlEOBwmHo83WcXe3l4sFov6\n/MKFC2pxmqaJx+NRWt1qtbKxscF7771HrVZjx44d1Ot12tvb6e3txTRNFhcXOXLkiFJibW1tPPnk\nk0rD64LZaDQwTVNZQBFCERQRKJmrHC8CLUOO14VTX2itAq0LohyrW3S5lq5wLBaLeg6AmoPdblfz\nMk2TarWqlJAoD91DknPL77ow6cpQvgPbHoJ4akDT+9AVrJzHbrfj9/t59NFH+ad/+icqlQqJRIK+\nvj527dqFaZosLS1htVrx+/1UKhVlOGKxGEePHqVYLNLV1cXq6iqmadLV1UV3dzeJRIJ6vU4ymaRW\nq7GxsaE8tHQ6jWEYDAwMMDY2htfrxev1Mjg4SL1e57vf/S61Wo1KpYLNZqNUKimFor8T3SBYrVa1\nNux2O8888wyPPfYYp06d4s/+7M9wOBzUajVu377N1atX6erq4ujRoySTyXsT4g/Gx6Yk7mVMTEwQ\nj8dJJpNEIhEVIsTjcdLpNG+99Raf+MQnSKfTWCwWUqkUmUxGLbKhoSHq9To3btzAYrEoy14sFrHZ\nbIRCIQDcbjfBYJCZmRmKxSKmaeL3+3G5XKytreH3+1X4Ua/XcbvdAAwPDzM5Ocn3v/99LBYLS0tL\ntLW1KVeuWq2qkEOshdvtxmq14nA4OHLkCOFwmImJCf79v//3LC8vU6vV8Hq97NmzhwMHDqjFLItb\nhEoWdrVa/ZAXIEIJdwS2VTns37+/KTSQUELOIZbNNE3K5bLyBgAcDofyuuQa+vd0JSLz1cMX+Zvd\nblfz1BWEKAJZ8I888kiTMOgegK4g4I5S0cMk3VuRe5DnWalUcLlcKnSzWq24XC7a29txuVwsLy/z\nqU99inA4zObmJnv37iWbzXL58mXy+TwAuVyOnp4e3G433d3dPPvss4yMjJBMJlleXsY0TSKRCOvr\n62quPp+P3bt3EwwG6e3t5datW8TjcZ566ikmJiaIRqOUy2Xsdjvlcplyuazeq9vtxmazYRgGpVKJ\nYrGovF25v0qlohSE7gW73W6eeOIJ8vk8pmmSyWRYX1/H4/GQy+X4u7/7u48sp/dVScRiMbxeLzt2\n7MBms2G32wHYvXs3CwsLtLe3Mz8/T2dnJ9lsVrn+brdbLZrZ2Vm8Xi8+n4/5+Xmq1arCHlwuF88/\n/zwul4t4PE5bWxuGYeB0OlleXqZYLOLz+ZRG9nq9bG1tkc1msdvtTE1NcePGDQzDoK2tTc3RZrNR\nLpfZ3NxscucbjQahUIhnn32WgYEBSqUSyWSSH/3oR6yvr1Mul0kmk4yOjjI/P8/AwADd3d1NrjXQ\nZAnFGus4gW4hdY/CarVSrVYxTVMJnh6GiNDK7/I9+SeC1Yp76LG+KA8Rat2Dke/JfMXlbw1zgCYM\n5KGHHlLzafUo5Hv6s5Ghewt3O0530WV++nfC4TArKyu89tpr9PT04Pf7WVxcVCFirVbDbrfj9XpZ\nW1ujt7eX5eVlYrGYwrB27dpFo9EgHo9TLBYZHR3l5s2bjIyMsLm5ySOPPEJPT49a2y6Xi83NTdra\n2pQRW1tbY2trC8MwcDgcyntxOBxkMhn+23/7b9hsNiYnJxkdHaW9vR2Hw6Hej3gehUKBWq1GuVxm\nY2ODaDSK3W5n3759rKysUKlU6O/vp6Ojg/Pnz9+znN5XJZFIJNjc3GR2dpZ9+/ZRqVRYXV0lFovR\n29vLzp07icfjXLhwQQnM2NiYerlzc3OkUikGBgao1Wo4HA6FPbS1tTE4OEgsFmPXrl2Uy2WOHTtG\nKBRiZmaGd955R7ndIng2mw2n04nValUYg91uxzAMvF6v8lBk4YubFw6HqVQqHD16lMHBQR5++GHy\n+bwSgpdffplcLgdsC8G+fft47bXXeOmll/jSl75ER0eHsrh6+CDza/UcWoVHF9hWLECEUr4PdzAI\n3V3VlY3cW6v3ouMAunejz0dXMmIN9Wvq55Pvts679X7k+61D/t4KugJNoVdrCCXHPfroo5imyblz\n54jFYiQSCcrlshJ+Aa537NjBtWvXyOVyzM/PUy6XiUaj7Ny5E4/Hg8vl4vTp0xw9epQdO3YwMDDA\n9PQ0e/fuJRAIYLVaiUQi5HI5stks0WiUo0ePqucunpF4DPKZxWKhv7+fb3zjG1QqFTY2Nrh06ZIy\njB0dHTz99NNUKhXq9Tqbm5vcvHkTn89HsViks7OTnp4eVldXaW9vp1AocOzYMSYmJviv//W/3rOc\n3lclUa/XOXLkCIcPH2Zqaopr164psMflctHb28vw8DCNRoObN2+qGO7KlSskk0lyuZxCk0ulEpFI\nhGKxSK1Wo1QqkcvlOHr0KKOjo0xPT5NKpRS4JItLX4SSAZE5yOcCTgnOcejQIZ566in+4i/+AtM0\naW9vZ2hoiJ6eHiKRCFevXuXll1/m61//Oo888gjvvPMOa2tr6vx/8zd/Q61WY35+nrm5Obq6urDb\n7Ur5yLx0N1+3guJ26opFBEEXWD2E0YVUrqULvY536J6NCIpcW3ASUa4ihLpwihAKJqQDsbry0TMy\nesalNXSS4/TP9XNIaKJ/X88e5HI5PB4P+Xweq9Wq/g+Hw/T09NDe3q7A8VwuRyqVolwu8/TTT7O6\nuqqutbGxQSqVor+/XwmfYRjs2rWLnp4eJicn1Xk/+clPKmzM6XTi9XoJhUI0Gg3a29ub3lWpVMJu\nt2O32xUQ7vF4mpSn2+0mHA7z9NNPc+zYMQqFAgsLC3zrW9+i0Wjwm7/5mypLePDgQfbt20ej0eDd\nd99lYmKC+fl5RkdH6evrU2HUvY77qiSsVivvvfce8/PzDA0NYbPZ6Ovro6uri3K5zPT0tMIIbDYb\nq6urZLNZcrkcmUwGt9ut0GG3261ia1EUt27dIhAIMDc3x+3bt/F4PBiGQUdHh3pRmUwGu92O1Wol\nlUrhcDhwu90kk0kajQZtbW0cOHAAwzA4ffo0jz76KE6nk4sXLzIxMcHU1BS5XA63283evXtZXV0l\nkUgQjUb59re/TblcJhwO4/F4aGtr42tf+xrvvPMO0WiUer3OW2+9xe7du1UopOMRoghaY3JdgO6G\n/LeCdboirNfrTWGFfh05h2RQ9PekYxESUugKovV8epjS6g20pi/vlsLV701/Hq1hjn6v8ruuAPP5\nPFtbW8zNzREIBNR5EokEIyMjDA0NUavVOHDgAMlkklOnThGLxYjH4yrUzOfzGIZBJBLBbrfT09PD\n1tYWw8PD2Gw24vE4x44dU4rI7XZTr9eZnp5meHgYh8OBz+dTz1bPWMj7EMMgnoS8Z7vdTj6fx+fz\nkc1mmZ+fVyHq3r172bNnD6+99hr//b//dwYHBxkfH2dlZYWOjg46Ozv5tV/7NWZmZshms4yOjvLK\nK68opXqv474qiWg0SiQSwe12UywWOXr0KG1tbeRyORYWFiiVSmSzWcrlMl6vl3g8rmJiSY0uLS1R\nrVYJBALUajWSySRerxeXy4XL5eLatWssLi6SSCRwu93cunWLyclJdu3apdzqdDpNuVxWlkfcPnE9\nY7EYL774Iuvr63R3d3P27FkWFxeViygewdmzZ0mlUmxubhIOh1lfX6dYLKq4e3BwUHketVqNSCTC\nzp07VQwqAijYgcPhUIIlVlQsqp6NAJR7L5ZVz4DY7XaFVQj4KN6Bbvn1TIj+v4Q7ujcCd3AQ4EPC\n3YoP/KyQqRWY1EMbESSbzdY0P/1ncdn1bI08C1Gwq6ur3Lhxg3379rG1tcXDDz/M0NAQ165dY2xs\njIMHD+L3+7HZbBw6dAin00kgEGDXrl3U63VSqRTRaJRbt26xsbFBNpvF7/dz48YNwuEwBw8exO12\n4/V6cTgcxGIxzp8/TzabpaOjg3A4rIyb2+2ms7NTeaqiGDweD9lsVikS8WgbjYYCON1uN21tbbz+\n+uvs3LlT3euxY8f41V/9VZWpOX36NNFolM7OTm7fvs2lS5colUrMzs6qbN5HGfdVSQwODirAcHp6\nmkAgwCOPPEImk6FWq6mXU6/XldZfXFxULlkmk8HhcBAKhZSA62mj3t5e3G43w8PDpFIpHnvsMVKp\nFD/4wQ+U+57NZtULGxkZYX5+XglaPp9XeMZrr71GuVzmJz/5CalUSi1Wh8OhUmvpdJpoNKpcWrfb\njcfj4fd+7/fw+Xz84Ac/oFar8YUvfIFisUi5XKa9vZ0TJ04wPj5OT08PgMqyiOIQaysWXk+H6VZV\nrKfOEZCfdeHWCVp6SlKIPKJQBFPQXWMR8NbwQ4ZkRiTt5/P51LvRMxpwB8sQK9qKIYhy0zki+n2K\nMhGeityDKNmtrS1u376tBPutt97i8ccfV97F8PAw4XBYPR8RdAkvxdBYrVbW1tYU0JzP5+nu7mZ5\neRmr1cr4+Dh+v18BmMlkknw+TyqVIp/PUygUqFarTE9P8/DDD1MoFJQBkGddqVTw+XyKfyPvr1wu\nk81muX79OjMzMzidTmZnZ0kkEly5coWHHnqII0eO4PF4CAQC9Pb2Mjg4yPLyMktLS7S3t5NOp5ue\nmaz3ex33VUk89thjLC4uKoblxMQEa2trigVXLBaJxWI4nU6KxSITExOYpsnCwoJiEJqmSbFYxOv1\nKguj8yRM08TlcvHUU08RDocZGxvD6XQSi8VYX19ndXWVcrlMtVpVrM18Pk+5XMbj8eD1ellYWMBu\nt9PX10c4HMbv92OaJqFQiK2tLcXGs1gsrK2t8eijj6psxD/8wz/w93//91gsFg4ePMj4+Dg2m41I\nJEK1WiWfz7O0tMTY2NiHXHhZKOI5CJYAd9iOegZCJ4EJmu5wOIDmFKikXPXztAJ9OnCoW21RCPq5\n9AyH4BCC60iKUt6VDvrK3AXJ15WhKAXxnOBOSCXKTM8Cyb2IV1EoFEgkEqysrJDP59V7np6eJhgM\nMjs7SzKZ5LHHHlOe19bWFqVSibGxMd5//31WVlYIBAIsLi6qEMHpdHL58mVFeHK73TidTiwWiwqD\nNzY22LFjB11dXezcuROHw8Hm5iaLi4tcuXKFz3/+8wSDQba2tshkMqysrOD1etWalmdXr9eJRqO8\n/PLLBAIBRf4rlUqKPHb16lVM0+TJJ5+kr69PpVAHBga4dOkSb7zxBn6/H4fDoQxopVL5SHJ6X5XE\nxYsXcTgcnDp1isHBQW7fvk08Hmd8fJxYLEYoFMJms5HNZrl9+za5XI7e3l7y+TzxeFwJUVtbm0qL\nWq1WMpmMElin08mFCxfo6urC6XTi8Xjo7e3FMAzeffdd/H6/AreEyAXw/PPPMzAwwMmTJ7FarQwO\nDnLw4EH6+vqIRCIqBZXNZllfX6enp4dYLMba2hqjo6OUSiX+y3/5LwoY+43f+A0ikciHsg5er5ff\n/u3fvmssLj/r9Gg9a6ALi8S2IpDyHTmXfC4CrQuXWGFdOTgcjibB1j0H+acLv46BiLDrGILFYlFE\nIT0zAigh1dO1relL/T5aFZUoPPm+sGg9Hg/pdJr19XV2797N6uoqvb292Gw2kskkN27cIJfL8fTT\nT2MYBuVymZmZGSYnJ5XyS6fTDA8PY7VaOXXqFJOTkzgcDubm5hgeHmb37t1KsRWLRQzDUGvI7/er\nzIZhGJw7d055C6lUCthOfxYKBSYmJpQnKp6HPJtkMqkybNlsFovFQjKZxOVy4fP5mJ6eplar8eij\nj9LR0aFStOfOnVPlCzt27ODIkSO8/fbbynDc67ivSuLFF1/kypUrxGIxZcVtNhtzc3PUajVmZmYY\nHx8nk8lQLBbJZrNMT0/z0EMPcfbsWVVTIW5hOp2mWCw2WVyfz0elUiGVStHb24vD4SAYDJJOp/nD\nP/xDvvWtb2G32/F4PMRiMRWznT59mgsXLigg89Of/jShUIj//J//M1arlaNHj9Lb20s0GgWgUChw\n6tQpTp06xfDwMPF4nI2NDSqVCs8//zwOh4NGo0GxWGRtbY2enh6FYIuV17MFuhDKEDdad9nFZRXL\n3BqCwN2p0LoSuRuGoBN05HidY6ADhjJ0AW8FHXWA02q1UqlUPpTJaL2WPke5h7uBmqJk5BmJgHm9\nXp588knOnTtHR0cHTzzxBOFwWLETp6enGRgYYHl5WZUHjI2NAdtebnd3N1tbWwwODpJKpRgcHKSn\np4ehoSHi8Th2u51QKKTCl+XlZSqVCp2dnVy8eJHDhw8rMFPS9wIo3r59WwGhQh4U3EHWgeBSPp9P\nhVHy/oTwd/36der1ukrVCkg7NzenlJbdbieTyTA1NaU8o48yPpbOVPd0YcMw33jjDf7sz/5MIbge\nj4dIJMKOHTs4deqUcvFv3bpFMpnE6XRiGAY+n0+lktxuN6Ojo5w5c4ZcLqdiWGHayYs0TVNlJqxW\nK0eOHOHSpUucPn2aubk5yuUyPp9PpYf6+vowDIOFhQXK5TKDg4McPnyY48ePs7i4yPr6Oj/+8Y9x\nuVx0d3fT0dHBmTNnWF5eJhQKqbk6nU4++9nPqsKhoaEhfvzjH/ONb3yDHTt2KJdcxxV0VuHdEHwR\nHp1jAM1pTD1jUa/Xm1x/EVTxIHRhFPKWfF/HESRM0UMAmbco5lbFpHsJOhYiXqAoHj2lK/fZmlqV\nn/WCLlEo+nkky+VwOFhYWFD3LDR7UYxS17CyssLly5fZ2NjgqaeeUilLj8dDR0eHiuGj0Sjt7e3q\nvoVhubq6SiqVwu12k0qlKBaLdHd3Mz4+ztraGrlcjnA4rIyZaZqcOXOGwcFBQqEQ6+vrnD17lnw+\nr4xAtVrF4XAQCAQYGhoinU6rc5dKJfX8EokENpuNzs5OPvOZz3D79m0KhQJWq5WbN29Sr9dVWKob\nk5/85CeY91gFel/7Sbz00ks4HA56enooFouk02k8Hg9dXV08/PDDuN1u5ubmVPGLaW5XxgWDQaUo\nAG7cuPGhYiWd57C1tUU6nea9997j/fffZ3BwEIvFwsjICPF4nFKppBaW0+nE7/fz9a9/nc7OTgWM\nxuNxXn31Vebm5lTabPfu3fT19XH79m36+vr4rd/6Lfbv388nPvEJQqEQwWCQlZUVTpw4wZtvvsnp\n06d5+eWXlcspcTg0hxgiqCKUrdZTzyjollgES8+ICI6gW26dFamnN1uzIzrfQKyVzk+AZo6Grsjk\nGkIf1jEkmRugBLv1fKIYG41GE29EFJfcp7xn3TgIjlOv1+nt7aWtrU1llHQl53Q6aWtrUzUbuVyO\n8+fPqxBWz8ik02ni8bgCDQuFAvF4HMMwiMfjXLlyhVQqhdVqZWFhgaGhIbxeL4ZhcPv2bUXGgjth\nyLlz57h9+7YC4D0ejwrzAoGAKjOIx+MkEgmq1aqiBIgH2d7eTnd3NxMTE2QyGW7dusXc3Bybm5vs\n2bNHZf10IFxPb9/LuO88CdF6HR0dJBIJZmdnSafTKpUJqNxxOBymra1NlY83Gg1FpJKXXywWm/LM\nHo9HWdBMJkO5XOaVV16hv7+foaEhtra2sNvtytKKQP3gBz/gscceY2lpiVgspuiuf/mXf6mESRZR\nOp3mpZdeYvfu3Yq6/dBDDzE5Ockrr7yCw+FgZGSEjY0Ndu/ezXPPPdcEVOpWUf9dXqgel+seAtyx\n5Lr7LcfBnbBCDyH0MEPOrZO5dCq1CKndblexrI5L6N6H/F0Wpbxj/Vyt5Cnd85C/yfVlTrpX05qe\nvZvnpYdEVqtVcRTkeFGYgPJmDh06REdHB9VqFZfLRUdHh6ogrlarZLNZZmdnuXr1Ks888wzd3d3q\nuY2OjiqAfGlpicnJSebn51leXmZ9fb2pfiWbzVKpVBTFe2Zmhu7ubgVcdnR0qPcva0lATECVDVit\nVrxeL/39/ZRKJcUFKhaLOJ1OIpEIAwMDhEIhLBYLKysrTE1NUSwWcblcH0lO72u48f3vf59z587R\n2dnJjRs3GBoa4tatW8olGxwc5Nq1ayqD8eSTT9Ld3c3p06dJJpOKVQl3SEOFQkFlQ9xuN263W1X+\nifb1+XzKDRQ3TCylLDoJZ4TyWqlUFKfC7/czMjJCuVxmdnZWhTCNRoNdu3bxta99TWU7yuWyUlyV\nSoWtrS06OzubQghdOEWAZGFJXNrq+uveg6TRxELqtQpwh+Itz+mD5/8hPED+JgpXT6G2CrAuqK3n\n0pWbni1pBU9bLZquHMWbkWyVCLWe0ZD71MMsnUMi96srztZKVFk31WpVZcMknSnvSGoi5ubm+Md/\n/EdcLhePPfYY/f39qt5HgMSNjQ0FKEciES5cuIDNZmNgYIBcLkexWGTnzp1sbW1x5coVdu/eTXt7\nO+fPn6dcLhMKhejr61P4WmdnJ93d3Zw/f17RAqSo0DC2a4oaje2aoV27dnH9+nVV5ez3+/H5fPh8\nPsXbkMzfuXPn/nmEG8J6m56eZs+ePSoT8PDDD6sa+CeffJJIJEIoFFKsyEceeYSdO3d+KH6GbVdd\nisEkVVmtVpmbm2NmZobNzU2y2SywjeqLeypxqCgKm83G5uYmmUxGKSJRPIVCgVwux/r6unroPT09\nuFwuZmdnWVtbw+FwqOIgi8VCNBrF4/HQ39+P0+lUrrTuTbSmHoGmRa+HHjpTTwRAFJouUK0App66\nFBcd7lh8PSSpVqtKOenpUb22QBcm4Y2IdZYMkMxX5iBhgo456KQo3SvQBV9+1usd9HnrXpX+XOXc\nQpgTBSMhkCghj8ejwliZWyaTIZFIcPLkSRqNBsPDw0xMTLCyskKhUCCVShEMBtm7dy/d3d3s37+f\nHR+0CfjpT3+K1+slGo2SSqXYs2eP4gSVy2XVvsA0twl2drudZDLJ1tYWLpeLWq3GysoK77zzDna7\nncnJSaU85b3lcjnK5TIDAwNUq1UikYjiVgggKn+X1gwf1TG4r+HGzZs3GRsb4+jRo4qRJhTUQ4cO\ncfHiRTweD/v27ePSpUsEg0Hl/ktsJvGZLBpAWQK3202lUlHEJskPr62tKeuYyWQUGUsWlCx2SXFK\n845Go4EnzN+gAAAgAElEQVTf71f0WGni4Xa76e3t5Qtf+ILCWVZXV6nVavh8Pl599VXW1tb4pV/6\nJSYmJposZmtGQVcSIhC6+61nI3ShFtf0bopHt9j6OVvPBc1sSf27urXW/9cFW8dQdKWm4xqSyREQ\nU64huIN4K3KcpExb60V0r0Z/DrrHoocfglPp9yxDZ2zqNSKSSjUMA7/fzxtvvMGzzz7L4uIiy8vL\nLC8vMzIygt/vZ9euXaqvycrKijIQeu+J8fFxhb1NTk7S1tam8KlGo8HevXvJZDLEYjEGBgZwOp0k\nEglyuZxSSELC2trawuPxYLPZ8Hg8rK+vq7SqEA1N01S8o56eHgWo/7PiSaTTaV599VW6u7tVLJhM\nJjlx4gR79uzBbreztLSEYRiK2SjU1UKhgMfjYWtri0AggN/vZ319Xbl91WqVVCqlFqO4/OI+OhwO\nld6UJjdwh08gbfB0Ykuj0SAWi6n2Y6urq2rOly9fVoSZv/3bv+VTn/oUp06d4vjx4/zkJz/B5/Mp\nL0VPJcpotcjymU46EmHRMQn5XMDB1thchi6ArcpJvDAdP2j1RnT3Xy8ck+/pmRQdCBX3XoacQ7eI\n8rsIs16wpc9dVwiiNPUUa+vc78YrkXcs70FXVvrfdO+lXC6rSt1r165x7NgxbDYbMzMzrK2tATA1\nNUU0GmVpaYmuri5CoRChUIi1tTWWl5exWCycOXOGcrlMLpdTjM5G405R4cLCAsFgkGAwyO3btymX\nyzgcDjo7O8nn8yrsNE2TtrY2RfmXeyuXy8pTEAC0Xq9TLBZVmrenp0e167vXcV+VxM6dO6lWq3R1\ndbGysoLb7eb48eO8//77zM7O4vf7sdvtKpXZ1dWF1Wrl5MmTqknM4OAgPp+P1dVVBZ4JeCOsQwGY\n5IEKT15+FxQ6k8molJEAqhIu2O12crmcSidKylQWeyaTUR1/pqammJ2dJRKJ8Kd/+qcYhsFXv/pV\n9u7d21RTIItfLLvuBciQxdra7MU07zAPxfrpaUi5hk48EiUgWINu8eVZAaqnQStI2ircumuvz1Xc\necFqdIBT7lNXyq1AYytwqnteMg8d0NSflV6nopOz5Bw6YKl3/apUKuoZtIYzkvb8jd/4DfL5PLFY\njHA4jNPpJJ/PqwK9a9euMT8/z8LCAplMho6ODmw2Gz6fj97eXhKJBIlEAtM0eeutt1Q9j/SvkF6r\nMm8BMp977jneeecdlbItFAoKuHe73eRyOba2trBYtlmf4hlLTdDW1pYK8SYmJvB4PB9JTu8rJuH3\n+xkdHaXR2C6Aam9vZ3l5md7eXkKhkAIb0+k0fX19XLp0ifPnz7OwsECj0cDn8zE4OKgqQKVaTwRb\nUqciFLJoAZXN0KnM8rOkJ6XTVL1ep1wuEwgEFJ6ghwviygLqZyHJlMtlSqUSJ0+e5Ec/+hGxWKxJ\nCbRaR1mgulWXRaNbZF2hCD4hDWV0voL+mQiU/ncRVgmnqtWqslC6a64rBv2Z3S08EsWnl7LrKWqx\n7HrGRqdZi4XXQyA9/GkNw/T3oCsIOZ+M1nSzPAfJfoliFaxH5690dHTQ399PIBBg9+7dDAwMKJCz\nUqlQKBSIRqOEw2EMY7tiVJofud1u1tfXAVQDmnA4TKlUIpFIqAY2xWKRXC6nCrxkDf/DP/yDAu+L\nxSKFQgG/369Kz+V91Go1xdaU9eJ2u1WI4nK5qFQq7Nmz5yPJ6X31JASwuXnzJtlslrGxMRYXF3G5\nXGSzWWq1GsFgkHK5zLvvvqsa0HZ0dKiHsb6+rnpb6lZWFqa8KAGuAoGAEuRisUhbW5vCQ/S0obAb\nxSUUzrz04hQLqxOUJFVYr9dVOzsR/jfeeIO5uTnGx8cVCCsCrC90aEbkdfddB/LEMsv5dSsr82nN\nFogr3eqWt1ppfejUap3ZqLv/+nPT/yY/y/V1xSPei4RWkn6W7+nX1e9RB3z1eetrSvdC9DBEfz7A\nh7yi1mNlzmIY2tracDgceL1earUao6OjWK1Wurq6WFpaYnx8nM3NTdVhe3BwkOnpaa5fv654EEKS\neuqppygUCty8eROLxcLg4PYOFIlEAofDoXCKer2ueqZsbW2pDIysEVlvNpuNTCaDx+MhmUw2rQ15\n91arlcXFxSbFeS/jviqJy5cv89BDD7F//35u3brF5uam4p1Lo9DZ2Vl1U4lEgo6ODkVzjUajxGIx\nYLtxhzwMr9er2qJXKhUCgYBy2fWctbiLsM3F0Cne0k5MF6hqtapqRCRsEXyjWCwC2wtTSszL5TJ+\nv59yuawW1fDwcJP1lXPrOESrwOrIvnxPB+3EqusC0krxluNEuEQR6kIn52j9ni6kelyvX1N+F1BM\nri90dB1zkYWr09H1a7emNXXFoV9fn19r2KIfczfFIWtAV5wyD73/p4CqosA8Hg8Wi4VIJMLx48dV\nE2VpaJtIJEgmk1y9ehWLxcLAwADZbFZ5DFLlLMflcjlM0+T69etYLNsEL0m51ut1gsEge/bs4cqV\nK6py+KGHHqKzs5MzZ86QTqeV19nW1qZ6W3Z0dBCPx2k0GvT09CjOz8bGxodSz79o3NdwY2Njg2Kx\nyKVLl1haWuLs2bNKWx45ckTxDKQ/pOShl5eXWVlZUXlniU+FKTc8PKyQXPEg3G63EgQpKRfwyzC2\nG44K8WbHjh3KwkhaURaXXE+uJa6xFNLo6UF5GaFQSJUcS9pNX5yiBFpb2OkovigdCRt0wRCloguc\nzWZTbqYIpG7R9dRq63zlmjovQ/caBHiV6+meDDR32JbFrmcv9GcrKVZ5D+I2i5cl55NQqDXsab2+\nhA+tilO/ts5A1P+mp2F1IFaGvFvBgcLhsKJbS8q3XC6rlvypVIpabbtzunBkbt68yeTkJI8//jjP\nPfccgUBAhaTiqeqFYul0Wt23aZp4vV7y+TydnZ0888wzqllRLpdTnrMY0EAgQHd3t+JhCCO01fv6\nReO+ehKBQIDr16+r1M4OrQ39iRMn6OzsVOQRqdSEOzXxEmPt+KCwRQDF9fV1pWBkIYjAiMIQpSAZ\nAfEyotGo4uR7PB7C4TDJZFItdOl/EQqFVA8JcUklri0Wi6qILJ/PK29FXmgrIChDFpl4TnJcqwDq\nIKxO0dZDDX1xi6Drv+sWuzUMudvfdACw9Zw6R0VSsfocBH0XTop8Jv/rnpGeXZB7kZCxVXnKM2oN\nn1rDC8EvBJvSn0Nr9kUHYeU7Ok9EFIhk2mR9Dg8PUygU2NjY4PLly4pvIViZhBDr6+tEo1EcDofq\nvCYNiPr7+5s8Az3dGQwGiUajTeeyWCz4/X4Mw6Crq4u9e/eSy+XY2NhgamqKZDKJxWIhnU4TiURU\navSjApf3HZMQQpU8TD1H/e677za5UZK9aG9vp16vk81mGRoaolwuMzk5SW9vLzMzM6yurirBFFdR\nzq1bQQlB5MVIHl0UiDDc5DyixWVzn1gspioDx8bGeP311xWfX/oqSFVqR0cHjzzyiPJodIxBB9J0\nodTRehE0PVbXv9OaGdGFQxa5HoKIgOsWVv+s1Z3XPZ5WvKAVVxHvSo5pJWTJu2+18q1ApB5u6J/p\nmQo93JIhwg18yNvRn4vca2uIonsmujcl8xZQsFQqsbGxoUhagGLyimcgtGlx/YPBILFYjNu3b1Ov\n19mzZ4/K1GWzWWKxmPIMarUamUyGtbU1arUaQ0ND5HI5xc2ZnJyko6OD1dVVCoUCvb293Lhxg6ee\neore3l4uX75MrVZTRq+np0cB0x9l3FclIe5VIpHAat3uEmUYhuqkI3nhbDarYv/u7m6V4pEuUgcP\nHqS/v59kMsnu3btZWVlRbErp8gPbL7hSqShkWSjTwoMQNw/uFFHpCLcwNa1WK8FgkN/93d+lt7dX\nkV4OHTrEiRMn+F//6381ZQ2E3z80NNREb9bTdXBHYGWhilW2WCyKNObz+ZTAx2IxFYKIhdMtte6B\n6AImv8txcq+6sLUClDquIEP3iuT74oFtbW2pXL1YRgnJJMzTBVDOoeNGMi9dyblcLuXJiWdQKBQo\nlUoqcyRhg95YRxSHKHDdU5B5y3xFCcjzkLnm83mWl5ep1+t0dnayvr5OtVpldnZWeRbZbJZisUil\nUuH9999nz549qgO81Wpl9+7dJBIJVTlqmqbiRnR3d6utGgTbERmR8Eza6QvDt7e3V9V7FAoFRdI6\nevQofr+fs2fPEg6Hsdls5HI52tvb2djY+Ehyet+7ZYulkxy1LHbpICwvx+fzqVBkeHhYbZMnGv36\n9evk83nFgjTN7fSUdEo2DEMVxjz66KNcvXqVtbU1VaMvYCjcAa4E3BTilgjer/zKr3D8+HHV/0LI\nWF6vl4MHD2K1Wtnc3OTUqVMEg0GsViu7du1Si1qETY81xeLqLrPNZqO/v79JWHTra7PZWF5eVgCq\nbvF05qBuKfVMiXwuIZcMyejosb+e3tRdbxl6yCI1MTLuZqXlGcg9i/ci4ZXuLelCLkV++n6a+n4s\n6XRatT+U5yAVoPl8XtXuSDiqhx/yHOS8EjaZ5jYPJpVKsbW1xY0bN4hEIkQiEYLBIH6/X1WH1uvb\nO7OJF3DmzBn8fj+Dg4MsLi5y9uxZldnZtWsX+/fv5+zZsxiGwa1bt9ReNA6HQ7X/O3ToEKlUio2N\nDRYWFlSPCa/XqzpjOZ1OksmkalLjcDhUeLtjxw6q1SpTU1OqcvSjjPuqJMLhsPIgTNNkaGhIIbjV\napVMJqMAK9iOfS9evMjc3Bx9fX34fD5WVlbY3Nyks7NTEUpEGQgPwmKxKGKJzWbj7bffbkrBZbNZ\ntX9CsVgkEAiwubmpKKyS6/Z4PBw5coRSqcR3v/tdrFYrBw4c4IknnmDXrl2qQWpbWxv1ep3du3cT\niURUmzOv19vkPsMdko8sVrFgQvXWAb1WroTH42F8fBzDMJR10r0BUcJ63K4Lpc510HEIHRzV56t7\nIeIJybxF+enAmC6A4nXo15bjdU/vblkeGS6XC7fbrd6n3qvRarUSCoVUhfDGxobqQiYNWoRYJByb\ncrmsGhrrYZMcKz0mS6USS0tL3Lp1SzU6ymQyXLx4kQMHDtDd3c3MzIzygCUcNU1ThRmCf4kxGRoa\nUg2Z33//faWEpEmScHyGhoaIRCL85Cc/YXp6WnlS7777LqFQiMHBQeUduVwuAoFA056fTz/9NKdP\nn1beuuxw91HGfVUS+/fv58yZM4rnXqlUVH2GLEKJ6YWzIJvfzM7OYpqmKmF2Op2qFb7QUyuVispA\nWCwWPB4PPT09Kq0qAif0YUBx72WIFyE7Lr377rvY7XZ27NjB5cuX+elPf0oqleLTn/60KiiTJjqy\nZ4jk1XUrqacdZYgQdXd3qx3D4O75fmj2LDo6OvB6vWr3agFtZcii1616q1DooY5uRVtBQ13ptJKa\n5FnrQGjraM1I6DiDnvHRPRYBjqVmQd6x9G2UectWfOFwWL1POZ+UUMu8PB4PpVKJra2tpmyKPB95\n//Pz81y/fp2NjQ1Ve7O2tobNZmNqakr1JbHb7SrdLdtAyDMTaresLZ/Px9TUFBcvXmR1dZXOzk7V\nSjEUClEulxWlW9iVAnA+/fTTbGxsKCNntVrVejFNU3WyyufznD59mq6uLmUwHQ4Hfr//58pl67iv\nSkJaf7W68yMjI2rfzFQqpXLVeqPQeDyu3Pe2tjZFbRUBlcItoVIL92JmZkbRvJ1OJ1tbW03FRFLp\nWalU1IvN5/NUKhXGxsbweDxcvnyZUqmE3+/HYtnm5B84cIDh4WHq9bpafEBTLwAdlGzFCEQgpMQX\nPty3Qf9MFqD+mcvlUoIQjUZVaz35brlcVt/T6eH69UWB6VkG3cvQzyeC1Rri6AzGViKYsAkl09Q6\n9IY3rcBnuVxWG+eIctCxBvGa5LnrIYvMsZUj0NnZqfbjSCaTpNNplfUQTyIWizEzM6OYkZJehu3N\nqK9cuaLwJQlZpLhPnpt8Vzzjq1evsnfvXur1OouLi6rmQq4vAPnq6irf/e531VaWTzzxBGNjYwwM\nDKi0eFdXV1MzHVHyYlBh2whkMhnC4TCPPPIIf/u3f3vPcnpflUShUKBcLjM2NqYIJ6urqzz66KMU\nCgX1sPv6+qhUKqyvr7O8vMzjjz/OgQMHmjjvAjwK0qwj/pIuEksnOzYXCgXlRRjGdl5aahYk5i+V\nSkph/P3f/73CIYrFInv27ME0TV588UVGR0ebLKekA1u9AX2/C1nUutWVz3Th1bMSenYGmrtTiYAY\nhqE6eesFTPJcJJzQhVvHCVqJWDIPOY8MUa76Z7qgAh/qg9HKQ7gb10MWut5HQtLUkUiEQCDQhB+1\npoPv5sHonph+Xw6HA5fLpTzNjo4OlpaW1DwkpJFrptNplWXwer1NXq5gW0LT9ng8BINBlR4Xoyj0\n642NDQqFgtrBTa/NkGtLZbRhGPT39xOLxVSbvEZju1Hv4OAgO3bsoK+vTz1Dye4dPHhQcS2EZ9Td\n3f0LZVMf95VMJZpXwCe73a72MazX62QyGSYnJxVHvbOzE9M0mZqaorOzk+HhYbLZrAI1hW0mWYF6\nva56FOphh8WyXQgjwm8Yhur/IItlaGhIhTE9PT10d3czOTmJaZoMDw/T3d2tOlEFg8Gm+NkwDKVM\nZMGLkMncdKHRMxJyLj00gWavQn7XSViidOTvQvYRb+VuWYpWYWr9uyD+uqKTa8MdopRcV8dMdKKW\n/l25Tiu/QQhKrZiAzNPj8ahGsk6ns4mxKeeQ//X7ar3u3Y6T/6WnyMDAAJFIhM7OTux2O8FgELvd\nzsDAAF6vl8XFRex2u1pjwqKVPWI8Ho/q/C6l4YuLi1y+fLkJt7l+/bpqniR9S6RrVLFYVIoAUD0i\nJF06NTVFd3c3Q0NDarOoQqGg+kicOHFCdZgPBAKMj4/T2dnJ5z//eXw+3y8WTm3cV09CUpvnz59X\nvSvr9TpXr16lu7ub3/md36FSqSi0WpBdaQkmOxYJz0HKwYUX0d7erjYdXlhYAO70j5TQRMcj9C5Q\nGxsbCtzq7+9XaLNY7K9//et85zvfoVqt8m//7b/lX/7Lf8nIyIhKV8rCr9frqiW63h1L5wyI9RVs\nRreed0P6W//pwKIe5+sKQ4ROnoHcsw4WyrnEOoZCIdXRWSx7PB5XYR+gyF+iUGTeejghz6I1xJJ5\n6SCzKApJAerPQMce5Ni74R/6Z3f7p2duyuUyy8vLjI2NKSMhGQuA9fV1ZmZmAEilUkxMTHDjxg26\nu7tJJBLUajUGBgZUyldX/NlslkOHDuH1erl+/briRgi+ZZom0WiUoaEhTHM7xW6z2RTILutTPIP1\n9XX1zPr6+giFQspw9vf3c+XKFbXfyPz8vPIann32WdVwxuFwKLzmXsd9VRLyUOv17YalPT09zMzM\nKIqrw+EgGo1y4sQJ8vk8/f39zM7O0mg0uHDhguouLK5ZIBAgk8moDlPlclnV7QvO4HQ6icfjOJ1O\nlRcXb0b+Sdt9IW4NDg7y9ttvU6/X8fl8PP/888zNzfHrv/7rqjz4W9/6Fg6Hg6985SscOHBAhUqL\ni4u89957ZLNZPv/5z6tydx0YDAaDKgYW6yvehgzdI9CH4AbQXLthGIbyrEQh6oKmhxeVSkUpR6vV\nqtqeCXagV3D29/er7wm4JvwSURjiOenZDGgmNulCLgKh/+zz+dja2lL3IlmNVuKWzEvuS5SI7hG1\nEsfkO7JFQ6VSYWpqin379qnQVYzC4uKi2ktzxwcb7szOzrK+vq6M2uXLl+no6FAhbzKZZGBggEKh\nwPT0NIlEQvVmfeGFF3jiiScol8t873vfY2Fhgdu3byuqdb1eV0pCygf0NbCysqJAy1qtpjabeued\ndzh58qRqdAPbDN+RkRGi0ah634FA4J9XClQsrNTbh8NhlpaW2NraYn19XbXJLxaLSpPu2bOHkZER\n2tvbuXTpkirKkkUhYYvL5VJgUKlUasIrxFqL6y+8DPm+eCkTExNN8xCBjkQiHD58mFpte/PXUqnE\nD3/4Q5LJJPF4nKmpKeWCv/TSS9y6dQuHw0GhUOCLX/wiIyMjTWXrAs5JibJkduRvrUxF+VyGLjDi\n5kt4lsvliMfjH2q4IgIlv+tKSpS0Xsughyzyz+VyKbxIXF0B7vSQRvd4WoVYlKV8Ljl/fVe2YrGI\naZokk0lFDGoFIOU5tHoN+jPRh2FsM3lHR0f55je/yc6dOxWD9u233yYYDKp2/CKwCwsLqkS7s7NT\nFUzpuIy0kOvu7qZQKKitAIVQd/DgQWZnZ3nzzTe5cuUKg4ODivYvvSakK5qAwBLyCRlN6O0zMzNc\nvnxZyY2EzxaLhUAggNfrVZ7a6uoq77//PocOHeKJJ574SHJ63xmXkvNeXFxkcHBQdRqOx+Pkcjly\nuRwPPfSQWpAdHR0sLCwoz8Fms6luURLT6f0QhDwjYUVr7CztzOFO30ghJl28eFH1ypRa/p07d/LD\nH/6Qxx9/nNHRUcXqHBoaotFo8NZbbzE/P09vby+pVEqlal0uFz/+8Y/p6Ojgj//4j9X1KpWKOl42\nOF5YWOCXf/mXlfKTod8DNHsOIvC6MHs8HrXzeiKRYHNzs8mCyzkED9CtuaD4uosvC1cHPcVLa2tr\na9qKTgcvBYPQvRdJ0wmjVUIz8Qqlnbzb7VbNXPV70+9dlIC44q1hGKA8S8k4iKK6ffs2gOoj2dfX\nh9/vV5vcuN1uQqGQKsSam5tTre9dLhfhcFhR8aVK2GKxcPXqVZV5qFarHD9+nM7OThXeXLt2DYDV\n1dWm0ErfuUtnn8o7sVgsaisGoVsLP0OATyFaDQwMKBqB3+9Xu5e1eqO/aPxCJWEYxneAF4BN0zT3\nf/BZCPh/gCFgAfhN0zS3PvjbvwG+AtSA/8s0zdd/1rmj0aiy5BsbGyoGnp2dpVwuc+HCBeVlHDhw\ngK6uLs6cOcPVq1cVAiyxr7hQsgOzWOZEIqH6TYg7aLPZ8Hq9+P1+NjY2lEAIcUpcb+lABKiW+DMz\nM6qXxbVr11S7/a985Ss899xz/Mmf/IniKoibLB28hcyzsrLC4uIiJ0+eVOHUv/pX/4pPf/rTnDp1\nips3b3Lw4EG1PwjcSTfCHeISNNcS6KxVXZAMY5tHYRgGm5ubyovSXfe7KQa5higF8Xh0fATuKCiv\n10swGFSei55xkHPK821Nf4ryEwUt4Y7FYlGgswz9/vVwIplMkkqlVNGfPA+hSC8sLPDiiy8SDAZV\nyLNjxw5FhzYMg5mZGYaGhggGg2o3LgndYrEYe/fuVYZEPKFdu3axvr6usnUWi0V5mmfOnKG/v583\n33yzaZNhCSOklZz+fHQ8Ra+Bsdls+P1+wuEwKysrFItFlbVwuVzK2xDZMgyDa9euqXZ6gv1IGHev\n4148ie8B3wJe0j77Y+AN0zS/aRjGHwH/BvhjwzB2A78JTAL9wBuGYYyZP0N1dXV1qV2JKpUK169f\nx+v1srq6quKqWq3GT3/6U+x2OwcPHlQPenZ2VmlOcU/z+TxWq5VkMqma1wrmIBZEcAl5oK2CIMon\nGAzSaDQU1XZubo6dO3eytrZGqVTi6tWryiK6XC7Onj1Lf38/x44d46233lLMuWg0is1mo7e3lyNH\njnD8+HHefPNN/uN//I9qUUgH5R07drBnzx7+8A//sCnEgOZCI7GCYmVk/nq8rWcKZAhJR9J0OqAo\ne5uIt5LJZFRXIx3HgOZQR8Iq+Xs4HG7axbqVpSnusn4+sZiiVOTeJGwUj1DHHPTMiVzf4XDw3nvv\ncfXqVX7913+diYkJRV0+dOgQV69e5a//+q/57Gc/S3d3txJ+6RYl4LaAzJLV8Hg8qi3c9evX6erq\nUkI5MjLC3NwcIyMjSkk5nU5GR0fJZDK4XC4FuDcaDZ599lkymQyLi4tqQ+JCocDW1pYCNCW7l8/n\nlaIUpZZMJhUjU/Cznp4eLJbtzt5Sx2K1Wpmfn1fvd3Z2FrvdzsMPP0wwGPzFUq+NX6gkTNM8aRjG\nUMvHnweOffDzXwIn2FYcnwP+2jTNGrBgGMYM8Bhw5m7ndjqdtLe3k8vlWFhY4NSpU4yPj6u0nrhI\nTqeTUqnEhQsXlIZNp9PEYjG6urqYmJhQC00ekLhustBkox+J1ePxuKJj6xv66OcRtmQulyOZTLK2\ntobX66Ver6sqP7F8pmnyL/7Fv2B8fJzBwUFmZ2fZ2Njgy1/+MoFAgP/wH/4D165d4+rVq1y6dAnT\nNPnc5z6HzWbjl3/5l5XSO3LkiAIS9QKkVsBOhFN34cWKwLZS0ck8IlhdXV00Gg2Wl5dVEZwoTxG2\nUqmkPB+4szO5XLfVY5HnKkNCPt0bac3O6MpDYmy5viheqauRVLnNZlNNf1p5G6ZpEovFmJ2d5fLl\ny0xOTrJz504F5F24cIFsNsvm5iZPPPEE169fZ2trS7nrHR0dpNNpLJbt7Q98Ph9jY2PMz8/T09PD\nrVu3ABRjU9akFBQ6nU7279+vBNXpdDI9Pa14Mfv378flcqkNciS9KwowFAo1EaKq1arqhv2Zz3yG\nTCbDW2+9hWmaaid7u93O8PCw4k50d3eTz+dVVzePx9P0HqX5ktDU73X872ISnaZpbn7wcjYMw+j8\n4PM+4LR23OoHn911bGxsKFxC9hwQNltfXx/9/f1sbm6qtnPCmhQwxzAM1tfXVbs72SNRFqEsEAEk\nJcMhQJHEweK+ykYrEq5I7wmp5ZdKOklVitWQLdna2tro7OwkEonw2GOP8e/+3b/j/fff58CBAxw8\neJB//Md/VHH44cOH+dVf/VU2Njb40Y9+RH9/P+Pj42SzWV599VXGxsY4fPhwU0mzriD0NJ4eLojQ\n6IKsg5niPfT396veGyKAUjErgis5+1bB1oE63auQz0OhEHCngrb1WFFYYo31il99+0ZRQgLy6uCr\nWFe57vr6utpqTzZG0nETaX5cLpd57bXXiMViFItF9u/fT6PRYHZ2lv7+fkKhEOfOnVNEtI2NDdVm\nQP9klrYAACAASURBVAR3586dCmg8ffo0LpeLaDSK1+vF6XSSTqf5q7/6K9Vi32azceHCBSqVitqi\nQealZ55EDqQJs9zvD3/4Q9U7RcJOUShLS0sqU7O4uKiaR0u2SY6V4jePx0Mmk7k3Kf9g/J8CLv+3\ntgGTqrRqtYrf76enp4elpSWCwSC/9Eu/xGuvvUYikaC3t1exJNva2piZmVE1HY1Gg8XFxSbSCWxr\nfCnFlXBEvlMqlZSi0YEi0fpCiBH2pRwrAjs4OKha6QtXo1Kp8Od//ucYhsH4+DgOh4OLFy8yOzvL\n5OQko6OjAJw/f54vf/nLPPPMMwQCATo7O/nsZz9LJpPB6XSyubnJ97//fZ5++mn27t2ryFW6Img0\nGsp91Xse6ixG4EN7W+ggp5DHurq6WF9fV8xUiXtFWeuCqKdQoXn/Ut27cTqddHZ2qk2MxEvQ+RA6\nocflcilvRQRIQioZkonSM0JyjHiZly9fJp/PE4lEGBkZYXV1lYWFBdVmYOfOnWrvC9nXU1iccn9i\nlAYGBujo6FDhLWxvAbG5uYnH42FiYkI1FEqn0zgcDpaXlwmHw+pZCIArG/iEQiGeeeYZpqam+IM/\n+AO8Xi9vv/02b775plKokkaW70tVMcDMzAyxWKyJoCbfkbDV7/eTTCZVtkqMsLRP+E//6T/9f+ZJ\nbBqG0WWa5qZhGN1A9IPPV4EB7bj+Dz676/iVX/kV1tbWWFlZob29nYGBAS5cuECxWGR6elpVc0rD\nmfX1dZUWMwyDoaEhFj7Y9RvuVB3W63XlSkrKMxwOs76+rl6cLF6JuQXoFKsm7p8UykgFXSAQIBQK\nqZb/4rJXKhUuXryoYlvZsSscDhMIBJibm+Pw4cO8+OKLPProo1QqFb7zne9w8eJFjh07phRELpdj\nfHyc5557TjFJdW6BLGa9vFsHEOEOJVysuC5YeroUUJ2NZJ9Uq3W7q7JhGE3ZDmjeqFgEtBXElM/s\ndrvq+SA1IwJI6mCpeDmmaaqeGXr6UhcI3SvSsy2wrWikj+SePXvo6uqip6eHqakp3nvvPR566CE2\nNjYYHR2lr69PbX+wtbWl2gmEw2FVFyThrsViUaXgInDRaJRMJqPo1PF4HL/fz9raGgsLC8TjcZXR\nEk+0q6uLSCTC7t27efzxxzl+/DjxeJwf/ehH6nlJiKunvePxOF/+8pd5/fXXOX78OP/zf/5PhdeI\nFyY4TqFQoFAoqIyGKFbYVr7hcFh5yR9l3KuSMD74J+OHwJeA/xv4IvCK9vn/MAzjL9gOM0aBsz/r\npFNTU/T399PW1obf7yeTySi2m5SPC3BYq9WYnJxUXoTD4VCMNwEbAUVE0beRq1arbG5uKrcrm82q\nmFJ4+4JUC84gWQ8hZ1UqFSKRCD09PczPzytUWSy54B+y0IeGhshms+zfv59kMsnLL7/MgQMHeOqp\np2g0Gpw4cYJXX32VYrHI9evXaTS293P82te+xhe/+EUlvHBnnwkd3BMhFQ8C7lSRCgApC03nK0Bz\nqzuJXYU8JNfSFZBsUCQu/N1SjHKszFdi67a2NoUH5XI5FTrovALZxFa8CZ3noM+lFayU67vdbkZH\nRzl69CiLi4vkcjmuX7+uLOyVK1ewWq309PSoep/h4WFWV1dZXl6mWq2yf/9+4vE4a2triptx9uxZ\n1U9EXH2p/ZFCNQkZhBUspemGYagCq7m5OcLhMJOTk7S3t6vQ2OPxsHfvXs6fP6/6p0iYIZiObFZ1\n7do1lpaWmjqbhcNh8vm8aqAk70iKFMVzkzS//CyK417HL9ww2DCMvwKeASLAJvAnwN8Bf8O217DI\ndgo0/cHx/wb4KlDl56RADcMwP/OZz3Do0CEMw2B2dpbDhw8zOzsLoLpnWywWVSFqsVjUrkgS+4mA\nyxbrIrR6fnxgYEARfvx+/4f6TIiwiOcinoXe8l0n40i3IOm5KQtG9uuoVquq8YrVam2yXENDQ6yu\nrmK1WhWr0DRNPvnJT/KJT3yCJ598sinOF2Wg12VIJkOPZUVh6H0dZF56RkTOqf8Pd/o95vN5VWGp\nYwBwx5O4m8svc5PnKinZVoWiewK6l6I/X/28ck4JV1o9G3lflUqFzc1N3nzzTebm5njuuec4d+4c\n+XyeRCKB2+1mx44dtLW1MT8/z+DgoGJEzs7OUq/XGRkZUV5jKBRSlH8JPwHlykuGrF6vq6bLjcb2\nVn3VapVAIMDa2hoDAwP09vbS19fH/Pw8r7/+OpFIhE9+8pPYbDbefPNNTp482ZTiFiMnIG4gEODG\njRvYbDZVJCa9WER5STcrvRJUQHYJsy0Wi+JxvPvuu5j3uGHwvWQ3fudn/Om5n3H8nwJ/ei8XDwQC\nXLlyhf7+fvr7+1ldXaWrq0sVyEhcJfGaxWJRpd/Ccw+FQmQyGZVGFSUhlG/5jgileB7SXFV2q5KQ\nRXAJAUgFDS4UCqpfg2lu8+yl0YzEexKP61baarWysLBAR0cHhw4dwul0Mj8/TzabVbTykZERfv/3\nf1+lsj54jk3KTv9cF1IREp25KN8TyyxDJ1u1kqoAhUfo3oe4wPp5ZQ56CCTz0VObuussSkL3EqBZ\n4cj9teIReiajXt/e00SaJ0tLwGq1yk9/+lPefvttDMMglUrxwgsvUC6X+fa3v00mk1Eb5ty8eZOF\nhQUeeeQRDh8+rNi8Kysr+P1+vF5v09YNUlEpDYQSiYQS5GQyqYq+isUisVhMFYp5vV42NzcZGRlR\noLpsWP3WW28xPj6uGivBnYbCArTLc69Wq3R2dlKpVNi9ezfz8/OKMxQMBunt7cXhcKgGN6lUSjXH\nke0l5bmKx/tRxn2tAu3v71e7cHV1dTE3N8e1a9eaNGgkEmnaLTudTivFIfGuIOKSsZC6jEZje0MV\noVSLRZAGNfICxAqIUMuuXfLypAZCgECLxcLExARf+tKXCAQCTRpd+BpSfm6xWGhvb1f8AQlVALVw\notGoitvvFuPLNXUBE09BwqvWcnJhUepehN6vU67RylCE5tbxgthLcx9xr1txAf1crYpA/xya29nJ\nPHRQVA9b7vbP6XRy7do1bty4wenTp2k0GmpjaJ1JG/h/qXvz2DbvK230IamdpEhRJLXvqy3bku1Y\ndhxnceJsTZpM2zRI0/0rOm3vFHOBOy3u910MBm2B6eDOdDDtoFNM28HMNNOkSZC0aVI7dWJl8b4v\nkm3tO7WR4k4t1ELeP5Tn6OiNeht/+D4YfQHDEkW+fPnyd87vnOc85zkOh/AICgsLJd3Mzc3FzMyM\ntAA4nU7k5uaivb0dJSUlSKVSGBsbQ3Z2Nmw2GwoLCwGsNXvRcWjGY19fn1wbSVbz8/Pw+XwYGBgQ\nlWzqnPj9fiwvL+Ptt9/GlStX5Lth5SkvL0/S32g0img0KjqWfX19cDgcGBsbA7DmWGZnZ9HY2Chc\nF2IPBOyZMtJRz8/P35Kd3lZa9s2bN9HS0oLCwkKcOHECQ0NDcDqdUkumIdB7s9RjtVo3hJn0kBkZ\na3MX4/G4/J5IJOByueR1wBqwxx2T56RBEC3mF6Zr9LwGov4FBQVoamoSIRu299LYSa2NRqOYm5vD\n2NjYBqYhqwn8otlyDGxUa9IOQwOZLJUxxWFJEcCHXgd8WKGaaQmNWPdE8LVGerXJZNpQ9TA6Bf5O\nEJmv04ZvfExfl44gjM6Ln4WG8MMf/hBPP/00tm3bhlAoJNjA3Nwc3njjDbz99tu477770NfXJ/KB\noVBI1Nbn5uYkenrwwQdRU1ODoqIixONxHDlyBF6vV5i1xL3olMlBIY6WTqeFvr+4uCil8NnZWczO\nzqKyslIEgaLRqDQOUgqPJX6ObSDpj7gcW8Wj0ShycnIwNzcnXcXhcBgjIyNYWFgQUhanhRGHoOPh\n4KpbOW6rk6BMG+dTsMzI2Z6JRALVH8zUoBwd+zCysrIEVMzOzhbpL7PZLDlgT08PgDWKqtvtFqKQ\nyWQSPUw9oJUt1Az1SktLcfPmTQDroTR3ra6uLnzzm99ESUnJhrx0aWlJ+BYsz7F3goudnaQdHR0i\nqjM4OIiGhgbZtfSuqsFJ/TsrM2Qj6oiL0QANkQ7ACHLyYNlts8qCfoy/89DXqB2EjjT0e2hnQSdo\nPAc/p34/HQFNTU1hYmIC7e3teP3116V5Kjs7G3l5eZiZmZFBNGVlZWhtbZUU5cqVK/B6vSguLpZ1\nVVFRgdHRUYRCIYyMjGDbtm146qmnRLmssbERly5dkia96elpJBIJxONxuFwu5OfnyyZRVVWFgYEB\nmM1mhMNh1NbWwufzYWRkBC6XC+FwWJzJwMCAcCe4UbHSQUA9OzsbTU1NmJycRCwWE7yHFbjc3Fz0\n9PRgamoKs7OzYg92u10+nx4nQUD0Vo7b6iSoJ0kgzuPxSAcoS5HUjlhdXRVyE/NtphXLy8soLy8X\nT8y/VVVVYdeuXUgmkzh9+jQaGhpkxLuWKmePfzgchslkkpB6cXFRSq4M1W02GxYXF+F2u7FlyxZR\nLiJwmUwmZcQg05aamhqpHKysrKCmpgYHDx5EU1MTmpubRR7dqD1oxCUIbhl3Yf18Gjp7COgMNGi4\nGVjN78BYggTWdR/0++hr4KKjA9osIuG1ARs5ELovhCVRHnTKTAUJvpWWlmL//v0oLCxEMpnEuXPn\nUF1djfLycjzzzDN499130d/fD7/fj4sXL8LlcqGkpGSDUhd5BayQXbt2TYbXTExMoLm5GW63G/39\n/RgeHsZnPvMZPPfcc0KuWl1dRUlJCXbv3o1EIgGfzwev14v6+npYrVY0NzfjzJkzMnuzr68PNpsN\nNTU1gmkRIKaD1tJ+6XQaiUQCXq8XX/3qV/H3f//3wgviZ5iZmZHu5vz8fIk8qG7Fah0VtenktIDw\nRzluq5Pgzjc1NSW5I8E8Uq3Pnz+P1dW1mYgEjFje4W5aUVGB2tpaWCwW9Pb2YnR0FKurq1KXdrvd\nkq9dvXpV0gamFowc7Ha7lLfIZiPAyUWVTCYl4kmlUoKhNDY2wmazob29Hbm5ufjJT34Cv9+PSCSC\ngYGBDVHG97//fWzdulWcGqXPjM093F1pXIwEtCHxefo5dCxcePocWtiF0ZE+Dxes3t31efmzTmF0\nJEZHxHPy4PUYqxh0XsZ0RTtHYONwX85f0UzX5uZmmExregulpaXo6OjAmTNnpKo0OjoqjYDM7ycm\nJoQ3sH37dhGF4fro6OgQbZGrV69KLwsp0V6vFyUlJZidncWnPvUppNNpnD59GrOzs+jo6JA+ClbB\nKLWv0xKei+Q9TddOp9MoKirC0aNHEQ6HpVIXj8fhdDpht9sxNjaGYDAolHyz2Sx6HOzRsdvtkjaT\n2Xorx211EpyVyNztypUr8Hg8cDqdqKurg81mw9TUFIaGhhAKhURg1uv1YmJiQhqniouLMTAwgObm\nZtEenJubk6EnXFjXrl2TUI2oMuXygPW5C0T1s7Ozpdqha/IUJuGkc6fTiba2NtTU1Mhotb/+67/G\nuXPn8POf/1zKr0wBEokE+vr6xOmUlJTIPaGRAh8mSfFnXRokZkDQloc2MD3fw3hejUnoxjBjpUS/\nbjPgUTs1Y8l1s8PoTLRT0tGSjmKMDikzMxN33HEHlpaW4PP5EAqFsGfPHiQSCdx7772IRqM4c+YM\nLBYLCgsLJSSvqKhAc3MzJicnBbxsbm7GwsIC9uzZg5GREZw8eRL33HMPLl++jMrKSkxNTSEUCqGt\nrU1Un2w2GyYnJ3H27FmEQiGZ5rZt2zYcP34cFosF27Ztw/DwMPbu3YuDBw/C5/MhGo1KKf93v/vd\nBmo678vCwgLy8vKk6sJOWDoa6lbSiQUCARQWFsJqtYrEHdeT1WpFaWkpent7JXK+leO2j/mjN7VY\nLCgtLYXVaoXb7Ybf75fatt/vh8fjQXt7O3p6euD3+6WvIJFIoLOzE8vLy4jH4ygtLRWZuWQyiY6O\nDqyurkniUaHHZDJJaK95CFrKjrVl9uITvyDQuLKyIpWUjIwMHD58WKYr9fT0YGlpCTt27MCOHTvQ\n3d0tqLfD4cDJkydx4cIFFBUVobi4GF/84hc3SIppIFAbMADZ+YzlRqYym+kFbJaeaPxARyP8LlKp\n1AbJOQ0sGsuZ+n14buM16AqIjhqM60Gfk59JO04d/QBrTEL2/XAivcViQfUHQ591d292djYKCgok\nBcjOzsb09DSWl5fx7rvvIiMjAz09PTh48KDc8x07diAYDKKyshJ2u11EaD0eD1wuF3p7e1FSUoJd\nu3YJ9f7atWvIzMzEvn374PF4UFtbK9ojubm58Pv96OnpQTQaleunHgSZozabDel0WqopS0tLaGxs\nhN1uh9lsxsTEBDwej0yVu/vuu4XE1dfXJ3ND6TRIqCJAfSvHbXUSpaWlsNls6O7ulsaq6upq9Pb2\nym5OlZ/c3Fx0dnbKdCN2iNrtdvh8PtTX12N8fByBQEAauObn59HR0bFBiYoVDIJFugmGOz3ZcPyf\nKDFzReIPXPQEJW/cuCEszszMTJw+fRqJREJCPbfbjfb2drz55puyczz99NOioKXBROPuDKwbux6K\no8uW+mBqolMP487P59EIddUC2BipbJae6Mf19WknoaMRY/phdDI8B52UdpakmWviGM9rsVjgcDhg\nNpvxm9/8Bvfddx+KioqwZ88e9PX1YWZmBqWlpdiyZYt08o6OjiIQCAiVmfd19+7dKCkpQUlJCc6e\nPYt33nkHHo8Hk5OTG4DWYDCI2tpaPP300zLWcGpqCoFAAFlZWdi1axfuv/9+0UVxu90imkzpATJF\nCdbX1NRgaGhIvjeWxTl2IhgMoqqqCg6HA/F4HGNjY5LKxGIx7N69G2NjYygvL8fU1JRwPugYKioq\nUFdXh8HBQZw+ffoj2+ltdRJFRUVSzrx69aogxCxLsqXb611rMu3t7YXH48Gf/dmfIScnB8ePH4fX\n60U8HsfIyAh27doFt9uNt99+W6jXRHbJowewgWILQEBRGgBzeafTicXFRTkX6eAETllRoBQ7ezQi\nkYjs+BzMA6wJ/77zzjsyxKWhoUFyR/ZwaNk6jREAG9MEGhMBPW1sBFl1JyEdnMYkNMNPk6x0yK8P\nnYoYwcs/BI7SkImLaEezWXSjO1m1U9CPGX9Op9Ow2+04cOAAIpGI6JPefffd2Lp1Kzo6OvDaa69h\ndnYW27Ztw+7du3HixAlRvSbOVV9fj6amJjQ1NSEWi8Hn8yGRSGBmZgYulwtZWVmwWq3Iy8tDS0sL\n7r//fpE64BrJz89Ha2srCgsLpfTe1NQkji47Oxv9/f0IhUJCsqI2K/GDpqYmRCIRKalbLGsiNWaz\nGV1dXcjMzITD4ZB1lkqlMD09jYsXLyIjIwMTExMoKCiAzWYTvMJms+HmzZsYHR29bV2g/1PH6uoq\nzp8/D4fDgeLiYrS3t8Pr9YpE+ODgILZu3Yq6ujp0d3djYmIC8XgcHR0d2LJlCxYXF3H16lWhzBYX\nFwswxfCNtFTWnTU7kIuehCyCSdRZDIfDMomJfHiWagGgsbERVqsV58+fF69OAg4b02jEWgSH6PP5\n8+dx+fJl7Ny5Ezt37kRLSwtKS0vFoBgua+MjYErjIdBlxBeYEtGotaKUJmhpoFQ7CI0vaMYkD53u\naMPVEQrfl+fk9RkdkC6fasxFv6dmb+pz6HNzVKNOzcxmMzo7O2X69oULF0Rrklwap9OJ+fl5lJWV\nwev1YnFxEQUFBXj88cdRWFiId999F7FYDHv37oXX64XL5RIw3GxeGxP53nvvwWxeEyvi/NbCwkIs\nLy+LmnsoFEJPT48MjqIqPD/j7OwsgPXWdr/fL3qaIyMjiEQionh1+vRpwbocDgcOHToEj8eDCxcu\niMPyeDyIxWKor69HMBhENBrFwsLCLQOXlu985zu39IL/Vcd3v/vd77S1tQn2QKSYhBD2P+zevVtE\nYHJycjAzM4OtW7dKSy4A0XH0er3CeuSiB9YWGyXTiDgDkBJndnY24vG4oMJsJWcPP9uqNcGLAijN\nzc0oKysTsg1fz8XKMqvJZEJdXR0qKipw5513YmBgQJyS2WzG5z//eZSUlAiYq9WyidsY8RMjrdqY\notAJ6tBcGyRfw8OIL/D5ev6FPi+fZ0xF9LmM2In++x/CM4yfRZ9fOxjjcze7lrGxMbz99tvS1xMK\nhTAxMYGmpiYcOnQIFRUVCIVC8n2YTCaUlZVJydBut6O0tBT5+fkya7WqqgolJSVC86cQz/DwMPr7\n+5FIJERfMhaLob+/Hz09PZiYmEBdXR0AyPRxStCl02lZa8S+qLzNyCEzMxM3btxAMBhETk4OlpeX\nhbWbSCTEQfn9foRCIUSjUTgcDsGaOK1ucXERfr8f3/nOd777UWz1tkYSw8PDkl9T66+vrw89PT0i\nPd/V1YWGhgZcunQJwWAQjY2N6OrqgsPhQPUHOoaBQAAFBQWYnZ1FOByWUJ8hW1FRERYWFuTLZpWD\nTWGkXTOPz8vLE+/NUJ40Vxro7t278dBDD6GhoUG0BC5evAir1Sqh4NGjRyWqoOzd3XffjZMnT2Jp\naUkWB9W277vvPrzyyiv42te+hrq6OnF0BBNprNyddWmTj2tsg6mKdho69NdRAB/TOb9OdTRl/A9x\nNTYzYP34ZhiE0Xnw0Nekz8PXGK/BZFpn3fKwWCyorKzExz/+cbzwwgtYWFiQ4T7z8/P46U9/iq1b\nt2Lfvn24fPkyenp64PP5YDabhW25vLyM8fFxGeFIkVl2AFMzA1jjytAR/fa3v8W2bduwtLSESCSC\naDQqdGmHw4GBgQEZy0jcy263Ix6PS+u6HlRVXFwsgCv1MahbSfGg8+fPIzc3F7FYTIYGP/bYY+jq\n6oLf75deE2NPzx87/mgX6P+uw2Qypf/yL/8SdrsdNpsNvb29qKyshMViwbVr1+D3+2WwSX9/PxYX\nF7Ft2zYAa5HDiRMnUFFRAYfDgYmJCYTDYVRWVgp7k+pIwWAQHo8HX/nKV/Av//IvaGtrw4kTJ/Ds\ns8/iF7/4hXSaki7NPhBOBtfkFZPJJMrNjz/+OC5duoTHH38cmZmZuOuuuwCsh9nd3d0YGhrCyZMn\ncenSJTidTvT398PpdEooqYexcPFYrVZ8+tOfxvbt2+XzElg1akFqo2ekQCFgphE6NNc9FJSr012b\ndDC6mkJjZVcijdBo7Op73fB+PB8xIO0ojMa+GRhq5Fboz8vz86BzM1ZhfD4ffD4fgsEgXnzxRYRC\nITz55JNwOp04f/688CAyMzMxPj6O9vZ2hMNhuFwuzMzMiEbFjh07kJubi6GhIVgsFgnho9GolNap\n9AUAbrcbd9xxh1DFAaCtrQ2pVApDQ0NYWFgQHRFGlAUFBXA6nZiYmBD6dFNTk5RAp6am0NfXh2Qy\nCafTKUOBSMmnXsf09LQ0HRYWFgr/x+v1YmBg4H9tF+j/zmN1dVUG79psNpmQVFRUhEQigcHBQemo\nu+OOO5Cbm4vR0VFs374dx48fl1JURkYGXC7XBj5+QUGBVCpCoRD+6Z/+Cbt27cKXvvQlPPHEEzhx\n4gRMJpNwFZg+ZGVlyf80RI6fP3jwID772c/CZrMhEAhgampKGtRY46Y8XXl5OaxWK3w+n/DlWWtn\ngw8rMDRuOqaRkRGMjo6ioKAAtbW1YswaYDWZTB+aXG0ymQR32YwlCWw0Yv6ucQgdFRiZkBbL+lAh\n/o0GrB0C34/4iz6PEcDUzsYYieh/xihCX6cxjdJl43Q6LVgDW78rKytRXFwsTvfw4cPIzMxETU0N\nCgoKNjQPUg8jEong5s2byMjIQENDA+x2Oz7xiU9gcXER77zzjkS6+fn5mJubg81mE54OFa1sNht6\nenpEVZygO7kP/D7Y7EXMiWulsbER9fX1sFgs6OnpQSKRwJYtWwSDYKmXmwjv18DAAFpbW/HZz34W\nKysreOGFF3DixImPbKe31UnMzc3JrAx65ZqaGpSWlsqgErLGampqUFJSgkAggFdffVX6Iy5duoSi\noiKUlpaiu7tbFhXbzVdXV0Xya35+Hi+88AKeeuopOJ1OJJNJaaghhyGVSuHxxx/Hrl27cOnSJeTn\n58Pv98NsNuOxxx6DyWTC8PAwIpEIysvLsbCwALfbjZWVFVy+fBk7duwQw3E4HHj88cexuLiI559/\nHrt27YLD4cCvf/1rWaAtLS3C7uRC3b9/Pzo7O9HV1YXGxkbZhZkiEZdg2Kh3TWN5k7uykaVpxCaM\nJCa9kxsNWbca0xh1msLnGQ16szRBH3+IvKUf42u1vob+7PxdXzuv0W6349ChQ8jOzsb169cxPDyM\nffv2YXR0FHV1daiqqpL7eurUKVFHY3h/1113oaGhAUePHsWOHTswODiIubk53LhxA9PT0wDWpA29\nXi+6u7tFmdztdiMcDmNsbExUrHJyclBbWwuPxyMqXiMjI5iZmYHX60V2draoX7W3t6OsrEzYydnZ\n2SgrK0MgEMCBAweQSCQQjUYRDofR19eHSCQCADKBfWVlBVNTU3jjjTdgsVhQU1NzS3Z6W52E2WyW\nasTIyAiWl5dx9epVtLe3w2q1Ih6Po7a2VvKr6elpkQnnXESSnXbt2oVgMIjp6WlMT09jcnJStADY\nQ8GcbdeuXSLXRryBXyjzzpWVFTz44IOoqKhAOp0WAVOLxSKpw9LSEurr6/HSSy/h+PHjcLvdKC8v\nx/j4OGpqapCTkyNDXVpbWzE5OYmLFy/ikUceAQCcPHkShw4dwtNPP42xsTFBqvPy8nDgwIENpUL+\nzJ1csy5pNNxxCGrSQXBHpYGy8qGN0Mhs1I6BjEA+X+96xE02Owcf19dvBE11NKErLMYUQw/vYYSi\nncNmTsfoDFkGpzMjdfpzn/ucKFYTrM7OzkZdXR0uXbqEzs5OVH8wn0OzIFdXV5GTk4OpqSmpfpGn\nQ35MZ2cn7r33XqGQm0wmUZOifsXCwoIMG06lUhgZGZHW9snJSUk1L168iKNHj4rT2rp1q+ANIMmG\nWQAAIABJREFU1dXViMfjSKXWGhOTySTuvfdevP3225iensbY2BgikQgcDscGhu9HOW57F2gsFkNn\nZ6cs4ObmZqG82mw28ZhHjx7F4OCgLAyq/ppMJkxOTuL8+fMyJ5RCKQyBKVsWiUSQSqXwj//4j6j+\ngJHHXZCdp/F4HL/85S+xsrKC4uJiLC4uYt++fWhqasKuXbtkZ1xYWMCLL76IhYUFYdu98cYbQtWt\nr6/Hs88+Ky3Kw8PDaG9vR1VVFRobGwFgQxPW8vIy6urqcOTIEaH/1tfXy/QovTvq6gKNB1ifiAZs\nrE4A66QpGgyJU3rH52EkXunowehANC6gy6mad6HJUTotoVPg+TbDL7Sz0FwPXenREYS+diPOwmsk\n+9LhcODIkSOIRCL4xje+AafTienpaYyMjKC3txfHjx+H3W5HcXEx9u/fj0AggF/+8pdIJpNwuVxY\nXl7G7t270dHRIeuOla28vDzZfM6dOyfMSpfLhfn5eUlnAoGAMCIpmEuJA37OV155RZoO+XmKi4vx\n1FNPCfZ27do1NDQ0yPCn4uJiNDU1wWq14saNG+jq6hJODyONj3rc1hLoM888A5PJhHg8LrJcBQUF\nqK+vx+joKNLpNSHQkpISmfZVWVmJubk5TExMCJmJDDb2+gPrISdHr3H3Ic2VpSWzeV14hg1jn/rU\np4TB1tPTg0uXLuHs2bPo6+vD1NQU/uu//gu//e1vMTg4iPn5eVy/fh29vb0CPFVVVeGee+6BxWKB\n3++Hw+FAOBxGa2srHA4HLly4gOHhYezevRvt7e0oKCjAyZMnhchjs9kwOjoKAHjxxRfR1NQkmpfG\nsqAO3VkO004AWCdB6XZzXSo0/q9Lq5orwftqLDVqI90MY6DD5k5Jx0PSlzE10eflazY7F1NFXfc3\nvo/GNDTOkpmZKaMV8/LysGPHDiwuLuLChQs4cuQIzp8/L/cqIyMD4+PjAi6bzWYcOHAA2dnZ8Pv9\neOihh6RKxtGEVVVVeOKJJxAIBLC0tITi4mIsLS1JT4bD4RAiHRW6FxYWBHhkpMfeDEorJhIJGT14\n6dIlTE5OYvv27ZiZmcG5c+ewf/9+3HHHHaLC5fV6UVpaKqJBKysrqK2txenTp/80SqCTk5NSu83O\nzkZ1dTU6Oztx6dIlRKNRGWnG1m4OIbFYLHC73eIguEgYgnJEG40GgDRxkdVJtSXWz7mwlpaWcO3a\nNezevRu5ubmyOywtLeHSpUu4cOHCBiGa+fl5CQetViu2b9+OhoYGAGtG43A4kJubi4MHD4p68d69\ne2EymUQFaWVlBR6PB8vLyzKLsrm5GaFQCI8++iiKiorknhkjCm1ULJkRY9EcB75Wlzz1js9jsx4J\nDRLq1m4an3YwOl0gCKfLrTpl0Y7IeJ38DJoVqolU5CdoHU46Fraqa81HjeEQ6HY6nfB4PEin14YR\nnzp1CqdOnRKl9ObmZpkx+8lPflLIWnl5eThz5gxaWlqQmZmJq1evorS0FCsrK/D5fFKhmJ2dFREb\nTgCjrkRNTY3MAWVKyPkdZOqyWrGysgK3241gMIiioiLY7XbcvHlTvvve3l5EIhEMDQ3hxRdfxI4d\nO9De3i5Vj7y8PJSVlcFsNmNgYAD9/f23ZKe3tQT6pS99CbFYTMJtt9uNWCyGqakpZGRkoKysDFNT\nU6ipqcH4+Lh0grLywLJOJBIRlSdSbNnAY7VaYbPZMDQ0tIEUlJWVBZvNJjLywWBQFhNLWGwJ1/M3\nWI2hUbDdm9Rau92Ohx56CF//+tdlYXOUG40gKytL2Hpc+IwAWBLVobUGIPX3ZQT4jBUN/XwjoUkD\njJudj6XWPwRG6lyfi1w/pt+LpCEdFRA30demcQ0jx4POQJeC9WfW167vGbCO45CVaex2NZlMmJ2d\nxd/93d9J1YBaDGazWRx/e3s7BgYGYLPZcO+99wqL9oc//CEWFhbgcrmErbl3717YbDZ4vV5Eo1Gc\nOnUKoVBI5nTU19fD5/Ohr68PXq9X0mfSqEkNcDqdWFhYkAib82inp6eldYDjJ6PRKBYXF3HHHXfg\n4MGDaGhokHtJctjCwgImJyfxwAMP/GmUQMvKyhCJRBAKhbB9+3bxwMXFxfKhWWOuqKgQvgIBGora\n5ubmyhQvPUQnmUziK1/5iqC6pFyTRzA7OyvCH7qEyUVMIJOLlcQXdt4RUKWhlZSUyNTo5557Dj6f\nD7Ozs2hvb0dDQwMuX76MpqYm3HfffRvmT3D3A9YjBCMByWh4misArDsEY1mSz+X/enfWu6/GEvj5\naXzc7TS5SXec0oiNVQ5eL9mnxAPo3HkuHSUYWZX8XX8enUpoB2GMmLgpaEdIZTJeK1mvFGdxOBwo\nKipCIBBAdXW16FNGo1Hs3LkTzc3NyMrKgtvtxsLCAkZGRqRCQl0SMjVtNhsuX74sPJ7W1lYcPXp0\ngwRefn4+VlfX5sQwqmWqSGAdWHO0bPLi65g++Hw+BAIBuV+cK9vQ0CDrgufMzMz80wIuE4mEzFwA\ngJEPxtZzshINtqioCFVVVRgbGxMlKoakZrMZ+/btk8EoVN9hOvCDH/wAdrsdO3fuxJkzZyQMJSeB\nVFiTySRzREm9psMhUElHBGwcMLyysiKzFBh+XrlyRQgwr7zyCn7wgx/g0UcfxdWrVzE8PAyv1/sh\nXoAx7wc2RgfGyoDePfWur5+nz0OjNJYa+Txj1GHM6c1mszS46V1fRxjG3J9GQwMiwGcELDczcO30\njGmQvif6nunftaM1llP5PcdiMbzzzjvo7++Hz+fD/Pw8du3ahcHBQfT19aG+vl7KlhyPWFJSIlEJ\n0146AM6oJWszNzcXRUVFuHDhgsgPkK7PdRcMBmVKOFMoYmVscMzMzMTY2BgCgYDcL1K6uVFRuAZY\nVwdjyZz3kxTyWzlua7rxyU9+UtSCioqKcP36dWRkZMDn88mXQDZbQUGBKCJv2bJFePTz8/Mym3Fq\nagrJZBIejwejo6OCdq+urorqNm8iNQQZ2uuxfgSgGJ7RYXDnMZvN4tgYlmupchpAUVGRvB8H0B46\ndAiHDh2SsizPbdwljZUC7TiMACYPbeTc3bXBaxxBRyh8jTZ4Xgt3fBop+1h0+ZIgoiZOaWyADorv\nR1EUvavzvvFzaDIQD4bOOgIzRk98jb4v2tHoqVcmk0n0JHw+H4aGhnD27Fn4/f4N96KpqQn19fXY\nunUrnnrqqQ3rOBwO4+bNm4hEInjzzTdx//33o729HaFQCC+//DJ2794tQ4RTqZT05hBIj8fjmJ2d\nxeLiovSOkPLNQT7EULZu3YqCggL09/fLeMLTp08L1sU29ba2NnzhC1+QUYp6Y+EGW19f/6eRbtBQ\nqPbE2QLl5eWIxWJIJBKyoEOhEHJyclBYWCjlJHbNxeNxmEzr6tHEF6hWTc4FsD4Cjzx7enYCieFw\neMPOyDSAhszfaSjsIKWTIfjJGR05OTm499578YUvfEFyT4qD0JCNxq/DaQ0E8nHd/KVRf+0QaJQ0\nCB40MDo1vqeuEPCz605UYzeqEfAkiAyslx61NKDmbzDEzs7ORiKR2MC7YJqjxybonhRgvXtV/26k\nogPrfA6dVvFvyWQSfr8f09PTaGlpgcvlQnFxMfr7+0VjgrjYvn37UFtbiwcffFC4EXp9ZGVl4ezZ\ns3jyySeFnj8/P4+77rpLujF7enqQnZ2N4uJihMNhnDt3TqJPfm6KIFPrklEupQ0yMzMFf2tvb8fI\nyIjwjFpaWvDyyy+jtLQUzc3NACDOjgruJSUlYhO3ctz2dMNkMkmDDIGXQCCAnTt3YmFhQSZ6MUTj\njsVFQvQZWB/6qxH4vLw8NDY24vr16xs0I9jRyfyxsbERly9flu5P4hcZGRnSIUqnQqdFtWKStmhQ\nubm5aGpqwtTUFMLhMKanp+F2u/Gb3/wGDQ0NuOeee+T8JNjodMMY6jPC0aQo/Xwexh0U2Dy1YJqg\nG8XobOhgeGiwke9BbEhzEoyVC/5N4xm8dobHvC79uXWlSmNEPAejE6aCOkVjtKdTHu3w9T2KxWKI\nRCI4cuSIEI+IHyUSCUkx9u/fj8XFRRQVFW0o0dIBB4NBHD58GMFgEIFAANFoFMFgEL/73e9QVlaG\n0tJSAEBzczMikQh8Ph8WFxexZcsWAehNJpOstbKyMoyOjkq/EqMzOghGIu+88w4qKyuxY8cOzMzM\nYHx8HFu2bJGU5dVXXxVRXw4xKi4uRn19PSoq9LjeP37cdlo25by4Q0YiERw4cADl5eW4ePEiKisr\nN5SEAoEAent7BSjjrMVkMilNWiaTSSYXORwONDQ0CCMuGo3C7/cjPz8fs7OzwqMYGBjYkLdmZmaK\nPibbehmqUR+CC5rj/VhmJYErFAoJJfZb3/oWpqam8Dd/8zdIpVJ45ZVX0NjYiObmZsEujFGB3lH1\nAtUNZwzXjTm9/p2vMzZ9GR0HIwqqcKVSKXHIfC0dAHcjRkJ0Zpp0ZWRi8tp1tymfuxkmw9cajZ0O\nR5Ov6Ox0bwkjGP5N4xoul0se+9nPfobDhw/jS1/6Emw2m1SinnzySaRSKdy8eRMPPPCAaKDyfvh8\nPvj9frkvoVAIly5dwvnz56XzmFW71dVVFBUVbYh0zGYzAoGAsC4pWlNUVIT+/n55L26OPT09cn9d\nLpdIOUYiEeTl5Yl+yejoqBDG0uk1Md3JyUn4/X5kZWVhZGTkluz0tpKpamtr4ff7sbCwgFQqJcb7\nxS9+EQcOHEAoFNpQIq2pqRHl6VAoJIuvsrISCwsLIm7LykRWVhaeeeYZOJ1OvPfeewAgg02YomhU\nPRqNYmlpSdhubPvVlGDuWGz1ZiRBjw+shd4EUcmso9oxp1i//vrruPvuu1H9Qbv7ZrwFXh8jJjbx\nANjA7dBcACOwyd+149F0aT1pjOem82OYq3EMLnBiLTyMhs/7oSMCjSXwcbbL8/Py0A5EV3AYWRDP\n0dUnAtHkseh7aSx5ck2trq4KkNzR0YFYLIYnn3wS58+fx9zcHKqrq+H1epGfn48LFy6gpaUFq6tr\ng6QvX76M+fl5tLS04IEHHsDY2Bimp6dhNptFZ8RkMmFqagpWqxWDg4M4e/asqL4DQCQSQSQSEcCd\naziRSAij2JhK8ruen58XaX5WPrQD4kaXk5ODnJwcrK6u4ty5c7hy5QpGRkb+NMhUDBnZaclFQFHY\nBx98UPrjOzs7ceHCBSG6kLxCSToqWHNhcKd76aWXRFzmq1/9KiorK/HSSy/hwoULokPBASlcdMQr\neI3AuoYkFzxFbLgwiUXwy2HOSydBoPTYsWO4fv06fvSjH2H79u0b8AIuXGOVwtiHwZCdBxc7rw/A\nh5wDjYhRBI1L/65bz/k5GbXoBcr30DwGOhtjtKJfa6ST6/ehk9GOjodOR3RpVadpxkhKg7O8PiN9\n3G634+DBgwAgknLBYBBWq1UmlM/OzuKhhx7C1NQUlpeXcf36dbz11lt45JFHUF9fj1/96lcoLy/H\nPffcg/z8fLz99ttwu934xCc+IeuWI/kWFhZk13e73bBardK4tby8jGAwKMOwGR2xEZHRG+9bMpkU\n7dZwOCyfi5gP0xdqXtB56o7Tj3rcVidBYIozD+kswuGwYAasgLS1teH111+XgSRcHB6PBzk5OaKQ\nzbISd669e/eiq6sLiUQCly9fRktLi3AliDinUilJH8inIJsyLy8Pq6troh/j4+MCvnECGHdUHdrS\nILhLplIpuWaHw4Enn3xSJPYI+GkQUTMpdQ6sdxVGN3SK2giM6Qaw7uT0zszfmSoYsQNGKdSe0Ofn\n37iY+bN+b2M0Q0yITVaMkIzELU2U0q/XkYgRiKSj0JUOPldXeXSVxWw2w263o729HRcvXpT7evz4\ncQCA0+mUifQ9PT3YunUrTp48iePHjyMQCKCxsRHDw8MYGBjAxMQEsrOz4Xa7ZRhOZWUl+vr6hJoN\nrIk/WywW+Hw+mZ2h0xVuKHa7XTpPOSrQbDYjPz8fk5OTkkqzS5prlRsk+32oHk5hm2QyKXKPH/W4\n7ZL6DKFJUqIGg9/vRzAYxLe//W3k5eUhPz9f+jtWV1dlUjhLoF6vF5OTk7JrsES5f/9+HDx4ED//\n+c9x7NgxnDp1SnLBnJwcZGVlIZFIyALWuS0dRyqVQigUgsPhEI+sS1VacgxY36lYMiUVPC8vDyaT\nCUePHsXly5elec3r9cr4t/vvvx979+7dwA7VWIkGGIGNgrnGHcK4y/IxnTbov+nQnX+j4WzmJPRz\n+Jk14KrLofqz0JlaLOuDjZmyAR/maQAb+SI6jdC4hcY/9KHTNmOUMj8/D7/fj+LiYty8eRNzc3OC\nD0QiEYyNjeHq1avo7u5Gf3+/YGSXL19GX18fGhoahCxF+YKVlRXcuHEDra2tKC8vR05ODs6cOSMG\nTb2UlZUVDA0NIZ1eE4YhAM8+DVb9KioqMD09jfLyclnbBQUFsFqtMgybUWxxcbFE2SUlJaisrBTw\nsrKyEvF4HFeuXLkVM729ToILmyQmLpgtW7agublZhqsyrMrLy0MikcDS0hIeffRRPPDAAzCZTPJl\nzszMYGlpCclkUkAbYE3Z59FHH8Xp06fR1dWFHTt2iJy+MYxPpVJwu91oaWnBxYsXZRfiwsnLy9tQ\nJuUgVl47KbCrq2sTzAOBAFKplMjmc04Iy14rKytSvWBrsNfrFfk64MODbPTvdGL6b8B62Y+Hjkz4\nev6uqeAao+B3whAX2MgI5XsyFNYRj5EarfkN/DuvSwOom6UbRpzG6HiMoK7xfrBkrIFUlhT5XV29\nelUiUVbdVldX8eabb0p62d/fL8OmuS7YT5Gbm4v7778fd955JzIzM+HxeNDd3S0sYI/HIzwdRo8j\nIyMyG6OmpgYul2vD5jc/P4/u7m5UVVWhtLQUs7Oz4nRGRkZkSLK+ZxQi4v3Izc2F2+3G7OwshoaG\nYLVapXP0ox631UnMzs4iHo8jPz9futsAIC8vD8XFxWJIwFpb+ZYtW2QC0pYtWzA2NibqVbW1tbh2\n7RpisZjQbO12OwoKCmC321FUVITPfvaz+NGPfoShoSEAkEiCob6mxOp6PeeEkj5LNifTBRoSw7iV\nlRUJCwHg0KFDuHHjBiYnJ6VXhMK6fD7D74qKCpSWlm4wAqOD0LsoF74xrUilUhvYkdoh8PV0FjR8\nI4i5mZOhMzBGDEYj1uE/X6/Lnvpn/tM0b6ZS+rPzPPxutJMwphl0ZFpuj9eocZPMzEzU1dXhvvvu\nEwHmiYkJ+a6ANVyK07yvX7+O3NxcVFZWYnV1FVVVVTh06BA6OjokIpiZmcHvf/97TE9Pw+FwoLa2\nVjYgOhfNsVhaWsLY2JhEqUzxAMicUZfLhbm5OSFeMQWsqqqS587NzSEcDqOiogJOpxPDw8Ooq6uD\n2WyGy+WSOSPGmbN/7LjtojNU76WhMgLgYufkIe7k5C34fD50dXVhcHAQKysrkjvqmYo6F02lUvB6\nvfjyl7+Mn/zkJ4Lca84BCTmTk5MyDm52dlbUs1gOpYEbQ+aKigoEg0HE43EZdGyxWPDWW299qE/B\n7XZjbGxMyDlMV/bs2SOt7dqgjHk+d0vjDm3EIrTxEFwFNo4FJA6gowB9Th11UKtDA6s6gtF4Bu8r\nX0+MRr82nU4L0EbEnvgNPxup+Nq58X+9kQDrVRXj9ZAMp6swvC8ulwuf/vSnZb1dvnwZo6OjOHHi\nhEwWZ77PNetyuVBfXw+3241UKoUvf/nLGBsbw7vvvouJiQlMTU2hoKBAJpbTua2srEiFKzs7W3b6\nsbExpNNpWK1WLC4uwuVyCU7Hasr8/DzsdjtaWlrQ29sr5MLFxUVRwGIxICcnB4lEAmNjY3C73VLW\n5UZwK8dtdRKcoLyysiKzMfjlJZNJTE9PC/16cHAQFosFdXV1mJmZwY0bN4R9RoSXXyBZlQz/LRYL\ntm/fjhMnTuDhhx+G0+nEP//zP8uCZxTBxU3NzSeeeEJ4/SaTSdSJ+QUzeiCjbmpqCgBkB2KNWw+F\npSrR5OSkGALBpMrKSjQ0NGwwWJ4HWHcw/DuNSEcE2gCIz+hzsImIi5bn0efXaYmuFpCoBqzTpXkO\nOkQdIeh+ASNzVEcsdrtdzpmXl4dkMily8IzsWJqen5/f4Ai0eI7+LNpBaW6GHmRE5iu/y6mpKaTT\naRw4cAATExOSktjtdplM/rnPfQ7p9NpgYALRAwMDGBkZkSh3cHAQS0tLcDgcuHjxIoA1UVx+z6ur\na1PsQ6EQKioqZODP3NwckskklpaWZCrcM888g2AwiNXVVfT09CCdTkvrQVFREVZXV4WwFYlE5H15\nT2ZmZpCfny+YD4HSWzn+qJMwmUzlAJ4DUAQgBeDn6XT6n00mUwGAlwBUARgB8HQ6nY5+8Jr/AeC/\nAVgB8H+m0+m3Njt3KpWS8g7zxpWVFcRiMVEJOn36NKLRqABJJF9RXYegVSgUEm9ptVplxxwaGkJR\nUZG0eZPxxtyVIR7ryDabDX/1V38lKkEPPfQQWltbcf36dTz88MM4d+4cjh8/LtOcTCYTyPegk+Mu\nRsp3Op3eQIKhbLsOzQ8dOoQHHnhArt2I4HPx6+hH90Ro49SMSWB9dzVWG3i/9Q7/wff3oQoLD52C\nkK5tjCaMkQKwlto5nU5Z5BQe5t9p0Ol0WjQQdAqTlZUFl8sFu92OiYkJ+e6Nn1enYhokpROhUyHT\nlRgSq1lHjhzB9u3bN1DKLZa1QTvhcBi//vWvMTk5iaWlJbS1teH48ePYv38/rl27htbWVpE6iEaj\nWF1dxSOPPCJ8BjohCh6x1D4+Pi68Hn6vbBx8/vnn0draKqI10WgU77//Pmw2G3w+nwg2ud1uwTTY\n35FOpzE/P4/x8XHZAIqKigTU/6jHR4kkVgD8X+l0+qrJZLIBuGQymd4C8GUAx9Lp9N+bTKb/G8D/\nAPDfTSbTVgBPA9gCoBzAMZPJ1JDWK+2DgyQqLlDmWmzBvXHjhsxGZG99fn4+8vPzEQqFZKJ3bW0t\nysvL0dHRgWQyiUgkIvM6r1+/jsHBQdjtdtxzzz147bXXMDQ0JBqEmptA/YnCwkLs3LkTg4ODmJiY\nwPnz5xGNRvGzn/0MNpsNHo8HBQUF+MxnPoPr16/jwoULcDqdmJmZEZ2KeDwuWoncscj21GBaKpVC\nTk4OnnjiCezfv18iBSOgp1WcNAZAI2IorfECDfLpXNgYjm9mYJpvsll/iVEFm3/nuenA+H4knQHr\n+g7GlMUIdhrfk9iJ0+mUKpcx9eHzGdVoHgoBWJ4rGAxKBBQIBPD9738fPp8Phw8fRkFBAQoKCtDW\n1oZDhw7h8OHDGBoakn6TQCCAWCyG/fv3Y3p6Glu2bEE0GsXw8DDm5uaQl5eHhx9+GOl0Gn6/H8PD\nw6itrcXVq1dRUlIiZD5WPBobGxGLxaR13WaziXASZ9zG4/EN7NtoNIry8nKsrq5uYASn02lJv6PR\nKEZHR2G1Wv+nOBLAR3AS6XR6GsD0Bz8nTCZTN9aM/0kA937wtF8AeA/AfwfwBIAX0+n0CoARk8nU\nD6AdwLlNzr3Bc3K8HkOkRCKBqqoqzM7OSmNMJBKRQTzp9Jqadnd3t3hpisAwVx0cHJTyant7O+65\n5x6cPn16Q2hns9lgNpsFxJycnJTOv0AggMXFRczPz8Nms2Hv3r144oknMDAwgPn5eezduxdjY2Oi\nFFRfX4+RkRHU1taisrISTU1N6OrqkknTpAPn5OSIMygsLERNTQ3i8TguX76MtrY20ZvQ+b2+bzQO\nCtikUikhzTAyY6qheQc0Tn1OCrFQ50FjQnSiGhQkBmCMFrSD4muZAurdi4aqS6e6QqGJXTo05nUz\nHVxeXhZBG51y6c/Ie8Rr4z2gTsSrr76KLVu2wGazScMgcQOXy4Xc3FzMzs5idHRUjJ1l++bmZjz2\n2GMAgFAohM7OTkxPT6OgoADBYBDvvfceHn30UTidTtx11124ePGi9PhkZmbC7/cDWG8eZCTAEZP8\nTkKhkEQd3NgIUNKpxONxSdcWFxcRi8UwOzsrkSlxtVAoJOSuj3rcEiZhMpmqAbQBOAugKJ1Oz3yw\nWKZNJpP3g6eVATijXjbxwWMfOojismzDnC0QCEiF48KFC2hra0NFRYV4VOaW7OcgnZUlTeoQLiws\nYGJiAl6vF/v27UM8Hscrr7wiNzsrKwvZ2dniUJaWllBSUoKHHnoIwWBQ6uDt7e04efIkQqEQjh07\nhurqauzfvx/f/va3MTMzI+AWNRPN5rX5kI8//jgaGhpQUVGBZ599FgsLC5ienkZ2djYKCwsxMTGB\nGzduwOl0SqXmhRdewPe+9z20tbVJuKwjAlZVuBuTnmyxrEnGz8/PC+ZgNAxt6AQKqX3ICg0jLA2O\nfvDdf8gAiSdpkFO/hrs5RWf0Tq/xDr7WyPjUm8hm6RfxJ6aWxBD4Gn4XmqVK/ARYc47T09Po6ekR\n7g2fz1mhL774ovBZksmkOP+VlRU8/PDDojrG6ePPPPMMRkdH8f7770s61draKkOM5+bmRMWM9yOZ\nTCIYDKKwsFDGRRKX42fksGr+vLq6Cr/fj8rKSlRUVEhfhtPp3NBdHY/H4Xa7MTMzI9wMYzr6x46P\n7CQ+SDVewRrGkDCZTMb04ZaFKQh6cUcnyh2JRLBjxw5cuXIFq6urGB8flx3DZDLJnAK20lKhios4\nKytLwjmr1Yr9+/fjvvvug8m0xsAka5PGlJeXJ+25/ILq6upgt9vx2muv4d5770U0GkVPTw9qamqw\nsLCAvr4+5Ofny7h5RkNjY2PweDwIh8M4ffo0rly5ggMHDsDtdmN5eRl79+6V94hEImhubsbY2BjO\nnDmDmzdvisAO8OHwX++uzKU1eEnjJ5uUz6XkH4WAaXAkjGnD4s5DQJjnyMzMlDzeZrNtSElMJpMo\nJ9Gh8X1pKDpFovFrPMPoQHQKw91T08D5N7ILQ6GQODL2KujxA+yP0ZWOVCoFq9WKycmnwcuZAAAg\nAElEQVRJWK1WVFdXI5FIwOv1yjAfu92O6elplJWV4cEHH8S2bduQl5eH8fFxFBUVoaenB3v27MHH\nP/5xHDt2DFarFdu2bRNFdKqv9ff3o6WlBX6/f8Pmtry8LDL9nHLucDiEsdnZ2YlwOCwVQALgTGln\nZ2dlo2Oa09vbi/r6emkOZKuB1gG5JTv9KE8ymUwZWHMQ/5VOp3/7wcMzJpOpKJ1Oz5hMpmIA/g8e\nnwCge1HLP3jsQ8fw8LB82VarVXYd5q5er1co0F1dXYJbZGdnw+l0iuenFiEXieapz8/P49SpUzh7\n9qwYJ52NLh8CkPJpIBDAm2++iXfffRft7e1wOp348z//c4kYBgYGRIuAiy83Nxd79uxBbW0tOjs7\nsXv3bhw7dkzSn8bGRrjdblRWViKdTos6Vl1dHTweDxwOB5566ilhxwEb+yV0NEG0n4vGGJabTKYN\nXar8nIwWdC6v2YpE+nNzc+W+8v0ZrRCM1e/Fc3NXX1lZkbFzGmfQ169xEh1l6NQF2KiXoZ2hfm+T\nyYT8/Hy5Xla0WF1hCzmw3rVqtVrR0tKCr3/96+jt7ZWZGr/85S/xzDPPiHLY8PAwZmZmsHfvXtTU\n1CCRSODw4cPIycmBy+VCdXU18vPzUVdXJ3oQ5eXlct+ZRjQ3N+Pw4cMAgM7OTkmnCwoK5LOFQiFY\nrVZZTw0NDbh586ZIBfKzk4DH9cw+Ik6993g8gt1waFAgEEAgEJBN9FaOjxpJ/DuAm+l0+kfqsdcB\nfAnA/wvgiwB+qx5/3mQy/RPW0ox6AOc3O2lFRQWKi4ulD9/r9Uqr7pEjRxCNRlFVVSV1ZY7bA9Z2\nkNraWiE3sVJBbQdSps1mszTApD+oQzPiMKK83D3+9m//VoYFx+NxHDp0CFVVVaKDwJw5GAzi2LFj\nooY8Pz+PSCSC9vZ2PP/88xK6v/POO6ivr8fU1BTGxsaQnZ2Nq1evYufOnbKAW1tbYTKtTbTejGfA\nQ9f32fuiGXc0WI3qEzWnY2TYylxdGygNmBEG7yFTEp0G8P0ASH9Abm6uYAS6OxPYyAKlcwA+zLLU\nPTB0UIz0+N460gCwoQeEKYV2sJulT2azGY2NjUJjDofD2L59u1wf9T8ee+wxZGVliRDztm3bUFdX\nJ5O+LRaLNGwRHGUnKiPMX/ziFzh27JgIEVFWLhQKiQQeX0u+yLFjx0SOnzbAe0a8gexfguNerxd+\nvx+zs7OSesTjcXg8Hrjdbok+r1+/vplJbnp8lBLoXQA+C6DLZDJdwVpa8f9gzTm8bDKZ/huAUaxV\nNJBOp2+aTKaXAdwEsAzg/9isssHFkE6nJXy12+3YtWsXrFYrxsbGMDExIfJd0WgUyWRS+idIn2Vv\nhFbLNuaxGtAiy5Gv598zMjLg9/ths9lQWlqKjIwMVFRUyK7CL354eFhGrbW1tWFlZW2W5MLCAk6e\nPCm7MfNj0wdVje9+97vwer3Yv38/vF4vrl69itHRUZG1a2lp+RDXQQOBNGitacES3Qf3Xf7nTqqx\nC+IHNHZGa8bKAB8DIIxAvSvze9vM4Pg6OmKtOKWvT38fOm0BNqp38/kE+nhtmnehqyNsYjLeO2IW\nvA/8WXda8trZE5GVlYX7779fSoYEEHNzc7Fjx44NDo7XxwgslUphbGwMb721VvlfXFxER0fHBnU0\ntiOwCsbvhBsZcQqv14vCwkLh0/j9fsHwWA7ndxSNRlFQUIDMzEz09/ejqqpKnEhdXR1WVlYwOzuL\nWCz2B6x98+OjVDdOAbD8gT8f+gOv+TsAf/fHzp2dnY2xsTE4HA4AayHRzMyMSJOzNEWE3W63S45l\nMpmkFZs7CI2dXxpLp6urq3A6nXIjOXuUOyrzb0rRVVdXI5VKYdu2bTLmb3p6Gm+88QbeeustqYNz\nBIDH4xEZdn0tBGXJnAuHwygoKBCNyytXruAzn/mM5I/Gxc1dmItHpyFME3QUoKsDOirQ//TzjaVQ\nRjAaoNQ0bGDdIWgD19eisRMOLwbWjV+nQMDmnAojEEqnpCnWfA+ek9fJ6FBXgFKpdT0LPp+vp0PV\npK433ngDTz75JLZs2SJt3BbL2tQsABIhAGsalwMDA2hqakJeXh6ys7Ph8/nwzjvvIBKJoLu7W9q/\nNUbCtIFAJJ06O5PpZDlrdvv27bh27Rp6e3uxurqm2coJ4VNTU3A6nYjFYujt7RVHGQ6HJfpYWVkR\n7QmPx/PHTHPDcVuFcHfu3Llh0TF8psiH2bwuBEPR0JycHNFI1PMRtMgIOQ8EgZqbm7FlyxZcv34d\n165dQ05ODmKxmOTdxBW4INitydF7FIxZWFhASUkJbDYbgsEgurq6JJxkDs5dnuGmDnkzMzPx+OOP\n49Of/jTy8vJkwDCl0bRB8WcaPXEGDV7q8QE0BtKtdUelNio6Ip1m8H10VYMhLB2FsUeE59FgomY7\nahxBRwus0GjWpU5fWG7VkQnfQ0eIuk2e16Eb0ogHaEfDe6SjFb4Pn8dJWiR7aZapZqYyQuzo6MCr\nr76KdDqNXbt2CVOYEQx7eJaWltDQ0IBnn30Wzz33HEKh0AZ5A2JlpKTzM+zatQsulwuHDh1CNBrF\ntWvXsGfPHlgsFvziF78AsDbkKpVKwePxwGw2SycrnaHVapW0iFTun/3sZ0j/KQjh2mw2IaewwsCo\nIRgMwmKxoKysTPonbDYbcnJyhNcwNzeHubk5mM1mKZFyIekZHUNDQ7Db7aioqMD169clXKXhUcmK\nv2dmZkqIV19fj5WVFQwPD2NkZARVVVVoa2tDaWkp/uM//gM+nw+RSEQASpZuSZri4qUcf19fH0Kh\nEIaHh3H9+nW4XC7x7Ho3NUYJNGLm5rp9W+MCXMh64riOSmggNFQ+n9qhvAbd+blZP4SRx8DzG8Nw\nI6air8XYqMXwXz9POxIjxZz3idejlbv0tfEc/D4YGegNij/n5ORsuL86leL9Z0Rgt9vhdDpFaPnk\nyZOiM0I6PjkSiUQChw4dQnl5Ob7xjW/g/fffx3vvvYeZmRnZqBgtaaD1kUcegcfjEdJUVVUVBgcH\ncerUKbS1tSErKwtHjx7dADLz83i9a6yEZDKJmZkZeDwelJSU/GnN3QAgLMdkMinoMCW83G43Ghoa\nEIlEkJGRIaHXgQMHcPXqVdn5GCYybGQpzmxeG32XSCTw+9//HgUFBUgmkzJUh4rIOrzOzMzEfffd\nJ7JiY2NjuPPOO7G4uCjTm+LxOMbGxvCxj30MZrMZp06dwsWLFyXnDAaDEv0wlyYaffPmTcRiMZw8\neRIFBQUbjED3aADr2IDePU0mkwiMaFKT/qcdgY6QtOEYsQ9GOjo60KpUOjLQfAXgw0Nx+Bn4j86L\n16cb73h+RiI6XeN59HszDOe59CBdphVcBzw0NsHPxlRE8040uMuoUEdR/C7Jx8nJycGBAwcQjUbx\n9ttvy+aUTqeFZk0pw/b2dnR2dmJgYAB5eXkoLS3dIDPA71vjTpqmHQ6H4Xa7UVFRgcrKSszOzsoI\nyerqakxOTkpJPhaLyT21Wq1CxuIEsWg0eks2els1LhsbG+FwOFBRUYHc3FzYbDape1dXV8usRQrW\nsjkmEomgt7dXBGTLysqwvLyMRCIhWgC8ufn5+RJiEcRxOByyaDbbLTifdGJiAul0Gu3t7YjH45iY\nmIDP50NDQwNKS0tx6tQpPPjgg2hqasKDDz4oMyO7urokbNSzOS0WCxYWFnD9+nUR/33ggQdkkZBQ\nZsQB9C6uUX3tXLhL0tHQCQDr/R7aUei8n4d2lDo053vT2Pl67cxoeDoN0JUMXaLWUZJOe3RExMfp\n4DR3QjsQ3lftTIz3jJ+V16Y1J1i54XtqYplO1Vie12V0GrPT6UR/fz/a2to29BBpdmdmZiZGR0cR\njUZx5cqVD2EGvH8a14rH43C5XKitrYXX65WJ42azWfQzh4eH0d3dDb/fj1AoJNc5Pz8PAJJWM320\n2WwIBALw+Xz4zp+CxmV5eTncbjfy8vIQDAYBAA6HA3feeScyMjLQ2dkpH3bPnj3SmefxeDA1NYWy\nsjKh17IECkAqHtxVqqurMTs7K1EAySgks1BOjQtubm4O5eXlUrL86U9/KqPq/X4/3n//fcRiMezb\ntw+/+c1v4Ha78eijj+JrX/sahoeHUVZWBr/fL+ArDYYenBOqDxw4ALvdjoWFBVHm1l2VeqEDGzUd\ngI1isdp4NC5Cx0ED0dwQLkZdMuU/XYalw+W5dYivn2M8l8YftHPTuyevn0asHQ4fM7IEdRWLu7xO\nMflcXp8GU7V4rk6pjGQjDTCurq5u0BlhT1AqlZLy6bPPPgu32w23243f//73UkKnyExxcTFSqZQo\nWZtMJlRUVCAWi8nOrwFRRtYHDhyAzWZDOBwWjC2VSknqXVxcjO9973s4ceIErly5gs9//vPo7e1F\nXl4eOjo6MDk5ia9+9as4e/YsTp06Jd3Wt3LcVifBNCGZTOL+++9Hd3c3HA4HMjLWhudUVVXh5MmT\niMfjMnjV7/djaGgIubm5wuYjKw6AdNcR0IzFYhgfH0dzczPy8/Nx+fJlkStnKL64uCgzNRhyz8zM\nwG63Y3Z2VgClxcVFYfOdO3dO5j0uLi5KSnTq1CnMzc1tCN1NJpPoT5Ahmk6n0dvbi1gshsLCQtnd\nuHC1EfHQebWOAHR5kNUeGp5ukwbWsQJgI2HJmMMbnRNxCVaENEHLSJjSxrlZCmWMYHjQkWnHo3Uk\nCCBq7INpnPF+MSoktqCrYrweXp+eufmHUhUSxHTZ9LnnnsPdd9+NjIwM3LhxA3fffbeoVqfTacRi\nMdhsNmRlZWFoaEiaEgFIlFpcXIxkMolEIiGOiIQ2RtHT09NYWFjAHXfcIdFAbm7uhtmkeXl5aGtr\nQ3V1NYqKivD888/D4/GgpqYGN2/elIiHUeKtHLfVSezZswc3b94EsHbTbDYbiouLMTU1BZPJhPLy\ncgEVfT6fVBh0BcLr9cpCpD4mqccMG0OhEM6cOSPzF6menZOTg2g0KsZM7rvZbBYVn4KCAhlyy/Cy\nq6sL+fn5KC8vh8/ng8lkQl9fn0yBOn/+PDIyMiQ6KCgogM/nAwAhg7GkRrCNjWbARtahEVHXuzbz\nas1HMO7+bIHXO7EOw/Xr9cHdms/j7s4qEolNOnXQuAM/w2bvB2wkUGnHpdMmXemgcTFa4GLntWsu\nBACJQMng1cxS/TtZpLwOjtubn59HSUmJVJDoDJnWXrx4Ed3d3TKwmt3K7PxltMsyZm9vr1SLGDUs\nLy9LOwGnfpNRnE6vCfycOHECpaWl2Lp1q7BCddk5JydHNCTI5xgfH0c4HMb+/fvR2tqKaDSKN998\nUzhGpBx81OO2Ogmn0wmPx4OZmRn09vaKiEZZWZnw1dva2jA6Ooquri6UlJTI4Nba2lrMzMygp6cH\nTqcTZWVlaGhoQHZ2Nrq6ugBAiCXA+kJ85plncPbsWVkILpcLs7OzApq5XC4Eg0FMTEygurpamsH4\nxXHID6+BwiHV1dXw+XzYtWsX0uk0uru7kUwm0dTUhObmZnR2dsLn8+Gtt96SaCISicj5CgsLN+xu\nGk/QGAWNhuAad1Iudm2gdDK6sYm7opbO46HTFWM6AWyUywPWgVFjtUNHEDqV0CVSDZoyojOWZjf7\nPBrkS6fTIgnAg46c164rQsZITKdedGSRSASDg4Po6urCY489Bo/HI70eVHo6e/YsAoEA3G43rl27\nJgDjW2+9hXR6XV0qPz9f+mD4jxtXMBhEZmamEAnn5ubk3jBNZd/S4uIiqqqqMDU1hXg8juLiYklR\niaNwk8vJyUF9fT2++c1vwmw2Izs7Gy6XC0888YRE1hcvXsRvf/tbfNTjtk/wysnJkXKh1+uFx+OR\nMuHU1BRaW1vR2toqqtjhcBiTk5PiCIqKikTZhzoS5eXlyM/PR25urugDUsTW5XJJOsFFq9mEZLS5\n3W709/fL4rJYLFKyTSaTMvAnEomgoqICZWVlqKysxOnTp/HlL38Z4XBYmsBcLhf+8z//Ey0tLdi7\ndy8+9rGPwWazITMzE5WVldKgo0FFYH2SGI2Ki1nnyjq6MHb3MdLggmN0kJWVtWHGJF9Hg+XzNA1d\ncxW0YRkf19fDz6CjG0r3E6DTeIoGPGl4xJW4c/Iz6xKrvl8ax+A5tHPg87WT0iCrw+FAaWkp/u3f\n/g1LS0s4cOAAGhoaMDo6io6ODqyuruLkyZOYmZkRoJmOtqGhAclkUnRXtcgzMRNiYOzboLNmdygd\nF+9/MpmEz+fDiy++iNLSUpSVlWHv3r24cOEC7rjjDuzZswd2u32DM+emwNSTmMfVq1dRUFCApqam\nW7LT2+okxsbGUFNTg5ycHCwvL0vJcXJyUrgTubm50rPBATq6vr2ysiJy+jMzMygpKYHL5UJhYSGa\nm5uxuLiIYDCIiooKGS3/9NNP48c//rHsQKRoE+wjRsEdgd2P//AP/4Bz587htddew8zMDF5++WWZ\nzcARbOfOncP09DTuuusuVFdXw+/3I5lMYs+ePVheXsa3vvUtGc5Cw9WLnYYAbET79W6qnYUx/+fv\nPB8NR5di+XeGuLosyPfm31iSS6VSIvfHdIPOh4uf76Edj3YeuvKhjV1XJ/Tn53O1YzGmXNqodDTC\n9ID315gG6WhJ3+v5+XkMDw/DYrHg/fffl16af//3f8fw8LBEgLxOOvGcnByZvcE1S2fLMi0rJ/xu\n3G63KF6z9aCwsFCkFJl6sBeGk+cmJiYQCARE0Pmuu+7a4JR575iuBQIBXLx4ES6XC+FwWBrQPupx\nW53E4uIiotEoioqKsHXrVmGhOZ1OmXOxuromKZdKrYnQms1m3HXXXZiYmMBDDz2EQCAgtd/8/HzR\nkBwcHEQ8Hkc8Hpf6sN1uh8PhQFdXl0w2ysjIgMfj2RCOU5uCOSTP2dPTg2PHjonmoMvlQkVFBbxe\nL6qqqjA5OYlwOIyjR4/ipZdewte//nXU1tbC7XbjL/7iLwTQJJDGhUCjYJRjrF7QiDRqTwPhY0YF\nKRqY5kFwh+aOqolUwLqcHdl+dBCazUqj5r3S5VeduuiKg3Zcm6Uu2pEZHYn+LPynz6P/biylsrSp\nqx7GKhAjJ5NpnUjX2NiIhoYGnD17Fjk5OXj33XdFDo6NhgTXaeCJREJmfdJ5eDwewccAyEiIvLw8\n1NTUICNjbeL9zMyMcIX4HTASWllZm4O7sLAgFG6qt/n9fvzud79DMBjEwYMHkZWVJWV+dpOm02kU\nFBSgpKQEjY2NcDqdggN+1OO2OgliBpFIZEMdWodk3KGpThwMBjE1NYWSkhKcOnUK+fn5uHTpElpb\nW7G6uiqAYzqdltoxsBaChcNh/Ou//qsQqahfkUqlUFhYiFgshvLycnR3dwu4xTJXOp3G0NCQiHvE\nYjEEg0F0d3djenoapaWlyM7OlmnR09PTws8ggy4/P192CbvdjtXVVWn0Gh8fx6c+9Sl5L2B9sRvL\nicA6wKfxCu0odPjJnZg7GndkfX6joWnHwQoDgVA6Hy04o3dVABJh8BxGsJWfRVc7+FpdguXPfJ52\nHjqK0qkOnR8btWi4HCrNqgcdMp3JysqKDOiJxWLIyMhAV1eXVAdYBWEFTfNbjKxd9ut4vV6pfGVm\nZgpOwR6NcDgsTplOk41qiURCdCeIW/AeUSj4/PnzMrne4/EgMzMTiUQC27dvR2lpKbKyspCbm4s7\n77xTNr329vZbstPb6iS6urpEsWdsbAytra3Iz89HPB6H3W6XuYlLS0uCFJeUlEiYNjU1hb6+Ptjt\ndvT19ck4NJa7uDPphhYuXIvFIpJf1dXVsogmJycF2OMXxkay119/HXNzcyKzZzKZZFjQj3/8Y1RX\nVwNYM8rS0lIpaU5OTqKiogKZmWsq3lQ54pDjkZERBAIB6frTJCS9e9KBMoow7ojM32nompHJBU7A\nE/gwZZu7OGeD8Lz8x+fSUenz6JmpfJ4GM43EJJ0a8LMacRntNPg5jI/xOmi8xuuhI7VYLCK+A0DA\nTH6PqVQKFy9exK9+9SsRhvH7/Thy5AgsFouko/n5+fD7/ULPZqTFmStM48iLaWxslNbtnJwcVFRU\noLm5GYODg7J24/G4cCeIUegSNI+5uTn5bCR0pdNpjI+PIzc3F8ePH8fS0hIOHjwoIs18fnZ2tpx3\ns/Lz/99xW52E3+9HS0uLzDksKytDVlYWIpEIenp6PrQjsrwzOjqK3NxcUSNiTwR3eS4MOgQiy0Se\n+YUDa8QrTnCenZ2V9yROQmyCxCtNkdb4yPz8PC5fvizkGk5Mqq6uxtGjR/HFL35RctVoNIpgMCh9\nK8PDwygqKpIUSA9P+f+oe7PYOO8zzfepIotLFVkri8VdpLiJWmPF8iY73pJJ3EnbGaenJ0mjkZ6e\niwHOXJxZLubMxVycAQZzDjDAuRicMxczyKDR3ehOI+glmSS2E1mWLceStUuUSIqkuJNFslhVLC7F\npZZzQf9e/ctJn5YGfSCkAMG2zCp+9X3v/12e93mfN5VKaW5uTqdOnTJnsbW1ZcNeHKb19fVfkUsn\nCv/4xz/W2tqavvrVr6qtrU1ra2vy+/1aX1+3UuvzxCxSa3grv67DwMstBVwMwHUAn6eBk/XwmbQ0\n3c9ysxr3vS4W4fV6LbK7zozy0f07SiwX7OR6h4eH9Zd/+ZeKRCK6fv26kaDI/HgPe1d+XUZEufjG\nG2/o0qVLmp+f197enr73ve9paWlJn3zyif7Nv/k3tuv2zp07hmdApHKv0+XM4Fi5FwRBMqCJiQkL\njh9++KEGBwf1xS9+sWJUwb3Xj/N6PFbF3/OrUCgomUyqvr5e7e3tGh0dVW1trQYGBipIQIVCwQRJ\nUQ1mPVogEFC5fDDKTZ9ZeqjjCHWb9hCIPlL+DL/QSgJcImKGw2Hl83k1NDRUREmv12syY/TJ8/m8\n0um0/b5cLqdMJlOxffyTTz7RrVu3dPv2bXV0dOipp55SKpXS1NSU/vN//s/6T//pP5nmYrlcViqV\n0pUrVzQ/P6+f/exn2t7etvX2XMf8/Lx++MMfWrrrHoJisahkMmkjy/Pz85qdnbW/W11dVTqdrmij\nVlVV6fLly3r33XeVSqUMVFtaWqrAA0jrMehMJqONjY0K/sbCwoL18/k5Ij8HgMPAfXLbtrzcn+MZ\nUULV19cbH8LNfFx6uet4XDwjn89rdXVVw8PDunz5smEOiO3AyeFZ19TUaGhoyD47kUjon//zf66n\nn35a6+vrNtn58ssvq6GhQdPT0/qv//W/amFhQdlsVv/qX/0r/Zf/8l/MXiBfuZ0dSVYG19TU6Dvf\n+Y6+9a1vKR6Pq6GhwagA2Cj3ldLLzUYBPAH/+fM4ryeaSfj9fjU2NioWi2lsbMzStomJCZPhIm11\nt143NDQoFApZC4leMroQ29vbOnTokDY2Nkzdio3NEHOY3KO2xvikh7ThYrFosyDZbNYmBMlIIPBs\nb2+bKhHqWMiZo0A9Pj6uRCKhn//852ZIu7u7unfvnlZXV20NG9uYPv74Y/u+d+/e1ezsrPx+v9rb\n2/Wzn/1M3/jGN1RXV6exsTHNzc1pampKMzMzOnnypInxnDt3TrW1tbp37568Xq9u3rypL37xi8rl\nctra2tKHH35oJJ+3337bMCGQc6ZfGxoaTOnZ6/Xq1q1bOn36tEKhkK0rWF5eNmYgcvLsmqBD4BKj\nPv30Uz3zzDOW8aXTaaXTaS0vL6uvr0/Nzc2WbdBd+nwm4gKwxWLRGI7SwwyB9uPn0+z9/X3duHFD\nP//5z41wt76+rqtXr9oEp/t8uTeHDh2yTCafz5sO5cjIiNnR4uKifvGLX9j7dnZ2jEMRDAbV1NSk\nRCKhYDCoZDJZkZ1RhiGUWy4f7Om4c+eOjh8/rmAwqNbWVo2MjGh4eFiFQsFWTaRSKRUKBb3xxhs6\ndeqU2SjOjuDx+Vb53/V6ok6iubnZ2I+xWEzz8/MaHR21jgNePRKJmOxXbW2t6UOixNPW1mbekojD\ncpR4PG6alkh5MS9BaQKTkwdEzccOSPcGo0XB+kGGaTgcyWRSVVUHG6Fv3rxpBzIYDMrv9yudTtvB\n2t7eVjwel9/vN82BcrmsixcvanZ2VqlUSm1tbVpZWdH29raNITPVevfuXRuRzufzunz5sqLRqO7d\nu6fFxUWNjIxYOpvP53Xv3j1VV1drdnZW9fX1mp2dtdVxV65cUVNTk44ePapbt27p0qVL2tnZ0Qcf\nfKDvfve7Nr14/fr1CqETyG/T09NqaGgwp3Ljxg3LQq5cuaJ8Pq9gMKjnnntOyWRSw8PDGhwcNOGU\njz76SA8ePLDnvb29rX/9r/+1JFlJ9HkiGZ2hpaUlPXjwQEtLS/rWt75lUm5u94fnt7OzY5lINpvV\nuXPn7Fk2NDTYsmlsAzCWTd+Tk5OmCEYWOzg4qK2tLd29e1ebm5tGVHL5G1x7NpvVjRs35Pf7DZCk\nVOIAf76Munv3rnw+nzo7O9XY2KgHDx4YU5NtcV/+8pdttOBLX/qSGhsbVV1dqYlB9+Q3ykkcP37c\nHnIkEtH+/r7u3r2rYDCoUCik3d1dBYNBTU1NmZ4gOwSmpqZMt4EtyS7SDoq/urqqcDhsG5G8Xq9l\nEEQxDr4LYpEiU4JIB9HJ3SgNcBSLxVQsHqgFZbNZbWxsWETL5XI6dOiQ9vf3bdksgGcmkzEwCmMJ\nBoO6fPmycTU8Ho/t4FheXtbPfvYzHTt2TPv7+5qZmdHhw4etnXrnzh1TxB4cHLRI5ff7raMzPz+v\n5eVlGzYrFAoKhUK6f/++Wltbdf/+fQ0PDyufz2tjY8OQ8/39fXN4KysrWlhYsGi9t7eniYkJ+Xw+\n46rMzs5qZWVFQ0ND1h1IpVKmNj07O6tSqaT79+9rZmZGIyMjyuVy8vv9mpqa0vb2toaHh7W8vKyX\nXnpJPT09Zjflclm3bt3S9PS0urq6lM1m9emnn5q+KRlZc3Ozrl69qmQyqZMnT7vng6EAACAASURB\nVKq/v1+XL19WMBjUiRMn1NjYaGQ+6QD/efbZZ1UsFjUyMqJAIGA4FPhJU1OT7W2VZKUhbUVsiq4K\npSqtTcopfoaWpru8iM+BBj89Pa2WlhbLfuLxuKlm1dTU6NixY3rttdcUCAQqgGu39Uy7VXpIYX/U\n1xN1EpBPAIdWV1fV1tam1tZWTU5O2g4JFupCg66trdX29rbu3LmjRCLxK7P/LvYAK44HTXcAsgkA\nJGg0KLALeqF8xQCY13swp0/6DbV6YmJCHs+BmC1yYcXiwd5HImA+n1cikTDnhTOgZkZFmTHhrq4u\nTU1NSZL10qGEs/m6v7/f5lBu376tYrFoegWUVtvb2xWOqbW1VaVSSYuLixodHVV3d7du376tjY0N\n4/5TFl27ds2AVTI0Iq4ky1hcAHFjY0O5XE4jIyO2kb2urk537tzR3bt31dDQoJ/85Cc6fPiwzp8/\nX9Gh2dnZMeXq7u5uay+n02kNDg5qfn5eH374ocbHx/XCCy9oeHhY+/v76urq0oULF3Tt2jXV1tZq\ncHBQU1NTKpVKunbtmgYHB5VKpeTz+fTJJ5/oi1/8okVf2vA3b96U1+tVLpezWR9XRwJxIVYRnjlz\nRu+9915Fuxb1tMHBQb3yyiv64z/+Y7MH7Il9HW4Z5S7OxgZfeeUVbWxs6N69e/rpT38qj+dgAOyr\nX/2q/t2/+3cqlUqWpY6Pj2twcNAyJl48K+ye5/aoryfqJOhRk/51dnbq3r17RoACXccBzM/Pq1Ao\nGF6BWChkJw6iq7DEarTPzyHQ515fXzfyiiQbgIEfQJrJAXBl6aiXObikrFVVVdYNKRQKWlxcrJhY\nZTmLz+ezf5JqTk5OqlQqaXBwsCK1hu+B/ufa2pri8bjV/YuLi+YwC4WCNjc3VSqV9KUvfUmjo6OW\nnlK2HD9+XPX19baoeHZ2VuPj40omk7/CghwdHZXf71csFjMnCWekvr7eFikjzkt2AUM2nU6bs56d\nndXS0pJCoZBOnTqlixcvKpvNam5uTn6/3xx8Q0ODFhcXFQ6HdenSJV2/fl37+/t66qmntLCwoJGR\nEe3v7+vy5csqFova2trS2tqaxsfHlc/nTXoA8VqifX9/vyYmJjQzM6M///M/N0dfLBZt+1U0GjUG\nKfeMQ+YuLs7n8/of/+N/WECpqjqQivun//SfKpVK6ZlnnlEqlVIsFjMiFZkLpdLn7RXOBmLPvb29\nJly7vLysWCymN954Q/F4XGtraya18Omnn9oiKjIKvpcLuLtn4JHP6d/Xgf+feX366adqa2vTs88+\nK4/Ho+vXryuRSCgcDmthYcH69l1dXTbc4vf7FQwGlc1mFQqFDBlm1Tq7Bj4/2OWyDhFh4ZDzcy6t\nGPwBh4CicalUUjabNedGVHC7Hmx3LpfLRq1NJpPq6ekxkgyLj+FZFAoF2ywdi8VUXV1txhUMBm26\nlfQ0GAyaoULUcoefhoeH1dfXZ/gJE4rlctn2lpJlrK2tGeXa6/XaODKAItOQZGbgGKlUSsvLy+ro\n6DCNjmg0qgcPHhhdfWJiQuvr66akNTk5KelAU2NkZETt7e2qq6tTIpFQLpdTNps1ngrZEYeoo6ND\nd+/etWXN09PTtn8VDgoIP/tiJVlGh15qLpezrMqdZ6EjsLm5aQeJ781zRBWbAMDgFbZ07Ngxvfji\ni/L7/bp7965GRkb08ssvq7W1Vdvb2zp69KjeffddXbhwwUDLz3cn3OyCBU6pVErFYlFf+cpX9Pzz\nz5sc4ujoqD788EMFAgGNjo5qdXVVoVBIb731lgVXd3aH3/c4ryfaApUOpu7oAmxsbGhwcNA0G7xe\nry2voSUEYLW3t2ft0+bmZqvpmeikr4xmhRutedBQXTmIbisOOiwlhzvLAC4BeSmTyZjXP3TokAGy\nDJbB0YBhx7+zZYzvur29baI36+vr2traMkOGou46NcbcQfHhZ5BZzc7OGjkH7ITtZZFIxFrLMETd\n7lBPT48xLLmvEMlKpZLW1tasPbi0tGQ7Thkmcksjj8ejaDSqhoYG5fN5zc7OKpPJqL6+XrlczsqX\nz4vvbG1t2TXu7e3ZJnmeSTQatXs5MDCgWCymoaEhI0KlUilrZ25ubmp9fV03b97U6OioNjc3Kwhd\nZBtkD8ViUZFIpKJDAo+F5w7gLB2MpLe2tqqqqkoXL15UoVCwtRAej0fXrl3Tj370I/37f//vde7c\nORsTgNDljq/7fD7LKJqbm9XX16f+/n7j4mAfH3/8sSKRiGpqavTHf/zHunz5slZWVhQMBpXJZPSD\nH/xAs7Oz5hQymYz29vZs5+mjvp5oJgFVdG9vT1NTU4rH4xWAmNfr1czMjGUC8XjcQBc2NjGT7/P5\nLKvAyEjxABZLpYfbvIvFogE51IYuYYh0ziUIUdttbW3ZUlgi79ramtLptCKRiI3nHj9+XHNzc0qn\n03atiUTCdok0NDQom82agWCIzc3N8vl8CofDVl6BhIfDYWuhYdCzs7OWCnOvKKOamprU2NhYsTSn\nqqpK6XRa+/v7mpubs1oZ7KZUKml9fd0AXkmqr6+3UiebzVqGxrpEnEh1dbVyuZxGR0eVSCSsrFta\nWtLm5qa6uroUDAatPGELPOAdbUP4D+l02hY0wYVIpVLWKsbhk124i5iJvrlczlqaUJ29Xq9isVgF\naerzoB9ODnami2eRzQKux+NxdX8mLXDx4kVdvnxZNTU1am9v18WLFyXJBgxRR4ceDgDpiupIB6Ul\nzxGVtuXlZS0sLFiH7sqVK0qlUsbc/PDDD7WxsaHW1lab7fhH/+gfaWVlRTdv3tTXv/51nTt37rHO\n6RN1Egxleb1edXd3a3x8XLOzszZkg6hHW1ubpYqkhBi2JGtDzc3NWe+c3ZfgDvwsNTOOBcFSOhkY\nPbsQiMDpdNrKFIAhDB0Dpw6XZHVrS0uL1ZccOIzZ5/PZekPKBlqqAwMDlhrT//Z4PGpra7P2L0bK\nkBA0cjCb7e1t2xVJtMpkMspkMkokEpJkmEGpdLCODkJWMBg0xzk/P6+uri6rn8PhsH1Pd7kMhg/H\nYXZ21kBIdw7BZV9y7yh3IAsNDw9boHBZmGRgLFZyB+ZA8YeGhgwHyWazNvRUU1NjYsstLS0Kh8Na\nXFw0ZwIgjYITBD2cJb+fLAe6c29vr/b29mzwr6rqYBt4a2trBZCOA06lUmZHtOJhB7s8EHgbu7u7\ntmV8a2tLly5d0uHDhzU6OmoiOV6v1xTnM5mMdajW1tb04YcfWkl569Ytvfvuu491Tp+ok5iamtLx\n48dtoCaTyRgnfnx8XBsbG4rFYorH41pfXzcuAuxFHnw2m9XNmzcN+QWtDofDJhjjjgSDYXx+nBfO\nBkAYSDDXJT3kz1PPEz3BFWhvFgoFi6Z7e3vq7u5WJpOxjo10EClwRIFAQKurq+ZUstmsTcZiREih\nUSMD1qFdUFVVZVqepM75fF6NjY22UCiTycjj8SiZTFo2VFNTY7MmLCgqFosWPSORiH0P2qrlctlU\nzTm4Pp9P8/PzVgfTCq6rq1MqlbLZCbImViOQmfT29toqPTAan+9g1WI4HNbU1JQxSOHNcFi9Xq+O\nHj1qOqZnzpxRTU2NRdDV1VXLMkKhkILBoOEdlJF1dXVqb29XLBazLsT29rb29g62za+vr9v9Z0qz\nv79fHs/BHk86ZS5mVFdXp0AgoGAwqNnZWYXDYVsLQQYI6E1ni/KTNvby8rKdGa/Xa3IKsCkpC2lz\nkslRyt2+fdtU43/84x9bBv2oryeuccnAUyQSUU9Pj81mhEIhm2C7fv26sSvBB1jU4y6hIRIDelE/\n1tXV2RJVACuiH9cAcQvPjhFIsrrWFfMgTaWFS8TjwXEwOzs71dnZqerqauNrTE5Oqrm5WX6/X4lE\nQul0WuPj43ZY/H6/1dGxWEwrKytWf4OjMGXo9XqN+ky0Y4M59HV3sIn9Jbw8Ho+VHRgS7VUyt3Q6\nbdO10WhUuVzOnBcOgs3biBoDXG5tbdkUJGVGTU2NQqGQSRJWV1cb45Ssi3QfHGd3d1dLS0vWTqVr\nlUgkDDvAEfl8PjU2NioajRpTlkwvGAzaZOby8rLq6uosU8XxzM/Pa2dnx/bUbm9vK5PJmJPBAVdX\nP1wNubu7q5aWFpvPQewlmUxaeXP27Fmtra3ZjFEikTCMiMAkyUqnYDCoUqlky6h3dnaMQEdniRkV\nHC/cjuXlZW1ubpomCp2vpaUltbW1PdY5faJOYnt7W3fv3lUmk9HIyIiKxaIymUzFHP78/Lxqamps\ncpJUziWKMI0HGuzqAEoyinQ4HFZbW5uBebW1tXr++ec1Pz+vkZERS/WIouAA0kMMBOQZIBFnQzrs\ntrAYNQ4EAurs7JTH49H09LT12dva2rSzs2MPHLCvo6NDqVRK2WzW0nWv90B8dXd3V3t7ewYiAkBm\nMhkzdknWJQFIJKoAPFLjcx9jsZhhAbFYzOY0CoWCORVqXcDimZkZq+MXFxft8KLqDLchl8vZdbkg\nNG1kuilzc3PGGSEjw4kREMCJKA9SqZS1unO5nIaGhnTmzBmTlzt9+rS11Dc2Nmw5bzabNdASDOns\n2bNqbm7W3bt3jaEI8xfBF5crQ9cLvIlsk3H0VCplax1XV1eVSCR0+vRpPXjwQNeuXTM8CtwLW+Vs\nzM3NmZ1xj+LxuC3AxiGUy2UDhXHIrmRAOBw2hmdHR4c5o0d9PVEnkc1mtbKyYq0u6YA7AVcfFBZj\nogPgAolu6kpNThYA0Ej/npvqCuLevHnTWmS0hygnyBwQ33WxDgyDayaqFovFirIIY2PbWKFQUEdH\nhzExWTc/NTWl6upqDQwMqLm5Wdvb26qpqbEo1tPTY2DZoUOH5Pf7tby8rFKpZIY8Pz8vv9+vTCaj\n1tZWy5rg+6dSKQNRwXrInihbmpqaTAvDJZZBESczcYVc0um0mpubLXo1NDQYV4P7R9azsrKipqYm\nRSIRi8y0F7s/W30AdZopXDIz2IxEc3dkmoh69+5dTU1Nqbu727ouKKsTLJgD2tvbUyQSsX2zw8PD\n6uzsVE1NjSYmJkyl3QUZAUo5zDjs+vp6ZTIZzc3NWZYZCARMTR1Kd7FYVEtLi8kd4BThiASDQSs9\n19fX1dDQoKamJpXLZS0uLmpra8uATbJK2uoEnZ2dHcXjcS0uLtqEcCAQUFNTkzKZjFZXVx/rnD5R\nJ9HT06NoNGqpLYd6bW3NJkM3Njas5elmDXhf0j536AZuPXx4vLHf77fUDkfkMjNxHHRCcBQAjtls\nVolEwiIszkuSRU3qTchD0WjUSEWAXqDW9+7ds7FwgFNJun37tg2SobbF5B+HoqqqynACF3+hpsWR\n0o1gUAiuRVtbmwnxhkIhxWIxzczMmMN4/vnnVSwWNT09bS1BliRTykgyZ0y3xOfzmaEy/yAdDFRR\nR7NMCaIc7EYiYD6ft1YxwYGfIULiRHhWsFdp/9JBYtaHstPVDCXobG1tmXOEnyIddDjAWKSH+h6w\ngOG7sICJLk11dbWampoqsj+6GOvr67ZAmM8DdAwEAjp16pRqamrMoQQCAetc0NKOx+P2TMDI0F9l\naQ/ALZkw5TlK3o/zeqJOIp/Pa3l5WVNTU6bvx+GF4bi0tGQRmYPj9qklWduNQyg9VGNyU2uiP61C\nCCmRSKQCnMQREengJtCSpCQBFN3d3TVWHCUJkRY8g8hTLBaNOVpXV6dMJmO0bXgSkUjEyF5kJ6Dw\ngFQQnWhd0pUJhUJmMGgyUnvjUBljR9uzqqrKpmUBSMPhsKHw1NkIDkPiqq2ttW1n7nUxC9PV1WUb\nrWBRkolsbm5qcXHRHJw7qwC1fW5uzhw0ZY3f77c5m1gspvX1dcMtEFrBkSLx5vf7VS6XbQkT/A73\n90EQm5qaMpFbnj9lCc+dLhhlEcGJcpPfI8navlNTU1YOsFOmVCrZqDizG5OTkwacejweDQwM2MQy\n4jQocTNHAtt4fX3d+DV0tNzy05UcfJzXE3USq6urKhQKamxstMNPF2BnZ8fad+5oNTVhOBzW3t6e\n0Wbr6uoMGWenASAWkX9mZkaS7NBy09bX1w1TcNNKd7ScuhDtS7ACNyph6BsbG5YhoAgOWo5Bu6Ii\nKysrFk1A2CcmJkwUFefFLghmKNCvgP9BJwCnwQvQzxU1oSwqlUoGkPb29toBuXTpkra3t80hAMgy\nDg4GwOHhoJCVEY03NzfNuaPvkc1mjWlaV1en5uZm1dXVWaeFTJFskfvLM3WZn2BPYFi8hyhKtsLh\n4KDTupUOnD+b40ulkgUNSHAuY5cuxNTUlOrr643gRVeO78xBPHz4sHw+n9ra2myc3xXKfeaZZzQ2\nNmataKZ8wRiWlpZsyhf7oTtG2UgGSVZB6cV3ZbxhcXHRHMrjvJ6okyCV5MJ5kHQ7MA5AQ6IqFF/Q\neIAiAEOyDhf5pR/f19en2dlZa1O6w18w/7iJIOwYBxkNwjJkEfAlULxijp9yBqcDL4NIAP03Go1a\nyYTn7+vrs/IARD8SiSgajerWrVsmFEzmEolETDuRayZSUceylAWn19nZKUmGE1RVVVnKTMeADCke\nj6utrU11dXWampoyuXocJs4VBwStm7+HFIYGBVOda2trhl8gjAyVnO4TPIVSqWRlhyTrfkiqcOLS\nQzq1u8qRmRpJFvW3trYsqOD4sQVAXzIgOC04M+6lO9yGA6PMSaVS+ou/+AudOnXKnBlYV6lU0tTU\nlHK5nCKRiBobG9Xe3m5nQzoocQOBgJLJpAKBQMV3qqurU0dHh4aHh01UhvsPkxi8D2cBP+hxXk98\nwIvUkcg9MjKizc1Nra2tGehD27Ozs9PAMSY6oU5Tergq19Tk0K4TiYRmZmbssNfX16uxsdHAU4Rx\nOfg4hsbGRmN24pwAnYhacDZcDw6YCpmL+h0AjgyI1iJtOyjErmpWKpVSTU2Nzp8/r9raWh0+fFi7\nu7vq6ekxvIHrw4AwbnAVGKlwAwDxMpmMIpGIzp8/b5EZh7S0tKTt7W2LcIFAQJFIRLOzs+YwibLR\naFTpdFq9vb1mnIDFZHibm5s6cuSIamtrjRfilnt0Gz7Pw4DL4PF4zJGDN2UyGdPqoFQku+Hai8Wi\nLcchCKBxwZAd2cnKyoppgJDiw8mA5k95AohJuz0YDJp9eTwemzWhDc19r6urMyFoShs6PysrKza4\nCCmurq5Ox48fVy6X08LCgvL5vKampqx8IXPjO2Bf/L7NzU1JsmD6OK8nOruB19/a2lI4HNaXv/xl\noyTz9xhIPp+3/YkDAwPq6+tTd3e3jhw5YrP2HAY3RUfaDJSaVFd62Bp1t3PxXiILjErp4fzH7u6u\n7QHFs1OTu61QVw+CyOPqI7otLEBJtA3ACyKRiAnq5PN59fT0WFeCg5hMJi1bcMeNi8WiWltbDRT1\ner0m5wef5NdJ84VCIeMtgLzz2ZQDRFAwC7gjoVDIMAE6ExjrwMCAtV1bW1ttKCyXy2ln52DxM++j\n++GOTZNputgKU5yk2e5wVKl0IAzDBC6dDkkVToTnRrYmHXB42M8pyTIiSjAAWo/nYMcm2hRgSeAS\ntbW1CofDBoCGQiH5/X5ra8PN4DxsbGyYU8HhAAQzFcvv6ejoqMgKqqqq7Nm5U8pkRnSLOHeP+nri\nPAm8/uzsrLXTJNlN8nq99uBisZgNPA0NDenWrVsaHR01bURadu5MPnP6a2trCoVCam5utt9BtsGB\nBlwjErj1LUQsDNRdmuJGDp/PZ4w88BDKHfAAl8JM5Ea0pVAoqLOz09LexcVFI3CFw2E1NzfbtULF\nJmOifUubEDCUmpyOC/Mf6+vrJt66sLCgqqoqEzThUAF8NjQ06NVXXzV5wc8PPoEBdHZ2KpVKVSyn\n3ds72BPR2dlpYFowGLTsgClRnAuEKhiEULpJ0SlDCQRknGQJOFjakJQbZHSUP+BCNTU1FggoUV0C\nHQeRNidShdjI3t6eOVXpIJrn83m7J0wNT05Oqre31xwaziMWi5mTRmcEkWRaxNJB0JydnTUHkEwm\nzdYow2GFMoHLgBddOwhYj/P6O52Ex+OplfShpJrPfv6H5XL5f/d4PBFJP5B0SNK0pN8tl8vrn73n\n30r6Q0kFSf9ruVx+79d9NpG2XC6b/gEtmrffflvz8/O6du2aDh06pGAwqNOnT0uSksmkksmkvvWt\nb+n27dv64IMPzOvX1tYqEAgYCETLMR6PK5VKmcHRy+ZhAzaiH8EhcLMSpj+p71wgMJvNWj8eJ4AR\nIt0vyaI51wetHKAKEg7ZDFOvpK4dHR2ampoyopRr+Bg5HR0GwJjF2N3dVWdnp40o5/N5NTc3G0ej\nqqpK0WhUCwsLhtIDPC4uLuqdd97R9773PWOKuspaYEAMOxWLRQNLMdCNjQ1br1hXV2fqY8zS4MRc\nXGN5edkODO3k1tZWK0PpskiV28nI4Mg8yIBoPTJ/gaPDDuiQ4RzcNju4T7FYVHd3t2ZmZowYBggL\nZwfb5vfD9AyHw+rp6dHm5qba29vV1NSkubk5eTweu5cu3kR5AC5BaeNK7FFa0AQg+4DP4g6muZjb\no77+zp8ul8u7kl4tl8tPSfqCpDc8Hs8zkv43Sb8ol8uDkt6X9G8/M9Kjkn5X0pCkNyT9P56/xXXR\ns2Z2gs4ABjY0NKShoSEdO3ZMR48etRvR0tKi6upq3b9/31o9tPhoU3IjSDlpK5KOgkdwyGmftbS0\nWDeC9JWSh4NPO45DWFdXZ3MYZAx0N4rFg01fRGx620QvOjA9PT0KhUK2aRzNAoyBmZI7d+4oFovZ\nfhCcLBOUPp9PkUjEMBgcU21trXp6elQsFvXaa6/pX/7Lf6kTJ06ov79fp0+f1re//W2dPHlSPp9P\nra2tmpiY0M7Ojrq7u00o5+TJk5JkdXhbW5u15MjWEKNtbW1VLpczrQXpYK2jJCvvONgbGxtaW1sz\nx8Z3JjtqbGxUOByWJNv5SjlB+RaPx43Kjy3we3GAsVhMLS0t5vhbWlpMawGugcfjsQlLgkIgELBy\ntVwu69ChQxaU3O4OpUkulzM7AsOKxWJGjEJW8caNG4bThMNh9ff3q7u72yaNk8mkKYQBYELdb2ho\nqChN4/G4cUUymYyVcLTiKePoSD3O65HKjXK5vP3Zv9Z+9p6ypLckvfzZ3/+RpA904DjelPTn5XK5\nIGna4/GMS3pG0uXPfy43mDSRh/kP/+E/NObbl7/8ZavzFhYWDOA5ceKERkZGVFVVZbJki4uLFpFI\n80nL6BCgXs2DBSgjakNCAagiylAakN675YPrMPg5DLyqqsrqYJB0jNSd/cjlcgaQIuPvGjCEsoWF\nBdXX1+sb3/iG3n33XSvZ6AbQMXG7LTApX3rpJb333nv65JNPNDk5qXg8rmPHjml8fNwcp8fj0crK\niuLxuAGH0kGb8OzZs/L7/TbsNDY2ZvRoF2D+5je/qcXFRc3NzWlsbMyuHXIaRsuhJAsiaOzs7Bgh\nDRyALA0ynCRjNPKsvV6vWlpajLUIGWl1ddWcvcsZcdmhPIu1tbWKMgFpwt/7vd/Tz372M6PB8115\nTr/927+t+/fva3Jy0jgsOI6WlhYdP37cCHS9vb1qa2vT2NiYVlZWlEgkjOMA9kT5hGYp3Qvu3dLS\nUoUAE3oYlEkEXf4JFwbq/+O8HslJeDwer6Rrknol/d/lcvmKx+NJlMvlZUkql8tJj8fT/NmPt0v6\nxHn7wmd/9ysvNAl2d3dNOowoxKZkjJA+cCQSsSGV3t5ezc7Oqq2tTXNzc0adJsWmN08KyZgy7UDS\nS0nWssLL4iBwCEQs9z0YE0QWWlTugFJVVZU5gObmZtM2AHDb399XNBpVf3+/7XwE9a+vr1d/f7/u\n3r1rRKqamhqdPn1a+/v7RqjJZDKanp62Vi0jy3wHUs61tTUj6vT09MjnOxCuff/9922j++rqqhGt\nWHYMwPinf/qnmp+f11NPPaVoNKqhoSFjOdLZWVhY0H//7/9da2trFcxPHDfZEUzLpaUlNTc3a3Jy\n0lB/KPSojLW2tlaAukxySrJWLJ0FqOzgKZQ24AKurobH49Hq6qoRmQCgwak+s30Fg0HrVDHdCQZB\npvD+++9XTPhCaOrq6tLRo0etO+d2ORobG7W6umqaosgA0gZmehmMpKGhQaurq2Z/sF4/O4OWlbv/\nDSuV2Q8ypsd5PWomUZL0lMfjCUr6K4/Hc0wH2UTFjz3Wb5Yq0GDqWmpF+vU4kkQioXw+rx/+8If6\nwhe+oK6uLkOjAfPOnTtnRBa4C0QTHI4rqkIp4mYG3GSXtkvUgZ+BEeGUyDbIjBj6QUcA59Dc3FzB\npaBuhedw9OhReb0H0nOpVEqBQMAOTyAQUDQa1dramoaHh5XL5XTs2DEDuVCK2tjY0KFDh0yrkszr\nn/yTf1JxcNjCXigU9Pbbb9v069ramrq7u221AYcGxL6+vl5zc3MV1wRXpVgs6g//8A81Pz+vtrY2\n3bhxQy0tLTbejXgMpRCRD5DU1QyBUxAKhUwTlExNOigj2cxOF8Xn85lSlMstISNC0wHw2ufzmfYF\nqxirq6sr8BwEa4eHh239H3R4dpzcunXL8Ara8a76emdnp4GdXq9XV65csXISjQ8c1O7uro21Qy+H\nEUrWS1YB5wa6OmPrZEAuCQxgFiD5cV6P1d0ol8s5j8fzgaSvSVomm/B4PC2SVj77sQVJnc7bOj77\nu195Ef0KhYMFI/X19eYo4vG4ksmkJicnlc1mdeHCBZtQpJaEI18qlewBk5q7/XnYgoBIZAWk84CU\nZBwuBgFCT0osyYA0tyYFJOKz3Iwmn8+bRB9DT4CmKC81NTVJkrEv29raTMK+v79f8/Pz6ujoUEdH\nh3Z3d3X06FH7/tevX7ea88yZM/r93/99ff/739dLL72k2dlZHTlyxJS7KIf29/d16NAhazOOjY2p\nr69Pg4ODWl9f1/LystW1yNGfPn1a6XRa165d097ens0gEHXL5bLOnz9vslm7+AAAIABJREFUXaJg\nMKi5uTlzQPPz83Zw7t27ZyQq6QDE5p7DkYDvAtlof3/fdpCEQiEbgmKrO8rjXq9XTz/9tGmRjI2N\nWURtbGxUMpk0hemuri6bGAUHaWpqsm4Pg2fz8/Oqr6/XwMCArQgAP6PsIGOC/ckhn56e1pEjR0xZ\niu89NDQkr9drknRuJ4igRIlMpusOloHHfb6UikQiJm2I7bqTuK4tP8rrUbobTZL2y+XyusfjqZf0\nFUn/h6QfSfoDSf+npO9J+pvP3vIjSX/q8Xj+Lx2UGX2SPv11n3348GFDpPF6pVJJExMT6urqMkQ/\nmUzazfD7/RobG7MUFIASrT9SXIbByAQoO4juRDD+222XuYQot03GkM5n96UiFeRaIIXt7e0ZJ4EO\nhDvvAHfhyJEjevrppzUwMKBisWgTm5IMjLx+/bqSyaSam5ttU3Rra6symYypPzFq3t3drVKppNde\ne03xeNwcKmAs5Qc0apfGC833xz/+sc1XkC0dOnTIdBqY4SClPXbsmO7cuWMl3Xe/+135/X7dunXL\nQGicck1Njebm5jQwMKC1tTXDhfr7+23N47Fjx1RfX6+xsTG9+OKLKpVKGhsbM6Ic4HQ0GrUSb3V1\n1RSoWlpa9Nxzz9lAEyxG8A+yPPAeysLR0VHTv3zttdf00UcfaWNjw6YrGRHwer169dVXNTk5afZL\nq9q1mY2NDdP0dMWZuU/Ly8vy+/2KRCIaGBjQ7OysPSPuFYGHrBequCS7flr5qJXBxQCnorVNqVlb\nW6v5+flHdhKP0gtplXTe4/Hc1AH4+G65XP7pZ87hKx6PZ0zS6zpwHCqXy/ck/YWke5J+Kul/KfOt\nPvciPeMQQt29evWqfvCDH2h6elqHDx+2WlI64MVvbm7q/PnzllK6cxiUMKT81dXVVlPCNIPc4z6E\nUChkD9w1KOpoHAKfCfOObAE6LpkFk6eQryinYDsyI/Dyyy/r9OnT1u92uRlQ1C9evKj5+XktLi5a\nBwT+SCqVstba7/zO7+iVV17R7u6uurq6KshfCNAgogJYBybU3d1tGhevv/66vvnNb9p1XLt2Te+/\n/76VSvv7+1pYWJDf71d9fb1Onz4tv9+vjo4OS7fJlk6fPq2Ojg6dPHlS3/nOdzQ0NKTf+q3fUrFY\ntOGrwcFBvfrqq6qvr9eJEyf0+uuvq729Xe3t7RoaGqoQjHWXNr366qumru3xHEj7tbW1aWhoyK6B\nUoj6HICVUg8nWC4fbJevqalRJpPRxYsXDcN6//331dbWZqI5tbW18vv9+spXvqKnnnrKBuKY2ATD\namho0Pr6uo2vAzwDdi4vL+vevXsql8vKZDI2nAWwCkhLxgt7FKfvEsPIkhkY29nZscE5FLiYdfl7\nBy7L5fIdSad/zd+nJX35b3nPf5T0H/+uz8Zjut2GaDRq9RcalzDmINDcu3fPIvJLL71kEungETge\nUjXaSpQi9LEpK6hr3TkAHA68CmZHJNlhwQngOMBXYrGYCZS4Enn8IetYXV3VyMiIKWqTujPSjkT9\nd7/7XZ0/f14vv/yytQJhiLa2tioWi9mCHKjXlEIwLd0OAfcRHctisWiRr1AoqK2tTR9//LFFTaI3\nTq67u1tTU1M2bNbd3a2hoSH93u/9nuEX5XJZX/ziF1UoFHT48GHdunVLpVJJR44c0f7+vgYGBtTY\n2KhnnnlG4XBYw8PDeuutt2xxD6QsgOCNjQ0bpuIelkolHTp0SC+++KJGRkZ07do1nThxQsePH7dA\nAfAK7oWd0ZEhcKCn0dHRoWQyqaamJgUCAR0+fFiZTEadnZ02KsDiHQLIzMyM/TulrtfrtTEConcq\nlbJr4jrAw7a2ttTZ2an5+XkDz2n3cv1kjNicpIrPgQ9Bxlcul7W1tWVkM+ZRYHg+6uuJMi75sjgK\n5hiY8FxaWjJSDJRS6t+dnR09ePBATU1N2t3dtTVr5XK5AhT0er02nFRdXa2ZmZmKNXzMObgdFkoK\nWmREBnfcm1TbxSBwZExOktVg1HxnmHxkSBzGXC5nJCXqUZfqjUQ7hC8OdlNTk/L5vMbHx81RYSBk\nSu3t7cbGA82HGYhBuQ5lampKkUhEL7zwgk6dOiWfz2elQVtbm82FnD171lqvMzMzpmgOzlRdXa2x\nsTG7N+70abFY1M2bN20BzcDAgG7fvm3EpOeff17SgRbo008/bSsKARN3d3c1ODioq1evmkMMBALW\n8YB4BAgqqeJ+uqI68CFqamp09uxZtbe3686dO1pZWdHJkydtTiYej9ui45WVFT148EC5XE5ra2uW\naTCsVigUtL6+rs7OTutaUebRvaH9jU2Fw2Hr5pFpfeELX9D29rbu379vZD63ZNrY2LAA42a5TAXD\nCAYYhcPzqK8n6iQkWe1LpAfUgaknPUz9aV+6U6OZTMbSR95PZOS9VVVVevHFFxUMBjU9PW3dCjog\npJV8Lo6IToYbIXAqPBh+xp3hwOBIEWGCkuYRvaUDR/T666/bfAoAKj9LGk+Uppwhm8HoBgYGbPyZ\ng0DfHRo5jMBCoaClpSUrSWpra62+JYP5+te/rmQyaTJ68Xjchs0oKV599VX77t/85jdt0Axq9dLS\nknK5nClHISE4MDBgrUQXyJ2YmLC9pGRvkLVIw4nuIyMjpn+RTCY1MDBgW74hE9XU1Biln6lbMkfs\nC34BA3Hd3d36+OOPTWz3H/yDf6D+/n4jZu3v75sOSjKZ1CeffGJMyEwmY86flj3ZHY7J6/Xa82Fl\n5erqqnE+6Fww2dzU1KQTJ07YzEYul9PY2JjhX3Bv+B6cC8BLd9aGgUgy4kd9PVEn4bYc+e9gMGi1\nMsgxg04cPHrNm5ubGh8fNzYbEZsalAi/sLCgy5cva21tzYySz4fARPrKIXMnFwHHoFzzXh6aqyvB\nIZYebgIDGAVI4/35fF6//OUvlc1m9Z3vfMfSf6IEzgQ8A4DX6/VaN4TfQb8ejKZUKlnEuHnzZgWe\n4vf71d3dbZoTUJ+lhyI5wWDQRFjgJEQiEbumnp4em7zld9EN4kBIsq1rOGVKR/r2sVhMNTU1Ghoa\nMieLYyWzBPQdHx+3yVMW7XR1dWlgYECJREJf+tKXrKx5+eWXdeTIEbOxVCqlRCKh48ePG/2+XC5r\nenpaqVRKoVBIvb291m15+eWXVf5suI0Ol/RwVQI2iqIXvBRXNAl7hPbP/UJMGOdZKpWsZdrU1KTV\n1VWtrq4alR52ZqlU0q1bt6z05RrYIIetbG1tKZ1O28Ik2q9kML9R5QYHms4F8xt4Q0kGCPJQSYuJ\ntPv7+5ZNYEwQakj3d3d39cEHHyibzVprDf4E2AEtSQ45LSfahXDriUKxWMxqfyKSe82UR0Rn17G4\neMj29rbu3bunBw8eVFBrIUJhYKSWZD8uXwOw0wVwXXWkkydPGqM0k8mYQyXDIHvCAUDdJUW9cOGC\n6T8EAgF1dXUZa5Rr4jMo8yRZKw+AGS1MMop8Pq/Tp0+b4Ay8GIA6nhUaEMeOHdPg4KCSyaS6u7u1\nsbGh4eFhJRIJPfXUU7aEqaGhwaIvg1YDAwOmM4rDogVNi/ru3bum6LW6uqr33ntPPp9PHR0dikaj\nWllZ0fLyspaWliyVdwliZHVkg9guz4fnjk6Fa28ez8HWeBSoXE2STCaj27dvS3o4ZgB3gmeEhqlb\nYlG2EmRcx/U4L8/f0nj4//3l8XjKAFsM13g8HgUCAUvNiZqNjY2WgpEaf35yj4OOeCgPz6Ubo+/4\necPGubjr32lToTcAvwDnAp0XfgKfD+AJqizJnAjlDJ9DloFi09GjR/Xmm2/awQdklR7OX8Cyc4ej\n+NmVlRVD3plNocvDyDR4gDsYJcl4BC62kslkdO7cOU1MTFivnZS1UCiot7fXkH13ZeHRo0etfIPo\nRhkJrVk6KKVOnjypUqmklZUVc25gQBwwd/cmkRlS1cjIiO7evWvBoampSRsbG1pdXTXgNBKJaHl5\nWaOjowqHw1a7E7UpuSDo7e3t6cKFC1pfXzcHxXdGxcwtG8h8OIAEGJwHpQ/AqbtXhBKQe092AZgd\ni8XU1dVlvBLawNhnPB7X2bNn9c4776hYPNgmPz09bQI4rJoEk6EUvHz5ssrl8iMNcTzRTIKIvLKy\nYlGMqMuwFIeTm8OLmp5/4lH5WReYgtLqjjZLD7UsaWsBcLFslr40jgwsA4NnGq9UKlnXQHo4jcih\nhINPS5VyB7CW1HFyclJzc3Pq7u62Nhag2u7urpLJpMLhsOlHoo0RjUZNeDUWi2lzc9PUnCgxEOlF\n6owSiUwJh+bz+bSysqLOzk5ls1m9/vrr6u3ttYlQ6t/5+Xn19/dbycXKQghPW1tbevHFF62LsLCw\noKamJhvC2t/fVywW009/+lPdv39fkvTqq69aB0WSOUUyE5c1iFOrq6vT0NCQpe2Mwnd0dJh4D+1B\n+BcwL30+n3Ve2tratLq6qq2tLdtbmsvljAMD3sM9g7VLq5XIzncj6JHNQTDDIRDV2ZgGyQ/siNIT\nDKK2traihUnrem1tzUB7eBEnT57UgwcPzMnCkUkkEnrw4MFjYxJPNJM4efKk1ei06CRZSwdgDsCp\nr6/PVtsRlRsbG22rFDUZ7S7S38bGRgMGyTY4qEi1uXgCHp2uCp8lPeTkSzIDousCqEnNSisTo6b3\nTbYyNDSkUCik1tZWbW1taWBgwEoMhElIi110HmMpfzaExFgxVF4MELquO0dCd6VcLquxsbFiRLum\npsYyMiIe9wjD52doM1OyIIXvlj60lmESLi4umnitezCi0ajGx8fl8Xisk7K/v29OxyW6kQFls1lV\nVVXp6tWrWl5eVn9/v1566SUDmqnVWQdIRO/o6NDKykoFKxT6NozYhoYGLSws6Ny5cybMg4NHxbtY\nLJpCuduSdRmQdHPo/mDXjY2NVl6TSQB4hkIhVVdXGxdmZWXFlhNRepL9IgIUDoetJe7xeIxsxqAi\nmTr3vlAo6K/+6q9+MzIJ+APV1dWGE2DQDLu4rcT5+XklEgkTdKEVVlVVpdOnT+vGjRva2dlRJBKx\nGpi0t7q6Wk899ZQ+/vhjmzCk9uNgYfAcKLy1q10oPdTmxJhxKrSueOj8DAaDQxocHLQ1ho2NjRoZ\nGdE3vvENBQIBI0BhYNT9REAyHhfYdceryWhwSgCJLkHLLWF4kTrzOThFgGK3B09gAccpl8vWhoXR\nSRnEffB6D/a9kg3ghNPptM6dO2fZCyWW3+83FXG6Whi7JNsglslktLy8rHg8btR8MCo4H5IMFNze\n3lZXV5eBeVwLz56R/0AgoNu3b2tubs7akhxCsCBAVYIBpTC2xP3hfvDvlJrc63K5bDgRAjmwdnlG\nyPMR+AA1ASQJVmS7TMNyDtgFw9891jn9nzrdf48vDoEbyVtaWuxwkW7iUPb29tTT0yOPx6OhoSHd\nuHFDqVRKr7/+uqanp1UoHGzSnpqashQNw0RshYwDgwfs83g8dsgweFbCcZ1EC4wwFotpfn5ewWDQ\nDJR6kbTUjeSbm5u6f/++IdQNDQ167rnnbMcIDx/cBeCJbAaAEfUmsBVJhrG4rVIOO5mHe9Awdgwf\nvIMDSebhDgThqABXuS5KOepuPpNMggOJEwkEAnZ/z5w5o5/85Cfq7u62naQu4MfB4mCCH1Bz48DX\n19fV0tKi5uZmsxlk5siCcJAcVJeMJKnCETz77LMVk7ssLYL4RouezIu2NG11V37PJQ2S1RJs+IMu\nByUCIkRgMg0NDaaMxX11eT7Mo/A5lPO5XE7RaFQtLS3mmB/n9cS7G9TUbvQ5ceKELl26pOrqaiOc\nsJSko6PDUjrSxnw+r/Pnz1svfHp62owVZBtCy9zcnEVhBD8aGhrk8/nU19dnwz8w42DkQYfFgGiV\nFYtFJRIJwznIGjgw7rAXB5Vso6enR7/1W79lmQ/tPxwQYBcRhSlPl/zltkA5SLwXKi7gLd+bJcdu\neUW7j/TeFQQCaGW2BOMm0sNp4DtIskPHixKQyAvm5Kp5o5NAqs2hczsmVVVVtvqRUu3b3/62stms\nrl+/rubmZg0ODkqSkc4oNymrcEAu10WSlYHsQwFLIa1n4Q/2CgGKe0V5xWd+vm3OZ5OtEpwk2XMK\nhULa39/X0tKSrYmIRCKWWZDxSQ9l+7DTnp4eTU1Nqa6uzvbHZjIZK7vW19f1zDPP/GY5CVJZIhVR\nA9VmBmsYzKqtPVAFxqhIv1h0wgALPWIUmBn8SaVS5p0xdDYsk5739fVpenraugMcDtJtJN4ZwNnd\n3a1YJkvkJXXHGUmy7snu7q7a29v1j//xP7Z2FiAc2ZOrp4EBVlVV2QFzo6PH47EuC9oRRCuyDDeL\n4D1ERXcalhqWMoHDSVcnnU6bU3W/qxv93UwCXESSqUXhwCmTIFfBCUBhjGBAJ4muQmdnp0XvlpYW\ntbe32/2h84GSNW1CeC9uliTJMkb+FItFzc3NaXZ2VufOnZMkww54vpQDZGI8XxytS/0nYGBLtbW1\ntneFYECmSfkBp8UtN1pbW7W8vGwSB5wVSh1aq3V1dert7bVMDwEcHMfdu3d19OjRxzqnT9RJuAAY\n6TkGHovFbPUaBrG1taVPP/3USCl+v18vvPCCyuWyJicnrUVFTQvrLZlMmrMhkkiqODDFYtFUnUgn\nDx06ZKUGNSfUWVqLGLxLIKIUoMsBiEk2Ado8NTWlEydOSJKVXCDeGxsbSiQS9tlkCu7EKVGf0olD\nQWnGfAlRCAdDquxmSqTdLi5DliHJAGFkAOlAubgH3wHMiCxEkmEikux70vGpq6tTd3d3xfwLDoVu\nByWBO8XK5+CU+M68KB14D9gTgcJ1hNyjiYkJffzxx1pbW6soBXBS+/v7pvfgZjk4H6/Xa6sZ+Hm6\nKW7Lm2uF2AS+haQA2qeSLDgAWLqOm39HS4NhPu6DW47s7OzoypUrWlj4tcoNf+vriWcSeHIMoqqq\nSmfOnDFm4MrKivr6+hSLxTQ+Pm6RD62DBw8eKJ1OKxwOq7e3V1NTU9rf37dVen6/3x4kC1r4XaTn\nAD7Ly8u2mHZnZ0fDw8P6whe+oFQqpfb2dqVSqYroQHZBZ4DIjSFgxKFQyIyO9y0vL+vP/uzPNDk5\naYDe0tKSEomE/H6/pqam9Pzzz5tiOCUD0YUyyx15JqXGGWJQ/F6cgNuxcYFRsitwDQ4oXSE+z0Xu\n3XkaCEaAbIj0kGGQcbnCMa7TIxjwe8g0XM4Iz4zPYPaEvwM45GCBG/C9uHbuFfbHQU+n0xUiym6J\nQn3vDvtJssPLs4ZuzvcCKKSM4z5XVVUZDuUSyMrlsmXLLuELRyxJR44c0VtvvaV0Oq3FxUXduXNH\nOzs7Blhev37dHCjXAADPJrtHfT3RFuiJEycstd3e3raH+txzz+nQoUO6ceOGJOnMmTOqqqrS+fPn\nDYkmSuGVBwcHtbS0ZJ6YMgE0nyhCGodXJwqRorKzgEPCnAZgJuIgbr1OpCHC8k+MtqmpScvLy5JU\nwZMgmgJOkpY+/fTTymazisViOn36tKWqOAQ6Km6Hxp03IWrgAGib8jlEdCLy5/kjlAkYMhqLkqyl\nzHfgd7oRF9wDESGcGPcbDQ8MnqhHVuLqO1K+SA/LUxwTQYYyzqU519TUGLDK76qurrb38blkkwDY\no6Ojmp6eNocdiUQqNFLhQ8DboZxksAqZfISP6+vr1d3dbbRql89DgOIMUMKCd7nrBWjTurwP/lRV\nVZkDTaVStm2MjNst+bDtixcvPnIL9Ik6iaefftpuNjcPgPKFF17Q2tqaJiYmjJEIdZc0cWdnx3AC\nsIJyuWyOAq4A7SZqdkAjAD8ecn19ve1/YJEw0RLSEN6Y0gQnhdd2B73IeiDmYOxulC8UCmYIpJ6n\nTp2Sx+PR6OioOjs7bXiKAwa9l0iM1iX/DakHo8apgR/AS3HLIuZi+B0uWY1uxN7ennK5nJqamiqA\nP0pF7jNOn+4D2Yjb5ZBkjEOcARgJzFuwI0A9rs3r9Wptbc3mKgD0pIezIoDA2PfW1pbW1tZMudzF\nfIjalHXl8sH+C1SqEdBBaYrITHAplQ60QeLxuJUt9fX1qqmpsUXYq6urlrVIDwFumKLuMma3PITh\nCdBMaYdTB5/gvZKsBcs9A+SllC0UCvroo49+M3gSRDsOvavpcPnyZfOYSIoRYUgNMWy3nUgLdHl5\n2QycsoCbRFShDuRGcy1EX6TbJdnBJ+13W3z8k+viUAK0ra6uVtByMWBUnnK5nOEeknT9+nW1tbUZ\ncewv//IvFQqF5PP5zBCJ8IFAQPF43JwHHQMk1UKhkGprD/ZmwCCMRqNKpVKGoGezWVVXV6u9vd3a\nZ7AySds7OjqMyIPiUjqdttFttCtbW1uN+o5DXltb08bGhg1z+f1+ZTIZtbS0KJvN2r5Tt/SkC4I+\nJZvScTKAiTgg9B84ZI2Njerr67PR+kwmY9ff3d2tzs5Ok86fnp62Thdg6c7OjmZnZ02AGKfv8XjM\nOVKmuQAtNuTzPdSuJKP8fECBLg+wSrbi9XrNYeJMXO6LO9WJXJ3bMYlGo/ZeAF2fz6dvfvOb+uij\njzQ6OvpY5/SJZhLHjh2rQHPp/zIPADuxUCiY7DiHlelNHhAzAdXV1RXqP5LsAQHWSQf1IRRnXk1N\nTbb4JZVK2cNEXt8tJ9w0lahMxkFmAq0YQVccCWSxxsZGA5ooY8h2UDk6c+aMcrmc4vG4HRaiLk4J\nZwEtt1Qq2QQgRpbL5YzLQTnGbktYj9zLlZUVm1LE6XZ3d9sBnp2dtaW5Lp6wubmptrY2OyDodgA6\nl0ola7NKD7sBVVUH0nGhUMjuSSQSMae5vb2tWCym6upqra6uWtR3ZeTr6+uttudZugxSMp6+vj6V\nSiVbSj0/P69AIKBgMKj9/X3dvn1bi4uLtj2d7IvgRGaDrQEEo1re2tqqQqGg9vZ2TU5OVjhMxrQp\nRXneMDfBxlwuDs+U8otMCsapS65ra2uzlQuUND6fT/39/Tp+/LiuXLmiiYkJeTweffzxx78ZmQRA\nGlHeFdKAcZdMJlUul20fAulxJpORJGt14Y0BeTjAeFMivvQQyXfFSzmcMzMztoxme3vbiDQ8DKTA\nOKBkQ0RyohXOgAPBztFDhw5ZabW2tlaRCTAPQNnT3d1tv4/FvEivU7dSXjGNynWm02nrGPB7JNlh\nSqVSFkkxTHZaAvy68wrHjh1TIpFQLpfTxsaGFhcXLa1Fdg3HFovFTCuBzwAfWl9fV1XVwc5KDlA+\nn6/Q3SiVSobhSAeYRzabrVAK456CQwBoUn4Gg0G1tbUZeMrBI2CwP7a5udmex+bmpsbGxpTP501X\n1eW/RKNRG+IDG2FuZnV11UDB+vp6dXZ2KhwOV3BfAB7p4PFPMhSXCAdD0nUC9fX1+v3f/3319fXp\n0qVL2t3d1bFjx7SwsKBLly4ZHkG5g23fv3/fFiBR2jzO64m3QLkJpFMQUCSpq6vLxD2pAVkALEnP\nPvusdTdoMxGtudEAl2AY9fX1Nn/hgkIuE5E6FEwD7gB1NSkhyDkpO+mw286klieCVlVVmURaOBw2\nDkd3d7cxRru7u21bOB0Xrhm8hAhJCYUBABwiJR+NRo3KXiwWrUdPZPd4PGZc2WzWIqHbvdjY2LAs\nAQIV2QbYxcbGhuLxuAG4vb29Wl9f1+rqqmZnZ81xt7S02PPHQSCOg4oSz9EFTyGGlUol061sbm42\nseHm5mZtbGwoHA4rGAyqpaVFsVjMIi94EM+FQ+dOG9+6dUte74H+CIAy5RUZbXNzs6LRqHZ3d+06\n5ubm1NDQYFlTbW2tZmZmKnbdSgddEPggOA7KG5eYhfOgC9fX12cdPAb7Ojo6FIvFVFtbq4mJCZ09\ne1atra36/ve/byRDOBSRSEQbGxuKRqPKZrM6fvy4fvKTnzzyOX3itGzKB+mhjJjX69X09LS1vTBi\n2pOlUkmZTEYffPCBvXd//2BZDSSSRCJhYKSb3rkMOa/XaxgEDohroAwhtV9eXq6YjXAX8eJMcDQM\nIAEegZXU1NQYcu4Oh+3u7qqjo0N7ewer89544w07+Dxolx7tHmCX1QneQiRNJBJKp9Nqb2+3Uo5o\nzRIeMJn6+npT6YYbQRnkTtAiw1YsHuzScO8vztnn81knZXFxUR0dHUb0qaurU0tLi133zMyMtra2\njEcCKMzAE88rHA5reXm5Yl4CR88cSG9vr4LBoCH+lEJEZ565Sx/HeRaLRTtElIr8nEvNf/vtt/Xi\niy/qnXfe0V//9V9XlDK0Luk0uISyQCBgDrK29kCtOp1OW5nBTAXXyDU3NDTomWee0QcffKBkMqnv\nf//7am9vl8/nsyx7fn5eg4ODikajppvB4h/uH7tL6NA9zuuJlxuUAAA6HKxQKKT5+XnraWN829vb\nSqfTthiFssPjOZh+42ERtYnAsVjMcAaiFZiGJAMf3TkBZiiy2ax8Pp9pS0gP+foYAYdIksn4k+6R\nJrsHCKAOLsLY2Jg2NzfV2tqq48ePK5fL6erVq4ZFoIXBfQNZZ6qvVCpVaFW2tbWpqalJfX19Rs6h\nHOG7pFIp44Cgd9HS0mIgZDQald/vV1NTk9ra2mz0nT+S7FBxyBKJhAnk+v1+9fT0GMGts7OzYis3\nm80oEWtra02xis5RuXywmAmWK/gB28g47FDp3SlblxciPZQTcAlkkixYtLa2amhoSBcuXNDGxoZt\neaMTViwWde3aNcViMVv8u7i4aEIxBBqyldraA2Hd8fFxs0f3WtwWrMtVAZ9DF+Ov//qvLcNgQG11\ndVVf/epXtby8rEAgoPb2di0uLqq/v1+ZTEYLCws6fPiwOjs7VSwWNTo6aq3sy5d/ZePm/+friToJ\nUlCiKmAQdS0Hi9SZm1Qul21aj9Ye0dtl+1FOQFmldcQDlFRhRO48A6PopIM1NTW2jIZshw4CrUd6\n9gB00WjUUsXh4WF1dXXpq1/9qhoaGvTgwQPbG+mK2X7lK19RKBSe4aPUAAAgAElEQVSS1+vV6dOn\nNTExocXFRYuQpLR812AwWMHfqK+vVzweVyKRUGfnwY4kF2ylpKuurrYyTDqYA2hvb6/ABnDQ0WjU\noroku7+g6WQt4XBYsVjMMplgMKgHDx7Y2DuLlXCgZAXI+0HFZi6kqqpKiURCpdKBKjZMVK/Xa4I3\nYDmI7oI3MXJNm9ttC5JNYCsuE7Knp8f2ZDBs5fIbFhYW5PP5dPz4ce3s7OiHP/yhcR6am5u1trZm\nkdrn85mwzr1795TJZFRTU6PDhw9bSUPpQ7lMQILXI0nPP/+8rly5YuVzuVzWW2+9pd3dXTU1NenI\nkSNqbm62sfIjR46ourpavb29OnnypK3GvHz5sm7fvm3aIY/6euKMS9JQ6Mp7e3saHBy0yB4KhXTq\n1Clls1lNTk5qcnLSPOIzzzyj69evK51O67d/+7d14cIFm31wdfyoJ3EApMhoHVKm4LBaWlq0vLxs\nWQXDSNCIJRnQCjhHikzPGln9X/7yl/beTCaj0dFRfe1rX5PX61VHR4fm5+etfuzr61NHR0cFUSaX\ny1VkXKTHRN2lpSXt7e0ZyAmQFovFbIWhyylhQMhVZ+Je0ypzDxtRlxdOGgccDAZtroZePwc/l8vZ\n9cM4ZUAKx8YBbWpqUiqVsq4PUZ7O1uLiooF9ALsAkDj/qqoqa4u6nAxswZ0poc3o6mTs7e3p+PHj\nSqVSho/QRQBczefz+tGPfqRXX33VeDnYLw6IQCJJw8PDBo5KBwpXY2NjJlaD7eEYwONooQPalssH\ne0Hm5+d17949FYtFvfLKK2pubjbwHP5HS0uLjh49anyh4eFhpdNpA+PBkR719USdRCwW087Ozq/U\n3I2Njero6FA2m9WDBw905coVeTwHIijHjx/X6dOnNTc3p2QyqZWVFdXV1dmyHkAm0juiPg6Bupx2\nU7FYNMMnRWbPxOzsrAm81tTUmFo0kRbvD9MOUDAcDmtubs6ozKSz6+vr+vTTT22oqVAoGNjX1NRk\nezFczQbKjXw+r0OHDhnnIJFIGAefbKC9vd1S/Hg8bi1Kl6EHUEdLju/P7wZohYYcDAb19NNP28El\n++jp6dHc3JwSiYQ6OjpsI1ljY6Md4P39fdsFWldXZ45LeqiSTtmCejYlmftZpN8rKyu2gg+HB3gK\n8c1dbuMCgO58h/RwVgY2KH/8fr+eeuop5XI5DQ8PG1BM12d3d1fXr1/X5uamrTDEQQAe0w4H2B4Z\nGTEBI3fYCwxDkjFmkQMoFApmr1evXlVnZ6e2t7d15swZtbS06MSJE1pfX9eFCxfk8/m0urqq7u5u\nyzTr6uqUTCaVTqd1584dlctlvfLKKwqFQlaiP+rriZcb1dXVJvwiHXjQv/mbv1FjY6M6Ozt14sQJ\nDQ0N6cSJEwYOXrhwQR0dHQqFQpqcnFQqlVImk7ED7YJGpIk4D8oLuh/Uva5mAu03sgRKFiKSOzxF\nWSPJOiLME6RSqQqaNoIgP/nJT/Txxx/r7bffttkMorL0EHPweg92ggYCASPmlMtlHTlyROl02kBS\nSp1yuayWlhZbkcjkoiTL0siiuKZCoaCGhoaKWprdGSsrK2a8RGamYxOJhKXHbNlG4IWDGIlE7P8X\ni0VTdebZMGeQz+c1MzNjsvblctlKFu6Fx+NRNBo1YLaxsVH379/Xiy++aM6YA0YElh5mD4CBLqUf\nvgjXQ6CIRqM6e/asAb/j4+O2w5NsYn5+vqI0g7TnAsroX1RXV5skAWUa3SmwFEhrXBtZK8FibGxM\nb775pt58802zU6/Xq9dff92yq3feeUeXLl3S0NCQBgcHNTIyomQyqf39fSUSCcOgotHoY53TJ+ok\n3DFjd7z6d3/3d81jEiUxvEKhoL6+Phvn7e3t1ZkzZ7S0tGT7FXnw1M6FQqFCFJZar1wumzYjGQKT\no4BrgJ8sAdra2qpwGi7PwmXDoY5E6tvY2GhZDZEHoVhJNiaMQUsP9354vV7j7bsELA4qrES/32+E\nHYA2N4LSPeCgEtnc6+fAEGmZC6EbUVNTY0pNfr9fiURCiUTCyg5+jvYm2ABS7lwnzxzA1e/3q7m5\nWVtbW5bBkJXV19crlUpZB6unp0f7+/vWzcF2uNd8ZzAHyhH3xX2VZN0NsAucaVdXl3p7e3XkyBHb\n4PVnf/ZntuAJglwgEDBMiIwXJxQOh9XW1qZ8Pm8kP9ehISYDPd9l/OJg+vv7de/ePf30pz9VbW2t\nvvWtb1m5V11dbaVYb2+v7t27p9u3b6u1tdWegdfrNQVwHNDjvJ44T4JIwSGuqqrSwMCALQDGOQA4\nVldXq7m52UghkUjEiCwTExNGauIwQqZxRW0gxBQKBaVSKUky9B+Bj/7+fpv4Y2wX5JxZB5egxd+D\nGeCgXNIMUaZQKFgaODs7q9dee63CiF22nNulcQEt2mwQzNiQjeITjojD4mIFOA8XbafMgRgWCoVM\nfRunA0chkUhofHxc+Xze6m2cLOWVJBswgkAkyURVqIsBoYm+0JT5/dCt+a4u3ToSiSgYDCocDlsw\ngNPiakYAVLqzLPw3fyc9HHjz+XyW3cbjcbW3tysYDKqjo0NjY2PWSsRRRSIRtbS0GOcFHoTH41Fv\nb69aW1t16tQpffTRR7px44bZubscCAfhjv5T4r399tv6nd/5Hf3Jn/yJ3nnnHb3wwgtqb2/X0tKS\nUeij0ajC4bA8Ho+Wlpb03/7bf7PuVE1NjW1jr62t/c0SnXHBMtL/UChksvpuKuj2t71er5FaUqmU\nenp61NbWppWVFcMc3BYTh4QI6qLarmw9I76xWEwnTpywSb7z588rEono3XfftWlVDj1RDEPj8OVy\nOTssAJeAWtTZIyMj8ng8+vrXv24RHSOrqamxFBTqLtkBmRHOLh6Pa3Z2VtFotCJFd78nBwJHS3nH\n50mVh6m2tlatra2qqqqykojsArXu/f19hcPhCnUv9+dInXEen28VS7LI1t/fr9nZWUkHzgWuCzyR\naDQqn8+no0ePamNjw5za9va2aYO6+hB8H34vGAA2xO/GUXK/cfzMBlGWQdR64403NDk5qeHhYfs8\n2pInT57U5OSkZmZm9OKLL+rq1asql8u6dOmS7t+/rzfffFM+34Fq99LSkt577z1zbjwHRJhwzGAd\nL730kv7ZP/tn+g//4T/o3Xff1R/8wR/Y9SeTSS0vL2tiYsI6PmBLhcLBxnXKr729PbP3R3093paO\nv+cXaRfMNdqHRHkeOK0rDKGhocEeqiRDz8kK3K3ZRAdSQ9JbUlK3TpZUcYi2t7c1OjqqZDKp2dlZ\n6zJsb2+bIAjZEHMbpLdEY1J0uAwQrmj5fu1rX7OuACQe2rOSLOrwXlqE0kHND3+hra3NcA/4BDhb\nN+Xm8BJta2trDZPAobidAkA2F/RLJBLGZKRHz3Nxh9ukh06TVrJ7r8kuAoGAtcP5edba8Xw9Ho+6\nurpUW1tr3Jd4PG4ODCfrOiDshutwW6C8h5/jVSgUdOfOHf385z+377u4uKg/+qM/0uXLl5XNZnXq\n1CkdOXLE+BKUXA8ePNCtW7eUy+V069Yt7e7uanx83MDKixcv6tlnn1U2m9X9+/crhGtcwR2ubW9v\nz7oVZDUDAwO6fPmyLl68KI/Ho7W1NbW3t+sXv/iFvv3tb+vLXz7Y4Q1GFYvFdOzYsQpdDsDiR309\n8TV/gC6oSaXTaTN2DMZFoqVKw6P0qKur06effmrqTBgjdT2y+/SmOdjuxuVIJCKf72C/4o0bN5RO\npw2c498B8sBSIGAxQMZh4qARzXFagFcclFOnTllmQlaFc6RsgWhDFuUyRrkv1O6uxgCHku/KdbvA\nJdeLgbqsSSISPyvJnkd9fb3S6bR9V5deTKlFa4574bZWKZtYmExrmsVHZDr7+/sKhUJqaWkxXCeb\nzVq3gUyF3+uWEzgslxrNveX7YE/83e7urqampvTLX/7Shs729vb04MED2ydy9uxZ/Yt/8S+0s7Oj\niYkJ3bp1SyMjI9rc3FQgENDrr7+uM2fO6N1339Vzzz2nVCqlF154QdlsVlevXtWHH35o9sAzpPTg\nOWA3qJP9v+29aXCc53Xn+38bjX3fFwIEAXBfJErcLInaJUt2HNuJ4tQolivjVCof5sO47r01N3Zu\npVI3ya0sX1LXX1y3PLnliTOOxh47seK4HJlSQm2kJFIQV5AECBDEvjXQjcYO9Hs/NH4Hp1uKTM7Y\nJlUXTxULYKP77fd9nvOc5X/+5zyctv7Zz35W/f39+sY3vmFKKhaLKZFI6E/+5E+Un5+veDxu9G3S\nx7TuB/u6fv36Le/TO6okfIoOAAhtD4vPx8NoQ2+RcatLS0u1e/dunTt3zpQDbrMk656NK+ddYjRu\ndXW1WbS+vj4DlGpra42izAaSZMKfSqXU1tam/v5+qz6lZDkSiWRwKDj0h/s4c+aM2tvbDc3G8kkb\n7clQHqT3UHxgFSgj0m9kNbDMYRhabwKem2dgHZhnlAjel/e8mHvfoDY3N9cqNPG0vALj2mAKrAub\nmPmkHyPVq9TBoAQI/Tzw6jMfXI/Y3qe/mVfCCIBaYn8UGSHGo48+qrm5OSUSCfX29tocP/fcc1Zd\nyrmojY2N2rJlizFJoYhv2bJFX/rSlxSJRLRnzx6j9B8/flx5eXn66U9/apwIvEwULtmQpaUl9ff3\n6x/+4R/U1dWlPXv26NFHH9WuXbs0NTWliYkJxWIxHTt2TAUFBXr33Xe1tLRkRYllZWVGqFtbW9Pg\n4KDJ7e2MO6okfCERRShYB4QHoZCU4UkwsWyk1dVVDQwM2GtYFyY+DENrFIOlI/uAd8DGysnJUWtr\nq1paWjQ3N6eWlha99957hjNQY7G4uKiHH35Y7733nlkFNg8xJsrBx77c3/Lysn76058qkUiovr5e\nn//85yVtZDWg6WKV/eFFDO4dxYDnAVDpMyp8zs+pJMNLmFs2OsoCtB9rjft76NAhw2WI+cEm2KS+\nitJvUu9Wsz4As8xNWVmZiouL1dbWlrGhpqam1NbWZuEScsIzeuq9T1V7mUJBAWjzHgzGc889p+Hh\nYU1PT2tgYECNjY3auXOnVXZiuMIwVHt7e0ZDH5ianv6NEigvL9cnP/lJrays6OWXX1ZhYWHGcZC+\nUpXsRE9PjxKJhM6ePav8/Hy1t7drfHxc169fVyqVLnsHP0JJVVVV6ejRo5qamlJdXZ0piFQq9fGq\n3fBCTMWc5zHwN4QcJcH/gyDIADjhXbDJfM8FOiSBU7CIEKuCIFBTU5ORjKampjQ+Pm6FRyUlJWpt\nbTUvBSsShqEeeughTkQy4InMBBuSzcF9I6CSjPfxG7/xGxnWGEAR4fbgo7QRc+ORYWFRHMTa3rXF\n7WZT+5CE4S2+/26ujefnvye7otdnV/AssNx4JF7J5+TkmCKSZOFLVVWVAZM0UvENYlk7z4b1IYe3\nzoQezLHHWvgMP6PRqLZt26b29nbdf//95glwr9FoNKPfKJ5IKpWykJLnzg51qOeB1u/DUtadjBBk\nLmjjHR0d6urq0sDAgPLz85VMJtXT06Pe3l5JsjT40tKSrl27pkQiYVkNeq1+rE4Vl5TRGISmGsXF\nxTahCJPnS2BlwR5wSevq6myi2aBS2sq2tLTYZLOg3pNZWVmxw1kbGhqUn5+vqakpDQ8Pa3Bw0HL8\nWK+bN29qZGTEFBwKj0NZ6HsA0MZn/Sal6/b+/fv1e7/3e0Z08m60t3peOQJ2eaFeXFy0LA7Ci5Ll\nfd7Lwbuhrb935/33+PoVz73g7ygSr3TwDLxyQokDikYiEXutvLw8owKTrFNzc7OV6S8uLloFqrRR\noEXo5LNhPnXu582HsR5glZShnH1qlDSzB5z5G4MwTpI9M+Qz31aO+gsK8/z3cF9sYtr7TU1NqbCw\nUPfff78RB2mlRw1LT0+PeW2xWEy5ubkaHBy0uZ6fn1ddXZ3J5u2MO5rdQFDCMLRFmJ+ft3jeKwdv\n+QAMPSCGNvaLxUJFoxut2fgsJCis3549e/Tss89q+/btKigo0NDQkHp7e62kenJy0th1eXl5dv7H\n7Oys9bKEYLS8vKza2lo7SQr3G8tAwdPKyoqOHTumr3zlK6qpqTFlBQOUZ/ZCCbDp8+ts7oqKCtsw\n3jL6jAKUZ/gPKBW8Gv7OPfueDig6AD4AZ77LW2dJFjqiiOGM4Ilg/VF4ELegxzc1NdlnX331VS0t\nLenq1av2HTU1NZI2kHxpAyfy6W/vhfosD7iHVyzIDs/vFa5Pn2Z7bF7++DvzREaKa0QiGwVsNFlC\n6YKDraysWBMmDOWNGzfsKEt4EclkUrFYzE7nYi0J2eLxuAYGBrS0tKTx8XFTyrczbvndQRBEJJ2R\nNBiG4WeDIKiU9N8ktUq6Iek3wzCMr7/3a5J+R9KqpK+EYfjyh10TDcvG9pbK04E92u9dVwSIBUEI\n/SKhuUH9AQzZeABEPT09evfdd60VPNqW1ngAVpw7ydkLQRCYpg6CdP3J4cOH9eSTT2p1dVXvvvuu\nzp07ZyW9lZWVisViKi0t1TPPPKNt27aZxfCpMJQh946F53fu37uOPqvC/8FdsJw8uwddPX/Av5+5\nZYOjYINgozCJ9cnGdrylRVn7bJNnk3qMiPcWFxcbUYs8P53CoOeDH8XjcdXW1mYA3Hw/ios58YQq\nj0X4n17JZTclIkvAvDCfKCmYkMwBG5Lv88B0bm6u1Xsgtz7UDoJ0te+uXbs0PT2tiooKPffccxoZ\nGdGJEye0ZcsWu49kMqmdO3dqcHBQCwsLdl2fMcNQ/SIZl1+RdFlS2fr/vyrpRBiGfxkEwe9L+pqk\nrwZBsFfSb0raI6lZ0okgCHaE3q9bHx7YwdoBRPkNk63BPTaBgK+srFjazrvKuNa4d3gexKaEKqlU\nyprTEvNSYptMJtXa2mrVfb29vdZOL5lMWrqOxSgtLVVbW5tyctJdqHJyctTT06N9+/aps7NTzz77\nrM6fP68nnnhCk5OTprhgKPqUoyQr7eZ5mQufzvQ1Cf79WCg2Ybag8nkEH+WBdfYl4t5NB5BFaXhM\nwyt1FDrrTeztwxDuh+MHwzBUc3OzwjBUT0+PbWbOPeHahYWFGhsb0/j4uJ0M7+/Rt37z9+Dfg0Jg\njqRMOjyeYPZc+mv6LBmGw2dfWFMUBn1AqB3yIZtPQwdBoOrqav32b/+25ufnjWiYk5NjhLP77rtP\n09PTdi7N1q1btbCwYKe8c7/0xoDxezvjlpREEATNkj4t6f+S9L+uv/w5SY+u//5fJP2r0orjs5Je\nDMNwVdKNIAi6JR2V9IFOF7QjwwKhGDwohoCycGtrG+dP4FnAU/BxN410pfQiZre0h8pME5m6ujp1\nd3dn1PvHYjFrLNLd3a2KigpNTU1ZL4fZ2VnF43GLqemydePGDb366qt68MEHVVVVpS9+8YsKw1DF\nxcXat2+f9u7dq+eee07RaLr/4M6dO80jwEKy2QlReFayGB5P8RvNh2S+QAhrn71xs3EEL1jZXg3X\nQZA9HwU2pS+i8yChx05QRHhrWE5K26urq9Xc3GzpQ85KoQdDbW2thVxLS0uanJxUR0eH1TLwfL5S\nNxtI5Dk8Z4WNubi4aPwdFBKbHu+Ca2GE+DuKmrklLGYOfHWyZ8ASanqshPQ7zXxycnI0OTmpN998\n02p5du/erZGREb3++utWOl9SUmI1RslkUhMTExnds7PxlJ81bhWT+CtJ/0mS9wbqwzAcW5+MUUl1\n669vkTTg3je0/toHBpWPuIN+83vQUlJGGMHvCCWL5wFCyrZx+WgVD0qMa8s1CwoKrG3/4OCgEomE\nurq6dOXKFevVODAwoOnpaesMDaIfhqEVdBUVFWl2dlZ///d/rx//+MdKJpNGclpbW9Phw4dN2V2/\nft02DsNbObIGHkgEg2AeECafYmTuvHeAUCPI2ZgPP+FWMHzmA1QeAM5nn3xogqXiu3zolEqlTIHw\njLyfFDSdu3GXCwsLtWPHDlPIVN3m5KSZtvBSeH7kA54KVprv4j2+wNDLEocyQWv2WSYwBZ6Za/nw\nxs8588Iac1YIIZwnvzH/KFzwmVQqpfHxcV24cEGpVMq8vGPHjmlsbExdXV0aHBy0Zsv33nuvXnjh\nBRUVFen+++/P8KxuF4+QbsGTCILgVySNhWH4fhAEj33EW2+7Nz/ZgVRqo1Ozd6ERbJ/pWL+nD7jZ\nnvWHpkbQFhYWND09bQi7z26gNMIwtBQROeWlpSV1dnaqurpaS0tL1ukZ4HN2dtasPWw/yrobGhp0\n8OBB+36yIAUFBbp06ZI1xiH7we+euZjt4kqyrtEIFhsda4RS8TwRYmpKtrFmkjLmOBvZ996KD2dw\njymRZkN4XII1IjQkzPNWXJJtFjgC0Wi6EY5/vjAM1dvba6k8r/xRBGNjY0Zw8gqR7/Melw+ReK+3\n7MyZxyM8hoGy8dyLbA9CUoYiYaCYUEAAmoDoXuHk5KTrMl599VXV19crLy9PL774oqLRdIfy8vJy\nq07Ozc217mvce3V1tcn9ysqK9R/xbRFuZdyKWnlI0meDIPi0pEJJpUEQfFvSaBAE9WEYjgVB0CBp\nfP39Q5Ja3Oeb11/7wIBSzYQhKCgOH08ihAgPG4JFw0VcW1uz3hKec0AvQioK8T4oFR8dHVUkElFv\nb28GeIQLnpOTLtpCoKEVY92lNMo+PDysSCTNspycnFRtba0RxegJUVhYqLffflslJSU6cuSIpX1x\nN3keStZ5jsuXL+vEiRP6whe+oLq6Ont2z59g0/I35sQLLgxHrJVXMj7exir6DeXBUnptZGeTJGVs\nIN/hi43qgUQ2aTQatbaEKICcnHQvkEuXLtlaErIAoKZSKdsMKHE8LjY3fBofhnkugpcJn9FgDvES\nvUJFcQLgeiUobXhhKA9JpsQZAOtQsiGv8RwlJSU6fvy4xsfH9ZOf/ESXLl1SU1OTVUkPDAyop6dH\nKysruueeeywdum/fPl27ds0yYMXFxWpubjaq/O00nvmZSiIMwz+Q9AeSFATBo5L+tzAMvxQEwV9K\n+veS/kLSb0v64fpHXpL0X4Mg+Culw4ztkt75sGtnAz+4Xj525H0+5kNY0dDekyBtNDMzk8HSi8fj\nGWktvsPn4yF0YQ1RWEGQLqSRZJwINPvU1JTy8/NVUVFhJ26trqY7Pnd1damvr09PPfWUFhYW9E//\n9E/mUo+OjqqhoUHz8/PWDITqvDAMNTIyon/8x3/UCy+8oEgk3eb9/fff15UrVzQzM6OmpiazXt6y\nArKx6Xz8yybwQskaeMuIoKIkWR+8CMIFH5OTUsWq8VmIZSh5nw3xDX9ItdbW1mbIyNLSkt5///2M\nDBWt3/DcZmdnlUqlrHYCzANlyf2jHPAU/Eb31pv58p4D94m8ehnmXvASPM+FMIdrI7N+fpHJnJx0\nndDKyoqlyXfs2KFoNGqdrZqamtTa2qrDhw/rjTfeMKO2upo+imLv3r2qrKw0DGx4eFjV1dWamprK\nKDK8nfE/Q6b6c0nfDYLgdyT1K53RUBiGl4Mg+K7SmZAVSf/hwzIbkiz7gIbGk/BAEsP/DtjpLUNO\nTo6mp6dtMf3C+C4/1DbQXgyQ8ODBgzp16pTa29uVSCT05JNPGhchmUzqu9/9rtGygyBQe3u7jh49\nqp/85CdqamrS9PS0bYKysjLNzc2pp6dHx44dM5zC4xOxWEzxeFyNjY26efOmBgYG9Oijj1pb+CBI\nn3r1t3/7t6qqqtKOHTvU29urVCqlzs5O7d69O0MReHzBg5E+E+Spyz4W9+64JNsYKB3ez4bie1HU\nPqb2IJ8/U5T74f5YF28oaKHH86ytrRmhDaIdgCT3xJmb+fn5Gh0dVXt7u92Hnwe+l+f22R42NPPA\nGvtUJu/DgGRn3DghjWfxoQdz5TN1NKnx7Fba3iHbKysrGhgYUH9/v27evKnFxUV95jOfUV1dnTo7\nOw34ra2tNdo4DYh+8pOfWFuCiooKO6me+7+dcVtKIgzDk5JOrv8ek/TUv/G+P5P0Zz/relgf6hII\nGVhM/q1fM2NRfaxNLO1dbdBzKixJrwVBoHg8bh4JFqejo0MdHR1WpNXS0mJHB165ciWjRkCSpqen\nlUgkdOTIEavdIC7Mzc1VS0uLVldXtXv3br322ms6ffq0BgYG7KRt0q10Vy4oKDD+xPe//3396q/+\nqu655x79zd/8jaVT6Z+xf//+D6TmqIRlHrDSXpi9xfSApHfTmU/vctOXkq7WAIO+VgTSEBsfGjXf\n6zEPj7kwcnPT55wS/6+urmpubs46ilPOLqXDVO6VU7+j0agGBwdNSbDBs7EN7iV7I3vPB8Xn+4FS\ngZqtVHmub37zm3r++eettsNzVSR9QHbBw0gHcy0/1zk5OaqsrLSmOlNTU9q6daspyocfftgKuqg1\nYi05ce073/mOamtrM07H8+nqWxl3lJYdBIFtZO9+oV3BBphYzz7zxCpfRbe2tlGnwQLj4qdSKfMy\n6DLlLWhdXZ1KSkq0tLSky5cvW0WitMFdoO3Yb/3Wb2liYkLT09MqLCxUX1+fHaHH2SDLy8v6wQ9+\noAMHDmh6etqaxZIlWFlZ0bVr1zQ1NaWOjg51dnbq4sWLSiQSOn36tPr7+21D7NmzR4cOHVI0GrUy\naYQZSwfeAU7g8+5YLp7Fr0E2/sDrxOHeffbZCxSUZy2SJeA6EIVQsjSO9ZgA4Q+pRQ8w0ogG4JWO\nZDwfVb6rq6tGgiM1CqOXNccDwgP1nlNubq6lsLHqPIfHvnx2DZzj2rVrikbT54hUV1fb/GWDw8yZ\n7zgOcxVKNUdD+NRrWVmZNWg+ffq0mpqa9NBDD9leoZE0zxaG6VT5yZMnrT0CHAk899sZd1xJUDdA\nqEH/Av7OT5+x+LAF86g2ZzMQC3MauUd+vaLBLSOmjEaj1kRk69atGX0R1tbWVF1drfLyco2MjOjo\n0aPat2+fTp8+rTfffNN6RrAgIyMj5kbPzMxknOGJkPD9e8kqyfIAACAASURBVPfu1eXLlzU6Omrd\nmisrK/XUU09py5Z0Ftmnbtm8kLy8a82c+XQxFtO7v1zTp06ZF+aOXp7MH14fHgRKnTBAkikSNoQH\n+/zzk9KMRqOqqKgwRUGBEsIfhmkCV01NjZ0oJqWVR3V1tSmvvr4+HThwICO74WUDBeTnypOnfFqZ\newYj8ClfNqQk7dy5U5FIRF1dXTp58qR27dqlmZkZJRIJjY+P23kjhw8fVm1trXlbyBypXrJPZHkA\nbVdWVsxwDQ8Pa+/evcYBiUQiFm4BsDP/nMlBwyRSyx+rs0CJeZPJpAks7EPvHkqZMTUpJ+/+ofH5\nvN8ghBvSBnGHTcSJ31hAvru2tlZnz55VY2Oj1TdwT6urqxoaGrL4DvZbLBZTXV2daXfANI56xzoi\nwB7gi8ViGhoa0sTEhCYmJnTq1CmLJ7u7u3XPPfdYRSrPjNVFWHwGgg3NPJGKRdFlYxje9Wdj+awG\neASbg1AxO7bP5mfwWnaVJtZakh2a7AG1GzduaHR01E50ZyN1dHRYmIXn4jEKPFAUG/fHT882JFvG\ncyBbXpnxd4yLd9V5ttzcXO3cuVNtbW06ffq0Tp48qYqKCrW2tqq1tVWLi4t666239MMf/lANDQ0q\nLi7W4OBgRlgDH8R3NJek9vZ2FRQU6K233tJbb72l559/Xlu3bs3wUjCy/lnm5ubU1NRkIWgymVRZ\nWZkd1Hxb+/S23v1zHiwSVh9kHHTdg2OSLF0qZR4RKG1sfh9r+1iUTUKqETcVzoF3ka9cuaLZ2Vm1\ntLRYlgQhX11dtQY1r7zyisrLyw1VvnTpktUW4P56dx8hQ/vjSeTk5Fh149DQkNbW1jQ2NqbPfOYz\nev7555VIJFRbW2teB0LLPWOtsbAed8DqMTf8zPbSUCx+s+NBIbReqaytrVmKFiWEAvMKimyR55Lg\nefDegoICS1mnUim9+eabmpmZUUVFhdbW1gzZb21tVU1NjT1TKpWyM1Lm5+eNx0LYhQzwk9Sy92a8\n/PjwEwWzsLBgYSKyxGeZK2+Qjh07pkOHDpnyWltLn5laX19vp7kNDw/rpZdeMko+IZ0/CwbPCeX/\n1FNPqa6uzjAJ5AicCEMG+SsWi6m1tdVOGtuyZYvGxsbU19f38SoVj0QiBiiura1ZZVx27t1bSS+E\nAHZManV1tS22T615liXZjWQyaVkO745GIhE7QWx2dlYXL160BrMUdRUVFamrq0uJRMIswsjIiLZs\n2ZJRJ4JLHoahtcJfW1vLoItLG94TzWxaW1sVi8Us/cVJ3wiQJHv/3NycvvOd76impkbPPfdcBkMT\ngfEsRB+3SpmVmt4jwLL5EI3PAQqWlpZmgJOevMU1vEXmNZ6Be6VfxMrKit5//33DFQhFiouLrQMU\nCp35A6sibERJwCPh3oNgo6ydzc6mIsyUNrIzkgw3kpSRdfDsTUIjP3guNnpjY6NZ/9LSUm3btk3/\n/M//bHO+uLhop7Uzx8gE85BKpbR//357dry6WCxm7NB4PK54PK7p6WkdOXJEqVRKN2/e1M6dOxWL\nxXTkyBEFQWDnytzquOPhBh4D3XxobCptgGd+Afmd131HKKw1C8T1CVkQ1CAIzMr78zX89T2Yxmak\n7RxWb8+ePaqvr9fy8rImJiY0ODiomZmZjNy5T+lhWSorKzPwD8DMRCKhz33uc/rUpz6lmpqaDPDM\ng42RSPqIusuXL+vcuXO6cuWK6uvr1dnZqY6ODkP4Ccd8SOLxAdaATeH5EAwsFtZ7ZWXFjsDzTEz/\neZ4TpU4s78FCNi28h7W1NfX29ioWi1lD3aKiIlVWVhonBQ8EucDVB5eg2I6wgmIrb1Q8TwYFhryR\ncVlbW8tQENKG5yBtHG4NhoHsrK2tZTBHPbaDYvQyD6bmyW0+pYzseW+Ye/FguyQDysvKytTX16e9\ne/dqcnLSTpYLgkDXr1+39oe3tU9v690/5+FBPlJfdLr2C+cXheHpwAgprbtIXVGKjveAUMFn4PoI\noLSROmOB9+/fr1deeUX5+fmanp62MKWhoUFvvvmm5ubm1N/fby3VL168qMnJSRMUNiUKjIWGBERs\nzuaqq6uzjE8ymcxIqfGsgGtlZWUaGxvTfffdp6qqKp08eVLbt2+3zet7erJxvKAzEHY2CFiJz3qw\nJsT7PlXIT57VYxBYU+ZU2ugo5ZVjT0+PiouL1dHRYd2VfLduj4v4UJM1zfYwqPPA4nr0n+GzaYSF\n3vig1Pw1PADs7wF5+rCN7LM2KCu8YL6H5j8oFO85e/A1W9ECbq6trammpsYwF+qiUEptbW2WTaSo\n8VbHXdEtm00AoSabFSlttGpDcD0/gFQpQCR4AIsDkBMEaUoxIA8Kgvyxj+F9vp+Yj8kNw1Bbt27V\nyy+/rHvvvVdTU1N66623tH//fjU3N1vTXGkjlPDFawCbYZjud8HpVysrK/rWt76l6upqffKTn9T2\n7dsthGKzSeljCK9cuaLBwUEVFRVpfHxcpaWlGh8fVzKZ1BtvvKHHH3/cNr4Xfg9uYjHBOng2PuNd\nea6RrcDJfgDIQqcmBYnQ8zdpY1MRMiUSCe3cudOYlFg7n8VBqWFt2Ths8Ly8PNXU1CgIAmsExFx7\nxYV3wT0QgkgbGRnmyW9UAG7//LwXxYNXAO/BMz49WM69RSIRO+aPtcXLYL6QE5+2hQODPPn0byqV\nMkPDNbZt22brHYlErFnPrY47jklkCyWLyd/BGFAmXot6LAEAjAUBc2Dhof1SWYf7jeAAljLpbMow\nDHXkyBG9/fbbGUJHcVdPT4/27t2rxcVFtbS0aHh4WOXl5ZKUcVJ5GIaGj6C8OAgXYaUb0fLyskZH\nR7Vr1y7bCJFIxASms7NTp06d0vz8vJ566ikNDg7qzJkz+vKXv6zz589rcXFRx44dM3KTzyQQ5jB/\nWHwfq6N0s3tYgotwTZiyhGaeQORP/mLNGB4PmZqaUkVFhUZGRgyoYwMvLi7qzTff1Kc+9akMkhcn\nrflj9gCAwa9Ig/N93gNgsHGRNeSKz3hehS9c8ylUPBv/Hu7Bt/7zIScGyJeNI7++xmRxcVFjY2Mm\n9z4b41P6Ho9JJpPKz8838tfs7KyBqJJMbm9n3BXZDTYqlibbLZQ+WFvAYiFUaFXvLkPQouVcXl6e\nJiYmMmJyPAnvHnpLt7qa7sKdk5NuMQ+3g5Oz9+7dq4WFBcXjcX3ve98zN4/NjtvvBZQzLH2+mueF\n9HLixAlt27ZNu3fvNgEZGxszglFDQ4MmJyc1NjZmfRQvX76srq4uNTc3mxfhMwxYEsBGj3V4t5rN\n6OeEDeXxB5Qgz+YVfTbIilvvvQkpfbI8Gx/jAA4TiUT0+uuv6/7777f38Z7p6WmNj48rkUiorq4u\noz8ofSCuX7+upqamDDyCgSx5DAq5Yn7Y2N4L4lrIFaGJvw5hDnPAvHtFzzOjoFEwpJc5mcx7Cxg1\n329Tkh2ROTMzo+LiYsOzJGXINm0Bb3fcUSXBAvjTukk3+pJZtCRCh/eARQP4BMBcWFiwFBgAGCky\n3FM0NrGyR5qxEIuLixofH7fDiImjY7GYFVmVl5crNzdXY2NjisfjKiwstBgRQcQjodQ5Go1m0MQl\nacuWLbp582aG13HPPfeYl3Hu3DmdOHFCvb29VpBWX1+v4uJiO2F8dnZWVVVVOnv2rJaWltTc3Kzm\n5mZNTk7qE5/4hGEJuNh4Xh7YRIF6gBhshPBkdnbW7j2bEIXST6VSVhqPtfQ4ElaQ+UdhopglaWBg\nQEeOHNH169ftNDLP4SgoKND58+ctc0WmgZobDljmlDY2S3aI4LNn4D3MhyR7bp+t8WEvHitz4OeO\nATjOenpmKpWg7Ie8vDwrWsNgegwH0lQ2CMqBSSMjI2pvbzflw5wSdtMK4VbHHWdccuisP9yE4+15\nj49rPYiJxkaTI8i4+N6FRoABEH2HIbQ1QrywsGDIO63IUWSRSEQ9PT2amprSgQMHNDIyorffftvo\n2b4jNNiGJAMR4WiQG49Go5qZmdHQ0JAKCgpUVVWlZDKpmpoavfbaa5Y1GRsbU25uriYnJzU8PKyR\nkRGlUilVV1drfn5e+/btU3t7uy5duqTi4mJ1d3frxo0bFkfv27fPQgLO1/QehucI+PtcXV3N4B3A\nCkQRsgZYYL9JUPgoXg7hxYtgHRcWFlRZWamSkhJbJ86X6OjoMK4EbFwMxtzcnHbv3q2VlRW99957\n6uzs1Pz8vEZHRzUzM6OysjL94R/+4b/JBcnGAjy4CUaEDKHMsfRcx/dm4L5QitKGQfNeLl6mV0ae\nFMhp4qlUmtRHpoMQzpPwfFYuPz9f9fX1NudlZWUZyisIAp05c0adnZ23tU/v+KniWG9iLSbYV8mh\nUZk8NLTXtMSAHonHqnvEF0vhAbDR0VHjLrB4eB+ULp84cSIjlTY8PKxHHnlEL774oqanp61lGD0L\nEQxJ5h2g2YkPvTLD0yCWHBgY0NDQkJWgc2AwfI1YLKaKigpFIhFVVlbq4sWLGhwc1IEDBzQ7O6vB\nwUFNTExo69ateuKJJ1RfX29AKR2REDQfZrBRcPeXlpZUWVlpCoP7ZvN4HgJryuZnU+BJoVTgb+Tk\n5Ojy5cvq7e3NAKUBCiWZB4IxgFMAiEy61HMMCgoKVFJSoocffjgDl5Ayq0MhQXFfnk8iKSNs8MCj\nd/U9LuH7gWAg6HLlvQWf5eF5/fdQ50J7Rx/OSRuG03s7KJilpSU1NTVZY2b/rKurq2pvb1dDQ4Ne\nf/31W96ndxyTIGZikiGP4IqxgNLG+Qqg5ygOvAzyy0wWhTLEmCwc3AQWlY3KxkCQWTzeH4/HVVRU\nZD0uv/3tb5vV4/RvTtimWIjFnZ+fN1YpFX5zc3P2eYRhcHDQNiQYChulra1NExMTtvi4pNDKl5eX\n1dPTIyldPlxSUqJnnnlG7e3tZrGzsxg+a8L3YpVSqY2W+5IyrDFKlvv0uXeE2afvyJRIG8oxlUrp\n3Xff1fnz5w1HIFW9urpqnaYA3qhjANPBG4ATwTOmUikdOnRIjz32mFl8FBdrzH1iwTFU2R4HIztL\n5PEMnwLlJ+49IDr3iCflwWMwJ+5FkrGDIWpxbyhT7tcrLOY0O63rDee2bdsy0t+3Mu74MX9YXklG\n8cWdI56XZMJMsY2PnRFUz46TlLH5UqmUpZvCMMzo7bBt2zbLWbORieV8rhxBlWTvq6mpsc3ulZhv\nfYYgFxUVmeCwyaLRqJ2SPjExoeLiYlVUVCgajWpgYMCUSDKZ1ODgoKLRqNrb262F+rVr1xSGoZqa\nmjQ3N2cdttjok5OT2rFjh20s5iqbaCQpoz0cc84/5hZLiEDigfCM0obbjfJAmZFKXlpa0sLCgqqq\nqtTc3KyBgQHb3Hhdy8vL1qPDV7ySMqZVG2tNleT8/LwqKyuNL8LaEdqxIdnwhAEYFuTSe5oeLyA7\nxWscp8Am9JgBmxfgnDnJDp3xIhgoALwp5t+zLaUN4J9nkjILAJF1z2NhL9zOuKNKorS01DQmAsHJ\n3jyMjx2zc+0+vkRrMnGUFfsmNv46/A4By6eppMwNAqsPoUGJdXR0WP9M7pmNgQDj4UjK8FhYwOrq\napWUlCgej2txcdH6AyC88CsQhoMHD+qJJ57Qt771LZ0+fVrHjx/X1atXNTQ0ZN4FnbL+9E//VOfO\nndP3v/99tbe3q7293c7qRDF8GAYBVoPX4UFYH6L4fpx4Jh4EJKWck5OTQRQqLCxUVVWVFhYWdOTI\nEVVUVJiCgVpMTQLYBx4h9GXCwWw+C2nmb37zm/rCF76gxx577AOArP+dEAOjIG3Ue4BLQA3nWfgp\nbZD6uA5eFkrIlxEgXyhJT1pDpuCcpFLpvq/MI0qM+h9kEg/S7wN+RxFlZ6W8h3Qr446TqTw/n41O\njQOcByYaDesnHQEmRMFSU1eAqw8ewPXZ7LjUq6urVpfv3TNJGUxJrtve3m6oOYApIRCCFYlELOzI\nzU2fjMXRfisrK5ZxGRgYyPBkuEb2KV0cHkufzZWVFV24cEFSukEuOExlZaXKysp07do1ff3rX1du\nbq6ee+45xWIxPf744xl8B4+oe8uJey1t9MX01hfSVLb77vP8bBLWhU0AGJqbmz4n5Z577rH5iUQi\n9iw9PT26ceOGlX6j1HNy0v1G8a446Wt6elqVlZXW1fzSpUs6fvy4pI3QBwuP/Pjw0ocZKDieL7vk\nPhuH8PKEPNLHgWfm+XkWmg15BjAbHu8BUhqf9xkiT5bjNZ9KRUn48Pl2ORLSHVYSsM986hPrhTVm\no3gEnvgslUplMC49/dafM0BIQvaEv/O7b0nmvRGsAgvHQmD9e3t7zUKiPHgGOAQeUE2lUorH4wqC\nNCuwpKREyWRSpaWllkGAJ8Gm9C56WVmZ+vv7NT09bUqpsLBQo6OjWl5eVmtrqyQpkUjo0KFDOnv2\nrCKRNCX8xz/+sX7t137N5gOrh5JEqeHNQIdG+fK7t1AAm1hqDwyj1KnFwZVH4WWfnYLnRQeq3Nxc\nNTc3G9jm+Rsozry8POsaNjIyYvMUjaZ7UzQ2NmZYU48xRaNRo7/7bAvhTrYCJDslbYCVHsfideYC\npQNIiwVPJpMZc++9hCBI1xRNTU3ZMZDSBofGDxQ4Ms41PGDswVr//Hz2VscdVRK+W1MqlbKOUgBc\nTCYbz8e+uJhYLlB7SsK99aI8fHV1VWVlZZb+o6aDDcHkelq2X1DqLYqLi3XkyBEdPnxYFy9e1M2b\nNzU2Nmb5fhY1EkmfN1ldXa1EImEMvIqKCq2srFgen4X1br7HRaS0kMbjcXNHi4uLtW3bNmtyynNF\no1EVFRVp+/btWl5e1vbt2/XDH/5Q4+Pj2rdvX4b149qzs7MZIBjVuNIHGYg+3Sll0uU9IYnzJbB0\nKBDejwFgPVF6KGNpo2ENXbnz8/MNfMVLmZub09DQkHmiKysrxjrcuXOn1tbWbMPhQaHA2DR4tAxS\nub72hmdn8DoygrfhCVtY9uwMi89WeFYs18PjJe3sFRGKymc4uC9fVOa5EcyzD0NuZ9xRJcFkAFDO\nzc1lWDWsqa/s5CE9q5KNjQssZfLwfWHQ2NiYysrKrEcBrhzxNZPJREajUSv9npiYMOWzfft2LS4u\nateuXXrvvffsHFFi/tzcXJWWlurw4cO699579Y1vfEOjo6OSZN4Ewue9HZ92A1eBZ0BHqCBIsz4n\nJibU399v3hMbk8Y1jzzyiCKRiJUpV1ZWGhDIAHPxeI23vlhVL+y+ohTF5zEcT3zjzBEPqFHxyHPS\nV8FT1lH2HCCNsqASl89zHMHi4qJ5Y+Xl5WprazN2IfOa/TzeZfcbDZnA++NvzI+0URTn0/D8zT8/\nShnPKZVKGZbC3+AG8Ux4rz7bwpx5j4B/2YAx12UtfDiTSqU+Xk1nsAbSBqjoc8nZcTEKxQNDHrsA\nNKTtvUeouSZ/RxlQ2utTnmxavgdrw/WLi4utJV0YhnrwwQfV0dGh+fl59fX16Y033tC9996r48eP\nq6OjQ4lEQl/4whd0/vx5dXd3G9hJmENIwSYBQ6GildQerL/Gxkbt2rVLN2/eVEtLi+bn5zUxMaGG\nhga98MILmp+f144dO8zLKi0ttbZ5KAisLhuF2Jn1wG2HIeqFDFow12JNvJvLtT1RiLXCOHjsgoYx\nnAdRW1trYVUsFlNeXp6R7FBQKDK8MsI4QsKysjJbW7wCvBlpw41ns7HBWHuu77MSGB0UDkrCv99j\nNHiGfI5rs3mRWzY73gWhKusBfgOt27M2PQjt07T89Gle1uh2xh1nXOIO0jHbu0ksAHgFQiVtNC/1\nCgMCFgAXv9Nq34NLYZgmFjU0NBhhysfVPrOSHdtRPMQ1o9H0oTINDQ2anp5We3u7nnnmGbW0tBiw\neeTIEe3bt08vvvii3nzzzQw3trGxUalU+oAZeitgLX3ue25uTqWlpYrH4+ru7tbw8LBqa2tVX1+v\njo4OjY2N6erVq+rv71dVVZWxRT1Qy7NgMVGUdPFG4EpKSmxzJJNJA5h9JSWZChSCJAPaPDDoC5kA\nHvES8RQpcGMTQFsH5PTEtEgkYgoNcBggeHV1Vfv37zeWLEqINcVjZNOCXfkN7tOd3nsAD8ETwQP2\n2IrHCHyKGKCYUA28wisslDJyR0paknkXHnz0RDAUjgf4Ca98JSn75XbGHW864x9+YWFBRUVFhlOw\nuMRhuGIg0kw01oF4nesxGSwEk0toArkF6wPwiGX0nAIAzlQqzX/nXuEe5Ofn68aNG7py5Yra2trU\n399vCqelpcWsQkNDg4FguOeJRMKU2vj4uFnceDxuig4CWV5eul8n9zw5OZkRb//gBz/QysqKTp8+\nrerqan3lK19Rbm6uzpw5owceeEDSRq8DFB2eAvUvfrDp8SCYFzYA+A/uO2vmMQivEPy6eMvHJoMD\nA0NxZmZGy8vLqqysNA+wqKhIJSUlRmGnK9PKyop5GjQMwqig1FA2bGRpI3RAKXlvlrliTbLnx7vy\nnl+CIvLZCB9a8n5wJO6RQ6SYXxSgDwNRYt4DQr55zXugKCXu7WPnSXgSFF4EeWTPfpOUYeV9atS7\nyLQn5yDipaUllZeXZ9C8SUv6WNMvhKfrcg9ofZ/7T6XSjW6HhoaUm5urLVu26PHHH1d/f79+9KMf\n2XN94hOfUF1dnR544AEdP35c77zzjqV9E4mEEcTwGnD98XaIuwGhwCjIiqyurtpZqm1tbers7NTE\nxISSyaT++I//WGEY6pFHHtHw8LAaGxtt3jz2guAxn2wYFK9fCzZRthsvbWwUPCxJGRYWzIXvx4LX\n1tZqdHTUlD4KGgUCRXlxcVHl5eUGRqZS6fMoEP6Kigp1dHSYDOH1SBsnsmEIvOdA5SWeA9djrclC\n+H4n2UVxKFCey/MvkCEG18DYMW/01OD6yWTS5py59Fwh77nwGtcnfEFJe/D5dsYdr93AgjIRlGN7\nL4BJ56fPbWejysRdsVhMxcXFxmTEGwCDABUm7ehDFxSEt0SAe8vLy8aLyM3N1alTpzQyMqKSkhK9\n/vrrdoISfQSrqqr0xhtvaP/+/erv71dTU5NGRka0tLSkqqoqSRv0dBY9JyfHcAnSqdDVfZiQTCYt\n9XXjxg3t3LnT6lDwCkZHR7W2tqYf/OAH+tGPfqTnn39ej60TjHzKDCtOyo6NBCgMJoQyYJNj9XBv\npQ3LzADY43Mg+mQRCgoKVF1drZmZGbtvmIxsFozGwsKCamtr7d78Rk2lUmpsbNT8/LxKSkpUVFRk\ncuJBSowOMuWxFd4LxuIzaf79eB4oPa7nrw8eggfos3YegPfgKF6IpIxmPjw/18xOH/v5Zv+gGPgO\n+EcfllL9qHHHyVRMRG5ursrLy5VMJo2i65FcwDIWESuDsKdSKaMzew8DwopntjFZubm5Ron2FsZb\n1zBMn8uZTCbNTVtcXNTJkyf15JNPamFhQQMDA9q1a5f6+/u1vJxuuvvAAw/o/PnzystLn+78zjvv\n2D3gutI9KAgCUy7SxpF3hEDE38yVJ6D5Cs1kMql4PK6ZmRmzLt76056frIfHc3wBl1cIzD1eB9R2\nNgkpSR/ncm3vXqOMUNTgPwzo6awn37+ysqJEImFl7mEYanR01OjwZICWlpbU0NCg0tJS64mJ5SSF\nzsZnE/tsEiEja4ACY47wYsEpPNsU/MH3cOA5AH69u4+nCk+IaxO+oqwoHfA0cR86kM3zpQystQem\nfTjkPfdbHXe8xyWodRCku/iy0T3uANgDyQllIX3wJGtAnPLyclsI8vbSRvMUro0w4OJyHW85BgYG\n7HfPDBwfH1dVVZWKi4t148YNww0WFhbU19dnaP3+/fu1tLSkiYkJhWFoFpJ2Yx6s9C4qzwPASQs+\nngGrRRcuSoIBLUtLS41Pwb099NBDFl5hgUg34lbj1UA0wm33m9czEgEdsZT+uswb4YPPhGAcWFNI\nVLApuR/wErJL0WhU09PTZiTgUTQ0NKi+vl7SxvELVLxi5TE6/vm9UkOh5ObmZrQ5ZHhZ86EoHgCK\nQ9ponuO9Ba8kUTh4tBhGFHtZWZl5tYRBPvzxILSUeRpbGIbWugAAHOP5seJJoBWJrz03ASvJYpAF\noR8EbhNWECEgnKABDMPHi/6oeDaqz2BgTRCYbdu2ZVClFxcXFY/HdfbsWROOlZX0eZV1dXXq6+uz\ng3qGhoZ07tw5bd++XfF4XMlkUlVVVZqenjZlR9GYFzIv1JIy7pHNl5OTk3E6+OrqqhKJhIqLi60Z\nzcLCgu69917t2rVL9fX1piDW1tasjBliFHPsgTG8Nv+TUMzjNJ5XgKVjw/tUNtaQMMYj9QsLCxnu\nPpgI1h6ryebm/TU1NcrLyzOeCDwA8AiPk/B9eDVYfN/UxadufbgFHuC9BjAKn2bn84RDqVRKJSUl\nGdmh/Px8y+DwOV845nlAPiRE1rz3goHgd67h58172LdLzb6jSqKhocGqE2OxWEZlIYw4HkzaqO6U\nNths0kZtgf+/r7wLw9DSllh6GnKQewfwxBrHYjED8LinaDRqBVSk5TggeMuWLbpx44ZGRkaspiOZ\nTKq8vFyjo6MaGxvLUD5Y5ZKSkgwKOW44i56Tk2PFTHNzcxYeJRIJ668hpRVrfX29rl+/bkKwbb26\ntaqqSjU1NRmkLe+RME/SBhjG3COgvowchYknlt3w1tetcHCO/17WCvq3pIzT1qempmz9yI5wXKLP\nrhCjs4E5ftFvRA+OZmdkUDbZYRHrhEGgTwjKEwUDJZ85xKvy84Yi5Bp4sKxdGIZmEKUNQzE7O6vC\nwkLNzc1lgLw8K9+LxwOZbmFhwQBtzxpFSXueyK2OO6okksmkWSRCAh+Tes2KAqAgBisHmIXriYLx\nWheBlmSpS0kGEFKNSlHQjh07ND4+bnhGd3e3pd/ANLZv367p6Wklk0nV19ebwiGORMArKyv14IMP\n6syZMxofHzfB9KkoFh1kG8uOqzs5OWkKJZlMqra26KJVhwAAF+tJREFUVmEYmtJgA+fm5qqurk4d\nHR2qr6+3wrH33nvPzqvEoqBIgiAw197jP2xq4lgElHVj49FfkeegSA4MwHuC3vPD1fb0biw3r6MM\npqamzLpKacZmTU2NGZX5+XmVl5drampKCwsLam1tzcCX+OfrKLg2skeotLa2lsE5wBslLEDZZAO2\nUMp5Xg9E+sIvH9Ksrq5aVou5nJuby8h2QZHHgAGwg2eglPleeC1gXhgD9pkHmG913FElIW0gvTyA\ntKFNPYsNgfV0a0IV3EG0sQc2cbXYxAgviDtKpaSkRGVlZWpubtaJEyfU0tKiiooKvfvuu8rPz1d+\nfr55G4WFhTp79qza29tVU1OjgYEBOxyGLMThw4fV3d2t1dVV3bx50w7xQfhxuysrKyXJFpQNiyuP\nMEoynMBbbzbZ0lL6QNn8/HydPXtWQRCorq5O/f39SiaTOnr0qO655x6LwX3GAbYnm8pzRVgjvBs+\nzxqB+0Ajx0vg2r7Zq1cYPmwA6GMeUJaTk5MmJ8TXzAH3lUqla36gYpeUlFjpPxsXWUCWcO3hgGRn\neggfgiCw78R7AkNDMfL+bBo23q/v2O6fHcXlCxwJw8B88AzY5B4M9nRyz5SVNlL6nm/h99vHKruB\nVS8sLFQikchQDJFIxMApAC4PRPpDVdG8pBSj0ahqa2s1NDRkOAYAl89uEMosLi5qZGREIyMjJlBn\nzpyxEmUsPQK8urqqmZkZXbt2TTk5ORodHdXQ0JAaGxvV3t6utbX0WZ4HDhxQTU2NVlbSXaX++q//\n2o4FjEQiKikpMcGFMYhHtbS0lBFOcB/eulA4Njc3p7KyMpWVlenJJ5+0M0mnp6cVjUbV1tamS5cu\nZXTe9tkjD7wBklFMJclYf7zO531aEksIOU3aOKjGK3OUDeBbKpVSIpGwGpXKykoji7GR2fDSRlhC\naOBdfhip8/PzRsrzgKHna7BRKW4jvs/OonFWrCdS4YVQVJaTk2OYQxBsNAf2nhkesb8HjnlAzn0N\nDViPz1Z4EBulQHNnX1XL9/pn9bjJxyrc8If34lJ5diEWCpfNNwL1hCdcTmmDbsyGI+2Jtp6dnc3o\nzg3tu6SkRFevXtWlS5cUiaT7Ri4sLKi5uVkzMzPWJYiYrry8PKMqsbi4WPfcc48OHjyoCxcuqKWl\nRefPn1dXV5dqa2vV1dWlxcVFc53hCQAsEaeCSRBK0Z0aK+9BWzpZFRYWqq6uTmVlZXr33Xe1f/9+\nPfroo1paSvenhGdBvr2goMAsl0+v4QmQIgOo5TWfUqMgjzgbgeb/KAN4HpFIxPp34g1h7QgFOXKx\ntLTUUp25ubkqKytTLBazTQbWQIXt4uKiqqur1djYmMFzyU7t4j15L5TKY0JWNpUkw62ysQhp4xQy\naeN4AR+iodQ9t8TPSSqVsvWPRDbOxPVkqZyc9DEOKAfmn3+rq6sG+HtFzxrRrRzDx719rFKgkswy\nsTD+AGGfR/dUXzYQmhu3XNqIKT0CDdaBlfYkHElWsfnYY49Z7rm9vV0LCwtqa2vT+++/r5KSErOY\nPlXV2tqqhYUFHT58WOfPn9dLL72kz3/+8yopKdHJkydt8YaHh1VZWWmuMl6CL/Lyng19M9lg3L/P\n9UNbLi0tVX5+vlnh119/Xa+99po6Ojr0xS9+0dLKWClQc2njWENJGd2oPWiLAoCtCviHJZUyTyf3\nGwuXnrXBbec78E7YIJ7ynUqlTB6oK2E+wzDU9PS0cnJyrIQfAp0PJVEWnrrsvQLkBZyE58Z78aly\njBNW2mMUkjLWCe6CZ6V6r8azNvGK/f1yTUKNpaUlC0XxNJCD7LJ31sOXDfB9t6sgpFtUEkEQ3JAU\nl5SStBKG4dEgCCol/TdJrZJuSPrNMAzj6+//mqTfkbQq6SthGL78YdclLufMQt9DwdNpPYjJ79n9\n+7yLSkELi5FIJMzDIE5FKLwQRCIRHT16NGPhY7GYhoaGzGoAzoFzrKys6LHHHtPCwoKlGVFKJSUl\nampqsmwGGYiFhYUPFIn5XDZKq7GxUZcvXzZhIZNC7whPQ+/r69Pk5KTm5+et3iGRSOjTn/60Kioq\nzPX0m9RbHU/VRpGA2PPMpKp9dsS74Fyb5/JW028KhFxSxmZAuUvpzBegbSQS0cjIiM0RWZPm5mYj\nuRUWFqqsrMwAYO6LTYvM+BQpG5fnwAOQZLiP5ynwzCgy5gcsisGc8jnWmDmampqyjevJWr4UAcMH\nDgRwzb6hf4rnoTDvXlngmXulSPh4q+NWPYmUpMfCMJx2r31V0okwDP8yCILfl/Q1SV8NgmCvpN+U\ntEdSs6QTQRDsCLlrN3hwXHiEvrS01Nw0tCq4AoKORcWdkpRxrgNVjBCNiOGIWUkxkh0AUccNj0Qi\neuWVV3Tx4kU1NDRkLFh+fr727NmjtrY2jYyMqKqqypSaxxR2796tSCSi06dP25kaLS0tOnv2rHlC\nAKqSVFlZqVgspurqalVVVWlqasrumV4KnpeAUFCL8vzzz+vChQu6cOGC4vG4nnrqKbW0tJi76j0h\nlCNAmE8fc0+44NKG6018Tm/QbMTdN/+Bc+F5DZ5w5UNGLDjWnKxTGIaamZkxBcQGSSaT1q6O4wZy\ncnJswyIXGBNPwMtuwuJlTZLxT5g3Qlq8WDw87/0RKqPI6DKOLBBeZhMBJZm3hkfhe3Rg+T3Q7ENs\nn/5H+RGOes5HNv/kdsatKolAUjZN63OSHl3//b9I+lelFcdnJb0YhuGqpBtBEHRLOirp7Q+7MDfN\nYrEwkjKsHpp8YWHBBNPHs8PDwzp16pSBYXSfIv7GcnulgjfAhkUAcQvvvfdeRaNR7d+/X5cvX7ZN\nRvrx5s2b2rVrVwaJhkUuLCxUQ0ODrl27puXlZW3ZskU5OTl2HDwgaRAE2rt3r3p7e/XII4+ou7tb\nFy9eNK+BWBRGYRAE6u/vN+WJh5RKpfT9739f0WhUx44dU15enn7lV37FXE54HdQz4GaD/vs8vEfj\nUSqkPQlVsKA0p8UzWF5ethCD1/D0WE/PJZBkoYTvq0HWCcXv2/HzHfn5+SopKcnoZ8qxfigGNlR3\nd7dlPlBKHqRkw6Hs8Zz8fWaHMPAQwIv4Pi+3c3NzGWExvA4GPBk8TUnWSt/LlU/X4vXgfXF98DoP\nYpJd8lmy2x23qiRCST8NgmBN0v8ThuF/llQfhuGYJIVhOBoEQd36e7dIOuU+O7T+2ocOAEbPIiPv\nTP4aa4Tlp4jLhyWDg4OWOfBxsqQMF5jMhkfy+V4mHyGqqKjQoUOHFIvF7B6xdtevX9fevXuthyOC\n4y1SU1OTAYrDw8PKy8vT8PCwCgsLLUtCNeN9992nXbt2qaWlxRidnNeQn5+v1tZW7dq1SzMzMxob\nG7NY1IcxFLRdvHhRv/7rv66SkhITXDAbLLonInmS2urqqnkNfAfgpXeRFxcXdfHiRV26dElPP/20\nKWNOJkMJra2t6eTJk0omk9qxY4d2796t/Px8wxNQqIR8hHIoeMh2XtBLSkpUW1trGwB8BHIRXilW\ndnZ2VpcvX9b4+LieeeYZ1dWlRXVxcTGDU0M9CKEWitjLBPfgsSEURLYywdMgLGA++AxcBjwXsjLc\nO8aO7/N8D59FAb8hxE4kEkY84zN4hb5o8VbHrSqJh8IwHAmCoFbSy0EQXFVacfhxe326teFyYmVA\n8kG9farKx4KemJNKpfT222/rypUrZk2gKksbriUTxmLgsoOHeDAQ5cK1Z2dnzTriSRw7dkyHDh2y\nMAbvAxcb4Gt0dFS7d+9WPB63Y+mwbIBh3d3dktJnX77zzjtG8SZlW1xcrK1btyoej6ujo0O7du3S\n1atXdeXKFWN1fulLX9KhQ4csjq+srDRlS7hGrA7IRWjmBd67uawPm5e5SiQS+pd/+RcNDQ3p6tWr\nmpubU0tLi1ZXV3X8+HFVVlYqlUrp3Llzqqmp0dWrV23NU6mUampqDEuJRtPHHDJvUMVRZtxDVVVV\nRg1NTk6Oamtr7fyN6upqpVLpxj1cA6B1YWFB3d3dikbTrQirqqoyGi/jOeCuU46OzOHR0esBr8Az\nTLNBZRQxHhHzToiHfPh6FrwEPInl5WXNzc0pHo9bdTRG1GdB8GC5NnIKMO49Iv+5Wx239O4wDEfW\nf04EQfAPSocPY0EQ1IdhOBYEQYOk8fW3D0lqcR9vXn/tA2N0dNQWHdpwcXGxgT1ob4QFjjpxYEFB\ngZLJpM6cOaOqqipj+qGJvQbFc8CtxYJJG6Cbpw0D8OzevVsnTpzIAIhyc3PV1NT0AdIXwBv/YL+9\n8847ysvLs5OqoOnm5aUPH5akwcFB1dfXa+fOnZqenlYikdCRI0cycJj3339f165dU0tLi7Zs2aLf\n/d3fNZ5GEAR65ZVXdO+996qxsdEUpveUCNG4V8+oREDX1tbs3piTkpISc3lR4KWlpRoeHlYikbDD\nkouKivTaa6/p8ccfVySSbo3f1dWlVCql4eFhTU1NKRaLqaOjQw8++OAHCFkNDQ32HeAKRUVFKi4u\n1ujoqFUJwy8hBVhUVKTx8XEVFRUZngXmMDk5acoYpdbQ0GCcCOJ2Qk5pg7jFM3uimc8YSMqwysik\nZwDzbJ6rgMIgVPGpZWSH60H95wwOvASPqyDvXAOj6+/rypUrunTpkv3/dsbPVBJBEBRJioRhmAyC\noFjSJyX9n5JekvTvJf2FpN+W9MP1j7wk6b8GQfBXSocZ2yW982HXzsvLU3l5ubmQXgHAG0Dz+diL\nyYVAMzs7q7m5OZWXl2toaEMfscg+RkQ4UBx4DUww8TKLVVNTo/vvv19vv/220a1RKhTU+LQc94db\nvnv3bp09e9ZwhJmZGe3evdtAyZmZGbW1tamxsVEvvPCC4vG4vv71r2tkZERTU1NqbW21npXhOncg\nmUzq6tWr6uvrU1lZmcbHx5VIJDQ+Pq7du3frq1/9qrq7u3X48GGtrq5mdNHyDFSUCKGWt1QwOr0X\nxzNNT0/rvvvu05UrVzQ1NWXHHoyNjWlxcVFtbW1W8zA3N6dEIqHl5WXLPj388MMGWubn5+v8+fM6\ncOCAhQ/gR4SUhBKkaFOplHkRhIUVFRXGvcD7mZ2d1aVLl1RdXa3HHntMJ0+eVHV1tf71X/9V5eXl\n2r9/v21WLK3PtrDp2YSkNDs7O3XkyBGbN4wR98tP72X6FG8ikciQITw3QoHsox7Y8PBzPJBPSIUc\nSxv8I7zw8+fP67777tP+/fst9Pze9773s7a+jVupGa2X9EYQBJ2STkv6x/WU5l9Ieno99HhS0p+v\nb7bLkr4r6bKkH0v6Dx+W2ZCUAerQ9HRlJd0SfX5+XkNDQ+rq6tLMzIyReTyl+fr16zp79qxWV1ft\nBCyAOGis2RMIyQRFMzU1ZZvMu2Ug1VhN8AHPkfB1JAg1LjveAhuspqZGZWVl2rFjh5566ikdOXLE\nNmFpaalZSUl6+umntWfPHt28eVP9/f1aXFzU+Pi4tm7dqvb2dsMI+vr6dO7cOfX391sMGovF1N/f\nr7feestcaIQfr4fnZF5QCPydOYLJirAuLCxocnJSr7/+ui5cuKBUKqVt62398eRmZmZ08uRJ9fb2\navv27crJyVE8HreTz5977jklEgkjgq2trens2bMaHx+3DQ5YOTAwoJMnT2plZcWKoMga0EW7rKzM\nMA1wFlKFiURCBw8eVG9vr06dOqU9e/bo6tWrOn36tK3n1NSUWXzWCxwAHMcDsGEY6sKFCxmgqCQ7\nLwS58IAmWQ0//3gQrAthrseCeG1iYsLkl+/Mppr7DM74+HgGK5OjICVlALG3On6mJxGGYZ+kgx/y\nekzSU//GZ/5M0p/9rGt7NBjWZRiGunr1qiYmJjQwMGAL1NjYaOdjMAFXrlzR8PCwOjo61NXVpVgs\nlmHRIUx5phqpK3LTs7Oz6u7u1pYtW9Ta2moKgMVCWUiyuJmKTJ8VIdTBRYxGo4bU79u3TyUlJRoc\nHDTrv2PHDp05c0Y5OTlqamrSl7/8ZQNrn332WR0+fFjnzp3T8PCwjh8/romJCU1NTZlQk7pNpVI6\nevSoHnzwQf3d3/2dHnjgAfX395v1Q1ChPUsyoSWuhiLsm8GwIebn53Xq1CkdOXLE6hx27dqlV199\n1TypiooKDQ0NKR6PG49haWnJStYfffRRnTt3Tlu3btXNmzdVVFRk7Eh4CydOnND999+vtrY2TU9P\n69VXX1VRUZE6OzutO3h+fr719Lx69arGxsbU0tKilpYWS72CNdTU1Oi1117TwYMHdfz4cfX19Wlu\nbk4jIyP69Kc/rcLCQr3yyit64IEHrKU9GAYZFDI4pDRJR+JREXKAV+H1gk2AOfhDl/E0pY1+KqQ2\n4fBwPb6TStDsMCc7LGf/wMVBllEk0sfwmD+P1noLd/nyZVVWVhpv/vz582poaFBPT48VYvl0Es1f\nxsbGVFBQ8IEy76KiIs3OztriUfvAe5qamtTR0WFIOPeFIAByot3BPVKplGKxmCorK+1afA/Pk0ql\ndODAAeXk5KitrU1HjhxRZWWlamtrMwg/PlSBvt3R0aGRkRGNj49bWFNVVWVu8Pz8vFpaWvT000+r\npqZGf/RHf6SKigr19vbqxo0b5vngAksbPRJJNePeouz4HuZ3dXVV3/ve91RQUKCDBw+qr69PW7du\n1RNPPKELFy6or69PdXV1lgalOczExIQ1AO7r6zPeA99Dt6nx8XFNTk6qoaFB/f39GhgYUFdXl1pb\nW7W6mj51bWZmRr29vVYmv7y8rNHRUcXjcV24cEGtra2WIcCdXlxc1LPPPqsgCPTWW29pbW1NnZ2d\n2rp1q+rr69Xe3q5t27aZ14Flz2Z/BkFgzV+w2Hi8bHb/GYr4PCsSRe05P8iRNzBk7/iu7PJxroNy\nwfBJG8cD4K2gtCRleL//I4zL4Ha1ys9rBEFwZ754c2yOzSFJCsPwlnrr3zElsTk2x+b4eIzbQzA2\nx+bYHP+/G5tKYnNsjs3xkeOOKIkgCJ4NguBKEATXgnRx2B0fQRD8dRAEY0EQnHevVQZB8HIQBFeD\nIPjnIAjK3d++FgRBdxAEXUEQfPKXfK/NQRC8GgTBpSAILgRB8B/v1vsNgiA/CIK3gyDoXL/XP7pb\n7zXrviNBELwXBMFLd/v9BkFwIwiCc+tz/M7P/X5BSH9Z/5RWTD1Kl5jnSnpf0u5f9n18yH0dVzrV\ne9699heS/vf1339f0p+v/75XUqfS2aFt688T/BLvtUHSwfXfSyRdlbT7Lr7fovWfOUpzbY7erffq\n7vl/kfS3kl66m2Vh/R56JVVmvfZzu9874UkcldQdhmF/GIYrkl5UuqL0jo4wDN+QNJ318ueUrnDV\n+s/Pr/9ula5hGN6QRKXrL2WEYTgahuH7678nJXUpTX+/W+93fv3XfKWFM7xb71VKe2qSPi3pP7uX\n79r71b9dpf1zud87oSS2SBpw/x/UR1SJ3uFRF7pKV0m+0tU/w0dWuv4iRxAE25T2gE4rqzJXd8n9\nrrvunZJGJf00DMN379Z7XR9/Jek/KbNo8W6+X6q03w2C4HfXX/u53e8db1/3MRt3Vb44CIISSf9d\n6e5fyQ/hntwV9xuGYUrSfUEQlEn6+yAI9unnUEX8ixhBEPyKpLEwDN8PguCxj3jrXXG/6+MXUqXN\nuBOexJCkre7//2aV6F0wxoIgqJek4H+w0vUXNYIgiCqtIL4dhiHFdXft/UpSGIYJpZsTPau7914f\nkvTZIAh6Jf2dpCeCIPi2pNG79H4VuiptSRlV2tL//P3eCSXxrqTtQRC0BkGQJ+nfKV05ejeMYP0f\ng0pX6YOVrv8uCIK8IAja9BGVrr/A8f9KuhyG4f/tXrvr7jcIghqQ9SAICiU9rTSGctfdqySFYfgH\nYRhuDcOwXWnZfDUMwy9J+se78X6DICha9ygVbFRpX9DPc35/2ajxOsL6rNKIfLekr96Je/iQe/qO\npGFJS5JuSvqypEpJJ9bv9WVJFe79X1MaGe6S9Mlf8r0+JGlN6cxQp6T31ue06m67X0kH1u/vfUnn\nJf0f66/fdff6Iff+qDayG3fl/Upqc3Jwgf3087zfTVr25tgcm+MjxybjcnNsjs3xkWNTSWyOzbE5\nPnJsKonNsTk2x0eOTSWxOTbH5vjIsakkNsfm2BwfOTaVxObYHJvjI8emktgcm2NzfOTYVBKbY3Ns\njo8c/x890C13QIRTqAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "imshow(image)\n", - "gray()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python: 1.250645 s; Numba: 2.591848 ms; Speed up is 482.530126\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQkAAAEACAYAAACgZ4OsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvWmMbNlZrvnuyCEyMjJyOqfOOa6qU3bhwgMeRJvBlpFw\no/YVF9TGFkjWbRoJ2kwSf5D6DzZCavUPZMwfdLvVQmIUtGhh+lpgBE0Ll4TBjVwufI1NuQoLmxpd\nrpN1ppwiIofI2P0jz7Py3V+uHRFZdQ5BSbmkUEbu2Hvttdf61vu937DWLsqy1Hk5L+flvNSVxrQb\ncF7Oy3n5913OQeK8nJfzMrKcg8R5OS/nZWQ5B4nzcl7Oy8hyDhLn5bycl5HlHCTOy3k5LyPLPQOJ\noij+Y1EUXyuK4l+Kovile3Wf83Jezsu9LcW9yJMoiqIh6V8k/XeSviXpHyT9p7Isv3bXb3Zezst5\nuaflXjGJ75X09bIsnyvL8lDSH0v64D2613k5L+flHpZ7BRIPSHrB/v/mnWPn5bycl9dYOXdcnpfz\ncl5Gltl7VO+Lkh6y/x+8cyyVoijOF42cl/MyxVKWZTHJefcKJP5B0iNFUbxe0kuS/pOk/yGe9P73\nv18f+MAHtLCwoLW1NX3pS1/Szs6O1tbW9Ja3vEXf+MY31Ol0dPPmTR0cHOhrX/ua3v3ud+vixYva\n29tTURS6fv26ms2mnnzySa2trWl/f1+DwUDf/OY3VRSFms2mdnZ2VJalFhYWNBgMdP/99+v27dsa\nDAYaDofq9Xra2dnRcDjUwsKCyrLU0dGRGo2GDg4OtLy8rF6vp4cffljPPPOMlpeXNTs7q729PR0d\nHUmSHnroIb397W/Xhz70IbVaLd26dUuf+MQntLm5qd3dXTUaDS0uLuqd73yn3vzmN+t973ufnnvu\nOf3qr/6qlpeX9Su/8iu6cOGCJGk4HKosSzUaDR0eHqosSw2HQzUaDRVFobIsVRSFjo6OUjv5y/Hf\n+Z3f0c/93M+lY4eHhxoOh5KkRqOhRuOYRBbFsZzs7+9rdnY23Wdubk6DwUCNRkODwUBzc3MaDofp\nI0llWarZbGo4HFbaxe8cp3COn9doNPTbv/3b+pmf+Zl0jn+o5+joKB2j7cPhULOzs2kcOS5Js7Oz\nmpmZ0WAwSP8PBgPt7u7qmWee0Wc/+1l98IMf1O7urm7evClJevzxx3Xjxg11u101Gg21Wi2tra3p\n/vvv18HBgR5++GF97nOf01NPPaU3v/nNmp+f18HBgdbW1tRsNrW9va1Op6Nbt27p8PBQjzzyiK5f\nv65+v6+trS1tbm5qOBxqfn5eMzMz6Rnm5+fVbrd148YNDQYDzc4eT8uVlRVdvHhRv/zLv1zpV/qi\nLEv1+3393d/9nR577DG9+93v1vd///drdnZWCwsLaYx+67d+Sz/7sz+rubm5JEvvete7Jp7M9yS6\nIR2HQCX9Zx2bNL9bluWvhd/LH/zBH9QHP/hBXb9+PU0IBL8oCs3MzCQBGQ6HOjg40NHRkQ4PD9Pg\n89tgMNDe3l5FYBAqhLIsSx0eHqaPXzsYDNJ5TKK5uTktLCyo0WjoX//1X/XII48kAOH+8/Pzajab\narVaWlxcVKvVSoNM/Y1GQzMzM2o2m+p0OlpcXFSn09HS0pIWFxfVbrc1Pz+fBndmZiZNJgCDcYrj\nVTd+v/u7v6uf/umfPnWeT9q7Vei32J6677Edv/d7v6ePfOQjr/j+8Zly/w+HQx0eHqrf76vX66nb\n7arf76f/+XtwcKD9/X3t7e3p4OBAw+EwjR1y8aUvfUnvete7Ur3IEP3A/V32Zmdn028zMzNJtvf3\n91M79vb21Ov1NBgM1Ol0dOHCBV28eFGXLl3SxYsXtb6+rk6nU+lr5MPlGPk+ODjQ4eGh/uIv/kI/\n9EM/pOFwqO/7vu9Tv9/X+973vqkzCZVl+f9KevOocy5fvqzl5WVdv369MikkVf7Gj5/HOYPBIE1K\n6Vhbzs7OpsGZnZ1VWZba39/X/v5+AiU61tqdPgAPmrjb7aZ7o7kRgmazqaIoNDc3p2azqZmZmQRo\nBwcHCbwQLAABxjA7O6v5+XnNzc1VQCIHEJNMREAuHqeMA4u63yeta1xb/dqZmRnNz8+PbM9ZS47B\nSErsiPYA+IwP8pVr88zMTEWB8HtZlmnMUDIAAePLuHI9zKbf71eUCm1DHvigiGAI0snccKU1GAwq\nQIe893o9DYdDffazn03MadJyz0BikvLWt75VL7/8ctK2R0dHaeCcwtIZTGpYgCMo6AnyMzB0MiDB\ngDC4TtMpLkR0/vLycrqnpAqaU4+3GyHietq+v7+vw8NDzc7OqtlsamFhIbV5bm6uQpnrigt9/E6b\nvuu7vqv2/Fx9kxava9S18bxR9zsL9R3Vrrrizx61MGPGB9CanZ1NpqTLR1EUuv/++zU7O1sZK+4B\nAAAKkir/I5uzs7Pa399PcnJ4eKj9/f0Kk+U8Bxc32fw5eBbqgZXs7e3pypUr6na7kqRPf/rTZ+7b\nqYLEyy+/rKWlpTQ4h4eHkpQmI0gsnaD+wcFB0sw+0ABFNBdA4fn5+dSZ+/v7acB88Cl+jIFZX19P\n7aC9RVEk7d9oNCqayDUU2gTQwc9AfS6YDlCvpjhI1JVXanqMA4AIIuNMnUnaOq49XupYjE9kxn9+\nfj6NFQrDfR7IIdc0Gg099NBDCdTdR8J35A75QgabzWZit41GQ91uN/mBDg4O1Ov1kh/F2+Es25Wj\nAwTKkuPOKK5cuaJ+v6+yLHX16lVdunRJjz/++MT9O1WQuH79ujY2NvTggw+mDnWkd8cVE46HR0O7\nNmCQox3PJMTsmJubS5qdgc8JfqSHPtEj02HyuwMQUyO2Jw62U1zaH/vB//JMOQYRf6OMs9tjGWVC\n1N0j/pZr91nLKwUyZ6Te/84OcOQ5YwTU0eCMO6BC4djc3FwyaSOD5f+FhYVkKvhxnMF7e3vqdrsV\nWeTZ6TtYgjuy3YFZ5w+BvVLHD/zAD+htb3ubfvM3f3PivpwqSAyHQ733ve/VN77xDc3Pz2cdYHSG\nf9zD7iYKneV0DR8BTALvMULgbIHiHR7pH7+5T4R2ARQHBwcpyuCA4oNIcaCJvhb/PVfGae1J6f6k\nE7HO1KgzYV4pULxa5yr9iCKI8oMycdMT8AA4mPyYA7TJTU5AotVqJUDhOKyhKAotLCwkp7ablPgj\nnNFG9oPM4uCMppHLpvt2jo6O1Gw2k0JFFq9evard3d0z9edUQWJ2dlaf+9zn9MADD6SJ6mErnzgR\nICK1jw4ndyA1m0212+1EHfm9LMvUiZg21MOgUQfAEiex/4bdienkzlTXTK1Wq0JHnQlRZyx1k61u\n4voz5MrdMjXG3SMCBW0bV+8k7YgFJVGWZcU297Hw9rg2x4mM7EWQ8N/cLJmfn08MAZCAsbrPwpVY\nlN+6Z8MM2dvbS/I5NzdXMcdRgJg+AJe3352qn/rUp9IzTVqmChIbGxu6ePFi+t81dwSE6ByMwOHM\nw3+TThDf2YB0PHAuRO4cwvnoPg2iFqC7pMrkd/rp4FAURdIszWZTS0tLKRTK+dJpOp87NglQ+PX3\nooyb7JRRptKoeic9njvPqTk0ntC4jxn97hObnBDX4G7nwxBhowDEwsJCGl83G6OCGwwGFQBwVsI1\n0ScCIyLKRlQOZ7+H3ufn5yu+E0wkgKUoCr388sva29ubqD8pUwWJhx56KE1OtKpPeAcGH7xRGifa\nbwAAyIvGQNuQEOXhU3dAQRXn5+fTQEhKTlYEwzVEv9+vRFqgpO12W51OR+12O33QQjGqEX0qdSCQ\nm3iv1v5/tWbBOBZz1nLW+pjgHgr03BpyHtD+jLVr6egDcBPk8PAwJUDha6CeyAqjg5HjtMdNZ+nE\nDwYoAQ7MAYAMkIARuc/N0wfc9CIq2Gw2z9T/UwWJ9773vXr22We1u7tbcS45ipMAw3c6SDoJVUZw\nwYabnZ1NCSWHh4dpkFutlo6OjrS/v69Wq5XAwidqZBNMZvIhYBQeXXH7kTAnyVFzc3NaXFzUysqK\nlpaWkgbyEG2dvyIek0aDwyQTapzJkrvvOPCIPopoZtTdf5I6z1pcluKzMZk8vMhvPrGazWaKmEkn\nYcrDw0PNzMxocXGx4nsAXAAj6mESu8Pd5RngcCZNXTgdPVzK+Sgod6oCBL1eL312d3fV6/W0v7+v\nCxcuaH9//0x9OVWQ+OIXv6hms1lxPMaQ5v7+fupcGIF0IjyOpnSedGJDxhwKd/JAGZ2euYPRB40C\nQHAPgKIuKsG1PF/M34Cq5pxXZ53IZymjwCZX/yT3OquDclzbXm0djJWbFEVxErZ2P1QsMAO0LmbG\n/Px8Ag7PhYBxeA4PH2RLOlFsMB1XfLSXc1E4nsvjpi5t8wgL7ex2u9rd3dXOzk7KJh0MBlpfX39t\n+SQ+/OEP68tf/rK+8Y1vSNIprVwHDkwoH2DPgIz0LmZjuleY+mKoU6o6EF1LxGw6D4F5iM1tXqeV\n7mCKLOLVTrC7WV7JhB91/iR+ibq6znI+44sSKMsy+YnwDzHBXS5i1MAnnqQkdzEq5mYxyigCj8sw\n/2NOABRuLni4HZny+0hKPhLMEfJ/CKvGeVMUhVZWVvTcc8+N7UsvUwWJmZkZfeYzn9Eb3vCGSge7\nH8K1M5OXCe0JJDE/gjoODg7U7/dP0ULWgUiqXOfFgcFpIBrTmYZ/b7VaOjg4SOFXwAB66t5nD4G5\nYP1bgcW4yXc3wOtuMJ6z3COGA5l4yBLgQd1oZncacg4MFYChbpcB90MwMV1OuEfMjYFJULevEXG5\n4vmiI78sjxd4AQwHBwepbQ5ArpyeeOKJMzO1qYLE7//+759aX4B25rt3dkR6j3rEAvXr9/uVqAYO\nx9iJrk28PZg87ummDtqJc8mz6ZrNZiU2jk8C5uCJM24TT6q9z2oKjKrjbjoZc+WVOlVHtWscY8Ef\n5I5lrnHnoDsm/XqAXdIpVhCVA34B90X44j43w5z5umNdUiUK4YrHZdxBwiMfPAvXYao4E+eTC7GP\nKlMFiUajkVJRfXIAFG7Xx86iIwCAHFDQif1+P53v3mK83xEoQHxPQgHpPRLhORE4RNvtdiVvwu1f\ngC6Gvl5teaV+gHsNDtLdZ0STOE/x+zBe0cxxP5Hb+pzD9U79OT8uCKRuZCg63mEeMdnPgcTbzO/R\nmcqzu/nDMV8f5Cw75x/Z3t6uLBKbpEwVJH74h39Yjz32WJrEziD8b8yMcwaRczRS6CgmJllr1AUI\nELL0QaNTYRKu9Z2SokEwI7B5cUw6QLiDVqouHJqk5MDg35MPI1dyz+c0PPfbWUu8Jo5T/I17+wSM\nSga2ATDEJeRS1eyN+TueeetrkKjbgcPTvx0Icuw6J+uAkvvZol/E+xsmPGmZKkjcuHFDjzzyiJ58\n8skKSFAi7aIzYnqt0z/pND0EEPwviA+jwJRgQB2BqZMBdUcWlLQoisriM+xEZyAkvrCPhGddurY4\nKyW/W0Axyiy4V/c5SxvO0o54j9z/aG9PsIv3cbnDv3VwcJCcoe7viCaAm6X8jikDSPgCQMwkBwqX\ni3gvN2FcpnP9R32dTke9Xm9s/3mZKkg89dRTetOb3lTxS7iTZTgcViIEPqGZfB7SjHa6ayxYQaSF\ncU1IdC45VYsakOw3BqvZbCZtw4BLSqHOxcXF9GFzmuiw9DJqktzNSUu9o+7r54zzB4w755W0yet8\nJc+eYy8e3Yr3RFGQE+Gszx2dziKkE/aKUzJOZK5hwke/kLOgaGrTpiijtCU+Y6y70WjoypUreuEF\n36N6fJkqSNy+fVt/9md/pu/5nu+RVN22TToGBV+GjV/BO5SOzIEEJech5n7RY+yD7lrdO9wjLtiB\nbO7B1mduk0pKzswYhh034SbVmq90Uua0+t0AoLsJFveyxBwYijs5iXSQJwGTYPzcoe7FlQsO0rjq\nGDnwMKebvF5PrNe/RwYR5R8gvHLlim7cuHGmPpoqSLzxjW9M2V8+aaLd5QPhYU+37esSmihMfo47\nunP/2LFRwKMDie8MOjtX4cicm5vT7u6u1tbWkmDNzMyo3W5rcXEx3WfUpJzUBKjTuveinAVERoFF\nnb/iXhdXMn4sZ5JgHtAujxZgUsYx8L1KXEbrQu4wBGeycZ0Hxf0gbgp7yeUD8VlZWVG73T5Tf00V\nJJaXl/WWt7xF3W731CA50kpKpgIZmJge0UMcnZ/+HTswJ5x+XQSdKLj877TPhYjY9czMjHq9Xron\nfgmPXY8rdfd+pWWco/Qsk//VtKWuHZMwkFd6XzfTxjlUHSCkk8mJfDCmw+EwySQT3xf8RUDwjErk\nS6qGRI+OjrIbEHladvRJRAXIvbztjcbxxs5vf/vb9ad/+qcT99tUQaIsS124cEGbm5uSTmxEFxQ+\nDISvj/fsMzc7uJYCiPjE535RcCKj8evjd+nEK+7rTDwzj5z7VqullZWVSlKNl9ykqzOfcv9PUsYB\nxL9VmaQdk4LQ3WYezgpiNi0OR/anlFSZ0HH7RPclRLl2k8KXIWCWxDAo4OITH9nzRMTIht0fMTMz\no2eeeSb5yiYtUwUJdh2WTocD3bSIOQ2etOJmiaRTdbjtyMDzm18Thc1Nlxh+isXR21EdIfN2R80y\nSbnbbOJulVcKOnWafNw1o/6/FwXnsyseHOnsOObLAaSTHdI9GYuSi7o5SMQUbJQafeVmj2d9xsWP\n7qOI5kZuxfG4MlWQuHbtWtrGPK5fwEtM6MlTWOk80NFR1+PbkiroGic+1DDGyB19I3OI6zxgDb4K\n1EOqTiHdIVrni6g7VndO/FtXJpmU/1bg80rAJZqiXnKOvbrizHFcW6JSKMsyrQWRTvITMDlcXjxU\n7oos5z9wxkzBnHHlhsnq+1bwfhRn2a5cY6SE889SpgoSKysrevLJJ1P6rIMEFCqipE9oJqnTO0+w\noi6p6lSKgxXNED/f/RO+KIvVmyRQxV2KSYmN6zQ8dbuu5IQrZ4ZFrTGJ83Na5ZUCg4+fm4PSibno\nju044cb5ISjRH+H3zPV1BHP+z22XJ51o/nFAnGO0MBrkjkL+Dea4p3n7O2i83UdHR8lpPmmZKkhI\nxwlVly5dSqgnnewZ6RvHuAaWTjtlGFB3TkahyEVP4nHqdcDwHX5arVZKv26322mZN+1k/T5ZnLwF\nanl5WZ1Op7J1XV3xyRCfT6qChHvFnTV5X01C7/+9mDC54v4mxshpOH0RzblRbCFO2LpzAJ8okyzh\ndlPYMzLdhyadKCfYq1T1wXF/l11nAMgg/gRAo9VqVULu5GigqGDgmEcLCwsp63jSMlWQ2NvbSxvA\nSKqgnae2Qo984jBp47sq0PpSXvCjYOSAI+fM9PuxYxVAAZqT+t3tdtXr9XR4eKiFhQVduHBB6+vr\nWl5eTufnmEQOHNx5Fp/DhdeXw08aOYn3frVAUQdEryYaQfHEp8jGom0/yoHH+XFNQ07Le98y4QEB\nNDa/scELvwMUcYWpy1sMbUbfWq4/nVW0Wi11Op0KmyXbs9lsps1m3MF56dIlfetb3zrTOEwVJOjk\n5eXldCxqSYTfvbvQLN98NNJur8PrpkQQqBsUF04EFPOB5d/tdjudB9UDJJrNplZWVrS6uqrl5eW0\nXDyCRAQIXynKhKhjEvSPU91x2aKx3CuA8N/uxj1QEAAF45hjE36dt8/ZAYARwZW60MYAAD40f+Uk\n4AGbcNofHeT4BaTqalQc7JxL+3g+37UbRtvpdNTpdNI2CMPhyfs7XLH4OpJHH31U991335n6faog\ncfHixUq0ITKJONBu0zuLyGVLxihCnUMwZ//H3yPKM3EXFha0tLSk5eVlLSwsJO8zA8WWee12+9Tm\nty4c3MsBwnc6ztHrCBJMHihnnQnCvWJ5NUyirj6/ZzzvLPdyNuerayObIEPXIwR+PSVm2jpj8N8x\nJTAh+fhmSE7xfatEXyPkfYupAZOIsuX3jxvRcB1KEr8YsofD3tcEFcXJi6WLokjrT85SpgoS3/md\n36m///u/18rKSiUpSjqd9+DA4BOIyeasoy4XgRJt9ZhIJZ2YPtzDtbqn6gIUjui+lp+wEy9o8Z2o\nXLu7GZWbCO4tz7EtBASh85V+Oe//3QaK2Lfj6syZT3XnRPbAXqMxPOn+Ga83mpI5kBgMBmn3KldS\n0Pfd3V1tbm5qZ2enEv50sHDzI5fwJJ3sS+nPlAvLOyj4c8V7cR+XUV4dCYDhP2ETpE6nc6YxnSpI\nrK2tqdvtanV1NR1zx5QvxfbJGbUHncHmG045c8KYs0O5r09e1+K+UxG0DopJAgw7KJNp57FqfBoe\nrvVndr+HmzRc40LOc0THljt/KZ60M0lxkPa/sf9GXZs7Pu7+8ZzIAtym9zB0DH/nMmSpB4WSc/j6\npkKueWETbM8PSMAE3A8S66HtLgt+T0pux3XGtyzLJAfsdsZ3Zy3eBgckTGJJarVaunDhgr77u79b\nn/zkJ0eOh5epggR2ng8eQOB0zLW5I62bJ55TH3MjqMsHIuesiv6JKJDcHwclNJT3PMIqFhYWJgpx\nelw9mlLunIsA4W3lu08WN7/O4siM9flfL56OLk1mOkzil4gMjz7y53MaHU1F5MhLZBJS9bWRnlND\nYam/Ry7wR2BGwmB8ckalE1lizK3xsH10urqyo51uci0sLCSW4PLtwAWbnJub09LSkiTpvvvu0/33\n3z92vLxMFSRAQ7fP6QiKh3xiYohnnXmp04ZRGKTTyUiulSJAuLPU98xEmPr9ftpAx0OpLqjOFmLo\n1QHC/RCRQcTi9Tj4RZt7VBmlsV3gc3Q9TpBJgGCSEoE65yOKQDGuPmdgsa+cVcAe2Gm63+9XWKOv\n3Yh94GDlzNjNZ/op52yOfjUc085o9/b20tvHiKh5W3g+2CUM5Ed/9EdfWz6J7e1t7e7unvIuuxD4\nYhkHBs6L2ixO9ugY9NBXjIZwnpseHtXw92+0Wq3EGnAs7e3taWdnJ5k9DJKkxAx8izueifbmJqT/\npeRoOQXKjQMv58z1kmMj3ueuub2vHIBitOAsDCO2IR73tkRnbgSKWB9t8P/dR+AJe2wSg79hZ2dH\n3W5X/X5fe3t7ld2nHZDHgRMs0UP1bnp4Rm6ubyVVQM3ZEC8JiovJPFwvHcsE+T3r6+sjWW6uTBUk\nSESixMH0pBM6AvTMCX2OjnPMHUFeotnhA+F03/0Kru0ZHByVbDTiL17BmQSwcL+4USuIz7tIeD4P\n8+ba688V2+rnxGiH95UDRMwQdaHKaV7oLZrV7xknVK7kJjv97g5rJkRkWt4u74vcMWTB2QLbGhLC\nHA6H6na7CSTiamNnNF6nP4/3q7c3goA7Fr0uB1tnBrAPmKvX7fKCvPn3mZnjFwS9pkKgt27dytJY\nn9hRaJmULM11Z1POWRkRv05Q/Z4OLkycGIoEydEw+FZ6vZ7KskwCd3R0vEqQfArAZG9vr5KOXhRF\ncjJFX0Rs5yhg4zvC6WCDaeTUOAp9Lh8kF1lxBoHN7JvEenSpjlHkxiKCckxnj+DAmNWZUx4xcLPT\nk6N4kQ1AMRwO1ev10ntEY0If7XSzIgKGr5twkymaOB4RibLo40mfe//7rlk+Jo3GyTtCefcsyuef\n//mfdeHChWxf1ZWpZ1xGz65nDnrJ2aHRu+ylzleBANKZMfOOc6jbhYKBiZTaJzrP47kSs7OzFcfX\n/v7+qX0ueX2gU9q6kjOVchTbgYJncE94HQtxwfaP1++mhZtN3j4H27rniD6FeG8Hh5xZwr087Ji7\nR5QZT47a2dnR1tZWOobp6FmU8f0V7meA4juQuqng4Xqud99FLi8o15+SkuLBNPIQsEdVMJNarZaG\nw2HlhdeTmoGUsSBRFMXvSvrvJW2UZfnOO8fWJH1S0uslPSvpw2VZbt357WOSPiJpIOkXy7L867q6\nX3rppQpISCcoTwf6S1zpDNf60Vnj5+QE1SMKUQPHen1JOtcCCiROwQAi5YNl8AbnXq+X8uY915+X\nBlM/4S1A0bfAiyxilEed+vy7+z2iM3MSO9Xrd+ocNXsE3ZxQRrY3ChyiX8oLgExfR+Dxwpiy7wfX\nABKYF0xCAMIdljx7BE7vS5/0zo7dH0Iuja+v8HGsA2batre3l2UnXI+yJcJB9G12djbt3zJpmYRJ\n/L6k/13SH9qxj0p6tCzLXy+K4pckfUzSR4ui+A5JH5b0VkkPSnq0KIpvL2ug68qVK7p9+/Ypzy4T\nI0YvECYHjBjfjhQ8nk890WfhaE17cEhi4vCb2+7cF6HmHGxGcuf9xSsxMQhgkE5i23znrwuDC170\nMcQoQAQHvhNVirZ07A9AIGfGUZ/by7GN1JWbvA44zliiPyQXhvbx39/f1+7ubmIB1OnPjHlIOBMz\ngkSpnZ2dFMVwf0HuXZ3R2cx3qbqHiSdkeb8hIwCLy6L3d+zrHOhE+aZNvGiYyAhvP3/kkUe0traW\nm461ZSxIlGX5/xVF8fpw+IOS3nfn+x9I+qyOgeNHJP1xWZYDSc8WRfF1Sd8r6Qu5upvNpi5dunTK\nVPDwkgMHHeULbjypJCJqLkSa02CRhdAGimtsJjMUkzeTEyL1CYOn3B1wnmOP48kZibcbj7jX622p\nc95GYIiAwTHP2Muxqjjpx01yB1Yfu6gjnJFEFpHzQfjkdIBHLpjovV6vMoF8fQe5LYSpoe2+IC9n\nsviz1dH0OIndocvzMa6eUOfMRFLa75UxjGwCuSQagmx6YZw8VwIZ4jlarVb2OerKK/VJXCrLcuPO\nA10riuLSneMPSPq8nffinWPZwqYz7Xb7lN1IQktRFMkexAbDZvQdn3xwveNc+N0mzBWf4FJ144/c\nBHXha7VaKTPUBZ56eWGPpAQ0vmKPbE0W78AwclraASLHJKIZUDcpmXwxtEe/RaYStVucNP7c0URz\nFud1jAKJOoDw4p5+3pzN/QBmxqnf7yfGgaYdDAbJ9MiZtd4vzqz8nDhGPBMTnMnulP/g4CCNMWwz\n+j2cqfi4RBbsY+OM2N8NSlsWFxe1tbWVlf+6crccl68o4f8rX/lKWgR133336eLFi6e0kHTyrs0Y\nWfAt7dzPMZC3AAAgAElEQVTZ6VpPOs0YckIf/3eqjKA6evu9PDMOwYyvUosawNcIoPVoq9ukOZ/J\nqJITXPcZRMBwhuYC6nXlKHAEFO9nf5YckEXAciBw/4P/Htud8zf41gKYhbAwAJmIBaZJWZ5Eovw5\nYAIxbZuxo7imd3PIneP0CRGH2dnZtJMV/cs7a71E5Tau5Pp6ODwOz7/wwgt68skn9cwzz/ybbTqz\nURTF5bIsN4qiuCLp5TvHX5R01c578M6xbPmxH/sxvfjiixWaxYC4gBLujDTTk2FyUQrp9KSg5IAC\nOzL3e2QqfMfedPvPAcXXAAAGbjeTuekJQ273OjhEoMhNlrpnHOXjccCiXbHuqOG8RJDw/nTfiR/3\n7zGiEs2i+EwO9lEG3BQtyzIty/fVnL7Xg0fUnNk4MMTFVLTHx5sS/QzR10KuB2aA5934c+b8HrG/\nXJGNYpaDwUCdTkf33XdfMs3PUiYFieLOh/Lnkn5K0ick/aSkT9vxPyqK4jd0bGY8Iunxukq/8pWv\n6KGHHkqoz2TiwVx4XUihia6dY7afdyyTw51s0ulkm5ymdnrJeQiFOzOdwjYaDe3u7iaBJ0btqz+J\nYPjCHXeOxckf2+laxp+jTvM6yLojMtL93PHYhsjUvG2u/R20OS8Ctj9vNIW8z7mv1+fLtCOTRNHw\nXJ4270lMHj7nPpi0MVksnussyOUJZeCrVN0EyTGk3PN6X/vHxyX6JLzfvS8wp/r9vprNps5SJgmB\n/l+S/ltJF4qieF7S/yLp1yT930VRfETSczqOaKgsy6eKovgTSU9JOpT0C+UIjjwcDnXp0qXkwAPx\nGWB3vkCL/VofSEfTO22pnB/jw7nBocQm+0DhAOJ+ABQaDErKOg9SuBEcz9zjXLedPeLhy8oz41L7\nPU5Wfy7XcLQDIMwJXI45UFfOXItmyajiQBcnQg4ky7KsmGwIP2FExoDFd4PBIPUhkQxnfnWsk0no\nrLIOKLx/vG+lk1R8FAQ+CcyfyKJiX0cF5ffw8XUWFAGV775Oqi7xrK5MEt348Zqf3l9z/sclfXyS\nm6+srOjLX/6yLl68mOxYq6cSimJC5c7xErVnTvuNouix/nivUX4Cp+wIs5tOCCjP0mq11O/3VZbH\nKdrlHWcbwkTyS6SWse0++aPAx+eLVJRncvs41l/Xb3V9laPNo4BuXJupH2EnMjEcDlPY0reNw/fg\nS6txUNbtau1A5UqAz+zsybs2cj4W6vBoFREvz6ClDgc336TGJzkTOseCvY3RD5Ubh2gGnaVMNePy\noYce0ksvvZQazsN6yq9nVOa0l3SiIf29BNHG5ZhP8Jwg1hV3pjo99vo9VBafg5f0cK6HRBcXF9Xp\ndLS8vKylpSW12+2Udz+qXZMABG3PFbdtYx05MDprqfMrjALpUYBNn/r2cZ427UlqRDsIO7pvwcfB\n5QQG4Caca2v3XTgToT5f3et+Big/9RwdHWlnZ+eUf6TO3wQY5CJFub7LMUvvx263e4ZRnDJIPPnk\nk3r729+u69evn3IYUugUtzel/KpJH+S43oIC8IzSmj75ol3NeVHImGjRXEL7ISS+inR9fV2ve93r\ndP/99+vy5ctps1x/4/go4Drrb3EC5pyJkYLHuiYFi1HsYBJQ8BJ9SE79PY0//u4RsKhwfM9IdyzG\nwtgCTMiY50A4SDBmXMP/RL4wOcnNICpTF/6MJhC/15kifn1k1dIJez1LmSpIXLlyRcvLy3rppZcq\nZgEF+uYLlaSqEyiu1GTQY8RAOhYyf5cox3IlR/EdMKCgvtcmb552AUUA8DWw0KvT6ejSpUu6cuWK\nLl68mF7kyu5Xucnj94/gNWoy1k3yaMN633qpYyJeJjXhJgWIcX4N9+QXxUmyGvk1fDwkyjWuVJCN\n6BvA9APwfVOjmBUc0/L5eFTO+zcCWnTMOzBwjLbBbuK5tNllx89D3nu93mvL3Jifn9fzzz+f7Hc6\nkIkuVTeBwXZzM8LZAtcQTYjHPXOT4ijrHe4AEZkMbfC2ofnn5+eTeeF2vtusc3NzFRODt4x7BKSO\nRXh7os+kzp73kgOJnNkVBbSu5EzAUX4Ib+coxpIrcdxdVkhU87AkPgCAIj5T7OOYo+H1+Thi8qAY\nfDWvm8nRNPVVzBF0/Jmc1Xlf1SkEj3REeY79enR0dPejG/eyeNZbpEcxDOgswWmY5xU4SMAwHIFz\nmtM7P6ehvaAdYlQg552Pz+Nsx0GCd3eQWx/fI+LF2xmPxe+TAEVssx93G7iurtinuXuMui62eVxx\nQPCIDM7ARqNRyXrlHjC6mNPg2hzm4KwA04B6cTyzFocsSjJlXXbcxIzy6dE4ByNXRh7Viv4S6uA6\nr4Pi94gsJBcEGFWmChK3b9/W4eFhem8FhQ71jnR7ziecdyAoigngnQy6S1Vt7LQ1p50jk3AvsQsl\n7RqldWk3e2FidjhQxFz9XKljGbTZ2+7H6vwEDmL+v/tlzgIE40Alfp+k0CZehETfM5nJvPVNiWh7\nXOvgY81Edp+BVGWH7qfxBXhk1i4vLyeQoF7MWuryye2mj4+NO74998Ll3Bmtz4U4vh5q9/73NIFJ\ny1RBoixL7e3taXl5OQmlmw8MjmfGubngTIL68BXEMFacPHXg4P/ngMPbHuvw33JsxCMkPtiukSMA\njGI29IGf60KduzYHENGeRUO79jkrUNT9/kqBws01xv3o6PgdE/4eDF5758/hIVPPzPWx9dClm5zR\nZ+D7nPo2hoATWrrZbCbHJcDj/ioAjbRxQIJ7Ef5088md5bF/AQY3gTGv6Wvm17/VAq+7Uh544AEt\nLS1V9pXw0KCkyiCRMxGR3WlcXC0a06bj28kjIMT/uZb6fTA4Jy5t516eyo3Qkfm3s7Ojzc1NLS4u\nJgrtVDfHBHLmxKR2fdQedfa5CyJ1R0/6WRlELK8GKHzR13B4snMZ8sHmPR567na7SbPnTEL3FbE5\nC+YETm5fodtqtZLfi5yWRqORHNQ+6TEvJaVIi6QKuNHfAJ/LjgN/TqG4+e1AEpkTffKBD3xAX//6\n1/W5z31uoj6XpgwSly9f1tLSkq5fv37KCUjH0nkMumctAio+ORFuHFZsRML1vj9AnbatKwyM77mI\nQAFqMdTG+bSRTU48ycYdVwy4L5KSRoc0R9H7uuLedGcRsfjx3IQ+i8kQ2/ZqTA/X9lyPDC0uLmpp\naUn9fj85hbvdbiW5zTNwI/h6VE1SytzE3Gm1Wmnlrjsjo+nAh8V+3l5P8opA7MlVdf3spjhtdpMx\npopLx2P+6KOP3pNNZ+5ZOTo60he+8AWtrq5m1w249vOdoKBS7gdwrQ+IuAfZJwWsxTvVaRmFeplI\nANjCwkLaXJTMSHeiYYsWRZFi9NjQvoiKtuLA5X/eRu7p2j7YLjDxewQ9njv2J8XP83MQXL82+lvq\n2hH7cZJyVkZB8XU51LG4uJgUBIu6WI6Nho4mqo8d/Tg/P18BFmQPBeEva/I1JA5iME/+1q0B8eiG\nP6NneXq/RoDIrU9yHxnX7ezsvLYcl5ubm9rd3dXly5dTiDNS/7g6TzoR6NzKOQbNqVr0DOPpZlDd\nK5xzAnmyjL/bk63n3EzwtOB+v5/Sg31vzMPDQ+3u7qZn8Sw+BIn3i3pilXTaCenf48TM+Tgo8Viu\nXhfaCBTUG9nAWcAh3nOU5sy1mfNiOJQJie1PRuPc3Jy63W524sHoyG1gnDBZCHlK1WxZxg8zEmbg\n7aJt/kZyn+QOWNTvz8ZvyDXXuwLL9Q0y6/27s7Nzz1aB3pPy9NNPJ8eNmxVxAsRQZkRT6URIY7yZ\nyesd5vtIRi+x5/K7V5k1FYS8VlZW0ns3WJAF0AEO3W5X29vbaUNVbOPBYJAWIPmuWl7cfJJ0ChC9\nf0b5CJyVjWITTnmdeeV8EaOAIld3Xdvi/Se5LgeEMaoUtTuTl2gEmtydznFCeQKWhzJ9abd0slWh\nrxfhXBaWMXakjLsj3s0Gng+m42kAzjQxcZAb90VEEzqyGuT4LGWqIPHwww+r0+kkreqaDyHN2ebu\nW4jFfQLuOPSQltflu10x2bE3+e70EiaxsrKS8hrcN+GC2ev1dPv27bTR6vz8vLa3t9M+ipIqvhOE\nyDfSoZ0+0JRx/gEmUM6kiJPUv9cBSrz3KIDK1V1XzsJAYltz7eO58R8sLy+nyTU/P5/dXsAVBIX+\nR5vzOz4vFAI7bqMEJCV2yYTGRMa0dAbgjMYjeLndyXDWOrB6uwA/Hzvvlze96U36l3/5l4n6mjJV\nkPB17k63fVIgCBzHRHCQ8E52p2WMFfukcXbiAu+sgbdt8fYjWMPCwoI6nU5iQdBPXqpzcHCgpaWl\n9G4NnGftdlvNZlO7u7sVkwnb1tvu7+2g7dEU8pKjnNEUi5rfnX7xOhfknF/Dzb9RZRQjGAUwo8wN\nf9Y6EwWQWFpaSvIF0LMaFNPC/RQ8b8yIhGnt7+9rdnY2mQ6sQsX3wS5q/qYxmAfjK50Aj4dXXa6d\nxUZfhe+25WzZlSPAERXCT/zET+gP//AP9bd/+7djx44y9RcGd7tdLS8vn9Imng7rk9/tTGyzuA7D\nQ6XRsSmd3ryE4uYK3m1AAaAgXOmORwQeBgKdOzg4SGBDynWj0UhgQvvY2gxtwzsoW61W2jrdNVIs\nddrVQSVS0OhDiP6BnK/BzRH6cRyjyJk3Xrw+v2+uRCodn9frACRoG3uItlqttH0+TmV/zR5amOXn\n3i8ecZOUxgkHKdviIW9khka2ECevmxLItLfFFZlHAXGYe4Qv5mK4STMcDvVXf/VXeuMb3zhyTGKZ\nKkg0Gg1dvHgxrUqjo3J02JkG2tZfu+6ayZ2RbtOD0B5C9Unhfgef8B7CjHsKYK4sLCxoOBwmZyYf\nN18ADMwr2oitvLS0lByV7qTl45ml9Mu4Ek2E3CQbVU/OV5BzYNb5JsZNZmdKfjwHFj7RRgG9TyjM\nQ0wPolGYfYPBICkcXwzGWMMw4oR21he33Xdl4wzO28qzuIPet+HDP+d968zN3/Ppb1pzU9x9LwDh\nE088ode97nW1450rU18FurW1VemouCIuxyJ8ww4PUXGNswtAIadVXSBiqMjbhDB4LJzkGQCOF++s\nrKykDFJ/qTCmS6fTSSvxaBd+DcyZ+fl5dTqdyiI1N4e8jNLU8Zn9+XLaOE64yC445vX5xMhd74I+\nyofA3zqTKrKIXL2xHrfZmbQcI5WbXax4H4e/6s/byWQE/P35/Nl9NypnnMibR7O8Dk+i4neicLF/\nHaB9nJxxxnOZOxsbG6+tPImrV69qYWFBL774YoVFOCWjAz2ODb3jmAOLVJ0U7qCKuRBc7xoKUMDe\nwy8AGDgVZHMTScnX4C94WV1drbzGD8bBduqSTpk2AAIaz82UKPyAWw4ochPQSw4YJgGOKJjRLMmV\nOLHr2jnO5yKdXntRx1ByHzclGY+YJeuLDpEX+t9zJMryJDvYldJwOKywR2TRXyQdnyNqf+4/MzOT\n2uBMgrnh2bnOnOILn4uiSP6/RqOhhx9+ODtOdWWqIPHiiy8mJiGpkglJ5zr6ergpHo8eXen0W5Wi\n78J/K8uyIiC9Xk/z8/PqdrsV77EPLJpIOvZm7+7uppfPAiCdTictJZ6dndXS0lLyS/iqQ2cJCIAD\nSJxM3kfRIUkZ5SvwEidb1Np1E9H7sq4Nufs4KMd2jvNvRDbh9cS25Wx5+tkjBMPhSbo8oWte1gMI\nMGaYjO507vf7Ke8G88R9SBHgkENfbuBM2dsuqcKAOB5XPzMf8JktLi5qcXFRCwsLydfV7/f14z/+\n4/ra175WO0a58u8CJNbX1xMASCdUnyxF71xnGz5hPdwp5Sl2BIloZyIs/g6GGF6i+ODiGPPIC39Z\nwIa5wQDiewAkaIcv8Il2aK648Piz5ByK4yYwf+NkHAUSdQwl9nM0c+J4+TOMYiQxkhXb588cfQT8\n7mn/mH37+/va2trS5uambt++rc3NTR0dHaWNa3m589LSklZWVlIOBPkRc3NzyUxx0Kf+yKKif8Uj\nExyPLMidvFyHn204HFbWPxH6xWzFGfvII4+8tt4q/sADD+jWrVunFnF5Z0WnpAtKHUXNCaovyuJ4\nHYX3yep+j9gW2sHg9Hq9Si5GDLHyZi4iJVBXD1fRBzE5yDVLnBRecqDgk3QSZjFq4tedV3dOLNGk\niRNnEiZSZwpxLMqEh735H5AgmxbKT8St1+upLMvKPqTLy8taW1vT+vp6Ugrdbjc5tzFNfcyZyLnn\nRO6RLT8vgn0u69adk5hEJIvRdsLvZVlqcXFRc3NzevDBBycaK8pUQYJsxE6nU3lgz0J0B2QUdiZi\nnOT+fwyx1Wkd16AxGcsBzAtC56wBgcOmhBKyDgABJdpBGCu2IbZ3FA0fdzw6vurO9b4bNSHr6qqr\nvw7UcveYFKAmKU7zvc1RKXkEyc1WTIpms5k2CVpdXdXq6qpmZ2fTTufciyxIzA3p5N0g3gZky8Gf\na6KMeb+5ogCAnFF4Mh7hWF4jiT/FzZ9Jy1RB4vnnn0+0LU4OJpsnuOS0JjaZdHqrc6eY/vG6/JhU\nNT38M6pjy7JMjk5JKY/DU5yLoqiEOfGURybj3vAc1Y/9kGNSo0BjEgGJ2sz7Jud7GKXZ4/0j2I0y\nG/yeubpy38c9E0qIZCiiGUTL6F9CkAC9v0cF9kf425UBMgm7pLCEPSojb5uzyZx5JlU3sPHCc/EO\nG/dlYTbhu5ikv7xMFSRarVbSwtIJrfYceulkb0rpZJI4dXT7j3N80Dy/wFO53c6jzpygjupUF2Z/\nsTEOTU+z9Td5wS7cQZkzJ+r+z02icYM/jqr78eg7iPQ3p/HrJnrdc/m10ek8iYmTu0eu7vhMaF3S\nqXd3d1NSFAwQGSQ07XkvyFYu6uSKwe/p4JHbsBfzJ4b0aW9ubBxc+B8A9LUfLHWfn5/X5cuXE8uZ\ntEwVJHZ3d1UURVoUg23nNha/Y++5bSlV0Vc6vfefJ7W4J1ganUgU7Xnqyglz9FGgCRDEhYUF7ezs\naGlpSdvb24lNEAr1WLjf09syDjjqSvS15IAiTuIc/Y7t8TEYxXwiqNSZe6NKHZt4JcXBjwQqPr7c\nm4nr63jiZreYDUzs6HD3c3N96WHVyK4Aitin0bkb/zpTAgz29/fTM7RaLT300ENn6rOpg0S73a4k\njUCTXLviQfZ8+5x/wo+5h9nX/nuYiTBmjL2PKhGgIqJLJ5oRs4O8fhYBdbvdtKYgtj/WPapEB27O\nHPA6cuZDLD6J3F53jci9IwDl+o9z4uIp/y1nutwtUIj9mJMZjvtzutZ32n5wcKDd3d0UJuct5b1e\nr7IEO07mGIFDNh0w3FxBIeG7iOZM7HvqRO6cWcSMzKeffvpMfThVkCiKQs8//7wuXryYjhGPJoV2\nZmYm5SqwkAYqn1vAxV/ABvSM77OgAxEOF5DYRh/MHH2Ng+TfsRN3dnZSBibtISmHhV9xIuUmv3Q6\ny3IczY/njQIK/70uvBzr9v7IsQ5/hhwY5p5zUrAYxzLq2s19PLrg6x089O3gQMo0eTDu2zg6OqqM\na3xWT+yTqi+TgoHwP7kNbl7n0sO9v5E3X37AUnWu/5u/+ZvX1st58Ph75CDGeGN2nFR9D6KHjzyM\n5HsQ8k6LKBiuNXPC5slYEQAiZee734O9Azwnn4FDC62urmppaUlLS0un1otg/9axilwbIsOK59eZ\nBhH46kCzDlRi/bk+yUWixpXIjiJgxvPGFdfUUnWpPslUTHbpJFw6MzNTeVWjr9tgTGmX+8A4FwDi\nEx2RyKBvjQB4UGd0arupIylldbpzE8Cj74kknqVMFSSgQr40lk1d1tfXU1ozYUI+xLC9kwj7ACCw\nEfY7dJDwBC3XmJH+RQ0ZhdNRHBCJtDxqKAeJnZ0draysqNPpaG1tLYEFu155KI0SJ0ndpIzf+d+v\nGceOxk3iUcBV19a6UtfmUc9ed+/c9c4g/I3tMfwcN0rmmsFgcGqXMK97OBxWEuOQhaIoko+g3+9X\nQMIZlIdj/Tnc+e5gEoHC2+JzxbM+cWC22+3acciVqW+p78tpQW1i0mtra2lyx/gxNAqAwVHEJ+4J\nwaIc36nIOznS7xx993a7APl5fr0jtjtSEUaWya+srKjb7WptbS3ZtfRD7v65duYmTp3G5e8koBHr\niixgFFjFdtVN7kjLR4FJDgjisdzYedsxaf2Vi5iy+L+k44nOcnHPN/D8Cd8YhgkdczDwS7EbGbLn\n0TV3hvrzABCegs35zjRc9n3BoOfyNBoNveMd79AXv/jF2v7NlamCBHaXa0s6BXOBPRUWFxcrySJl\nebJoJk5UNDAbvpAW7c4nj6C4QFOfh0ZhET6AMdIRvf38deoO08F27PV62tzc1OrqatIyDPLS0tKp\niet11h3z45OYG3Umgpc6mj8JQ4j39zp97OryQ+pALpZJTA0KyggfkUc48Hf57lVEQthwBoDwdkat\nDxggbzjdYZYoMmefHqXjd+qDpbgy871auaYsT95E5m+DQ/a73a7W19cn7itpyiBx/fp1bW9v6/Ll\ny5Xj0QzgmFR1hMXzPFPONQXCICnl2VOXF89Ko3MZGAbZNwXxNG3qim2OzIKFQ6RwN5vNtIPxzMzx\nOx1WVlYqJlGdKTDONIgTelQ98Xwv0c8xzmzImQwOCg7Ek5hD8dgkUahR7fOJ58pnb29Pu7u7SYn4\nq/oGg0GSDd/42Bdz4YfyrFsHG6kqH9GEdYYKQHhkgwSt2C/R+es5OH6s0Wik7QzOUqa+6Qx5EVI1\nXOTLsPv9fiXZxT3EEc2jyUGEw0OsrrUobp54BxM2jZ5o6fS6Ek/4onAPnsm3zyfX/vDwUAsLC1pd\nXU3OTtdiXnySTDLJJwGK2GZKZGnjyiQA4aG+CBTjWFMc91yb433r2lCWZVIK/nG/QsyYpT7aAUC4\nYqGdsEXffRsTxOUmJvBFpejylutbwAOThLZGRyd9AIidpUwVJHBIxvgxKLy/v6+iKCp56L7S0j9u\n6/Fh0rpgMqiRijFhWWLrCS50LBqF8/2e7vRyZ1hkO+4DcSDyPSW4N/0Qtc040yP+NsnvuZI7Zxww\n5YQ4BxDxeO7e8X5oa2TGGWVdG6JZQ7/GsfFJi4z47zH1GkbBG7zwFbDLVYx+0FaXA3/2CMj8Th/S\nhgiy0W/hgOuLFKkHs/ssZSxIFEXxoKQ/lHRZ0lDSb5dl+b8VRbEm6ZOSXi/pWUkfLsty6841H5P0\nEUkDSb9YluVf5+ouy+pu1R6p6Pf7yakJONDx/M53EJsBIVzlO1gx8TyBCUFx8MBp6rsK9Xq9dI2n\nkPtCMM/7Z9DcnyGdxLFpJ0uPCYMSqvW2jnKo1pU6lhHrqBMWP6/OHJi0Ld4f7nUfBxLxPkww+p26\nc6HaHEj4PRg390MgY67BofpeJ7Li6dq+loO6fd9Mz3/wsLqXyCicOfGcsGCAzL8jv85UPEWAeXHh\nwoXkxJ+0TMIkBpL+57Isv1wUxZKk/1oUxV9L+p8kPVqW5a8XRfFLkj4m6aNFUXyHpA9LequkByU9\nWhTFt5cZiWIHJ1DYV1Pu7u4mqu8bgcAo3I70PQZ9IvqEJApSN1jYp8vLy1pdXU0bxRwdnbxmnsVo\nDBzCQMo4k8EZgdus0klCDQ7bVqulTqdT2a4OFuSZqLkJGTVkTvvmTB/an6OvuXPryjj/hLM3n3Te\n/w4YdW3wcLWf78lJMXRY5wPhPHIjdnd3tb29rc3NTW1vb6fd2+k3X1uDrwsfF0lwMOKyLNMYklDl\ndaFIXFZ5Tlcmfsz7wJ9/OBxW9rl0QHPG6nk6zKe7/t6NsiyvSbp25/tuURT/rOPJ/0FJ77tz2h9I\n+qykj0r6EUl/XJblQNKzRVF8XdL3SvpCrDtnYzLpeHB/hwUr9nzLOqeLTuUAEPIRoKkwEByQ0ol2\nIK+i0+loaWkpMRkiLTi5nD0QIoMZcL4vCGIrOqI4Tp/ZX4K9MQEn+ieXn0Gb/a+NV6U/c8dH1ZO7\nJp436p5+Pv0aQ3g5ujzqedw34PkFfDjm40k9kYYzUaSTHBbf9RqW4oDgu0C5MllcXEy+LPdpeB/6\nRN3b26to9wjk9IszXAcTPwfW4FvV0VduwjtDL4pCN2/e1HPPPaezlDP5JIqieIOk75T0mKTLZVlu\n3BnIa0VRXLpz2gOSPm+XvXjn2KkyNzdX2TvSF93wsKQ1M4ielJRz4JVlWWEjoC2bz3Kc9G7XGO5L\nQCAYqLm5OS0vL1depEM8HZDgesJrCA/gw/9OEwGSxcXF9FYwv6/bpghO1Dg5RuEU27WUX5vT4lwf\nHaaujXNtyEUcfA1ODiRie3IAwb3cIReLA0V8tugHgXZ7oZ/d9AQA3EdUFEUCdZLeGDvAAeUDsBHJ\nYk9WWACy7uyGye/97/kT9AEOf5KjPCPZ63NGQX3f+ta3To3tuDIxSNwxNf6Ljn0Mu0VRRJ555phU\ndNxJqiAtWju+YyNqDBcOp3a9Xi8NcL/fTz4F93E45XbURegQGOxOByCEl/Bl9FozKWATrEfBfnTQ\nAEjwhaRONY3CM8dnd+eXX8ffaO/Ga/nubMzvyTU+2SoDn7mHpOwkzY1ZbHPd/84G+N0nvGvgunv7\n77TDTURfto3fwc0lfzkTIAFTBBQAnKOjo8paIwcHz+p0ReVOTr/G06/pa+9L2IT7yhy4I2M5S5kI\nJIqimNUxQPyfZVl++s7hjaIoLpdluVEUxRVJL985/qKkq3b5g3eOnSpPP/10esDLly9rfX39VKc4\nSEQGEc0M73BfJ8FGHByPAOECROeC1mh6JgIsIkZYaJt0OkyJhvCX/PAdpy0aKi4bpsRIDH9HLQqj\neDs4hzojk8h53nP39Xoik/N25DzyXDOqjAKMHFBFjext9WdE48aEO5ZW+9jxuy8LAMzJv/HMS8xc\ngIKogjtJfY0R8hJDqnGcvE99jDyZkEggAOLKrixLbWxs6KWXXkpm8VnKpEzi9yQ9VZblf7Zjfy7p\np25/ZT8AACAASURBVCR9QtJPSvq0Hf+joih+Q8dmxiOSHs9VevXqVd1///26efOmFhcXT4EDQucv\nWo3+CDovTiwXFNeOTsOkqh3o/o9er6dWqyXpRJu7H4JdsX3DEhyNUXP4+hTWpnQ6HS0uLqZ6fe8C\np45xEue0Y86WdVAYBRJx4vJ/pKQ5NkCJAOFjkJuoOYfpOBYR6/eJ45rV7wtIRRAEsJEfnOeMG5OM\n5DaWBuC38hc2OetyHxumh+9TEX1hKCVXWM5aYn6P1x19NTjUcba7f24wGOi+++5Lq61nZ2f1T//0\nT5q0TBIC/T5J/6OkJ4qi+EcdmxW/rGNw+JOiKD4i6TkdRzRUluVTRVH8iaSnJB1K+oWyRm04gtLR\n/r4LjnlKawSIUcU7iU51j7gLk2fLQT1ZG8L/OYcbAsUjwip8hyCeJXX6HROmKE7e60i83Zf11rGE\nHJ2O53kZ5YD0a3MOtNz5EbjqQCJeV9emuuM5NlQHRHX9lDNR8BM5U1haWqpEHWCThDjddMQsYKsB\nkvwIp/r4s48rkQ03Q5EbN28BrPi+FvrdWYSDBm33vSQiu2ZMt7a2sn1fVyaJbvy9pNPeouPy/ppr\nPi7p4+PqXlxc1LPPPqsLFy5UnGxoXToQgIg+Cek0i3BtheYn3MgggN6STnU8qN3tdtMelNBNTAGE\nxfc8jAAG2BVFkXJBpBNgjF5xErnqVhnG5809czQ16qh9HaDE+7pZEe8R71s3uf33CDx158djESDi\n91H943LF7/gW3MHcbrcr40e9HpVigktKkZCjo6PEJNlUiAidvyPUmYQ/jzNn5NNXqdI+5MzTBNx8\ngQlT3Nyl7UVx/GqAuAxiXJlqxiUr6zyngIFws8CFPueAo3CdswS/rm5CABS+opSoSlEUCQAAA85r\ntVqJPjo4xPg9C4MkJSFBi3kehNPjSbRyTkPHZ6t7fp57XL/WgcKoe9a1nzIOFOr+z4FEbENu3HPX\n+Xlo6xhSL4oiOZYBF+nEPEGJ7e3tpfd1bG1tJWAg6c8diMiGmwPIHr/TbhiFbx8A4+VFUNQPCCF3\n/nFmivI7S5kqSPB2K9fMHjZzuxhTwVfOSacz11xAPORGJ3nuAedF4ZFOYugMBLtjMekZ1IWFhcpA\nFEWRQra+lwSUlWdot9taWVlJa0Woo66MMyliyWn2CASTmGyvtIxr36iSMy2i4I8yoaTTq3KdgkdT\nMxdWpX7khmMOJDjVycW5ffu2bt26pZ2dnRTVYNL7UgDMWO8j95vBJj3Z7sKFC1peXlZZHvvotre3\ntb29rZ2dHc3MzKRUAtiF94s/+8WLF/XAA9mMhNoyVZCQlFhEfO+lVH0BqieEOMPwCRq1hNt/ngeR\n25nHkZ2B93g3FNXv6dl2ngsAMwHhARY86B6tcT9AtPVjiRqx7tx4Xp3W9b91JeeTiL+NK7kJXdde\nb3d8hpzZEU0ZHzNnND5R/Fr63Fddxra66esmME5uz9q8detWcmZD/3F8emQlZp7WaX9P3efNW4PB\nQMvLy+mNY4uLi5X30JL/4/LHszz55JOvrRcG4zDCQeO7BTk9wwRwZPS9HYhruyA4Q/BEm1Hak2tJ\n5nJTx0EBRyY+hOFwmF7MQ9v8PaFlWSbzpdFopOgJgjRqEnN9/B4nTI7u5yZV7h6jJnsdSMQwaF1/\nTnLMj/skjp/4W2xjBI2cJs2ZMZF1um/H5QaZ8AjXzs5Omqw7Oztpi34UgTPj2K7Y/vg/fjB8Jisr\nK6l9/X4/LUZst9upLTs7O5UQva9JKctSnU5HL7zwQu2Y5cpUQeKhhx7SxYsXtbW1lQACROehfNWl\ndDIBoP7RucZAUtxr7JrFPcyuUfxeLrieO8FfYuYs7iFFlmu5nv0PYSmYL2gc91TzLJTIGHJm1iTJ\nMaNYRbxn7jf/wJRy7CK2c5L71AGEO9/8ewQ9n8hxLOPzR6c3f3M5FlI1KuIAQSq3b4aLKetZjsgq\n93G5juZ0VACeKQnrIAJGVIb8ml6vl3wXAJRnK9P2l19+ueLgnKRM3SdRlmUlo9HXxWO/NxqNU1vp\nI6gMKNd5AombHIBPnZC61vGwaXRKci+cV7z8V1KKUJAwRe4/e3JKqrwyIJpEUYtJ9U7AqGHjhM2x\nhpwWrStRC+ecgpGe17UxtiE3BqMAYhwjiRO/DsBife4gr/NNeen3+9rc3NTm5qa2trZ069Yt3b59\nO73ugQnNte6khFlievjCw3g/ZK3f76dXMWxvb6e9KWGwhNFh5EtLS+p0Oikke3h4WFmCsLe3p7W1\ntYmUipepgsR73vMePfHEE8ncYPJHreUfR2VYRmQRMZuSyAnFfRKO5tG77Z3JMXZMnp+fr7wgliw2\nf8cnJom/k6HRaKSdwJeWliqLv+o04Cjfw6iJTomTc9ykGwco8fuoNpzV5JgEFHLX+P91JlLumfzj\nIUqpOsnL8njLgJs3b+rWrVsJLHZ2diqveogZwb5Gw/eWcDmLY+xO0W63q62trQQM5Az52pLZ2dkE\nECsrK4k57O/vJxOENr7wwgtaXV2tHa9cmSpIrK6u6tKlS7p161ZiAaC6J7YwYaHzntvgCVNuizmq\ns1bCbVIGxtf9S6c1micWQdn4eKwaMwNgoO3NZrOSR18Ux9uaX7x4MS3oAiRGaeY6DTcKRKKt7scj\nZZ+keB/lWEXdJMy1aVRbIpOI98/9H31N7nAeZfr4PbDhCWt7pu/R0fH+kNevX9fNmze1tbWVwp2e\nno8M+u5itA/m69ESbxtyLR0rMt7XgnO/LMu0bcLa2pra7XYlUxd5RibZG7bdbicH68///M/r8ccf\n16c+9alsn+TK1N/gxY6+oCud6glPjrY4gaTqPgM4G1lsFSd/FDxnKbFED3g0RRAI39fCzSDMj5mZ\nmbSfoNu2CwsLWllZ0crKSrIxqRsbNvpFxk3m3O91lNv7dBI24udE0yMyrnEso85Jl7sutrXuOeva\nHIExV9yf4Quy2M2cEDiT9vr167px44a2t7e1u7ubkqo88pEDCfrKX5ZTF75HnqOMHhwcaGlpKbVp\neXlZ7XY7AQGKxrfgk44d+5x38+ZNvfWtb52oDylTBYnnnntO3/Zt35YWnoCCOPmg6NEZFT9ock9E\n8WSmXEHDxC2/HCActBBOD3/Nzc2l5ejU5S/XWVxcrDAg6iHbEg3gXnPP1efZEfg4UfndizOOOKHj\neXz8mXOljv4DEN7HOc0fTRkvdZO3rtT5a+L3nMzEehzo/E1rZE6SiwBb2Nvbq/ghfIXnKJDzyR5l\nN9e3+CTiM8f8CMzWdrudkq38pcaetXl4eKjFxUU9+eSTunr16ql7jipTz7jc3NxM9NUzz+goNzWc\npkUNULfHhJe643ECupkTC5OZiUwyy9HRUXJKkUKLvwFnptNb18JojrIs03M6m4jPnNOQORCoMwty\nQuptisI7zpeRu8bPn0T758Ai+lJy58fJmXvWyBYjOwT4sf8xJ27fvq3NzU11u91kYjJJ+R8TwrMy\nYbzcA/brDmvknXyZ2G8u/yhMdnpvtVq6ffu2VldXk2+r0+lodXU1bYaDCYs5gnx+6EMf0le/+tWx\n4+FlqiCxtraWdtjxJCqf+C6AhB/9GJ3JoEHpPNkqAkmkrznPOh9Jp5yebrrAAvr9vlqtVnKm+mYg\nbFLiAuJvSseG9G3PuG+0m+u0UM7n4D4e/81XF8Zr6H/vj9zkz4HMWU2Cujq95MAnnheB0n+vy+fw\nSQoz7Ha72tzc1I0bNxJjIDkKE5OU6whITMg6xuu/u0zGxMBYHCwwqT0fw6MatG15eTm9c9aTCSVp\nZWVF73nPe840NlMFia985SvpRSF17MD9DpIqE43jhHt8wYt3PFo5t8Q8Z6PH/6PJ4V5vwqFs3Ev4\niXRaFonFdu/v72trayvRVQYTanhwcJCcUf78Hs7NgVi8hyed8Qy+1Dlq2pibMEn+QDTrPF8hp8Xj\n/xEAR53vxyJTyE3OXB0809HRUVqYBSDcunVLN27c0M2bNytrMRhz2IOHrBkTTET6x53eHqHLmY2u\neGh7DJNzrS8S8/fKxtXNtMc3o/HEwknLVEFiY2ND73jHO/TNb35T0skEiNEGTyjhLx3Gg0egcIdR\nDDv5hIsx9lwyVs70oF7agT8B6thut9OEBNEpZVnq9u3bunnzZsq5B+2bzabW1tYqu1RFVuNb7kuq\nvGaOc8qyrGzR5+zLs1s99d37GyaCbew5LAAvzxJBgntRt4OLT+AYco5g4bstef95XXVmFaAbgYfn\nI2+AhVnXr1/XxsZGAgjWYJA96cqKtvkk5v/YF/GZca77s/v4ev/Rh6z7wMHtTlKiMZgTvJGMdvq7\nRIbDYdqt7SxlqiAxGAz00ksvpf/jgLo281VycdL7luhuarjWy4GO38s1Y6T0kWXE6MdwOEyaGbMI\nW5UFbHGdPxqLmDYTotlsqtfraXFxMbEQn+SeleqC4H4MBwnq94nozlXfZ7MoilNLj4ncQJdzIMG4\nRM0JSPh1njiGLwYzEXBn0uHTQdC9fqfsjJ/X7SFl95sgSzCIzc1NXb9+Xd/61re0sbFRCXHSf+5E\ndCD2Zf2+gNDBxGXZ5ddZbZ0S8n6nHySlEK2DcFwXwnURaJGHs5SpggTv6bx9+/YpWuWAgQBJ1dVy\n7nRyFuHbesU4eaTQlJhUNcr88PNcwEF6VvnhcAXBHfSgt91uV8PhMA3m7Oys+v1+2m2byeQTzLe4\nK4oiTSBsXiYwa0RwfHFvHKu+CzimDf3puf9MOpxhgBr9Ql/4pHEK7q8KiOYSC5LKskzhxKIo0noF\n1icATFzn7DGCj7MjlyG+E8XY3NzUxsaGNjY2dO3aNV27dq3irPS9VV1p+KsSKJEtRXPDTWk+ESj8\nWmdDfBh3XxoAELTbba2vr6fcG5SMb8HAc+QWOI4qUwWJy5cva29vrzIAlLqJ7Q/r2gQEp+Ol006r\n6GPIlegD8OP+1891n0VRFMk+ZHB514G3F+85b5p2Ddjv91OIFO0knezo7ZEe/gcsfHKgCX3iM5k9\nM5QNXSVVwJYcEDJK0WZosThe3i6nyqyUlVTZ5QuPPb4TQo0wMPfct9vtVHdMdYa1OUjwybFTXtR8\n69YtbWxs6OWXX9a1a9d048aNtE+D91dUKDkZ5Znpf2caAB9yihMUp6Sb17TXV0ZjshJS5z5sM0Ae\nxNramtbX11P+BHumutJlK8WzlKmCxDve8Q4NBgM9//zzleMRZSPlRyjc2ZMrjs4UZwYRCNzuzSW7\neMmZRQgCzkj2oGA1oIMES4vZ2sxDZw4S7ntwSu1LjUkJ97BXo3HyzhLPRoXOI4QLCwva29tLIMHE\nI3uQd6B4XoezJi++1RrtjUyC/7HNSRlmnQF90Wq1kpe+3++r0+kkzQ2LYJEVk9CB1v0sbkqSObm5\nuambN2/q5ZdfrpgYPLNPXMY7KhuXIffDwAIYh1yClTOhKMOeb8OGyZ1OR8vLy6lOfoNJ0F+eO+Fv\nF6Osr6+/tkDixRdfTCG/CAh15kfOSUaJAynll4b7oDiC5zRkrM/vG9kPdRMG5fWAZNlJJwLOJrqA\nhD/vYDBIAuDP4CDmsXdPCfdXzgNYw+EwvRgG4QdMFhYWKjtnRWBhc1/fqs/fF+HUmDa7HwAw8v7F\njDo8PKwkK7HMGZDY3d3V8vJy8jnBmMgtIGwJSLgmp198nBgbj2bcvn07RTG63e4pZujjyhj4hPdj\njIu/qIfxBHRRJNQfTVjqg3GxyhPT3BOkfE9U9p1g7073BUWn773aLfueFLRKpITRUx2dVdKJD8HB\nJQcedVSR33KTf9LiAuh1IbRoRekkAoGAuXbxfAbPjcC34iFF7oEw+vkIJx98JdBaTzhDEDkGOOKs\n9CjR7OxsWgoPXY1gTl3+Emh3rHLMQYLkJPbV2NnZSeZnq9VKe0QywXg9HT4olkH7fqK0BcD1CUl/\n93q9SuYi6dUxyzEnF+4sdx8M17iZwHNTL/evK/4MDjRuHub2RPVrMS339/cTGDjriw7VScpUQeKx\nxx7T/fffn3bccfMhN1mdcVDqTBNH/ggUdXU7M3BwypkWFAezSBtzlDRn5vAMdRGV6J3nuRjw6Mjz\nrdL8ntF08418HJhgEJ7m7v6cuGCNyeHOYs9VASgBCRgWExZQ8jRnxtR39gIk8JvAMNyxCJvgedk7\nwX1XsCU+/ia3OKY5JeD9iKZ28MYUYJLyO87scZPUna9+fs4560ylLMu0qxog7+0oy1JLS0sVZjFJ\nmfr2dZubm1pbW6uwAXc++qR1Te0TxK/J0f/YKXHy+3cXityEje2KoBKBKjq/3HsdqaC3LzpK65gW\nFNo/9IlrughMPuk9PAtIwBY8ouAszrNhuQe+F0wCD1c7HeccN2niql/fHYy6AaL49m5JSaOSqwIY\n8CzuJPScGl8xXOforFNcPpYwmOhwpF7GyV9hycf70Z2e8eP+Hp4HU9KZzczMTPLlsK8Jbb9w4UJ6\nPeWkZaog8d73vlftdjvlSrjGc4HxEtmEg0QcUNcuPvDu9Iy/SfWLjnJMJV7DIDmFp93eligoTgNz\nDMKP0xb++opZ7gnAuRbzvovg4wCQmxTR1PNMToCOhDLXbp7TQnvQqLQ9xw5hD4wrIOEmFG1kcnjO\nAPV5ZMfXW7gDkWfgE+UtMtU4fm4aEErOjZn7GqiT54tKhHMdMBywAYiDg4O0w5lfz+YzbIBEG97w\nhjfoM5/5TFa+68pUQWJra0srKyuVRCnXhlGDx0kaNVwO7d3xGUtusnN81LHo+0CQGGhovAsjYc4I\nfjkQoz7/3UOL3nbvM/qQ8zyJKYKog4zbtf6c8V5+T59gPKvTYW+Xs0AHizi2fs8cY6OdMB/vN9ei\nHnr05/doAiUmROVYmj8Pz+kT2X1DkeF4X7sfyT9uQgJS9In/j4MbswLGxu7cPm6YbkQ56K9//Md/\n1F/+5V9G8R5ZpgoSTz/9tN75znemgQPtiSOjNVy7Rc3jwhu1rNty/p0C0NQBgJfIROL9OIZgOjCQ\ncOXtcO2Sy170UFruWaIvwgU4N7E8N8Q1oz9TDjRzE9nNKOoFpHieaJL4faK/x6+JE8XDhhynXu87\nT4vOjVlUOq48PKRMe5A1vz6Gd/062A5A4e2lHyMTAdD4zdlDZCIktrkCoS+63a729vYq44yZRWIe\n5fOf/3zaBm/SMlWQWFlZSZ5zvMC+G1DO/HABR9DqbEbp9JutGaCo0aTTDKKOoeSYhLcnNylcyNxG\nd5YQzQnaFk0T6nfHYqTE8VwX+MgC3PcwikV5fzCBPSOR3xF8j6TQF94Hfk8yOd1+j4rDn5s246R0\npkU/xBclOTD7/T1Dk9+9r3wsol/J/VYoNsYA88fNBu+XKHNujri/Cr8RvgSPLkkn79pwWXQzzUFi\ne3v7tfXejd3dXT3xxBMpNo796J7x6HNg0F0wXUPE4jZedDRJqtBhB4vcd6+zbjIxcflfUgojurca\nTeWaJU5C9x3E53FhjH4EBwnq8w18XOt5gXq78Mfni/d1k0I6yQDlub0tPKubWj4OaGZfwOROR0p0\ngkqqLN+mfe4QjTLik90jBv7sOeXi7XYlFXMf4rqKXM4CdTgz8u3ofLtGD/l7jgbHIkBEBcIzXL16\nVbu7u6dkd1SZKkjcvn07bfHt9pxrRf/un6jdKFEIvNQNdo4ZxE/8LZ5PfVJ1zwm0I1rChdGFK7Y1\n3sPb78AQaT/FwQ97Vqq++5R7OvDk2JYfj/d0NhE1bo6JRf+Dr+nwLFJAIu4N4n3C5OM3zotJVz5x\n4jNJJ4vTog/AxzsnO7QDEHMAnZ+fTyAfGQLXozh8i3tyIOJ7aNyMiWtWXD59fqAIOK8oCl26dEkb\nGxs6S5kqSLzxjW/U+vq6vva1r1WEN/fQkWJFH0OdL4Fz48ePe8kBQ64ub4ef6xOTdnnkwbWWnxe9\n6rHk2hWBM3eu95+bPICI+wKiLRyBJ0506vVkMATTwdeTefjN/S4OPM5Q8E05G6AOSZVJAyUnu9XN\nFswOHwt/Fp9E3r/RH+b975rdzSQHuJwPgslOf7m56awjMo9ockfg9HvS7/QNdRdFoYWFhdfeC4Ov\nXbtWefuV22t0cs5+k0ZvqxZLBAOK32MU0FCHA4GjtGshqWr3OxDEdRfSyaYxPkkdwFxLejtcI0dK\nGieBZ1a6lnfQygGN91383YECZuLvJuHaHGvxunxsqNfzPnxSeD0eZcF3wasUo6b3tjIWPqnj80lK\nAOUKKvYJY+NAOxwOKys0SW13sIRdFEX1pcT8D4uILMmBkfY78OTmi5+3sbFxCvzHlamCxLVr1zQY\nDNLKQkdsqT670TtUquZK0CFO9VywJFU6PPo8/H7jQIOSYx3xGdz8YBLx8WeKjCJeFwXb2xoZhP8f\nU7LpHxgF93d/goObg2IOIHKsxgEh92zUHcc43iOaZQ4qcbwIWY4bIx9rJp0v/aY+B/vc+Pq5/izk\nZPjiqvi80cTi2WBCzq65h88TvrsJGp8P8KLup59+ulZh1pWpbzrjQlRHb5lUcdJTXKCl+smd60AX\n6jrGMqq4RonX+f3i7z7Ykk4JircrBxyuYVxofIJ5GNIzKN2u9pBt1EIAg09c/y0XSqUO7xen8ZTo\n+HMwi2NTB8KRLUSQrysOenXj4/Xm6sw9k/c9/hTMSs51xeDLyl2uo9M5gr8DdJ1C9Wu9zVtbW6+t\nnanm5ua0tbWVFvxIOmWfccztOjreKal30ihnZHT85ShanCAUF/5JS24QqTuChNNRFwrvGxdut9k9\noYrni+1wDe8TNDIdj55ERx3fPT/AnXo5IHcQjkDm7XCgGgW4tCECV7xvbqzod/dPjaLfZ1EY/ly+\n6pPnwSfg0Q5MjehbiyzKfT/4WdzMy4FZZJWSkulzljJ1nwST3UHBIwG+WAlvMANLbj4LdNxZFBE6\nmicRoV3gcg5N2lUHROPKKAdp3aSI2j1Oco8yRFocAUiq7lMJ8Hr/5sDDtZvX530UAdyBwvuaMYx9\n4JPBnYxRC44rDqCTnOvAkmMSUQ4iq4r3isDIxAYEGCNSt5FPX4wXI0cRIKKJ7A7MqMRyjIxVtmcp\nUwWJbrdb8bLzIQfeF8pEG54MtLhPg8eEIzh4iVqtrkSnmlNU/z06LyPNdps1sgX/Hs0FFwJfxOTC\nGim738Of30NxvgUatBca6mDgQux9VxRFWi/Aue6sY5x8XHDq5dqd8w9Rb93kpU2uiTnfGU4cq9zE\nz5kPse9yfZCry9tQZw5SvM6oyABNd+C6XAAcnO/tiiyE39k39SxlLEgURdGU9HeS5u+c/1/Ksvxf\ni6JYk/RJSa+X9KykD5dluXXnmo9J+oikgaRfLMvyr3N1E9VgAjUaJzvusLchW3Qh4FButpxH8GL8\nmE6n+MDkNNQoAJi0xAHnbxQmFxT3RTib8snD+Z52HLU39TpNjX6OuEeBMzbaF51k0X73SRNTmf2v\ns51oRng7uac/K/XHtnmJKdL+rHGyn4XxxXu4iecsiD0somnmbUL5sb4jgqjLG8/OhMbx6StXcyzD\nlRDjkzNTOM8T3yYtY0GiLMv9oih+oCzLXlEUM5L+viiKv5L0Y5IeLcvy14ui+CVJH5P00aIovkPS\nhyW9VdKDkh4tiuLby4y6Zpejo6OjigAvLi6q0+mkbbh8ExHScImHI4jYgD5R62hYToP7b7nv/n+O\neUSNFQfCKSfnugnlWXbcwxeKeVujJkIYc174mZmZtIMRbxXDLh4Oq+sRIlBEbR81Nh+P7TMJYl2e\nsBS1bNSY1O/7aUZ/DO31LMm64uzOJ5ObG3VA5PXjR3AWwZjmGIFvEchqTPqe+7sfiT7BlOaTYxKe\nm5KTQTc1fAx3d3e1trZW21e5MpG5UZYlS8yad64pJX1Q0vvuHP8DSZ+V9FFJPyLpj8uyHEh6tiiK\nr0v6XklfiPUyQB6a81132ASVh3dhiSvviJGzc1KkX/YsWXvXJ3gEilHmSO56P+70MVJbJphvDecv\n5PHcAGcGno4snWwuK6kSyvN9EtkGjf0F2EmKc7kfOzTxPPS5h+Wc2TAeMeWYEs1AX07uiV2AhN/X\nx8Anp/uFIlvKsQkHdw+l+l9/LvrcZcz7wB2RDhSxXSg9QIJx8PMdRF3z42+jX9xX4yZZfH6Kg4PL\nO9sVnqVMBBJFUTQk/VdJb5T0f5Rl+Q9FUVwuy3LjTudfK4ri0p3TH5D0ebv8xTvHThXv7OiP4MOg\n+ADErEX8E/1+P23AEXMCuPZOe3PPmD0+CiBGgQN/ffCiYAGI7E3YbrfTdn4IFO8TQaP6btPUV97x\nV7BIiv50UHCNRjTJQ9AALWPiW8LBFJgkCLRPGs5xUwZNOTNz8pZ3WIOPY6TE0blKtMC1JM8ezTWo\nPeaBa22e04GbceF3dzYCphEkYjSEOqO5xDW5jWOiT8wBAlbM2+Di1oPOiFxxRPMl5wAuy+Pdqc5S\nJmUSQ0n/TVEUy5L+tCiKt+mYTVROO9OddbK7MrQ6aloGjfx9tI4LrO/ijID69RQG8c7znGIOdSAR\n+qHy95X6MJgc7ILMTsis/ZeO35mBwAN6CJ3vPo1vxicaQIJPBzBCqwG+vr6DPSPpC8+r8I9vmeYC\nCui5k5m+8ZWiMXQagcG3wYsvDHK73fuSfqdvfNdwNxGiTCAHHkVgKz1A0De0oT5np0xI3yYvJ0eR\nAfjKTelkpSegkDM1fAcx7u2My9vkvikvjUbj3jAJSlmW20VRfFbSf5S0AZsoiuKKpJfvnPaiJH+3\n+YN3jp0qZH8Nh0NdunRJq6urabDY85CBYxUgGhFhcNoehRMhzE3qcYAwYX+MrCdGOyhMACYtr473\nd4fim8CngN3LhIc1+H6UCB31Okj4y3h88xbaxjGE05duOyVHa7v/6OjoKPmRfLfm4XCYzCcWPLkf\nxiNTcak2gMGEcrrNZPFwq3Scd+O0HjmZBCRgbLRrMBhU3mOCQmJ3d9pGvyPHUSYiQ+F/clyQiv+T\n4QAAIABJREFUE2cSzh5iWnqc9Byr87lRtra2tL29fQpYJimTRDcuSjosy3KrKIqWpP8g6dck/bmk\nn5L0CUk/KenTdy75c0l/VBTFb+jYzHhE0uO5uh955JFEcRG2/f39pNHYM5H9JSSlN061Wq0KoOT8\nDO7YQpCi3UaJnTqpR3wcA3FK63W61zvarQjlcDhMAouQABpzc3OJtnsqNROYPnKnJUCBgPvKRdZc\nuFddOnEgOlPjeVutVur3xcXF9EIYN33ch+ST26m3txutzacsy2RCMmk4HiMpgATPTR/QBvc3+Phg\nrrrJyn3cZMDpS98he7x5jH5zswgG4OPu19OemBfhpinyCxg4CLhM1UWCyrLU8vKyVlZWknJ94YUX\nJpJvaTIm8TpJf3DHL9GQ9MmyLP+foigek/QnRVF8RNJzOo5oqCzLp4qi+BNJT0k6lPQLZc0sYrMZ\nHhTWgLBCbX2z0mazWbHRECJfhEPxjLboqMohspfc5I+myTg/hp/j9A+6z8QDIBBqtC3P65qEYzMz\nM6n/Ynq7mxoIOhPcow+enutraJiAhKh9jwPA1k1AQILXBbrt7j4T9xO58xVNzFg5iDgY+CRiPJzl\nOEjAbHzbeR9HHxffnp8xoe0O5PQp0TVABRn09tBOEgbxMzh78rCp+0yQG+QfucEpHxeduUM3AgVO\nUspwOLz75kZZlk9Ielfm+C1J76+55uOSPj6ubuxTOgyQgGK699u9447QnMv7F9x77c4iCp3p9qBU\nzaCMgELJ0bpRQBFZhOcFcE93RgIWUFtYVE54iuJ4xSUgwnPBFDAzXBPRVkwXP87zt9vt1K+YCoCE\n75kJMKFZ4xuj8HkQMYEJuGPVJwyOQuQC2RgOj98Xyr4L/lo8nxAohKWlJbXb7fQyG3cG+4dxQY54\nb2qr1Uo+IPraX5JDf/f7/bR5i8sqcsJxZM3N4ZhPAfC6ucK9nQHBzDjXfWzRSY/8OnMqikKbm5tq\ntVr5CVlTpr6lPrkNILp0svCLj5sSvvelv6MhbnsXJ0csPhg+0WPH1jGH3O9efMAjSPB7tGFpS9T0\nXo90Aq4eXaBwPL7eLfaB943TdXw+DiQOtgAPrAJHKy+o9Y110KbNZrOy45iDBOML2+EZfOIAfP7S\nIA8JS0pt52W5MAlAwiM1PgaYDD72rn2dfTm7YVycIcYxh/G6g9dDqoyjyygKM+Z/uEzm5kWUozjO\nPNfh4aFWV1fjdBhZpg4S7XY7vVmbDvJMuyjcgAB2pHSCqu4xngQg3LEo5ddXRFYRbUI/7szB78O9\nOO7JX7zUl0903nl93k4HUb+XT4YcMMT/HUwjSDiLA5jRZgg4E3BxcfFUXoFPACIw7oB1tgCl51l5\nLu43Pz9fyRvwiJj7OWI+B+dRX9TaMZHL+5rrXRYAPnwXsCSPROQiHD5eETRy4xjzTqIi8RQAH1u/\nJleWlpayzHdUmTpI+CBA5z3lNSfYsA6nXg4UbgtTfMJGx090avo9o3MzBwreNmcWdfdEMPf29tTt\ndisJTx5OdFMp56iKCUfxe2xvBIVIv6UqE/GJQbsjeLt2dueqmzEwIhym3MMnh4MidTKuPqGic8/H\nMPoaYJtxjN3s4ryYU8D4+jmwAjal5Q3k7FbtbCnKhjvQHcijCRLHIo6p96vLQ3z+yEIoRMfOUqYK\nEh6zdRteOnnrURxcqbqIhf+jtqZO1+Qcc0cg9dSxjjpfBcLuHue6ktMq8f94b4TJwcXbS4nmUd09\n47HoOON3tDbf3QvvLAJ/AuPiYBb71oU2Cq/3n/dzpMyEVHHUup/Gn89lwvMpImP0kKL3MfXEJCb6\nQ1IKz3e73fSX1wW6QzGnjLy/3FyJYJFjF9QT63bg5/86Jo4z+ixlqiDBOwtxWPlDIZDuLPTJGH0B\n3mFMrpyd6L4BvybSXL/HqIk4CiByqE6J94ntR9vGuuP/8VnieX7fOmbh7XWQcOFzGz1ulkJb47PH\nZ/PffIz8u3R68Zb3AxPf3w4W63WqznjnZMjb6ddxDmwPM8fZKpENTA3yHqKfgDbQD6PMYO9v7uWM\nxsOkUXnk+tdNV8rh4aFu3bp16t6jylRBYmZmRleuXNGtW7fSQEbNJlUF3RESwXQmwe90pGep8UET\ngeCUKHD+vc68oJ25QY/nco7Tai+RIdQJkguFa5tcm8a1ixJByj3rXm/Ono7tdQCKWtDrQujdoRdp\nuE9yGExRnOyEHQtt93BqNCfjc+cAFocmUQy2NfBsSGdZOUYSTVVnTTyrR+McMJ2RxZRtfz1CfK44\nPtHnkZPjcWWqIEGM2SdrTtgkVTQWD+5JMk4vOR5NlDoKHoXHqXPUvpF9uMaKwJLTWExCz5HwJKec\ndvd2jmItuftEeu/94X9jW+lfF/7ocff7jSp1TBCBBwCcYUTwjr/HcCj1ep2YKO6jGiUPHMOX0ev1\ntL29rc3NTXW73UoYNzo54/PmgCeyOh/TaJI58MAifD2Gn+vA4P4OjvmcIRnxLGWqIMEg5kpER1AR\nrziOL+nE/nSUdGoXfRe5EjuXunLaNNqvsd2AB3/9GcncW1xc1PLyclq7QeIPiVKetuz3ijQzZ275\n/zk25u3M9btPSEKZ7sdx51tOm7nw+n0iqPuYRAccIVafRDmTLGpf6vSQemQ7DhgOfGh2wJr+8whI\ndHDWmaGxRBPDmUXs/xyYuKmck8MYOfFQtIfESRQ7S5kqSEAbXfPnzomI6Qt3pJOB9NAYx0dpumii\neAcXRZHMEveN1IHMuIKgsgiJZJ+1tTVduHBBq6urKabvmY2j2j7ue67N3jeRHvO9zs/gkzQylPjJ\nXRdtdC8OpABSND1zgMJY8SHpySd6HTv167xNUYOPUgocy9Xv/T3qe67OCH4+Nh49yT1LDkQoV69e\n1fXr108dH1WmHt3o9/vpBaY5yuTedCgkNqfHw1kkRGJMDENFQc0xAAcK920wYDHDjRKdo3UTxU0n\n8gJYZ4DZ4TkitCtGOvwZcrkZueeLZZTwRkGvuz4n6JF95fqF6/xVCl6nv3nb2YRrUvcFeD+xh4Yz\nzdxE8v8dEHgGp/b+zDGiVdd3uf5z5uP9Rl8BjJEFeyTH5YE6HSxjnYRtMbne9ra3vbYcl4eHh8k+\n4uHdcx43aHUHl3eY0zGf4LHkhLdukKNNnAuzupauu1+cxE6FcYz1er1EA/0dkAi6a0u/b47mRlt3\nVMmd42ZCXd/F871ER2pOCzNOHsp2Rud7iUQ2wYSWlFaW+r2dWo+j8t4uHJVx93FPoXfTMcfQIuut\nA4pc8fBuNIsiSLhsulLz/qUecks456tf/are9ra3ZdtQV6YKEmTrkTnpvoe4ElA6nSsR7TPXNjkN\nVve7Fx9gzCAGFjSuEzT/HgGJvwygJ1Mh0CwAgmWwXoL/cxMmlkg16+hx7rv3bY4l1LGv3Dl1AMVx\nz7WIgB1BMddOFEhO08cJnHvG+Ly5EKPX6U7QWJyxRCYanz2nOPieqztewz347sAgVd8078dpzxe+\n8AV985vfPHWfUWWqIDEcDrW7u1tZjZgTRI7Tka4pPQzqFDGCgqNxLlzl96gDEBdKvkdmUgdSrjVx\nhPX7/TTgrMLEuRT3zfBdkrh/1F6ufeoAkr7Kgdg4ujzOBImTsQ7IKJHSO+1G2CMw1v31+3l9sQ2x\n3ziPLQrix/MgPOU612euXHwsYh95P7rp4kARFYu3Pz5LnQlDG5xZHB4e6plnnqkdk1yZeggUgPDJ\nNSo85eGf6HPITVjPzvN75MokLIHzcvfztkegcmT3dSeE27rdbjIv/v/2vi1GruvKbp1q9buqq9mk\nSJFsizLFkceSLVmQYFuWBcvRjCHI8OPLmJ/BTIz4Zz5iJMHElvMRBHDgx4+RzwCTAANlLI8RI7EH\nGMOmMR4BAjQUZdGiaFISJZKSLYpN8dWv6i6yq04+utetdXfvc+sWRalaSG2g0NW37uPcc/ZZe+19\n9jmHsQpdE4KrhutQsVotr/ye1bIg6r1/GZpuO6B1vZQqFzEe26H5mzcdPFVmPc8LdqrYIDXrhMvF\nERy8ORlFOqQs2LKZbnWg9eSBa6qd9DzbziwPjxF0mXbQi/QVJMbGxrC6upqbqGJFra89hyxCh0K1\nIXUM3bu/+s8anLSBQj2H4gGEPdcqBxuLQ3PqcmiwbnR0FOPj49m05/Hx8SwzVdmETeP1OrntxHYI\nT+tZ60Mtnr2/BWCeo/eiaEDQPs/Wo9c2WvbU+SyXuqp2iJbnqZVXkCB48+OxB60b+2x9vmURCnop\nse9lY18aoNREMWug9MN65HeyVV0Ep4z0FSSYnOL52p7ls4EunmdBQtNjVeE9S8dn8nodxdAOoTTQ\n++hv1nrpM0PorMRECwbkMwV1arVm+XGtCc8a2mPaQbR+1S2j6H30HYqUXwN9wOZUaqW5mnikNDjF\nxiw70t8pep8QOqNeXAND68JOoLIxAw14ax15jEvBxbI5y+xUF7RuLGiqfnouCoP4uviPDpNrJib1\nRvVa9e+OO+7AiRMnNulnkfQVJFqtVjYjjQqlyBdjJ+bA9GpVGqWkKSTV39mo6u/yd4pSM/3fo+4e\nUBSJWmYNMGm031oQWiGul+GBhCqgzUjVDs56tuP/BAL+rqBIy6zZoDF2fHi6TRyR4XMJEjqnQevB\nArC+u2Uu2jb6XQGMwV0CBu9h07Nt3bXb66tMWepv9UhX21IrzTpT0NPAr3WXioLC9hp9Bw6P68pf\nVq/VxVAg0nu9+OKLXfXUSt83DB4ZGcmtNKxAwTUuqXBqZawVsOyCohWpxyxi285umUhRvETvASAH\nRHqMz/aEjWiHeXX9DI5+qBKpYqnF0cVvY4w5EFCQYJ3yO5mYgoQuOEyAazQaWFxcxPLyMiqVyqZV\nqkmFvTVIeR/6+brcPjuRzRfR+uX3EEIGTuy4fF+W3ebUWKAgm2MdsDwEwatXr2b1x4+yGI6IKPNK\nuV72mDU2Vl8UKJmExzb1mIQuB0m9seA4MjLy7iyp/26JgoQiuIqN9nqdnr+nLLttFOuTWkUkKOl9\n9XprbYooqSK87dxUUiaCMaHKW9maHc6yHH02FYnsjIBC5WZ5WEa+o4IS69yO12uHtTMhbV4Lv2sd\n2UCk1jPvp52NdWLpu7aTjiYoIHB0iMvN2SQ1y+h0NziWlYFMsgyuumUnTHEdS9VhC26WIagh0vfS\nUQplB7pYMtcR5WLIzINQZucBF+87Pj6Oj3zkI/jpT3+KstJXkIgxYnl5OUeZgM1WFUDOsvHFrdVN\njXZYYLFoz2P2r6K7Mgn78XxPDcZyWNNOsVb6ysVAOIqhFN/OGdByKJW3S8uRnnI+iDIuz7KTegMd\nhdXl2yzT0pgE0NlJjB2KlkzdGT6PZU5N91YGYIGFQpZDQOBGR1zCTrNZLTNR5tRur6+jqQv/sGwE\nN9apshKWlRPKVP808Kr/23Osa6K/2UlafE9u5ESgINBxnU5NBlNW0Wq1suS9XmRLTPAi3VOfWj/W\nV6XiaMOrD6YWNoXinstggUOV2d7Xdhh7H6v4bGR2OF3aPsaYWzGbSWZ8vq7fSaVWS6HPtFanWq1m\njEQBRoOJSp214wDIysnrNXVZ64Btx/fg0DatNDubuhraGTUuoR1D21jbU5+ne23q0v4ECQVJFbIz\nvV4pPRci1jZWcFCmpfWmoMjvVhetrni6Z4GCMzj1HbkQcbPZzFioZvNytI76cuXKFTzzzDPoRfq+\n6AwXPwXyQ5HA5gk8lUolt1cm/1o3oMjdsEyBz9HzeK69J/+3AJNqZHsen6+Wgd+5wrNduFWXe/dG\nPOiyMC6ge6jW6/VsgVqWT2MSKddOj9PtsYCp704ltkAHICsnO4/SdFpnG/8h80oxAHYguwmR7l2i\ngOzlL6iuKQNiB6xWq2g0GlmMQmM6ZA3KMr2AJqXd7qzbanXG0x1bv5ZRsE34AfLL41FnWP9s2xgj\npqenceXKleTzPOn76MbCwkK2xDcbjH61LjGv/qcqOimhWgr19Sxqe/EMK9bN6Ba09FwU+7+yIe0E\nTJyq1+ubpovTQiwtLeUWgiXNv+mmm7KgX6WyedtArhatwOqBhPriLLtOqGOsga4BOx9XsCL9ZQdT\nkNBUZ42+a1Ba3TKO5bOjazq6Ai/jHrT+1BeNQSjAWMtPgIgxZoxkcnIStVpt0wY9q6urm9wfHQWy\n7NS6cvY8HrdMROuC92P9Wcame4zYOBWNC5m6xn7ojvQifQWJ7du3ZwEiWga+OH3LycnJHJugH6/7\nJtJdATpDpRq9p3gdnMctbVfAYQex13j3SR3X+6t/Sas1PT2dgQT9YkbeeT4DZPwwMYZBPoJEvV5H\nvV7H1NRUZo3pTtghR2UYGqy0s255fQjrc0xY/qGhoew9FCgITKrk2lkZ+yDlZ2dttVrZPW3QUWk7\ny0VXirEXjf3YEQK15MqMeB8uw0/GFkLA6OhoxijUtSXwaU4NQYd6qiDA8qjhsUDB+/CvDchqTKJa\nrWJqagoTExNZUp4GwvVdFWzq9TouXrzo9oOUbJm0bAaFxsfHsy3JpqenMTU1lZ2jdIoNubq6mgVi\nNDIObGYSFBuzUN+cjUKLR39aV0EqEi/mQZ/QjrOzY1er1YxJVKvVLAB57dq1DDCoJEwVpg/KwKWC\nBMGV1pWdi89Xiq8WSDP4OKqiwWG+Py03XQZNIdetBfmO/K4gYYcceS5BQqfPa1xE3RbeW903HQK2\nbkaR38/34OZE7MRjY2PZtgeaqKdxIdaT3t+CkwbhLSP12kJHNXRXeH70/dQg6uiMMk9+Lly4gJtv\nvhmvvPJKoR6r9D3jUgM9bCh2mpmZGWzbti2jnEql2FkajUYOOGy0vJt7oUCh5aByqRJ799KGpdBa\nKwDZacfs4OyQpNjsGEC+g2rCEN9VF2glZdZUblpWDayxzKqY/F3pvPrXGmBst9sZQIQQssCfPpMd\nmvVC8NGhQ96Pyq+MTZkBP+z4OqynfrgG+ayLoTENbT97L7ZBtVrNykPrzZGDRqORjWaoC6rvpixN\ny6CbLdlRMWV1bHu73ojGWlj3yqg5LM3P0tJSNjTK7xwJ6UX6PneDiqnWkoheq9UwPT2ds0ykp3z5\noaGhbJs29RmLLL5n7VOuglUyb0RDKbu9v35UocbGxnKgxo5Bywnk55awE9DF4Ln0L3m9WnTNs7DB\nwVTgjM/WrFSr/JqvwaxZ3c1bR0O0XizlHx0dzaVtK5PQmbB6Xy2/1rUXd0gBeup/vY/GXdQ9s5O9\nyMp0GFnZmLo/rBMNPntiQYuuHBkE0HGrCRILCwtYWVnB4uIiFhYWssV7ySi4DWYI4f21MlWj0cgl\nzNDdoI++Y8cO7Ny5M7d9PHMDVldXMypNdNSAnmUTFEXubmLBxAMRO9at19pOwr92+rEChabceoEs\nOzyoIKFRb/XL9d1VqYHN+SHWBVOA1ECxRvdp8RQgFOA8n5vvQDBgp2LZlFkpAClV14Bgu93O7umx\nCS2DxkeUTXgSQsgYDMvNjqtMjsxKQUxBn88gC9aNhL0hdHVRtB15Pjs8WfTi4iIajUYGDo1GI6dn\nrLNms4larea+a0r6vqQ+kZoKY4OWGrgkZVYftFKp5GgWN5e1kuroOmqiQSLNwAOQlZFKqj6oKof1\nD20ZCGhEeG1ILwjGMllay+MKEjYXw9Jsj3JrHbBNWFbPIrOdFBg1BdwqtYKldW00HsB3tSChzIhu\nmFp1ZjpSL2ywMhUYTDEqZTuMe2h5CJKMh+n/ql8ECbX+McYsmWlxcTEXQ7DM1LpF7XZnjokX7+Iu\nYgQHXSJB37vZbBa63570PeOSY9BKbfWjjaYIy7/0a71hr1RleEChAKHj65o7oH66WlKN0HvDpao4\njHGsrKxkW8XR12Xjajo1LaRG9rUeNMDIDmznKljLqaM1NsBmYzQsN/9vtVqZZeX5Giz0Uqn1/SkK\nzCy7Agg7GJmExldYh8o+eJ0HbN2OWYBQcLBls7qr2Y1WrwgsumbK4uJi9r/GElRPCHrK2sie7cgK\n9ZJ5NHRhNUOX7VypVFCr1TA6OoqTJ0+6fcOTvoKEDvVZ3z9lCfQ3jQBbBU35pRqco6JqMg2tFnMD\nOOEMyPvW6kKwI1troIxCy8FG1KCTjp4oABIc7AiBul88pixIA47qLnjMwv7PMluXhAChAWSgwy6K\nRhL0PmwHfRdd3ZogoVF9ghDLZ8GtrN5QUjEr7750OdTNY/1r+1nmqEO0AHK6pOubqj6pjuhyhgrg\nBACCgo2VeMZJ3R2PaRdJ3xfC7Tb6oOIpGRtNG88ykCLRVGLN3GMQjZ2FndBjFpb+eZ3EKiXPUWuq\n1p9KoaMuNriqiszyaYYhy6N+t6XUWhatYwUYBQ92alVEW/c20FvUvmq9ddjZDn3ynjYYzHfUZ3YT\n+662TiyrYNk4usD4CMGUbEBdTDVgBBSO5jWbzazjW4bCOlP3gm3IeqdrQ6ChLloXyjvWarWwuLhY\nqp4ofV9Sn5H6otEF2/jauJayWpBgI6QClWxMOw7N7D87KsEy22AVAYvlsnSQCG87hbpO9l3pFmhH\nt4FIBRtVal7vWVJVzFTd2vN5Pyq8BcZUR009y9aDto8yIh2tYluwPssEn4vEAq4OM5Pm66Q5NSTM\nKGWn1YWOlKFqbGtlZQVra2toNBq5XAcFVJZFYwpsT82z4TmMaVkWaw2kGrT31a7ipHFqGco2vCqu\ndTE8a86Oxu96nEqp+ftMeWZDc+hxaGgoix0A+XwC3gvIj4XreDbQCfRZa+nRZC+mwPfX33iu9/4a\ngygCCq+D2zpkgNE+1wsWsh5Srof3YR3aAChBWf111ZWUkUmJx+zoHrCT67wI/qajRxQvBuC5QjHG\nHBNhHM2Wi0CocR2tYwUi1UONh3kTHlutFmZnZ7G0tFS6noAtkCfBDqQA0WuDU9gwtqEsOKToNxXT\ndlq9VpVBWYqORKiFIPrTb41xfciQO3hxWrOdFq3KoO+iYKHnWGAoYgisY/1exCAsUChIqAJrOfVe\nXof03kktsAZfCbRsS40B2E5o38v+9cBR3RYNBHNinI33aCCZboC9n/eOljl6o0Asi+YNMd1d1+gA\n1t0NZeGaw0JAteyLuUW9SGmQCCFUADwH4A8xxi+GELYB+HsA+wCcAfCVGOP8xrmPA/gqgDUAX48x\n/tK7p85r93zNEmXK/hZ1EO98PWaprzaclk+/s9JtYo1G5mkt1P0A1pVxcnIS9Xod27dvR71eR7Va\ndVfE9j78LaWMnjVP1UuRpe/2twhMUmJBQ+sf2ByP0GxP1qMCLpmouoT2HVLvad0mvU5ZmY52FMU+\nPEBMSUrPNe7D+SRMBWC6gLIopucTQHXeCUFDhTrbi/TCJL4O4DiAqY3/vwngVzHG74cQvgHgcQDf\nDCHcCeArAD4MYBbAr0IIfxSdGmGF24hsr0BhrZeKtSb6DOuqkJJpYMiunMy/zHXQZBVaB6b20ofV\nIS+CCTcMnp6ezoGEThO3nUm/q1Us0zn1vcuea+9/PewuJUWuhg6pKsBqQE/Xp1CLqfTdCwjyb8oA\nFB3r9nuZ+imKvQH5gLwmlNHNUbbA4X8vUM3RMuopAZEA24uUAokQwiyAxwD8VwD/fuPwlwB8ZuP7\n3wL4Z6wDxxcB/CjGuAbgTAjhJICPAzhk78vpuOPj47lIbFnxAEB/43GlaPyf55ARXL16NaOQRGrN\niCR4aGansgjej0xCp38z/Zy58+12OzdrUvcB9d7N+uEKqjYWUKaOLP22x1VS4KTHikDau5c9V602\n2RcZlQI3mRjrgtaSf3lukcvB754xupEg2E08Xdf60brQYX7qGPWVoySaZcyAq64loWullhn1UynL\nJH4A4K8B1OXYrhjj3MYLnwsh7Nw4vheALn3z5saxTTI8PIxqtZotkEG5nsbSRleFsb+pQvE3JqrQ\n99W8eAUNHRMnUtvgGdDJtiNdZN4FQaLVauUCVxrhViXXJBqdfagxkBT9VSX0LKj3NxW3KEOje20z\nZQ/s6N5Qtk0q4rM090RHkaxBSL1bL+W39dYLe9N3LWoPPc660GX5+IkxZklVGtvQOqPONptNxBiz\n1bVijJnL0ot0BYkQwucBzMUYfxtCeLjg1J579tmzZzMliTFi9+7dPSub56YoWHi5DNoYAHLKpdZa\nwcF+eA8dogOQYxs67EnE528EJJ32zfUKtVzc3ctLybXxi6I6KkuFu93Lnlf2miIh0NlsV6AzP0PX\npLCskMe8UYZuLkeKPaleeu6WZxz0ev71gNcDHa9ObO6MTpXXUS6Cil2fs1LpLFEwPz+PS5cuZe5a\nL1KGSTwI4IshhMcAjAOohRCeAHAuhLArxjgXQrgFwPmN898E8AG5fnbj2Ca5+eabs9mQ9Xq9VAN6\nooBg6blaZYqCh0Vj+n70efmXiqpzLBhYs+xFXRJdTbrVamVzTEIImybfsKNQeP7CwkKWfecFWC39\nt3VTtg67BTqL6Llnacs8mx2S1FkDlgrcNo1cKbO2vwUJ7x3LsCJrXLz3s0OO+pvXNha8Um6P/s53\n11iZnmv1ls/nXCOu+bFjxw5Uq1UsLy+j1Wrh0qVLXduG0hUkYozfAvCtjRf+DID/EGP88xDC9wH8\nJYDvAfgLAFyj+2cA/i6E8AOsuxkHADxb4jkuHS57Hb8rWHgpqkCnsVQ5ddUfTuTh2pIaLVbk1k6l\nCTWc/LO4uJixAS4bxklo6pLonpOaehxjxNWrV7G4uJibHq4pzDb4162ueqnbspLqkPoelNTohroa\nNjdCA5WaqQh0ckCUPXpsIgUQZYyR9y7WEKneeXk6ei+b02B1XyfZeffVdUd53MsC1UlvFC7F14u8\nkzyJ7wL4cQjhqwBex/qIBmKMx0MIP8b6SMg1AH8VE63AqKum+RaJp2xFnzJBLAsSzFtgBfN8dmAi\ntM43IBDwfajYjUYjy7RjIEmZBGMW2hHsZCKCFd0QXaXLAwrPgpWVFPUuc56t316YhPo/qZ3TAAAg\nAElEQVTU6mbo4kIMuDHqr8/UulCQ4P/sTHyeZ73LsFfv/ewwuLaTZX022O0BBe+vIKduj8410q0T\ndOUub7oDv+vC02WlJ5CIMT4F4KmN75cA/EnivO8A+E63+3FIx6KpR8mA9AQdPa/M/ShsQJ2izgVk\n6csBnT0mCBaVSiXLaaAlW1lZQaVSyYJKzKkna2B2HIOQZBIc4WFn8NKqyWRoKVLZpanRA1tfqd9S\n19nj3eIfXpAu9d26e7y3LsHG+I4OD1o3T1md6oHNSu3VGNljnm7G2BlapJ4oWPP9Go0G5ufns2nd\nBA1PT60e6GQxzUr1Ar1qfFm/PA/oMJWy0vep4ktLS6hWqznULDqfkmISvIcChfqXtkPougVcCWtq\nairrmDrWzE550003ZfsfsGF0uI7KraMrQGfnKw5NccSDwUm7u7hO+9YEIzsq4n2KJGXlbf3q8TLM\nxAMG/d/eQzu9dTN0bUa2n7I+1jc7muaW9Dqc3u087308g2V1ThktgNwSeKojHpNgfVUqnbUsuLYK\njSsDlepScCpBjOuuKvU0xvXRsLm5uffX3A1mHlLKNuo7ETaGUsHR0dGMRRAk6NMxnsAxaqCzcxRX\ni2Y8gp3XbqSj9NL618wg5LRhXegUQG4olR/mXRCgLDDQehXVQZl68oDBPqcIEFK/2XKyM7BzKZgS\nIAiQTCzSGbl0RVgPRa6m/T/FMt+JeGzKGjV1h7xn810UGHV1LjJLNRRkvzHGnN5MTU1lQLxjxw7U\narWeNg7u+w5eIyMjm/IkykhRg3rJIh6lYyPoWhLMa7AJKjyXMQm7lqPm86ufahOgdLxfWZNOwNLO\nwLU+dRVsdhDtTLTI9IVTINFLRyjj5nW7NnUfDTzarEldG4GWdHh4OLdKlQYsdSaw53qkAK+bS+Sd\nb9mWfSf77NTzUgxY70XdJEDofiYKEkB+/1YaI+oPme7Q0BDuuusuLC8vJ9/Rk75vzlMm+6ss3S3y\nyT3RoJIqmvqDfL6Cik5CIhuwFlGDVJZ+2gxNWkG1ELQA3FqAe3KQYTCWQVbC69rtzm5RGuDj8+w7\nsR56qWP9dLuuiGnYRDCtJ4Ie24HMjR2FlpOJaWR+mmei9e3pxvWyB5bP6g2NiLZxUX0oaHjn6+ib\n6ht/o0EA8gsise5o/Aikw8PDuHz5cgY0ZaXv7sbi4mLOYqekmzXw7l10TzsioABgRw30Glsee73n\nG9shOl5PZsEG1/kIdrRFOweDZKurq1mQimm8McZNIwVe+VlGOxqSih94dNirkzIuRtH/LI+uxqTu\nHV0xpdjsPKwHa9G98pQFxSLRTqxxML6XdXnKiuqUusQc0eA5DESqfml9MXhJ9tVoNLB9+3a89dZb\nPZWn7wvhctiPkmq4ouPdIvuqMNo5PKBQy26PWStg0V8Dm1R2G7y0ohmFmm5td2tiHILDgsvLy9ky\n6lQGXXXarjVZVAatm5TYGIOe3wuD89iMWnt9DgGbPrlmZKqbwWAuAZn14QFdynXS9+xFdISMf/Wd\n1Ci0220sLS3l9KOoTTQmoWuQKAO0SV/UP5aBAMZr3+s8iXcszWYz85kAf/px2Ya0NFjvpfMD9HwL\nAsokvGFG3sujsymLaZ9t/6dia/yCVoK+KDsIg5yrq6tYXl7G/Px8tgXd0NAQJiYmEELIrI5OAOpF\nrtfCdmMRHhBpfIb1rS6HAh3r21J6TVlm/XqMoux7dYtf2NgWy6B/9V56jAFq7u/arW3U3aDLkdI5\nZbcELAr1e/fu3e+v5esA5HZspqQayWME9joLDqxQ9btVAVURFSzsvVT0HC2fTeTxQM9aS/u+Xjk0\nsWhpaQnz8/O4cuUKrly5kiVshRBy4+PAOoshPffyRwC4FtxzOVIWT8/t5m54rgnLpe1hU7C1bnWk\nSIPAGqzVtlNgsfNsrNh20HbsJiljZtmqXWIxpeupmAbPZ/zD03cbY6pUKlldTU1NYceOHV3fR6Xv\nIJGiht2km69nfW0e006h5/HconJ4AUhVVqvEqhy8vsii8V4EhEajkRvyajQaWFhYwPz8PC5fvpzN\n6WBdsHPpPBG6KloOa720TARUPe65CNoGrAvOt/CAUK9nXauSa9n4DGVtWr98V90Ji3WmGwcRbDWR\nKAVaqXLa7/ac1Dt6ro5Xf159dnuuGjqtP9VLdY/1PKb49yJ9H93QQEwRsqrSdQOUlK/cDVi0wTzX\nhRaJihlCyFKq7doTqfJ4HQxAlo1JN4KrDa2urma5JCsrK1hYWMDi4iIWFxexvLyc5QhYH1c3Va5W\nq7ldvTzlVnCwQFGmznluUV3rMQ3qqXUkkFswBzoBXcssdP8KBoIZDKZ/7gGjWusyHTQFHClXs+z1\n3SQFMPo8ndrggTTrli5pL9JXkLC+o1UKS089BSxT2RZg9BrNn7eJLdYVYUwAQC5gyNRhTQBKWZmU\nhWF6Ln1VWtpGo4HJyUmEELJgJcGk0Wggxs5ohu5opdZYh3WLkqyKsl1vlPC9dGMby74IALb9NTCo\nI0ca/AU6O8MxplMUqEt1vCJ3o1t8w+qqunaey+cZEVu+FNvQerVBai0D239paQl/+MMfkmX3pO+r\nZTcaDVSr1ez/IimLvh7YKCWzQczUM7TxqIiaBszkFaZtMx/fW5DG3pNiLfba2lpusVJOFed33eOR\na0xowI4gxnfWVY2Kti6worENrRuPCZWJXejvTALjEC7fh4uj6CpUtq6YJ6BMRL/rkB+A5HJ2tm08\nZpV676K4RhlJuW9Ab/EPe51nbO05zz77LA4fPtxTefu+g5fnGpRB7iI24Z1PJqCAYYcIvfvpfXQB\nVlI8ggRTq3UVqSKx1oP/M5bA5+nmNGQKtEgEB3V92LHJHnQoldmivbhfWkYeS7lMGgvQe3htx/gL\nwaLRaGB1dTU7bsf+WZ+6zgR/01GhkZERVKtV1Gq1jH2QUdiyWMZnrbZXH/wU1WERG/aYhCcpt7vI\nLUr1IZa3UqngwIED2L17N5566qnks630fQcvWjd9sVQFef/bBvZARjuinlMETAok2oGVIusoAq2j\nTZyygdFu0m63swAcp5Szw2tnUbADNu+5wbJb2m1HTmyddmsL2wE8RVdmo21kn6P1pMCheSOeG6i/\naQp8jOv7Wuj2g5y/QID0GESZTmulV5e3F/aQcr/tdSmASrGdEAI++MEPuiytSPoOEvV6PZtmnVLO\nXhE3dcxzQVIdV1NtbXScVF93Z6IrogpPK2fByNJBa800+swOr/kb3jCtRvJZFrpAXOhGF5kFsMni\n27qzbaHndKPc3ZiU7ZxFYK/1yXsrmChwt1rr+5XSldFhUu/5Rce1s5UBE/3N1o9lEXwPftf8Dl7f\ni6jBiDFuiqloynivrlJfQaJWq6HRaBQOQ3rxA3ZMIG2lUsLG80DJftQloX+vW/mpMml2nZUiZFd2\n4NFrlkP3oFCAYB3pO1gGRLDSzqSrW3kugd6P37XMfE6KfaSU3NaRsgXt+DrUaTuf1Re+q51Toxv8\n2HbwXAwPBLy6se5Gyg2wOuL9VfFAO/W/Hrf3tC5sCJ2V1HRotKz0fXSDmYRA+c12y1AuT6y7YTMt\nbfTfWlVaat0KjoDFRCYG/Dz3wCu7ByxqYagEmihk3RiCgLIJZULeWpwelU2Jd56n7LZjsKx8lrVs\nTLfWc3QZQAugfD9dyYtDn7SUOjtXk5a89+y1syggdtNJ21EtkygCizLlTbkeRUaSdder9H2q+Orq\nas+pw0XDU2VomlpCtbpWoazoEJxuRR9jxMrKCpaXl3MrGltWwNEHL5nFa1jNJlRgsOXm+3CFrcnJ\nyWzGJCeG2Ul0niXtVl9l6rTIWqtwlqLWK2dzMjhrryOj0twUuhUMGHcDCM8ApRhEmfoo6vCe66bn\npeq9V7e7G9ipe0iw7UX6ChKXLl3KhqqsFMUaPItVptGs5e4lRgF0AmGVSiWbkUhFX1payha1tcpI\nC9lsNnMjIDr5y360nBq8Y/xDfVcyII5kcNMf3QHbrmRl66pIbL3wmdYVtHVnwVx/J7hq+XWTZn1/\nZUjMQOXQKRcrZhyC9aDuRgoYrkdSboT+bs/1npsC5zLA4l3fDex5PDWiVyR9BQnSdg81+b9XURYg\n9Hw911aY0nhPioCJykw6yx24CHLMQ2AyE8uuw6SNRmPTbtT6Xlo2a4116JUBOnUnbL6FulEagFWf\ntIjyvlNJpYBb8QCS76+MSddHAJDb5Yt1xfN01XO7ungRZbe/q2jin3eNd0z1yboa9v8y90zVmQ2W\np+o0xpgl6PUifZ8qXpQBCGwOVKmkqJr+LXNdmWtYXgAZk5icnES1Ws1ZQo71s9xUUI6GAB2FYYDO\nUzyvzAQGsgmWxWNBNohIwLIBzaL6SNVZUceybK+b6MhQs9nMuRsaJ+L9FOi8IVKeQyahi8emytON\nWVj3IlU3ZTt1kXujsRuvbB7IeQDhBcOB/FSIstL3CV6aAORJN/oEFHdupcSeMnQbclIFYEfTBJ2J\niYksaKYb+bBj6sK27XY7Sx7iX6XkNrcg1XmtMmjMQ0cwmGvRbDYzX17Ty8tMrEv9XpR6buut6H4E\nSxtjUMtNtqRp1zqxS5OpCCo6wqHrNyhDKwMOWmYbMPbej8c8tlv0TC+fxnMlUsCgOlB0TbfUfE/6\nHrj0hn0s7bwesWiqYv1k61KkGskivF6v1Fh/0wCjHkuVuZd3o9Uhy9AVq+jza/yC76GMIqUwHjPR\nNrnegJpnCfldZ3XqsnxkZARfXVvDThcni2ASmcYlPHe2qPy2zEWMz2NXqTry9FwBqOjcFFgUnWuP\n9dqn+s4kRkdHc3sTppQI2OynX4+k5lOkKLaH3CGEjB5z+z1aQlpwojbvoxsR22f3IkUWhVmfXFj4\n6tWrmzaIZSejr+7VReqYBQqPCpd9HwvIbAO+D9kB65FgQNbGkaIUSNjApd6fz/NofTeG4emjipal\nF/EYSlmASJXd+66ualnpK0iMjY1le02kxAONbiCR6vSA7154VsB7lvq/OnQ7NDSEpaUlLC0tZQvU\nUrlJd5VK61J13VBf30njF5qWDOQ3s2GGJUc7GEOZmJjYlCdRBAr2e+r/FOtT9yklKbZF0NM1MgBk\nGaV8d/1dA5y6J8X1stGUsP6UBVjxcllSupsCX23nFEjYe3vP0e/eUgbdpO/rSUxMTGQTlOyLU8p8\nTwXvUoBi3Qx7Tw+cdNlyfujnLy8vY3FxMZvRqD4yFZtsgkOg3SxBt+ArFVFBSTsPj4+OjroTz7QO\nLAvw6qeoLj3pBhCsG2v96TYpILJ8un6jMg0dBtY9U232aRGopepZv7NeWPfd3jF1Lz6/yKAB6aUM\nivSGz/KAZHx8/F3ZVfxdE+4NAKT9KP1L6cYkgOIIvXc/tcwMMDKrz84TUCUjaHDdQkbovUxLBQrd\n4avsO+n9vA6dovD2+jLBN/s8++wyHYwdUjuWLbM3R4Yf3UWbsRWCspZZWYQGK4tYRJG+lZVuANEN\njLy2BDYvH9ANGHp5D4JqL9JXkGg0GoXBs3ci3ZCVYjsxLbxNfNKgGs8HOjMSvSQpWpoYO0E5gg+t\nfRnFVCtv2RPLHWMnIElhXgcDeHbfSHsf+yxbT/bZ+n7e+d3AhL9bNuFlTCqQE5gB5PbE5JCntw2i\n92wtg3Uhysj1nG/rRoPdfE/LbPnuWg/6G7+nxLZbr6nZfQWJPXv2ZAu5WvE6tD1mkdj6t3pdSpl5\nT/r0uuGv7tuoG8ICncVMdCIS0LFqOr7P39RPtVmUWh5PLFCkGILmbHAnMN2ZzO5dkapL+92e44GV\nV68p4W+aEm/n0vBjp9/r8CiNjAZjNWBphz5T5bVS9hzWSRm3I6W7Fqhsvdt35/PKuBr2HZjR2ov0\nFSSWl5cxMjKSW8wV8KOy/K6i/5dNlrGdjQDBWAF94pWVlSwQSZDQdQuAjiLpjlm8PxVX4xh8lpbT\nKlfRO3r0lc/SjEQGLLky8szMDLZt24Z6vY7JyUl3STe1pN1ciTIsoZvYTuu5HAq49r2BTkxDV95S\nFqHXqcuTcgNSjKise+u1XS+ApOWxbqNXzpSuFAE09bsX6fsQKNOYge7jvCnxUNmj5xbJratBd4NL\nqTEmofSOIKGKDXQ2tFUaqatWMxBnGY73175b0bur9SVVZ6IXP2QQNm+gW73aOvWO2/iAVz6VVFyi\n3W5n7EfdDy70azuR1nGRq5Iqezem471rt3PKUn57nRUvZlBkOFOswTtfN6wuK32PSUxMTABIjypY\nsVagjDXz6Biv9RgIf/fonZ14REvGTmp9TbonmuWoNDr1DrYTq2JbJfc6gbXKRROe7LXdmIJHjW1b\npdwWb45JjJ15F5oMpfVlGRfrWK+xDKSMwSgLFDdCbGe2xk0l1QZlyltkXNvtdmHKgSd9BYnJycls\nLQCKp5gplCyD3inrbMfmbSxDRzu8pfJ1PQROx7ZbwQPru5QxFZr30qXfFXjUh2YZUvXBv148w/r1\nChZFHT8lFpBZTsYGrAtnr7WxDKvAlk0QINj5dWNkHQ5VJqFAYROoLGClfP+iOik6t4iVpFhyKq29\n23PsM736TB0DgPHx8feXuxHC+pZ0moJrJUWlVOFsEFAZgJ5v/7cAoTMlVflscC2Ezq7NOjV7bGws\nAzy6JiGEbC3PZrOZi1V4SS3WV+e9yuQcqHWlqzE6Opq5H7q6lXaW67WmKYUuE9fw7qV1PjIykhtR\n4tC0fU9NwdZRHM+luh7KXiQE8bLW3QKD/u1WXykW5AFFkRvba44EUBIkQghnAMwDaAO4FmP8eAhh\nG4C/B7APwBkAX4kxzm+c/ziArwJYA/D1GOMvvftSASz9KdtQ7Ij2o7kMXudSRVOFY+CLjcnRDiqe\nWh8FiVqtlgEFOyFTpDm2z2xIVWL+taIJUdb18ayggt3w8DAmJiZQq9UwNTWFer2OqampLGBZFODl\nvbz/bSzFi67bc9WdSrlUygqADkPT3cKZjEbQtTEY3VDXulR26FABt1dQ8KTb9Z57qNd59Ze6T5Eb\nUXR/lVarlU23LytlmUQbwMMxxsty7JsAfhVj/H4I4RsAHgfwzRDCnQC+AuDDAGYB/CqE8EfRqc1K\npYJqtZrttwCkh/f4W1agjU7DoUkGHO0GOfZ5RH+dhkwFY5CPjKHZbGZrEtjhubGxMVSrVUxNTWF6\nejoDCboWDFRyPUmCirfxK9BZt1KBT9eQUNakQjeHbIFlqtVqOfAik7D12KvlZLlYRpv1SBDie3iB\nSp1cxmMsBxec0fU5CBr6HF1Lw7oamnvA6yzjTMVm3gmzsgxQ603PSU089O6l31MTuXoRzqDtRcqC\nRABgQ65fAvCZje9/C+CfsQ4cXwTwoxjjGoAzIYSTAD4O4JC9KRsr9aJFfiMVgBZaN3lJrZDs3Z8K\nR1YwPj6eWXd2LDsmr24GOyPzEELo7H/BxB9S4ZWVlVx8gFmEClp8N7sEvGc5dDUqPqNareZWpLJB\nvJTb5jETL2jWarWyeiY4c+IVQVA7qDIiC8p8lm0Ldce4RL6uOp7Kp7BzNcjmdK4MjQSBpZv02gn1\nfMu+unVq617qHJVUDMM+M8U2rtcwAOVBIgI4GEJoAfjvMca/AbArxji38dBzIYSdG+fuBfCMXPvm\nxjFXOGORkqpY+2JMndZl471l1FN+ve0UqmRUQO1cPE8z+7ieRK1Ww/j4eMY4dIcvLjajsQ3tEBqh\nVzZBAPRcJ56jlnRkZCRbsm5iYiJbOWtycjIDCyspgPDO03rX5fpXVlay9yVQEex0nYgYYy7BC0AO\nxHhvGg26fdeuXcPo6GhucpsXkLWxCHYum+HK9+F0gLIpyil3INVpLSCn3AWrp1rP3nwNT8qwHy3/\nuwUSD8YY3woh3AzglyGEl7EOHLly9PTkDdEX1Mqw9FDPjTFm7gUzI5kWTYXg+fY5KUpoy2QtPDt1\nCCGLXRAoNGBGEFhbW8uAwQIBFVwDoLTCAHLuCieLAflAmcYmlOFMTExkbIKuBsHrekXbheDFvUhX\nVlbQbDYRQsixFwIdQQLobMEHdPJK1PLTVbNAyrpjGRQctV51fRI+n8ZD16jgcwhcvcwUTYGqx/T0\nnFTGryaL6bVeID51DyvapzTLl/KuDIHGGN/a+Pt2COH/Yt19mAsh7IoxzoUQbgFwfuP0NwF8QC6f\n3Ti2Sc6ePZtbFGXnzp2b4gsM/mmH0HUTlpeXsw8V1k7C8lBcg5xK60iLbZqvTfX1PqrsNsmHbsHY\n2FgOxBjzINgA+f1AG41GjtKrv6/BT96Dw7EWwBiPKaKdNvBoY0C6mC8ntM3Pz2cxJT5fmQQ/tN4E\nWVV8HQqmO6MxD61TG3TWsvOYDl0zk5ZT+HkvdnadLWoNlq2DlCth9SwlZd0NPVeBQhP5yrIBtsWx\nY8fw4osvXle8pStIhBAmAFRijEshhEkAnwPwXwD8DMBfAvgegL8A8NONS34G4O9CCD/AuptxAMCz\n3r1HRkYwPT2NGCOq1WoGCtw1e2lpKYsR0BoTJEh1+VFXw6uIIsZiv3tuxltvvYWpqamchfP8YDaK\nuiXch1N3/ab1Z9yDcYRKpZLlCGiZdZiYZVRgqNfr2LZtG6anp1Gr1fDqq6/iU5/6VGkrqe+qHY3H\n2+125mLQzWs0GhlAt9ttjI2NZUvysZ0UeGkQxsbGclsLPP/887j77rszQOHydbpKlWWACjLaebTd\nNNBKhqlMROeEWD0p6ohHjhzBvffe27VeU+5GCpxVNBZhmUQ3FmwN5NGjR3H33XdndTw8PIwnn3yy\nsOwqZTjoLgBPhxCOAPgXAP8Q14c0vwfgTzdcj0cAfBcAYozHAfwYwHEA/wjgr2Kitjm1GgAuXLiA\n1157Dc8//zzuv/9+PPDAAwgh4OzZs5llZ4esVquYmZnB8PAwLly4gGPHjuHMmTMIIeDixYu5fASg\nE9ElvVxZWcHly5dx4sQJPPfcc7jrrruwb98+bN++Peu84+PjmJ2dxb333osPfehDeOGFF3D06NHs\nmnvuuQf33nsvdu3albkKtIC05LVaDXv27MGhQ4dw/PhxvPzyy1hdXcWXv/xl7N69G6dOncIzzzyD\n+fl5DA0N4dFHH8UjjzyCW2+9FQsLC/j1r3+NCxcuoFKp4NZbb8XExASazSYuXLiAt99+G6dPn8ZL\nL72EZ555BseOHcOTTz6JH/7whxgeHsbvf/97bNu2LbeStrpQOqSoMYIQQgYElUoF09PTqNfrGSBV\nq1W88sorqFQqOHPmDM6ePZt16OPHj+Pw4cM4c+YMlpaWsH//fszNzeHQoUP4+c9/jjfeeAOzs7No\ntVoYHx/H3r17MT09jRMnTmB1dRW1Wg2zs7OYnZ1FtVpFo9HAiRMncNttt2H79u2ZrnDOz+LiIqan\npzE7O4uZmZmMrWzbtg2tVgvLy8uYmZnBa6+9hhdeeAGrq6s4duwYfvKTn+DUqVMYHR3FwsICxsbG\nshgX4yCMszCLlrGQGCOOHDmSsTkK3R5+11gDWeHs7Cw+9rGP4dq1a/jd736Hs2fPYmVlJWNPNBAA\nMD8/j5MnT+Lw4cOYnp7G3XffjT179uQyVBUUeRwAzp8/n206DayzCIKTTiUoK12ZRIzxNICPOccv\nAfiTxDXfAfCdbvdWSslVjdvtNl566SWcP38er7/+OoaHh9FutzE7O4upqSmsra2h0WhgaGgIx48f\nx5tvvonbb78dx48fx+XLl7PGCyFkFU7rXalUMsYRwvoybleuXMErr7ySKSYrndcSAGKMqNVqaLfb\nmT/OFaf4vNHR0ez5jA9cvXoVH/3oR1GtVvHGG2/g9OnTmJqawh133IFDhw5heHgYe/bswde+9rXM\nCj/22GO4//77ceTIEZw9exYPPfQQzp8/j4sXL2agUa1WM+X+5Cc/iYceeghPPPEEHnzwQZw+fTq7\nF603k7kAZIpJsGi325iYmMj2ZCUFjzGi0Wjg6aefxic+8QmMjY0hhIA777wTBw8ezIaI6/U63nzz\nTVy5ciWLV6yurqJaraJareKzn/0sjhw5gn379uH111/HxMREpvDDw8NYWVnBL37xC9x3333Yv38/\nLl26hIMHD6JareK5557Dnj178OEPfxhjY2M4f/48rl27huPHj2Nubg4f+MAHsG/fvmwlLrpzO3fu\nxFNPPYX77rsPDz/8MF577TUsLS3h3Llz+MIXvoCxsTEcPHgQn/70p7GyspJtKry8vJwtqUgg4rto\nbIWjUyGErIPHGHOrlTEvZmJiIsdiabyazWbO9eJEx2azmdsJnkzNDjuTDSk7ijHilltuyW2fSd3n\n3zIjfyp9zbhkZeriIkNDQzh27BhmZmYyf/3o0aPYu3cvXn755SxJiFHySqWC8+fPY8eOHXjrrbcw\nNjaGS5cu5ShlrVbD/Px8hr5TU1M5V2Hv3r04cOAAhoeHsbi4mHUeTjAiQ6Bfy5We2u02Ll68iJmZ\nmUxxlpaWsvvSjbnnnnswNDSE/fv344EHHsD27duxc+fODNG1AclILl26hNtvvx1nz57F3NwcxsfH\nsba2hpmZGbTbbYyPj2N5eRn79u3Do48+iptvvhnf/va3sW3bNpw8eRKnT5/O6DSBjIqqwb5Wq5WB\nLi08352jCk8++STGxsZw33334fTp07j11lvxyCOP4OjRozh16hR27dqFqamprOyjo6N4++23sW/f\nPlQqFZw6dQorKyuZS9lqtbC4uIjR0VHMzc3h7bffxu7du3H69Gm8/vrrOH78OG677Tasra1hYmIC\nV65cwauvvoparYbR0VFcvHgR586dw/z8PI4ePYr9+/ejUllfoo85Fqurq/j85z+PEAKefvpprK2t\n4Te/+Q327duHXbt24cCBA9i/fz+q1SqATnCRM2QZ+AwhoF6v4+rVq7mhVZ1yzbgPO7rmxhAA6DLT\nyKytreVSpMfHx9FsNlGtVrPneiN/DIrTIBEwNM5FN5aBbg1eXk/GZbieQMaNkAwgVK4AAAOCSURB\nVBBCfx48kIEMBAAQYyw1pNM3kBjIQAby/pDrHzwfyEAG8v+FDEBiIAMZSKH0BSRCCI+GEF4KIbwS\n1ieH9V1CCP8jhDAXQjgqx7aFEH4ZQng5hPCLEEJdfns8hHAyhHAihPC597issyGEfwoh/C6E8GII\n4d9u1fKGEEZDCIdCCEc2yvqft2pZTbkrIYTnQwg/2+rlDSGcCSG8sFHHz97w8tokj3f7g3VgehXr\nU8yHAfwWwB+/1+VwyvVprA/1HpVj3wPwHze+fwPAdze+3wngCNZHh27beJ/wHpb1FgAf2/heBfAy\ngD/ewuWd2Pg7hPVcm49v1bJKmf8dgP8F4GdbWRc2ynAKwDZz7IaVtx9M4uMATsYYX48xXgPwI6zP\nKO2rxBifBnDZHP4S1me4YuPvlze+ZzNdY4xnAHCm63siMcZzMcbfbnxfAnAC6+nvW7W83Ot+FOvK\nGbdqWYF1pgbgMQB/I4e3bHmRnqV9Q8rbD5DYC+D38v8fUDBLtM+yM8pMVwA601XfoXCm67spIYTb\nsM6A/gVmZi62SHk3qPsRAOcAHIwxHt6qZd2QHwD4a+QnLW7l8nKW9uEQwr/ZOHbDytv31bLfZ7Kl\nxotDCFUA/xvrq38tObknW6K8McY2gHtDCFMA/k8I4S7coFnEN1pCCJ8HMBdj/G0I4eGCU7dEeTfk\nXZulDfSHSbwJ4Fb5PzlLdAvIXAhhFwCE65zp+m5JCOEmrAPEEzFGTq7bsuUFgBjjAtYXJ3oUW7es\nDwL4YgjhFIAnAfyrEMITAM5t0fIiyixtALlZ2sA7L28/QOIwgAMhhH0hhBEAf4b1maNbQcLGh8KZ\nrsDmma5/FkIYCSF8EAUzXd9F+Z8AjscY/5sc23LlDSHsYGQ9hDAO4E+xHkPZcmUFgBjjt2KMt8YY\n92NdN/8pxvjnAP5hK5Y3hDCxwSgROrO0X8SNrN/3Omq8EWF9FOsR+ZMAvtmPMjhl+iGAswCaAN4A\n8K8BbAPwq42y/hLAtJz/ONYjwycAfO49LuuDAFpYHxk6AuD5jTqd2WrlBfDRjfL9FsBRAP9p4/iW\nK6tT9s+gM7qxJcsL4IOiBy+yP93I8g7SsgcykIEUyiDjciADGUihDEBiIAMZSKEMQGIgAxlIoQxA\nYiADGUihDEBiIAMZSKEMQGIgAxlIoQxAYiADGUihDEBiIAMZSKH8P3cLym+IowimAAAAAElFTkSu\nQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import time\n", - "filt = np.ones((15,15),dtype='double')\n", - "filt /= filt.sum()\n", - "output = image.copy()\n", - "filter(image, filt, output)\n", - "gray()\n", - "imshow(output)\n", - "start = time.time()\n", - "filter(image[:100,:100], filt, output[:100,:100])\n", - "fast = time.time() - start\n", - "start = time.time()\n", - "filter.py_func(image[:100,:100], filt, output[:100,:100])\n", - "slow = time.time() - start\n", - "print(\"Python: %f s; Numba: %f ms; Speed up is %f\" % (slow, fast*1000, slow / fast))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can call Numba-created functions from other Numba-created functions and get even more amazing speed-ups." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXeYFdX5xz9nd+9WYAGRqhQldgUbaFBRASOKQNQgJrao\nwdhiiwXbZm1oYtfYY0QTNUYsqERQsIJdwAL6QxELCgpI217m98eZs3Pu7My9M7fOvTuf57nP3p07\n5czcud/zznve877CMAxCQkJCQnKfgmw3ICQkJCQkNYSCHhISEpInhIIeEhISkieEgh4SEhKSJ4SC\nHhISEpInhIIeEhISkiekTdCFEIcKIT4TQvyfEOLidB0nJCQkJEQi0hGHLoQoAP4PGAV8D7wHTDYM\n47OUHywkJCQkBEifhT4MWGYYxteGYTQBjwMT0nSskJCQkBDSJ+j9gG+1/78zl4WEhISEpIlwUDQk\nJCQkTyhK035XAv21/7cyl7UhhAiTyISEhIQkgGEYwml5ugT9PWCwEGIA8AMwGTi2/WojgQPT1AQn\nIkluPxc5zhuLdF3SRHkJGJPtRvjAT3uT/T5TxWzg8AS3tZ9DAdaDs8C6nwrN/wFKgB5ABUOYxWJG\nApu07YvMdbZE/vy6m3+bzfUagWeAw2zHbrL935zYKXmmFWjxuG6i93G6zyEWXvQiES53/SQtLhfD\nMFqAs4A5wKfA44ZhLE3Hsbzj2KGlmMIMHCNEClZQxBwS/xk53S9u+9KFrxXYDKxhMaOo4mCgE1AM\ndKaKsfye84BvgBrkEFaNuU2zub0T9t9Iuo2TAtL/mwmagZVe0na2hmG8CGyfrv37J91frG5BhaSP\nIAk5JCdIdvG236OxOopG5LWo4x4eYTy30kgxw5lBNa/RmW+BBqTVrUS8mdgWawHtLeaiONskizpH\nr5Z6IhQhz9+tI8sfstx9DczQcVJ1moPSvP90sE22G+ATt/YWErwx/AJkJz44Bfvyaq3ry5qBFlbT\nl5lcC9TxIlOAjWyiHCn64CxkTraWk6Cr5ekUQy+inux9rL6rdHYcdtz0In1k+RcyMEPHSZXl7HRT\nBVnMAbbNdgN84tTeIrJ+q7ZDYLXpFwlsb3/S8HqP6sJvxRVcyW5I0TWItkbVX7uQbY/3e1f366eL\neO6XVNzHIs4xUk3mjamg/UrSQLq+wEKCL+b5QIRgurKSua/s943TfeTkWrJfhwhQTCcazE8EcjA0\nGSvU7VoHQdRTgT7QnH90AEFPxymG/vL0U0Dw/OWKZARBEH3vOAmY071VpB1XvS9mBM9xgRmtchkH\nmMudtlfWvP0z+/+xBFVF0KQT5RpJN/kp6nku6On40kIxTz+FBDdiKNl7qtD23n4vOYmmuh4FyGiW\nMmRnV8h8JvEBRwHwBDfSl4+Q0S6qrfEEOxEJyJfIlPwT9TwX9FQLbyjm6SeI/nJFskJmF1gna9ku\n+Moal+6Vvfgf2/EFlqh3ptCMQlnGvnzPDsjrpz6vsB3L7nLQxwKc2uREJnzRmXo6yy9RD+ovJwWk\n+oYIxTz9BNVfDu6uDK/Yz80uJBEs8VYvJbYRTuMSetDCCvbnWM4BOtGNWqrYjaE8C0AVe9OFGqCU\nHqznDM7FEl9dgAXRvw+7DHgRa9UxpPP7Ci11v+TPmUQRWua5RSYGw5Ih2Z+J3biwW+GFSDdJMVCP\nda+p9QT38ne6UkM3fuZn+lHFMMcjDeZ9PmQ8k7mEO3kE2IAVjijQI2Nku9TsUP0zt/BFJwrN7dIR\nDqg6o0yEGqY73j4zpCUfuqcDC2FAVZr2nkrLIRTz9JLvYm6/F1W0iPKJy8iUPVnAB0wA1hMtLOr4\nZUjBL2cAn3MSJ8c86htMoQurWEtX3mAcMnyxCSu0Uf/dt2BNPFI029aJhwqXTIeeqPZlglwQ9csz\nnssly4Rinhuk+5E9WZLpaJR1afeZq0FPge6LXsgxHEkVT3El8DNSfNW2an0DKOS3nBn36K9zPM20\nIKf815pLdYtcUWS2YYNteSH+xE2dSzoEsZDMCXpuW+p56ENPlbUXinl6CbK/HJLvbNwiTNR9pYQ0\nAkRoZRP/xyj24lks90sENRiqLPQ9mEkxdXGP3pdlyJmiBtFzJiJRL0FfoBTLh6+wh1d6RQ+vTCWZ\nDGHNXTs3DwU9FacUinn6sA/IBZFEf9BKqJ3OT0XvFAOVQFegE+dyNtDbXGawBy9gCXoJBzKDkTxF\nFQcB5axguKeW1FJutqcUy8hRYlvOL3kDqMRom4yk2q8bRMkIWzqevjIptLkp6nkm6KmwzkMxTx9B\nn6WXzMQZ3ZXi9BlAEXvyDnvzKtCN/qxiBcO5iLEcyEMcxWU8y1/QLfNX+SO7MhuAI/kLZ3uo5PgN\nu9NMOVDJKB5FWvcl2n5LGcILXMQkiqjHyvmSahFOdf6dRJ8aEiXI96ozuddiV+zxtIkQinn6yAUx\nT3SSTazt1H6lGH3ACKCYvqxgGE/wAhfyDBdQyXpGch/d+Z7VDOZETmM1v+AnBrOFWc1xV2Z5alF/\nFnIO41nJLvRgOQYtzON3SN9wEQfxKD1ZQTUvEe1j189JRZY4+d39oM4/VdkOi5JsTyLHyx2feh5F\nuST7GB+KefrIRxeLl+gcp2IV6tUHGdGiXDRbUMVeCbQjPtW8SwGbaKUFiLA7bwAG37EDP9EZ2IiV\nZleJpS7ABqkTtVTtJ5OiDsESdfcolzxzuSRDKObpIchWeSJPDWoMwIt7T/95qeOo8MBV5t9GoJmB\nLPDZDu8cxRVcwSgG8jVQDrQynipGMJ0/8zugC11oiNH2VM4MTZVbJ9PSFeTQWosg/9p8kOyXmyeX\nIVA4he0FCb8i5Tde3u2easVyQTSb67VwDJf62Lc/djF98Cfyh7ZlSxnFM1zIgTwI9KEzn7KRHrYt\n9TzoqSxEkYrJSJkMZQTLpRvsIhl5YqGnMpVpSGoIcoy5n+RfKrQwFWKusItZKzfwNDO4xscxkuN/\nnA3U8yq/Ab5mJX0d1rLLQyongaUidUCmXXlBnwSXF4IeinnwCLLP3KuIKN+235+I3/tRdRgGSxjD\nBxztc/vE2I63kX7oBmS6ATfL0yk7Yyp/N8lm1sy0wAbVSJHkgaAneoFDMU89QSverOO1IIkK7Ut0\nUo3Tdu7XpJIaerAWKKcVg1I2J3Bcf7zKKXzAQTiXqLNfIyfXVKrnEiQTAZUNCQuudgS3ZZ5INC41\n2I9NuUlQ/eVefOWpeJT2+1OSETCD+ICx3EMLEf7KfHbmxSTbEZsZ3MAn7APUIUW5Mc4W4P69KlFP\nVcRJosWcM5XASyeY4Yw5bqEn8iPM9OSEjkAQCziDN6H26x93OkYsMXe6LqqTiVBMC8XUUcZGqtg1\niXZ4YyR3A6UMYAXd2QBsaX6iRNRtPkesa5TsNdRJpHPNVJUjO8EzDIP4K/SB3y8x00ViOwKJ+JnT\njRLZWO1KhXvIS0fmNLAIIBjGq4ziPm7lf0m2wzvd+I4LOYST+CMt9KELX5uf6PNR3MrixStPZ88H\nkyiJuGCy8bsOnp4E7ZfogyCWzupoBM1froQg1r2RjI/cfpx4+3ArMQdg8C6jmMZsNrTlUkk/hTRT\nbmZWPIVj2Eh52yfRuBWpjie0yr+uXslIjB9Rz9ZTd7Ce9nNY0P2Kc44PFwSKoCXYcqrK47ROsm3W\nsyTGwymVQJHtbxNylmYj1bzBDG5Isn3eqeZtvma3OGu5XS8/vz0ro6TlmvHj9vRTkjBb92RwQnQ7\niMrlcL8VOILmL/cyGJvoD04JTyKhi/o2bnli1JT6AiYwjaEec7Ukw2a6cxMvAOuZwXnIkEVVochp\n4kyE9gUmkqkk5HQt9MIYTgOiBTE+c1o3G5N//FR5Sm8rchC/PXGOnmbgCJKYqzDEeEJtd68Uai97\nDU/7K5HztVuU9n3YOyApAn1Z6vM4idGJdRzAg2Yb9KpEeiUlO04hn6n0H6vjKmveyc70OliaLbdq\nKpIDJk/2W5B2OshDSNoJyuCnXkA53nqq41eDpHqlIPv2EZLz/dp9807tdLJOpQD9m9v5L9fRkgFB\nms/JyAlFqvgF2l83oXZyWaUrg6buh7cXt/byvWRL1LMVbRPdghzDzw2Ug6cXSILgL/dTCUcJq9sg\nqV6IIoLMFV4K9MU6V7vvN95L7ddtApPdwizSlsNGSljCfmxuCyNMD9W8TQs12hJ7h6OWOXWAutDq\n23kdV3BDv26lQHdkARD13en3nxfRzObvPruBF3mueHl+emnH/mPKBl5neCp0cdUFu6f5t5hoy1kJ\nbSegi/byc++oJwBlURbYXk6WJkRbxQDNbKa7j+MmQj3WhBir8IbVHqcnCMtFNZyXiS6RZ+8E/OSV\nV9dNHyhV30d3op+y9PswyK4XyKZXIAcVz8/oeEjiZDsRkVcfub6+LubFwACgG9CTMzgXWblH7bcE\nS+DLGMRSLmE0W7HanIpfTPy4a7WvcrZjCaoDKaMZq06n3bosJFoMo+t4PsA/qGaxx3P2ziucwdW8\ngTVgqIS00OF/p+suxfUdjudo7sUqbWfvcJWg6+JufxXi/OSk9tPM0VwLVCBL8xXbPre/dyLb0pad\n42f7rH3iJ0NeSOJkS8z1R28/vkinqBIBrKYQuIwJ3MX9WEJawklci6zlKYsvf8XBtFDEd2zDGrYk\n2mVjF7si7RjSTXAMNwC9qaSe8zmVo7jL3Lfuvy92WGY/51a8Tcf3xxv8Xov90NuuniAi5vUo1dpq\nH1MoppBm+vMpBe3CEJ0GM90E3em7jd72SS4ABiI7ZPWZ3Wcf7x7NplGXHV96jgm61+YGIyY0N0nl\nNG4/6I/efrCLuT7g2cw2LOdaZnI0N3AFR9KHH4ByHuJ++rICqOBQ/kUVe5kTbhqxQvgiLvtVKF+y\nwdW8QxUHcC4nUEQTCzieCuqw3BPlZrHncVzGb5CFJorpyk/IJwUl6vIaTONdn9fBnZuZixEl3LKu\naB++Qwl3FzbShy+5gl/Th1VEd14R+rCSA3iRFirpzFr2ZTbQnT6sJdp1ZH8q8YL+veu+9AbzbwXR\noq67eeLNPcgW2ZlFmmOC7oXQ1ZI4mfaX66KRCO5RIxKDZfQGNrKK/nzIOCZypylYpfyBkxnOLIbz\nHx7nFqp5DyvyQ/d/O03f1327pZzA6VFrTOEkjmYayvXShc3meyiikSmcBlQwiWup4kCGMpeJXMdw\nnmE4TzKVYQlek/aczyiO4DqG8xQXMAHozHhuZgqnsQcLgHI2si1TOIkCDI7meqAzerHqIorYhTlU\nsT8Ao7mXE5nKwDZXk9NM03jRQvbrq+6F7hzBncA6YCVEDeJiO5aXaKdskfm8UTlUUzTRqdYh8UlX\n+JkTfgbNYuEWI66fh3IDSNEtIEI5rVzAEe22quYt5IBhA3LQUE2mMZAzOu2/E2kpTuBehvKyYwv/\nzW18we6AwYmcz0A+aPusjs6spx99+MzLyaaVOZxHJ9bwSx5pW/YTg7iLe5HXohAooR/fcSrHta2z\niInM5rfUA1YYZKzJNYb5cuoglfh1BnozmYt4nCpgtblv9R2o/esx9PGOm+n6o3ZSnZXRvaZoDgm6\nF+sxtM79kykxT2Vn6ybm9mPogq4GQSvYjycYxd/b1prGWzRSgFXswUBa0y38kSncw41YP0pDO4Yc\nED2D09iSrxxbWkNXbuQlBrGQE5iS0Nlmg5t4ic1EgFbO5zdEaHDM1d5CEdfwLLABS1SVcMeasWmP\n8AGIEKEzJUS4gEOoZhaykLbqLJqJTq+rC3UL7TtdRSvZncWZbLk9O0kIuhDiH8A4YLVhGLuZy7oB\n/0GGEawAJhmGscH8bCpwMvLqn2MYxhyX/aZY0EMx908mXCyp/l5izd60H0sKeldqWM/WlFLAxYyN\nufebeZZN9OA3VLGTaXnfyAyaaaaBIixBV8cqBcrZk9mMy2AJuXTzHpP5nu1YxHiq2MtxnWoW05Mv\n+JFSpFukCXcR190PTukFILrz3YKzGc8d3ILVyWIeo1l7rxPLEs4nK91d0L08+/4T+JVt2SXAy4Zh\nbA/MA6YCCCF2AiYBOwJjgbuEECkwy+IJT+hm8Uc6k2upQatkfONuJOKuKWRP3qSEEi5mLBvpxWIO\no5qPHe25800fcwU/ty37M0dxAlWoiBjrvIpRkSn5JOYAe/M4E7jKVcwBfs3F/EhnpFiqyCh9Upe6\nz4q5kDPY2hyQ/gVfowaFo0NNFQVczIHcwcNYYZ7qflITjpyIdb9lO/ItMwZn3F+HYRhvgnZ3SyYA\n083304GJ5vvxwOOGYTQbhrECWAYpHN1xJdtfVi5hj31O5X5VaF86xtrjZVN0PmY/vqKRSnbhZRop\n5Tku5hkuQ1bscaaK3Rig+buXcjD38w+scL4S5ESkcopMV87zXOn3hHKerqwmehBZj2nXwzIj3Me9\n7MbbnEQVE/krhVQyjFewxNqawLQHL1JKDcpi35V3kde8HFmQI5H7NwjxH+k3PBP9Zfc0DGM1gGEY\nq4QQPc3l/YC3tPVWmsvSSCjm3klHms9MDER76STsbZD3xVr6sZIhFAIfsj8GEeQjvPc2b8M7nM7x\n3M0/UI/+0g9/DwA/si1lbPS8v3xhCQdiWdgVwGakvxukz1jdGwVsYAteZgrjuIUBfMTljKaaV6Fd\nGoJCPmQCBzEdKORg/ss8zuF0JnM3j2KFlIK8n+2ujFhZIGWIafYoJN1l61JlqiV4lV7V3g80X3bi\nNTF0t3gj1UV9M9WRerX47feB/L+eImCzmfRKpYItwc8tW0INPVlOP75kJdsDxW1iDtCTLz3vK59Y\nxJFAhMs4gCIaeZNjmcuxWJFBoOd++QVvsYtZnamaRURYTRPlnMAFDOJDVrIjD3AX6l6t4gDqqWAe\np3I3twBfIecJqIlXTr/9WHpQRPZ96Ymk910OLoPudhIV9NVCiF6GYawWQvQGfjSXrwS21tbbylzm\nwoEJHl4RhMeooJNKF0imw0KTCXFU7dQjDFRKABm9cRUf+6zjWQKUMZj5CbYpv7jEjElX7MdjvMWv\nqaUUeZ0L6M23HMclQAGL+HXbulUMBeB2ZqKMgxJqGced7MkMAFawNy9yDvKJqhEryiUesUQz21Z6\nIoK+jflSvOK6pldBt0fIzwROAm4ATgSe1Zb/WwhxC9LVMhiSnfIWS0BCQY9NKqzyVMWN+8VPR2R/\nWrD/36rtq5ATuIhBvM8b/MFXi07leN7kVPbjAV/bdSSKaaVWy71SSS0VZsm7ETzYbv0/Mb7tfQ++\npkdbjVOYzh1UsQ9vcArzOBR/Sb/cRDP9bo/4JCLq3vccEyHEo8ACYDshxDdCiN8D1wNjhBCfA6PM\n/zEMYwnwBLAEmAWcYSQV6B7rsT4MU3RHT1SVzD6cEihlAr9PFfZOP9a2LXTnGwD2536f7SIU8xis\npy/r2RE5eClDRnslMHHqFc4w/eubqOYVM3xUJeryKoRuhmAQXLTp+03FVUXDMH7r8tFol/WnAdOS\naVRIMiQ78Jnt2bZ+xdwtY58b3enCT/6aFOKJrnxPFUNopYCrmc/ePMlBCXSaI7mbD5hADc1ALSvY\nEZlOd5W5RrKTdGINnGYKpwHd5Am4z8LrBJIQK+Y3ETG2Z//LFon4+90Sc4HzubQyiwv9NizEBwW0\n0pOVLGGc72030YOf6UcNrcjQ0nqe50TkwODPxJ4RquM/xDUfCLAyul30IDwyBQkVceL3umQnG5w7\nifjq41nnTkmcfqaSFSzgd7zEJT4HRUO8cjpHJ7TdU1zLCrZFhjOqFALNROduSQXp82N7J/VPCgEW\ndDfyt3f1T6IWbdCuYaKROPo2Trm4nT+bywnIiJUNCRwzJJ1sxzxWMBgrxFRPJ+AU5RJLFGOJdhAE\nXT1Rpq6jCrCgO1mPbsnxOyLJ5JwOEomKub16jb02ZoHts+i0uvLH3MSj3EQ/ljFSiysPyR5zuIw/\ncwAqpcKN3E3iwhsvfDHbIYwg25g6Kz2g2RbdMgAGuP/JGH6FPMjXLNFIHL1zsldX0kug6SKvroNe\naUdVECqnit0SbEtIOriKNzBYj+U3VxOC9GyLiliThWINPKbajZMoXscFFMkl5woJBPYKOrHQS6cF\nkWSSg+lPabqYq3NWFYLUenruGrl+H75EhdYN5YVQzAPEj2wLwB7MAmpJr+AG5feRuifnoJyRDadm\nBWkAL5P4dUkE1bWiSKZeqb1UmV6nUv6/Dy+zJd/QQHfm8Dss66eACE3szhx25wU20o/HuIMJXJ3c\n6YSkjHmczhv8hh15n6UMRg6MesllHnRfeTxS18aACroTQRapdJBIoeSgX6NkwiLt51egLVP7lQWO\n92AOD3IP0lJXj9yCJsoZy+0A9OZLqhiSYFtC0sFHHAJsYikDiC6UrVvpTvdPPF95LCJkP78LpCri\nJUdcLjnU7ySNSvLvVfiCED8ej2Ri5MG9ElEpBzDH/L8LJfRgsZkvZB0DsfK2iLb1Q4JDLV2ppZJq\n3qSa2WxoE1c9uZcdvwm5coXUnEMAlTIfvhy/+C0DF7QYcjcSjZG3b6+j9teNKsbxMaOBnuzAQo7h\nnLa1WiglguBgHmI2Z6LSrn7HrmzFxwm2JySVPMadfMcgpDWuD3zasUejOFnkbhErXu69IES7QCra\nEcAoF/tgWRDjplOFOjevgqcG+YT2v32ZE6rYsXqfCZLN8uj0veuFFIqBrmzNl0zkerrzbdSa7zKZ\nYTze9v/feYb19OJIrmBHl6LOIZnnYf7OVwwBNmJlVNSLdPspN+fWIcSbYm94WCcTeK09mlNFou2C\nngu+Yb8kInb265CMm0XNwEt1PgtlTScj5LG+b6fakz25mEPMCjchucYGelPJKq5mNq1t9ULVS68f\n6iS69v8TFfRY22Yap7BMOzkdtpgvYq7CDiP4j1rRxTsCdMbyIyfiNVNWfcT2ShT9vBK9peKNBdjb\n1wSUciUj+A93JnjMkGxT2ZZwqwtWyGkf8y9Y95PTfZFKj3EOSKEHAn4WAW9eXNRgoJrI4he7wCkR\n3wrogeVyUTUZk8Uu8F5fyeClU9I/1++JGq5iFrWUA7CKHZJsS0j2KEaOixzGiZxNGXW0T9/sNLBe\naPvciUQqXmWL5DQvYIppF6WANc8TuuWbqAXhVMhZv1lXMYXzkbMclfshXhHlIKB3QF6fLmJZ7S1A\nHT/Snzf5Hf/mttQ0MyTjXMGeVLE3AAP5kAN5jujJYQp1v+uWeyrEOEhak/j5BCzKJV7q06Ci/L7J\nttkteiWifQ7Qwn3chfQhNwDrzeXlwE9YA0rZpMDlvR+8uGAM4CfmcgIlWc9xHZIqtmAFlmFjIO8D\n/ftVBoyeuAuSyzMelGiXxKsqBVjQg9RjOpEqEbfvz47TV2QA65CPqT3N7UqQBQDUD6CA9jdnLMFL\n1rpPRwfsNonEjhzgtUe6hOQuT3Me1vevRFrd24oIsvDFJmTOl2QHNoNQni45AqyaQbTQdb9xKjM/\nulmisY7RZL7WcgKXI8O+9HaC1eE4uTrsL/u69pceYun0SjWxfPPOPvUf2I5qFlAdxpnnLG9xAtW8\nSQ2dib437fe09KcXUU/0WE4y92KQNCextgRI0O2VZ4KAinlOxeCfEyqCJdbnOnaLWz5uPsyNtP8q\nk/HhK9R4gHwSKKQEa/al32gdr8RK3GWPeFDtKMY6V8EUfpOGdoVkgn15mCr2Qw9N7cEmrAgYdf8V\nAMU0U8bvqEa6G9U9kA9jcYk9MQf0TLM5uKf82GqgMR2XyCm9q1M7vNCFKg7iEKbT3qL3OwNV306P\nzCliOHNooS/StaMnyEplIel4A6XRec57sp7TOJffcZXZrhJO5RT6JFCYOCRolCK/0womczU9aECG\n6xaby8sYwkLKEXzCIXThuxQdN+iBBbEJkKBn+0Lq7od0XhavnYSbsLXa1qmjmvnM4WSc48CVOHsV\ndruoyv/fYTR/4ExkyGSJbf/xnjS8HjdWJ6Y/ekvrrTNr6c2XzOdkpIVWTD+WJNmOkGBQxtmcwNkc\nRyfWcibHcgWHo6z0zjQygauppTuL2YONdNe2TeYJP0CSmIAmBmxQNBukw5XiRqout25BFzCFk7mP\nG8zPirESHLU4bOM2cOR0HQopo5kWulFAC134kd8zhX9SRftoArV/v9Oovcx4VTd2hAoa6EQzf+T3\nbZ+eyB98HC8kF1AhjDo38yqlbKaeCJvobC6tQxo5TvVjcx3/5xEQQbdPac/E8TJ56n5nUMb6IguA\nngziPQopYXde5HGuRYqpSjmqhNt+ji1IwY8XmmVZ3XX045c8zQqGcRP/BTYD9UTHyusVV3QfuD5t\n235ufp4YQF3D7VjEIdzrcduQfOLPjATgTmawK0+ad1ID8umsFFiRraYFhiA9X2QA5RsPsphD/Pat\npRM/8gXb818u4mRO5wBmEu0KcXIdqQHESqyBRL1km3qpGF8B1LEdb1AX1WHYJzEV4uyuUsv06CCv\nE4rsk6tkexYyilI2e9g+JF85i6MYyX0A7MOL7MMLbMk3trWSsdIDYucCft0uARP0dDVHCXmmTzfV\ng6pqXy18zL5Iga3lVp6kL58iz1EWeYgux6YEtxDoxR58iCXKTh2OtawYGMBifqY/ssOIaPtzctPo\nnYPf2au6P97eMSgaqOYVHuKfPvYbkq9syVLe5lf8lFHXaXAJiKBbIWepJZ3hdbFQLp1Un49+HirF\nqEEBTWzJCgTdkBJcTgHlQCf240UsIS5jGE/yISOxRNMuuPps1SIaMWihiCpGcDm/ooCeFNDVPEYZ\nsgNxyu9SQvSTgJeX0+Qqp0pFdXzNQKr5wNfVC8l9WiiihSKqeZcWiniO8ziYR0ht5tAg+eD9tSUg\ngq5I1YWMFcucbtym73vFz+NeExChFXiQv1FODQNZToRKjuU6BvI1BZTQm58oMEX9XSZRxRgzply5\nYPTYctURqXYYXMMsqpnPNSzgCvZmMpcDndmeZUQLtpz00ZuVQBf2YL6230SI5a4SQDOvcVqC+w7J\nRf7F37mG/wFr+YbdgTXMYwJWyt18xLtMB0zQk8VviF46jp+u8Eu3Ke81wFpqKGYA81jB9jRRw+Nc\nyUZ6cxChwIWhAAAgAElEQVT3MImraaUnffmOA3gCgBYqkNZ1Ob/gK6KF2bLQ5XFbsKrKwC9YwBSm\nMIk/sw+zsJ6CIkA5w3iVKg5lBI9R2JaHw++Tkv3pwZqlugcvcxljgFIGhFZ6h2Ic17A1nwCbeJhz\nkVEu6chlnu0w6sQIkPc/2b4lXZOA/Bw/FU8YbvvQz83Q1msxj93EEoYio1AKaaGIdXTlY37Frsym\niuF8wQgGM5/X+QPyqy9lCG/wFbshXSR6QQHlC3e+pn1YCsB+/Ie3+TVWpEs5ffmMat5HJg1ThSdU\nO+1Jlpxw+i5Vh1HCIdxDEU1hkecOwHKGs4Bj+Yad6c+71FDKKvogo1uasYq1pJoguV1iFcFuv2ZA\nSPQCZmvA096GVNwAblZBPGtBiXArljXdBBSwjv5cx9sADGY+APsynQpTfAWwkd5YVnSZ+SrBuqYl\n5quIRluh5QgNHMnfsAZJi+jFMqrYi234hOjUvkqo3fK321Oj6ucfYSc+YioTKaE2zvUIyRcG8D5f\nsgdNNPElO7OKrYk2CNKVHTFIgg5e2xMAQRe2v37ItlUO6Rn8tGPff6x6iq3a//W8yvE0YVDNJ21r\nRmjkz4ykin2ZwF84kIfoxiaUv7sThfRnBUrchzCXKn5JFXtTTH3UUYupYy7nIt03xYCgmg/YRA+O\n51y25lugG1ZxAl207QOpbtkm5fIC6iimLs61CsknCmkBitiCVcj7XpVoi/eUF4Q0uJkn22pI4l6f\nbFvlkHqPldP5+PHl6UJfYP4vrfW9eNR1q5HcRx09kYJcym85h558y168CHRiMUfHPOqvuIW9eJEK\n6lEDrJ1ZA8DJnMbOvGYuLwMqGcJ7WOGVegZHtPd6vLr8+wkH8iTXUEtXLxcjJE+o4pccqBX8lth/\nF+nI/59tfdHxZjQGoEi0ssz8CFcQYk5TXbzaKYQwVpSHugb2dli5TqzwwXKuYD8K4lg11byL/GGU\nU8Vubcs/5jB2ZVbcM/iSfamlW7t1V7Mt9/AgUEoVw2mihOt4GqvKeyzUU1ghKiQTKqli17jtCckv\nqnkGOUtZpZdQFnsL0bOVof1AaSIRMH7TWKQb1Rb3ItEBGRT12hM6iV42SLWYg3NSrURrIVrXqIRm\n/szIuGIOcA4TqGAdLbYO04uYA2zLWy6fFADlTOQvAFzHq8AGLH+6249Gd2dJS/4SxlASul06FC9z\nJvOZiBxgLwTTDSNxinBJlbUeND96fALyTOHlwgVFzNPlM3eaUJPIPgqATnSmASjjAB6jqG3Kfmy6\n8gMRGlI+tb4Xy+jCBobwAgAXcDhltGK5XJyeuKJ96gJBF+pCMe+A1NEZKKUzNUQ/gfYC2yB9Ryeu\noAshthJCzBNCfCqE+FgI8SdzeTchxBwhxOdCiNlCiEptm6lCiGVCiKVCiENS08ygiHk6sH8NXo/j\nZNUDNHMo97Az79OFVck1LUWcx+i2951Yx+HcjHSfdEb61vXJTbrAy+gbg1LOY3zmGhwSGI7gevbl\nWc7nd2CKO1QgZyKX0/53kK8DovENSS8WejNwvmEYOwP7AmcKIXYALgFeNgxje2AeMBVACLETMAnY\nERgL3CWESMKkDYqYp7MN9gk08dAzEOr7UP7mRmZwEZ8yjBlcSTWLU9TO1NGHz9mRD6jiIKwUAXpR\nET2Rmrw+0scf0hE5hBvNd92BrlQxlr15Amd3XbYLpKeL+BoUVz0Mw1hlGMYi8/1mYCmyysEEYLq5\n2nRgovl+PPC4YRjNhmGsAJYBw/w23SIoYp4uf1qy5bL0CUCWZd9KK6ru6DiuSqaBaaE73zKJC8z/\nlE9UuYz0cFTr2qs4+pCOy8XsxzCeB+A9DoB2cxJSbZ0HZJjRI77UQwgxEBgKvA30MgxjNUjRR5af\nB+gHUeXXV5rLHIiXSjUI0SzpjjN3yyrohhI45UfU/1f7s/KwADzPZbQE+Ma8jAMRUblkIDo/DFSy\nht9xXnYaGBIYSqlhLNfTTDFyfkMh0iLvKBOMYuNZ0IUQnYAngXNMS91+BVN8RYMi5ulEP0evYq4X\nblYx3PaScXJ5b74BStmGtwmyX7GIRq5kJAfyHyzXi4pHl66XZiqy2cSQANBCIV8ynLX0ZzXbAKvA\nNtEtWGGGmceTYgkhipBi/ohhGM+ai1cLIXoZhrFaCNEb+NFcvhLYWtt8K3OZA/Ow+pRtgG3N90Fx\ns6QTP2KujyMUImPLt2Qwb/EFOyPFWsas92UV3zMIKGRfnqSSu3ImgZVs5+9pX+GokJqompEhHYnP\n2Z8f2JHhPMYsprCOralkOTIxVyrT5gaVL4Hl5vvYhplXC/1BYIlhGLdpy2YCJ5nvTwSe1ZZPFkIU\nCyEGAYPBbTTrYGCM+dpWW57taEp95mI60MU8Vv4WvTCFvqwF2MQ4bqGCAqTAy0k3Q5nF8VwClPMO\nx+WMmAMM5EOO5xwKo3zp8vV7pmS3cSFZ4wN+w2scyV95hHV0AtawgVKswc98HQRVbIulk2Nirhl3\npqgQYgTwOvAxVmqzS5Ei/QTSGv8amGQYxnpzm6nAKchRuXMMw5jjsF8Drqa9ayXbrpZ0psDVB/uU\naNlD9GJtq9pWAJQhKOdKxlDNIi5jGM9wHUdyEQU5foPLTI36hJEiKlnDuYzNVpNCskw1s5ADoM1Y\nWUHVPaK7WdxS6SbjiklHet5EMYCpic8UNQxjPu4KN9ppoWEY04Bp8RtnP3y2XS3pEnO7n1tg+cDj\n4VSxB6AJgzqqmQ1sYC0DOJo/p6S12WZrlvItOyJ/hHI8oFfbI2dIR6Oa+ciZxUVIQdP95rGm++cj\nsT0H2fZt2Mhmc1It5roFbh+0LEbG06pqQfYvSW3rFmGjlhlI90sTj7XF6eY+J3M8uzAXOeFIXq9j\nOTPLrQrJFlWMoIwGoITtWI68/zP5FBrcCDE7ARL0bF60VIm58vlGT4ixjqEXTq4D+iBnuvXWtlHr\nOH01Bba/ihb68QnvcGwKziEYHMXFDGUmVmhmSEemghaO5q/swTPmEiXoHWFQ1DtZzrY4jeh0qdnC\n77H1LIheBk91t4nymRcDXTmLP3IndxCdRc4JdY30YytrXw6K7sJrHMXlfk4kJCRnuIlX2Mx6pPtF\n5Sfy4j+3r+eXoGVdvMTVh55lCz2Z4hapwouY2wsxKIGO124VpSK0/y0qWMcj3Iu00pXrRT+GvZ16\nSTh93QJO4UwqUpxUKyQkm7RQSA1dqUWmiTqDI7BKz0G0yAZJcLNHQJxD2XikjuVmSfaJwW3f0f1n\nDZ0Zx51sxWfcw+1YQUSq/qbaRxnRVoL6rJRzOJ5XOI1PGMuhXJ9Em0NCgkUhLdzIU8jolgbkU2wd\nmZ8klzuzRQPkQ88kToKrl0RLRszdkok53RQNPM+5LGQi0In29TdlIedCSpDZ5dQgqlrP4DZmYFDE\nodyQRJtDQoKHLGYuB/2lmDdhGT12izy4M6EzSQAEPdNx53Yx1wcik0FPkuV2XIU6lgFs5B1GEZ1G\ntgTLP16EFHv1mT0evZGP+VWSbQ8JCR4j+CfQmXE8gpWvJXStxCIAgp5J7MKt5wxJdr/xLqU94kWN\n0qubdA0TuZWe/IxMJ1uKFTFTT4QmrLqcus+9lJLwJg/JQwppZgJX8zzHI38jygoPI1vcCMigaLx1\n7K9EjlOkvbcXUUgUr752vc1O6zcBrTzDFZzOcfyaaUA5w5kNlDCYj7iUA1GW+mWMQQq7tNov4ICk\nziIkJGhUs4Bl7MVqeiFLz4FlndvdK6HAK7I8KBpr4NBrXxNvgoG+v1QOvvrJka7n+rYj99GPr9ie\nJwHYQF8O5iH6soQKNrI//wSgir15g1MooglppcuZlK1EwGOZuZCQoLOU0UArjzIVOQjahCXa2fKV\niywe2ztZjkO/kejQxQJSO6KsV/HRSbZH99MPquPbt1GDsOqzUqCcSxlNJBTnkBCqeQxpqKhiLZjv\n7UZci8MyO8m6JVsJzpOAexx6li10Xcz9WM+6+8QJZT2rArLNRH+h0QUgvH9RfisXKcvc6TKrAhXK\nBSRH769jPlXs7eMYISH5w1XMR0a0/IhyRUb/PnM78Vy6CcCgqFcx133fbr50Pf+JenUhduSJEtxY\nkS56BIvaxj7ZyP6S6Wyln1tvt/xsCG9RSiEqSdcOvEM5LfRmRfxLERKSp5zHOAzWYNCMFHKD+CIe\nirwiABa6FzH34v+2n4pekk3FdMd67NJrWNpx8387rae2jwD9gR+041qzTBdzCLszn48YRQuCzziE\niVzLEF7wcJyQkPzjFh5lI6XIQVA9CgyH9yFOZFnQvVa4j+fmcDsNtVz5qu3+N7ckWOqzcqQPT/no\n3CwBPb+K1eYdeJHP2B95g7Z3+SxkFBdzBBvow0ucy8AcKkYREpJK/sPlbKQYOQgKYfKtxAh42KIX\nMXez3EsBQQktyEk7nZDul1Lt5RZ1oo5bwVgeNpfbq9GrYzvFoMsBz88YwRmciuWCKcJywUjXzR28\nSC++4DjOopJVcc41JCT/+BuP8BlDcfaZh/ghy4KuBgadklGVEJ1SNroKvKraY02Fj5jblAGdOYbb\nge5UIKjicLrSYm5jL65cZPu/1NxXKV34kf9xFpYIq1mc3bAm9+h+8wpzXSspVzkbzHXVdl2AMvZk\nHhEK+FM4yzOkA2IgqGYB1TxHbVsIbry686HQxyMgPnTdf21ofwWy11ZRIEp4S7BugFZtW1AiPZML\nqeKQtiOdyOncxoPIEXQl4k1ED57WavsrYBuWsI4BfMPWZhvKkI+E3YBNyBtM3WRFyBmedVhT9LtQ\nwXomcRVPcD2/4E3K2chQnmMx47iUkUldvZCQXGQh45jJ+cDPWAOfkP1Y89wnq4K+OwuwxBosgVdC\nXaD91YW+GMuvrT9kKHEuppINUccqpo7deRlrkkIpUtzVgGcZMqNbAbKzKGQ8t/Etu7CQ0eY2PYA1\nSPdNA9bNqNod0dqk/oeurGJ3/sd+PEA3VrKW/kzgqmQuXUhITvItQ5jJZchxJS8RLCF+yPLEorvU\nf1jV7JvQxdBC/1+ll9Upsq3bnS34nrM4CYD19OI2/glsNNdp0tYFq/PQ99cFedMpa1w9JVQCm7Ge\nEtSg6JbAeqzwxDIuZDz38Dib2sqpRejCCjYymD14liO42vUahYTkK9+zHfdzO9Iwciv6bE/G5Ral\n5qWWaMeYWJRlQb8txhrK362jMq4p7AUkFGrwEWALOtPKJgSwFhm1olJwuu1T+dO3pJgNNNKMJfb2\nm8d+bDUeUASUU0YhdW0xtWrfLUi3USlQQRVDHa9ASEi+8ya/YS6TiR4QVcaTXdBVrQA7oaArAjCx\nyI1WrJBB/UvWURZ9s+2lEuG3sC+PsIk1wE/mPhvM/TbF2GeruZ9NNNJort/s0B6nKcdqvzLPSh1F\nWMn5N2P56RuABi5mBA1UsIrt/F+ikJAcZz/+SxVHIQMH7GUdc6ewRFDIsqDHezrw0iu6rSNF9S2O\nNN/XI90tuhg34dxZqH1uJlqgnfx9Svz1Nsj/f8lTWDHo+sCPekoo4U1O4kXO4l4eZjGHxznXkJD8\npIrxSFFXc0YUoaj7IcuC7uURRk/M40asxPf25D5O+1ei3IRVHaUW6Tuvx7LQvbRBiXsrCxhr7sP+\nFCE7hqE8x3wmsojRQBPfM4RNbBnnOCEh+ckfOZdhvIwVTgxZl6gcIwBXyyn+1AkltrHWtfu6W5HC\nHO8Yuhjb19VFuMnjS7lY1sZcZxH7mu1rAmp5l3HczJzYlyEkJE/pxXIE3Ygus+jFWg+AjAWEgBSJ\n1gcM46Fbyk6zRJWg61kO3eoP+gmbUk8TibZT2D5rxZo9quLhm+jPZx7bExKSfxzK3zjUfF/Nv6Bt\nQqA1PyQMdXQny4KuYrhBfnF+0+jGc9lswopNT1VxC3WDebUKlKvHLuqqM5Humd6sZlsWMoJ/e9rr\nm5xivmtlP7MARkhIvvAmxxM9RyUUcS8ExEJX6L5wfYq/3SLWhTyeu0aJeSveS8bFQ/ndvVrrTta9\nHjppsIoBrGIPRnM31SwCGhnO47zDMVQxHIBHuY1ljKCKvZjLaWYbStiV/4V5YELyhu/ZibmcjAwo\nKMBK2NVE9iz0oIQsxibLzqdYA41K8FSEid3HrQtiLHT/uIHlw04FXr/kWJnjDGQn08rhXAdACd8D\nm3ifg4FaruF1VrA7yxgK1HIDC7AmZIhQzEPyir4s4SLGU8U4JjAdmSNJx25EhT50RQe7EvqAZ7wB\nVj/7dIpnt6M+d+rEZOqBAbxPNXNpoDNQTwsCKdwwnb+bba6jnvVIq6UZ2Ew1C5M/jZCQAFHGJgD6\n8SnDeAsrkR+0d5+GoY2KAAh6ph+fdPFVoYpeBDkW6kki1rnonyk/PEA5pRQA5dzF3cjY99VYxXH1\n6dB6sVy9Iwlv6JD8Yx39uIsHqWuXsjrEjQAIejZ8U/Zjqjh0t8lDXlH7cftMoToPWcT6JM4B1hGd\nzwLU7NRW6oENWDNcrWPsxQtMZZ8k2hwSEky+YU+giY/Zl/ZhjHbpCo0aCNygaCZppn30i7K0leDb\nE4T52Te0v7y6NV3EEUxnJ17lbh5GCn4dThisc9lXCV+yO+/zMpVs5ty2gK+QkNxnEO+xAx9SSD0H\ncw8NlHIf1VhiHka+2AnI1P9kE+ckc/xYTwj2yUB+BmPBeZKS9f45TgEMNlKAdLWoNumDt/rLPgZQ\nw89UAo1soBPX8p7HdoWEBJ9KfuAYLuFo/kJ3VhEBJnI/mXe75E5+9gC4XCC7F8yvQKuXEtlW7eWE\nitJxOsaW3MQzSOvbHs3jdGz7RCjlt5fRLjvk2SzTdWzNEsawhDHZbkpIlvmSfenCaj7gKPSnXIsO\n7GzQCNBVaCZ7zfEzU9VpW/t7FTtfYPtM7V+WuIM1NNNAe9+5mwWi9qFcN9b+D+SfjOSBBNofTDbS\nkzt4AVXYpIpdst2kkCzyIqezhq3Yj8f5lv2J/h3kjgWdbuJa6EKIEiHEO0KIhUKIj4UQVebybkKI\nOUKIz4UQs4UQldo2U4UQy4QQS4UQh7jvXcePpZwOUjk4q2aARg9gWjHxMneLnMlaS3SOdj1u3m71\n20t1gSpIvZAjU9j+7HMLc4nOjRPSUbmfh1jDlsBm3udA5P0fJu9yIu7VMAyjATjIMIzdgaHAWCHE\nMOAS4GXDMLYH5gFTAYQQOwGTgB2BscBdQggX09cuotnypevHT3Wnovzh9nBJJeSxUFEz9tS+AIK9\nmEUB3ehCE+dyGK15EtJ1LW8jZwla4wa38lJ2GxWSNfryKfI30Eh9W3lHhS4tiQYxxCPbuqQTW588\ndW+GYdSab0uQXaMBTACmm8unAxPN9+OBxw3DaDYMYwWwDBjmvXHZHrlONibdDSXO+qxR+4Cnmz9e\n+dcturOeHqznOM5gIxGqeZOrWcB7TALgK/bM4aIZamzCuj5b8WF2mxSSNYbzBFZOJh09PUgIeLwS\nQogCIcRCYBXwkmEY7wG9DMNYDWAYxiqgp7l6P+BbbfOV5jKPBCFnQrpEXe1bCbZenEMPmdTF3u6j\nl370dWwFtPAipyCt2Y3AemqopJq3eJjbeYKb0nQO6aOad2luq+iknk6a+YK9stuwkKwxg2uBTuzL\nc8ixp1KsTKUQxqBbeBqFNAyjFdhdCNEFeFoIsTPtFS+FCthE+h6fvKKSb6UDffBUCbnbsZTFrhKL\nNSOvTQMvchJWST25v9c4GhUCeTQXcQdPcza/TsM5pJ5X+QMyFt8eyVNIQ7t8HiEdhdM4DoA6OrOY\nyQzkczZTwjf0JP4M7XwjtsHrK6zEMIyNQohXgUOB1UKIXoZhrBZC9AZ+NFdbCWytbbaVucyBeVgP\nCdsA29oank2fsBqcTFfkjR9RV+1RHZ19ApJefLoWab0I7ucuoJhqPuRyhlEYKF+gRTPFPMENfMme\nyA6qva1QyYYstCwkSJSxiQvNyXOPcB8wEOnR1QfNldGTT3wJLDffx+68vES59FARLEKIMmAMsBSY\nCZxkrnYi8Kz5fiYwWQhRLIQYBAwG3nXe+0HAaHOX29o+C0qV7XS2QXfteC22ocIbVVk8PSMltvcy\nWuYEpgRWzAFWsy3LGEJrW/4avZQfQDMbqGQGV2etjSHBoYbudOE74Gc6RkHpbZEaOQYYFXNNLz70\nPsArQohFwDvAbMMwZgE3AGOEEJ+bR7kewDCMJcATwBJgFnCGYRgu7hh9mr0TrWQ/xjTebNJk0YXW\n6/mqNqmnCNU+vUNQLppCHuYfKWhn6lltDto+wP20Hy+A6Gvfwif8KnONCwkkLzCVG5nNBK5C+tLt\n8z1SLezBNYSciOtPMAzjY2APh+XrkOa10zbTgGlJtw6wfMbZxItLJFXoMbZe11F+dlUxvYBiihnH\ndTzFXzlSRpQGik8Zw5NcxS68jPwROoVoqk6pBYhwJFdkvqEhgeJ9JgLrmcFl7MJbfMLeRMdgdGwC\nNFM0FkEYJE2nT91+fvGOo08w0juZZqSg9+MCfsk05lHF7iltaepoBjbxCXsCa1zWUeMGRXTmB3Zl\ndsZaFxIcXuZM3uK3XMgY5L3SzCcMAyqRqabtrspCUvdUnW0PgU78p4UcCuBMVUGKZMmUX9/LcdwC\njb5hGjOQxS/eSG2zEqCFIqpZxGKOAOB7duBJLsU5AZn+UjSzia5U80pmGx4SCOroTCs13MB/kQEB\njVgVu+qz2ragERBB9yqSyeYrTwXp8qk7+Y8T6cD03O7N7MRLvMJptHrwLVazkJc4hxn8NYHjurOG\nAUAtK9kJgAUcjzV4G6scoL0Tb+JZLuGTME1wh+IIrmdnXscaZ2lBCvr35JqPO90ERND9EG8gNROk\nQ9SdOiovx7APhEYL9xIO4HWOoyBO5/A6U4BaFjCJJs39M4tLqOajmNuuahvcnE41HzKLy2xrGEAt\n7zGGWZzLr7gFq1iHjtP5qmUyo+QijmAV28dsT0j+0ZPvsQIB1H3jZtylStayrTP+CYgP3a8lqr7I\nfI5T14/jdeRezxjZhIxHl4Ok1SyiiqGuW77SNkkJtmce9/IwqxhgfhrbCvqZPtzLdOQPoJH3OJIe\nfMUwHgXgHh5Axse38B4H8R6jsGaC2tGXFZgvOcYwmHeZyN+oYH3M9oTkF/fyGJO4gFc4LMZaheS3\nte6tcwmAhR7rkTsWKtdHtv3qqbyJvFrpbp2IHsIV4VImcgBPAiVcbps630QJt/ECzUR4lsuQ6QPq\ngHo20plV9EH6JxvYhwd5lNvZ0JbdIRrpolG+zSa25QOG8SjNREy/d625ryZzHZV4y7C9nK6HdX2P\n4S+hmHdAVtGP23mAzNuf2Xbv+ke4hoin+8BCGFBl/meF2yWGXmswG9hL2SWDWzSPPZm/npioUFum\n3hcD5RzII4y0xaE/wU0sZSRQyyHczxz+gEwXoJJiYe6rWGtTMVDM7rzEeK5s29dDPMjX9EcKdQGC\nCBfya75nR/7F9UjxrseaOKX+ulkcbt9lEVAOVFDFSJdtQ/KNRRzBs0zFuo9Uqgs1BqPfS9EVwdyr\nhXklUWMzHejtvxzDMBwf2wNgoSuS6VhUMqts9aj6RJ9soDoTlbRIdZCRNjF/lTO5mf8BMIkLkAJe\nw2IOYCQPEz3gpH4sKkeMNRitiznAb7gAa9ZqAwbNlLGR79iNXZmLdV10/2esx0f1XTrRANSwisFt\n5xSS3wzlOar4JYfxD6o4gsnczNZ8gdXxp0vCgmSde9eVPBF0Rfs0s5kjVQOlbtdB37feOZcghbyM\nYbzJ8VTTjRqK6cxpnMIijuAr9uQ1JtJIHdXMB6CUtUAjq+nOaxyB5RKxZ3dUse3Rt8rf+Q8b6cmN\n/Bsrt3sTsJmFHMbX7MQwnka0dRDKReYV+7qqU2niXu7iIW7nNU7kNab42GdIrrI3jwOwPQv4ln2B\nMuAXWE+k+Yx3bQzYlfAzABhrH9mciJSugVKnL1X6zM/mTB7mNvrwMY+YE3QvYTTXMxNBBQZrgA00\nUAw08SRVZvSuyptSj2WRRMxlugupjMsYzbVmSp5WClhDN+qpQAp5dA73mZwIlPAax2K0VWNKpLOz\nf49WgZCv2RUw2D+gaQ1C0oOMuFoHdKeIWprpjLx/N5PaCUVBinDx/rQQEB+67v9NdS6GbPnXk/Gr\nu3VGap8R833EfFVyDidwG9ORbgnlV1R+8Hpt+y3ZkVdYys64F6TWv49OVDGWat4z97uZA/gHr3MU\nltVtv/n189ZdUbF+JGobp+9fXY8i5HdZRjERpjpnngjpANzH4wgEJ3Ii03gaOWNUuf4gOR+6imAL\nCva2uPvQA2Khq9wkXvKYJLJvNUU+k8Kezvwv0YOfQ5nPa5yKNcNSWcwGMvrEwBo8XU0J64ntnlJP\nGS10Za25rB4VnfI6R2rbO4l0vDhhJ+x1Uu0FttV1jDCc2RzKrT72HZJvTGEygGlorKIrG1lPaXYb\nlRb8GdwB8qGnG6eizekm1T29+nK3YQrVyFwWXTBoZhEHYcV267NF1TbKmm5gEXsgBbpFW0/3dYMS\n2PVU8hJn0Jm1WJa/PboAh890MW/G/WnACXu5Pn1fxexj+lNDOiYfM5b/ciMAO/EiO/EO6xluWyvZ\nIIug4G9wNiCCnkm3j/KxZzIkya+ox2qbAXzNfVxDhBo68SMfcSTSelZfvl3Im20vFfqlwr/Ufg1t\nfXmNJnMdZdQwhrspbSspq5+T2qee+je6Hmh0rLlTrhY39I5B7mcqE5gZZl3skNTTiS8ZxlNcylfs\nBsAS9mYJuyGLQKSKbM9t0cl5Cz2TFrQuOOkmVXlolCg20kQTmynHaHOr6MfRhVbhdL6tLsvlvh7n\nT8zlMJ7iLEqoAbbQtrN/V17CEnW8XH/9OA1M4xG+oj/X8Tyb6Q7AJnqwlv7cw5MejxsSdKpZ5Lj8\nX1wH1FJHkXnXlBIdYtuxCYgPHVIT4ZIoujCl09euLE4vl73VpR3qGikXSQG7MY9GDD5jR9oPBql9\neXc5KGsAACAASURBVBFZdR30iUv1QCkD+IivGYWsJqhb9pB8HL46bqzIJHU9moE6mojwDBdzHBez\niAnM4xQCaZ+EJEgt1bwGFHAC59NIOZvohHwShf35Fx9zKDJ8sQvRKZiDFEOeDP6N2wD9Alpd3mca\n5WtPd5WieOfodny902sCComwgc8YhnPFo0TOxT7Y2cjX7EmE5chB1hbbuk6dSCLEuoF1334r0MiX\nDOF+7uA9DkPGwjdwt5k/JiTXqUN+p7V8xV48w+m8wMmolLlvMJFvGEQFP1LFOCr5OUXHDdLsUP8E\nJGwRrCgMRYAeHoD0xbXHOk+np4UiLAtazQodhLScpahJUjUgq6oGRWz7TOeNrz8h2NuipzkQyEdu\nFcop0xN0YTPnheXqcpLFHMF8JvETPbDmOOgRWdKIUXrRi9VspjM1FCA7AaeUzPZB+lgESdDdfr85\nMfU/6I9JToN8qSCWdWt3Y+jfoawXKgWtAeiqfeYm5vZkWPGSY6ltmpCdhSrinO6b3u2a6OMECitl\ngaCV/fhvKOY5zBCeYyP9iR5EtwbpJVbE2moGUUMvpGswVU+KQSAxYyxAgm4nSKFDdpSbIVX5Y5Q/\n3C3joEIX9GKk77AS+A4p7mVYYu4U3dIS52Vf38+r1fZKFrcb2u0HKzCIMCqMT895LmEkg1mI9ftS\n94J9sF9l7vyB9N5zuUPABF3/UnKlp9Xj25PphNTAon0fTgOaRezNG8BGpJjXI8O21hIdi55J7ILu\n1JH4xe0pA9v+KoES+vJ5AscICSI/sI/5Tlnq9ntaRWetJ3qQPpn7Piiak/g5BEzQ7QTlAntBxXAr\ncU/0S3FylyhRt/b5HTtgxZOrPOMqb0rQ0H+USuC9XB+9Oo0dtX0xv+IOqjiYRnol3dKQ7PAwD/AQ\nD7b934MPiH5q1LE/HdsnuOU6eSPoTnU1cxFlbSfjb7bfyLqod+EHyrEK5aZ6Rmq84s3J+tH1CUxe\n2m2Pm1d0Avoym4sAOJPxSbQpJJt8zc58zSCu4mWWMJI19MS6TxSqg082z7kTQXbxeidooSQ23GKx\ncw0lfiqKx885qcyHAnk9GpBuFhV1k8yN6GTh+EEXddXGROYSqB+lW3I2lVtG/0wAzZzCiWzFpwkc\nMyRYNAK1GDTzXy5ADsLHE2/7//Ge5mIRFMs+uXbkgFrmqpXuhPIl+40NV4+euv9Y5S/3g/3JIZUF\nt1Ubkxko1nPB2NF/vAZS4A3+wZ18ZSuv58RCJvKumdApJDi8wuks4LcYbMZ6YqvX1lD3vFMoYqoI\nksYkd14Bt9AhPRkYg4AaPFRx3l5Q4qssWSXI8aziTN+wql1u8eTxcJtRq+dHL2QSN7Il3zCXs+nP\nQgq1zuka3md3nuZwruU/3MRy9qSRMr5kP47lrATaFJJqfmAHXud45FNnHdElCuMZK3bhyweXSfK/\n0wBNLNKxT+LJR0F3wu/kpVy5LslMyrKn0pUTSiIUE6GUvZhl5mYvZwqn0ceMdLmRF6ihM0XU0dyW\nO76IKvZPoi0hqaSaj1GzQaWgNxD9pAfOM5GdQnxjzV2IJ/ZBmUzkdTwg8PnQ46F8qPmOurG8Wrbx\nfM9BoQl/TyI6dmtdPtU00UgTBq8zGlUf9T5uo5RWLuYw5DVpolkLZythc3KnEZJSqtgVgKep5gd2\n4Ce6IePK1VNnrMluXpZ1PHLAh94R8euLtvvYg4iKxEk2gRdYFpcKEVWx9xsZyvMAnMMErJBOuV4z\ngg1aaOMiJibYlpBU8muqOINjOJVzgFIqqSf6qU43Vpx+E6kIDMg2qYnWCajLxZ7XBZIr6ZbrOF2P\nWATdYk/GBaNb+Wo/6t4oYEt+5CcGIWfS1pmfq0yeRUAJo3iItzmFGrpQ1a4wQki2+JCJLGdv9uBp\nVtOPOUyi/eC900zkWIZCPKHMNXcLxHK5BFTQ3R7PO4LbJRb55GP320npOIk6WMJuT2qm++GLUcIu\nty1hC1ZxFhMSbEtIOriRl6nhZ6R/XR8kdRL0WKKcC4LuN/V0TiTn0nE7uaA8HmULvwU59AGloLlj\n1ESpRNplT86l0Gej6tdJTyscPWu1gGY20DeBNoSkg1ozydyfGc2fOJvopzAnktGEoORuSd1vM8Am\nnFPBi3yZaJQsarKR12uhHlftESPZRllbibhg9HvBXoxb/d+M9bSnh7+qcNECrmBEAscOSRcPcR8/\n0Rv5/WxA5itS32GqrekgGDmpNVKD9Ou24XaiQfgSgoCaIOTneqjp9slUF0oHifxQ7Yma7ANjeq3U\naEtsMB8iM1N2SuC4Ielkd2YgB7nXI8dA9LGzApyNPDdiWeBBiVvv8IIelC8iKHjNh6KjYnOD8sgJ\niYm616RMusspwk/sShX7czKnJnDMEC+sZQBvcYLv7dYzkAF8iiVNKp2EeiK1C3qihkkQ3LepN6oC\n7HIJ8Y5KWpRIrLceyw7ZjY7RZ4J6QQm1arN9+/bzF67ksLa1t3YpRBySPHfyJFDEvjzsa7ux3ABA\nNW9jjYUUcjrnczc3I10wXp4w4xUeDwKpN049W+hCiAIhxIdCiJnm/92EEHOEEJ8LIWYLISq1dacK\nIZYJIZYKIQ5JvHluFz5IlmWQcMpG5xW9yEU23TGJuJF0Yt8bV/EcH3Go30aFeORr9uAa3kXVBK2J\nqqTljSe4BTmHIAJUMJlbeZSbgW7mGl6MjnhPbNkmPW3w43I5B1ii/X8J8LJhGNsD84CpAEKInYBJ\nwI7AWOAuIUSCZl+sLyUovWwQSbbYRqwkWZnAT4dtj+CJF9Ej6MKqhFoVEpvl7MND3EkLG5EJtuq5\nkZd972cS5zGCR4EKRvAsrZSxgQiygIsczI5PrFmmQdCO9LiOPQm6EGIr4DDgAW3xBGC6+X46tE27\nGw88bhhGs2EYK4BlwLDEmpcLj01BRS/dlew+sjGI6senbm+b049F2RStzORSHuVvVPORrxbN4Xyu\nYqGvbToS2/A22/A60rqWYztHcWnMbT5iHKvZrt3y0dxDJ1rYhvd5gkuxirkka50HgfSNA3q10G8B\nLiT6l9PLMIzVAIZhrAJ6msv7Ad9q6600lyVIKOqJo1wwyV4nfRA1k+4ur6JuPz/7PaMKakt+pgvL\nGM5F7OerNd+wKwYNVPOur+3ymRt4I+r/5QzBmi9Rzwz+xPU8xfW8yAz+qm23gOXsxdNcSi2dAVjJ\nTlzD69zAPDbTnQs4mEe4EynmfiahJRr5kinSZxzFFXQhxOHAasMwFhG7e0ygla9qrxUu68T6AvQJ\nIyHupNrCdsqAly68+tTt66j/C4ieGVqEmqVaykZfLVnJL1BZAWva/Lkdl78yl3oMqpnHTwygmtlY\nVbRUHp16GiimgQI+YT/q6ALAeRzMI9wEbGIDvalmLg9wCy00UG+mrqjmQy5mFDuxiB7UAtsjC6PH\nkq1Y90oQtCIR63w5MFd7uePFQh8BjBdCLAceAw4WQjwCrBJC9AIQQvQGfjTXXwlsrW2/lbnMgQO1\n10APTXEitNK9oackTRWZ8rUnYlXJbYbwLj1opBOt7MUryNjzYqDQczzPUg42oy7qkNewjmeYygdM\nYBHjqOYDnufKBNqY2xzGNFQu87u4A9iE9J0rN53q9JsppoEjuIEysxP9lFEMZgHQwLOcioxekUnU\ntuF9iqgHNnEDT7CZLqxhK+BnYBBQbrbA6X6OJdrZts4TnbG9DTBKe7kTV9ANw7jUMIz+hmFsA0wG\n5hmGcTzwHHCSudqJwLPm+5nAZCFEsRBiEDAY0vmMGoReN5dIhQvGju5rTxfxOiPnYy9mBDvwNkN5\nicO5lclcgRSEYtc75xmujfq/L0vZlgVYk7nq+YKdeZ4zeJbzgHqG8oyfk8kLurEK67tXfnM78ppN\n4G98z/bcyQOsYhtmcjpfsCOyk2zEqsLVyHL2oJQaZGdRzzf0AL4H1iDLL9Y5HEcRZAMv/XNokplY\ndD0wRgjxObLbuB7AMIwlwBPIiJhZwBlG0hnA4vWs2e55cw0lTKlGz6WSju8kkTY3sh8P8CYTuZd/\nsT2vUcUwzmaCo4W+jn4sZgwNVLQtW8MgBvAulmip82xq+/t/jEygbcHndjMdsROPcAfyaUe3PJ2K\niTczh2P5gL1YSyfu5XpkDvt6bVv9fmngVp7lBC5E3qvKfdOIFHW3+yAXZoamF1+zUAzDeA14zXy/\nDhjtst40YFrSrbP26GEdez6PkPg0kXiZOC+kowCH3zYbXM90oIZVdGcxY9mRV+iujdu/wpm8zh/M\nfW8CamiihCaKERj8i8uwoiwU/9/emUdZUd15/PPrDZpFEAFDVBbjgqBH0AhxSTQawTUmzkTiMGPE\naDLHOdEzMw6CJunTmgQxGo86Go+7MWrUxIW4IQ4uOC6AihIUVFQgCggjotDQTXff+ePW7Xe7ut5W\nr+rVfY/6nNOnX1e/V/WrelXfuvW7v8WMhepppJWOHOdeM0sYxWLOqqDM1GaWoEU3+4h3Bt8G4HOG\ncC3X0bPvq2EHm+lL916h/u+vzvt8b6CR8ziTq7gP+Mxar5lPCbIpn+886ZF7eQadVZQpmrpewmGy\nKePMELWLg5lU7lJoJzijNFtnq0ytl0eYyQqO4wwu6vrvMJahxcb4yDu5uivDcTf0475dj7sGc77t\nz0LPjdOTdezL/fwGaGEEiwrfvTLzBUPYQSO7sbprWRPjaGY+0EYz89mbtziUxxhjxZWvY39WMo5n\nmIq+EUJhLd9sTME4XXBuXxYxjhfpyyagL1rQITMqt78Hm1zbTFrMy/d04HAtFz+FHJTU9RKOuP3f\nBiOsHZR+keVzv9i1QAAa6U0nU/g543iEuZagj+Y5hnRFWZlH/Da0yK+mu/vA2K7dVis4mLmcz7KA\nyaonuYjPGQS0sIqDed+q7LiW/bmJe7v+NtEf5WAu0/mYAwFopZFrmMMcLu32ntu5AV2LXM8ZfMB4\ndtCr23s2MYyXORndNi7XPIqdhRzUsKK9a9l7jOFBZnIVj6EnQf3rCXqdLys0SUEvbyReBY3QOynM\npRJUdjclP+bEK8cpYfym5jsN+30F1X4xI2gj6NoXPpRNnMN5dFLLlTwEDGYyV9HM8zRxNFOYznZ2\n4VZmk7tBsb2NWqCVVziBr3I4Y30hZaN5htVewNeHjGI4+7IP/wvAw/yCDQzjbxzHy/wTnzCWRjaz\njcFAHSN4nbOZFvK4dOdNTuUpLmI7/RjKO3zKSJZzKBcyhSuYC2xhmJVk1Uof1jAC7WYCqONCzmAg\n67mF2zmPcwB4kX/mAn7CLG6l52DKjNSDxKzT+m3qD9m5DlsZwDq2MhwdleSvb2+LeT7BTnqQV96b\niaMdi7JRaA1w11uwuYypbFfu42dSusNs1+8yqvN+6oE9qKE3g1jFRgahw+PqqaEfnTQAX3Is97Ab\nazmAZ/mUkdzEVXSf/MxGpu3dSNayiCn8nEO5nif5goEo1tNdfPqQOb5mpGpuPvVkui010Egb0yOq\n1f4SP2IeF5B56vBnAGtX1dlczJ1ch65DbvbbzE3VoGP5G/kPpvA7/kCmobMddZLvmGXDfId6/7Uv\nfRQ6P8V0LjLYT2cudyQK6q4UBRXXsSgbhd58do4Z7XgotZlzKdsNWxzM/xlzEQ3iLC6kkzqGsoxM\nrHMLk7gaLSItzOcfeZBL2cYAbuJOMqN+s17jYjEx92b92v2ygwYGsgpYxa/4Mx1sYDiLyUTFGBfO\nF2hfs6l1YtZnuyIAJDIxBziCu9if5+h+kzKRTsaGNu7kcnQUiWm6bUJcjbuphdO5jEY28y3uwMSg\nazqt9YXBfE4f3714Dx2iWEv377Yj4DPZSFLMk3H1VNgIHQovr+pad55KJO7J0nwU+x3aT3C10BV6\nWMsEXmQhR9DTDaAzSY/hr0BfWqhnM7uygtHkFwwzwWvsLMQlaN5vRuPG5aDLE3yLB4B6vs1/F7Cu\n4ljAj5nPVDI3Gjt81dyosrlK9Oh5As+zkMno0bk9UekXr1yjU/u42Zhj0gj0BzZ4dpinCns7hYx+\nkxT0OF09FdckOhfFiEwFTRE4SynNnKOiGBeQba99rgiZ+RX//pjSAEOA9WQSXYohaL32//znovnb\nlCOoBxo4id9zGPcXue3CaeYFtPvCPDVA98iUQhO4gp7kwrgY/MetNzDQ+72KjIjb2ypkO0mWgY67\nmF3VuFyguDtf6nopnaD2buWmmO5Kdod4e1Kz0/ptR62Y920jk4Xo39+gZBl/w247qcqPKZJmY7sY\nQAt6L2piPtYzmcxJ3OCzzWzTtt2IqP9HkUn08e97GBeDPxltO7AO7Tu3XT5BLrV8602K5LZd5UNY\ncwFX4H3LKcyIKGkXTDGJSiYByXwuWwx8qY/ltgia45PNzqCkKH3xN3lJOnHTwDbG8jxP8K++/5hR\nZa448qAJz6hdC2Z9QbYUGuOe1ACk2Bj86KlQpStmJJBWZIyOpEPADIUWBbPjou2JTfPTmecnzOSs\nfYMIerLw26zFfT4/LXJb4dhGf37Lk2Qia2wXCgQLknnC8O9L1OJlBNFsK1vj73zrSCruPOkEpooV\n9GJPpKRdBtVEkhNNfvy+1WLfk0/QjSAXmwgV5F6xJ/NsW/RTwyv8gIVMKWIb4WjkS07nMvScga46\nqckWbpjNjRSFj9qsO1PzpfS2k0kNOsJWUoyWKne52GRLC08pHt24153xgLlhZ/t+bbEqpgm1/Xl7\n5FfIeeRPejKfNWnuZjJUR7ecz1QauhJ54uUg5rIXb/IIM1nFV8ktokEiFUY0ww4E4orljho3Bo2u\nXJEhCHNSJX8HrR6iSN+PGjtZJhthm2gHbScffhGzR+fCJP5EE6cAAxnIevrweYl2Fc5A1rGZ3dGj\ndOh5TLIdx0Kuu2yTz8VSrPskqadHV1yRFS3oYS7KJHpjVjMuirrxw+ZLBy+1LnyhCVhBQqmXr2QM\nf+M7NDGxBDuKYyOjuItb+DsHcSLXcgq3ef+xj0U2Ic0nXHZyUakj1mKjq5ISVXfEHCreBxGmZG4H\nFb/bTmEKLYVxZcSJHZmTDXODLyXO3k7fDyKoMqQW+ZWMYSVH8DVepZEveYJLOcnXXCN6OvmI0dzG\nNfRiI63Uk+nZacQpSKRy3fyivrFXytO3a4OZih6hQ/gD6tZdtTpwabLUJl80jO0aCCsKYfy8OtX/\nH/g1HdRzI/eziO+F3H4xW21EX/Y7PDH32x10DHK5PqLsgBW2DkwS517SVRyDqXBBh/AXYSrq0ROF\nfzoOComGgZ5V/YrdRi7RC0LxFy7gau5mA0OAHSzijBDbLpxhLOc0foPQL8s7gq6LINeHncAVBWFj\nuJMQ1eTjzbNRBYJeijC7KD6VTtiMwXJQqGsgrI89zEg9Ext/IM9wGA8U+fniGcfjTOU/0dUf89XL\nCbq+7EJipWIXCwtDEsLqpphDVQh6KaSTpPHg4mSpwUy2FdrWsFjxCiPqim/wKGN4tsjPhWdHVwz6\nLsBI9uTDgj8ZHXaji7CfLzduP9lXiaCXcpKloh4PcTWijoJczReCKLY0bC5RNyUITP1zgHomcwMH\nlEHQ/8DveYmz2Y1PgP5M53Rge5Z+Sf79jfL7jCIxqdyDBrfFHKpG0EvF3UeoysdVUYfiR4jZUuCD\n8Iu6+Yyp42JKxeoonGYWFGFHcVzOa7zLUTSzgA85kI3sxRA+4JccztU8BnzG23wtwH6bqL7HYgqt\n5aLc55WrT5zdqSJBL/Ukcf/uW7m4OlkKxY/WzWeMKyZf2YGguuK9+T63sgtb0TXbGxjg9c/8nGE0\ns4ROathOX56PoMZLX9ZyH01keoTW8Dznchnz6WATOmwxV1u3KMQsysYp5b5W3YxoCaKKBL0ST5Sd\niSTrU+cjjKhD91Zu2bCLXwmjWQb0o4Vd+D7XMZZXGMf/YEbu1/I4sJXLeYHZPMFzTOMLdi/SLnib\n42nmTdYwjsG8jxbyNqCFoSzlaG5lIg+RidPPJuCl+rnN+qKcRC3neeRC+ejCqSJBh2gOfCrq8eGy\nqEN48TI+9nyFpTpYznhG8yJz+Rl30cxE7qeNQezBu7zKmXyd+zHCC62MZS67sB6A2SxgCafltWY9\n+/EgvwY2cx+z+JB90XXGdVOLuZzDG5zK/zGCWoaguwOZ/fcLZqkj0yijnsLGqYelUurIZKjAjkX5\niCpjMc0mjRfXMkttcnUfKoSgfTPrtDsUQT+2soUhQCMH8hID+ZQXOQUjqg200kEf+tHCZoYAfWli\nfF4L7uAmVrM3+kbTil+Y6minnToaaWcbuwJr6Vl3pZQbcByx2uX0m7ss5lXVsSgfUX0J6Ug9Xlw+\nvqX6TIPmDMzI14jmNqCNLTRgBPdUZnEID2G7R9pQdLCDzfQCtjGOOQVZsJpDrO1CZjJSb7vd+70N\nOJkrCR5JhxHzUuPKc623XFSOz9xPFQ5DOwjuTBOGtORufJgLv5h+oeXELncbhnZ69hI1tYdMfZfM\n3wPYwDzOZTGT0WJvd9pqQ9cvVyzheBrYxolckXPrw1nKQFazlXpWsh8ZkfK7VVp5nKn0FOEwghxX\nBmU5/dguj8zzU4Uj9KhxeSRZ6ZTbJ1ospQpJ0P4ZsTA+d+23H84iFvM9YKv1HrvZBpib3wgW59zq\nY/yCafyYtziJlYzvtp3uDTvs+Hq/iIXJeI1LdFMxL5QqHX4GVbgrdX1VeqicYAc9R7OuYEadYX3q\nxs1Sb/1tzicjgoqljEf7sW3MeKse6M3POIt6WunPRm7kEQawjqm+3qAL+QGvMYlJ/NZb0ub9NhE5\nNraPvJQwxThvyuXym1e+mIObV1BE6DCx6EhFPV5KFc44iaLZuP/8sc9PI4h2A+lauvc23ZvruYVa\n6pnOqWxgEOcwle30o57t1NLOFbxMKx3AVmbxFHq0X4eOmPGPcrOJeTHEXaSqXH7zyvWZ+6lil0sc\nowaX3QPVgHEBuBjaWOoIzu9+CRJCW2TtRtjtwNtACx0Is5gLtDCbh5nNU2xhMACt1JJJEtoEbCbj\nwrGPqT/m3r9fhYh0nC4WCH6iiGs7lRNnno8qFnSI54tKRT1+XD3GUfjUg8oB4FvmF9tMRUb4DC3W\n2gd/LDfTwgAALuGbjOEFMn7yNnqKtd+GoGicfMRd/6hcIlsdbhabKvchdBLPI3zqfokfnaLungum\nVNeQPworyJXjnwMyUSlG5Nq6tj+fc6mlhmGs4B6uZxUHkfHF26IrBE/SFiuc5Shml4p5WKp8hA7x\nfWmuZz1WA65edFE0mTYE1XuBnpOBdkli85lWYCvzmMYGRrGK0Wg3S6m+/lz/i/ucL8ckqMs1+0tj\nJxD0OEcUaend+HG1DG+p4ub3aReK/7161L2O/ciM6u3YfnuSNZcNuShXeGm5xLx6KUjQReQjEXlT\nRN4QkYXesl1F5GkRWSEic0VkgPX+mSLynoi8IyKT4jK+cOK8G6eiXh6i7F0ZFaW4BoJCCAvBPtcy\nAr6WAzmcBzich73lvci4drJFexVqfzlcIKmYR0GhI/RO4Bil1Hil1ARv2QzgGaXU/sB8YCaAiIwB\nzgAOAE4EbhSRLGfUR6ENL46ohOCDLMujqvEcByuTNqBIctnr4nHuAN6LaF3+8zRoX/2Dh178hJ/y\nGsfzKpPZxCjOYBbDWYNuMWdqrgvd282tCFh3of1Eo6YQMS/lPE4qgS2bXsRHoYJuzgab04C7vNd3\nQVfL8u8Cf1JKtSulPkKf7RMI5KPCLS2ZKEYA+dp0uSY2kMRJVRr57DWJOq6ggHdDftYvlsUMPEYA\n/RnLAv7I72ijjU5aWc7XeYxL2Mje6PZyfYA9MXXXcwu6/2ZRjqfPQr/LsOdxkg2dC23rFx2FCroC\n5onIIhE511u2u1JqPYBSah0w1Fu+B7DG+uzH3jIHKIdrJJ0sLQ+uxauHsSVIwP2NJgy2D7wG2EAT\nx3IqVzKKBZjSuLCJFrbQwg4GsZE6BtPEUfSnnfP4JdBIcBZ1ucW8HDfmOIqEuU2hsXdHKqXWisgQ\n4GkRWUG4ANaE6aA84YYmrM3FolPVhCmA5cKxDvud+8MWTYiifz325GYNk7mFNzgZoZ1lTKR7iVzd\nt/QzBgEbuZnb+JIB3ML16Bj2WnqO5fw3krgv57ifZl18Wo6fouuhi0gTsAU4F+1XXy8iXwGeVUod\nICIzAKWUmu29/ymgSSn1qm89FXADSElJSXGPbPXQ8wq6iPQBapRSW0SkL/A00AwcB3ymlJotIhcD\nuyqlZniTovcAE9GulnnAviqpThopKSkpOwmF+B92Bx72RtR1wD1KqadFZDHwgIicA6xCR7aglHpb\nRB5AF5/YAZyfinlKSkpK/CTWgi4lJSUlJVoSyRQVkRNEZLmIvOu5a5xARG4TkfUi8pa1zNkEKhHZ\nU0Tmi8gyEVkqIhdUgM29RORVL0ltqTcn47TNng01IvK6iMypEHsrLhlQRAaIyIOeDctEZKKrNovI\nft6xfd37vVlELkjcXqVUWX/QN5H30YG09cASYHS57chi21HAOOAta9lsYLr3+mLgCu/1GOANtBtq\npLdPUmZ7vwKM8173QwcXj3bZZs+OPt7vWuAVdJ6C6zb/O/BHYI7r54VnxwfoeS17mes23wlM817X\nAQNct9mzpQb4BNgraXuT2PlvAE9af88ALk7ii8hi3wi6C/pydMy9EdDlQXYDTwITE7b9EeA7lWIz\nOutlMXCYyzajM3PmAcdYgu6svd52PwR28y1z1mZ0FtTKgOXO2mxtexKwwAV7k3C5+BOP/o4ziUeB\nDFUVkEAlIiPRTxev4HjSl+e+eANYB8xTSi3CbZuvAf6L7sHZLtsLlZcMOArYKCJ3eG6Mm70IO5dt\nNkwB7vVeJ2rvTlBtMXKcm0UWkX7An4ELlVJbcDzpSynVqZQajx75ThCRsThqs4icDKxXSi0hd9aQ\nE/ZaHKmUOgQ4Cfg3Efkmjh5jjzrgEOAGz+6t6FGtyzYjIvXocicPeosStTcJQf8YGG79vae3oXh8\nrQAAAbBJREFUzFXWi8juAF4C1afe8o/RPjNDIvshInVoMb9bKfWot9hpmw1KqS+A54ATcNfmI4Hv\nisgHwH3AsSJyN7DOUXsBUEqt9X5vQLviJuDuMQb9pL5GKbXY+/svaIF32WbQBQhfU0pt9P5O1N4k\nBH0RsI+IjBCRBuCHwJwE7MiGv97oHOBs7/WPgEet5T8UkQYRGQXsAywsl5EWtwNvK6WutZY5a7OI\nDDYz/yLSCBwPvOOqzUqpS5RSw5VSe6PP1flKqX8B/uqivaCTAb2nNkQnA04CluLoMQbw3BRrRGQ/\nb9FxwDIcttnjTPSN3pCsvQlNIpyAjsh4D5iRhA1Z7LoXPVvdCqwGpgG7As949j4NDLTePxM9W/0O\nMCkBe49EF+FYgp5Bf907toMctvkgz84lwFvApd5yZ2227DiazKSos/ai/dHmnFhqrjGXbfZsOBg9\n4FsCPISOcnHWZvSk/gagv7UsUXvTxKKUlJSUKiGdFE1JSUmpElJBT0lJSakSUkFPSUlJqRJSQU9J\nSUmpElJBT0lJSakSUkFPSUlJqRJSQU9JSUmpElJBT0lJSakS/h9safitAAVtfQAAAABJRU5ErkJg\ngg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "@jit\n", - "def mandel(x, y, max_iters):\n", - " \"\"\"\n", - " Given the real and imaginary parts of a complex number,\n", - " determine if it is a candidate for membership in the Mandelbrot\n", - " set given a fixed number of iterations.\n", - " \"\"\"\n", - " i = 0\n", - " c = complex(x, y)\n", - " z = 0.0j\n", - " for i in range(max_iters):\n", - " z = z*z + c\n", - " if (z.real*z.real + z.imag*z.imag) >= 4:\n", - " return i\n", - "\n", - " return 255\n", - "\n", - "@jit\n", - "def create_fractal(min_x, max_x, min_y, max_y, image, iters):\n", - " height = image.shape[0]\n", - " width = image.shape[1]\n", - "\n", - " pixel_size_x = (max_x - min_x) / width\n", - " pixel_size_y = (max_y - min_y) / height\n", - " for x in range(width):\n", - " real = min_x + x * pixel_size_x\n", - " for y in range(height):\n", - " imag = min_y + y * pixel_size_y\n", - " color = mandel(real, imag, iters)\n", - " image[y, x] = color\n", - "\n", - " return image\n", - "\n", - "image = np.zeros((500, 750), dtype=np.uint8)\n", - "imshow(create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20))\n", - "jet()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "100 loops, best of 3: 12.5 ms per loop\n" - ] - } - ], - "source": [ - "%timeit create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10 loops, best of 3: 175 ms per loop\n" - ] - } - ], - "source": [ - "%timeit create_fractal.py_func(-2.0, 1.0, -1.0, 1.0, image, 20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Basic complex support is available as well. Some functions are still being implemented, however." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "((-12-16j), (-12-16j))" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@jit\n", - "def complex_support(real, imag):\n", - " c = complex(real, imag)\n", - " return (c ** 2).conjugate()\n", - "\n", - "c = 2.0 + 4.0j\n", - "complex_support(c.real, c.imag), (c**2).conjugate()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can even create a function that takes a structured array as input." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 2.23606798 5. ]\n", - "[(array(Record([('x', ' 1 - -@numba.njit(parallel=run_parallel) -def calc_pi(n): - x = 2*np.random.ranf(n)-1 - y = 2*np.random.ranf(n)-1 - return 4*np.sum(x**2+y**2<1)/n - -def main(): - parser = argparse.ArgumentParser(description='Calculate Pi.') - parser.add_argument('--points', dest='points', type=int, default=20000000) - args = parser.parse_args() - points = args.points - np.random.seed(0) - - t1 = time.time() - pi = calc_pi(points) - selftimed = time.time()-t1 - print("SELFTIMED ", selftimed) - print("result: ", pi) - -if __name__ == '__main__': - main() diff --git a/numba/examples/ra24.py b/numba/examples/ra24.py deleted file mode 100755 index 687ab7002..000000000 --- a/numba/examples/ra24.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python -from numba import jit -import numpy as np -import math -import time - - -@jit -def ra_numba(doy, lat): - ra = np.zeros_like(lat) - Gsc = 0.0820 - - pi = math.pi - - dr = 1 + 0.033 * math.cos( 2 * pi / 365 * doy) - decl = 0.409 * math.sin( 2 * pi / 365 * doy - 1.39 ) - tan_decl = math.tan(decl) - cos_decl = math.cos(decl) - sin_decl = math.sin(decl) - - for idx, latval in np.ndenumerate(lat): - ws = math.acos(-math.tan(latval) * tan_decl) - ra[idx] = 24 * 60 / pi * Gsc * dr * ( ws * math.sin(latval) * sin_decl + math.cos(latval) * cos_decl * math.sin(ws)) * 11.6 - - return ra - - -def ra_numpy(doy, lat): - Gsc = 0.0820 - - pi = math.pi - - dr = 1 + 0.033 * np.cos( 2 * pi / 365 * doy) - decl = 0.409 * np.sin( 2 * pi / 365 * doy - 1.39 ) - ws = np.arccos(-np.tan(lat) * np.tan(decl)) - - ra = 24 * 60 / pi * Gsc * dr * ( ws * np.sin(lat) * np.sin(decl) + np.cos(lat) * np.cos(decl) * np.sin(ws)) * 11.6 - - return ra - - -ra_python = ra_numba.py_func - -doy = 120 # day of year - -py = [] -nump = [] -numb = [] - -for dim in [50, 100, 400, 1600]: - lat = np.deg2rad(np.ones((dim,dim), dtype=np.float32) * 45.) # array of 45 degrees latitude converted to rad - - # JIT warmup - ra_numba(doy, lat) - - tic = time.clock() - ra_nb = ra_numba(doy, lat) - numb.append(time.clock() - tic) - - tic = time.clock() - ra_np = ra_numpy(doy, lat) - nump.append(time.clock() - tic) - - tic = time.clock() - ra_py = ra_python(doy, lat) - py.append(time.clock() - tic) - - -print("pure Python times:", py) -print("Numpy times:", nump) -print("Numba times:", numb) diff --git a/numba/examples/stack.py b/numba/examples/stack.py deleted file mode 100755 index 6b44e4e8d..000000000 --- a/numba/examples/stack.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -This example demonstrates jitclasses and deferred type. -This is an extension to the simpler singly-linked-list example in -``linkedlist.py``. -Here, we make a better interface in the Stack class that encapsuate the -underlying linked-list. -""" - -from __future__ import print_function, absolute_import -from collections import OrderedDict -from numba import njit -from numba import jitclass -from numba import deferred_type, intp, optional -from numba.runtime import rtsys - - -linkednode_spec = OrderedDict() -linkednode_type = deferred_type() -linkednode_spec['data'] = data_type = deferred_type() -linkednode_spec['next'] = optional(linkednode_type) - - -@jitclass(linkednode_spec) -class LinkedNode(object): - def __init__(self, data): - self.data = data - self.next = None - - -linkednode_type.define(LinkedNode.class_type.instance_type) - -stack_spec = OrderedDict() -stack_spec['head'] = optional(linkednode_type) -stack_spec['size'] = intp - - -@jitclass(stack_spec) -class Stack(object): - def __init__(self): - self.head = None - self.size = 0 - - def push(self, data): - new = LinkedNode(data) - new.next = self.head - self.head = new - self.size += 1 - - def pop(self): - old = self.head - if old is None: - raise ValueError("empty") - else: - self.head = old.next - self.size -= 1 - return old.data - - -data_type.define(intp) - - -@njit -def pushpop(size): - """ - Creates a list of decending numbers from size-1 to 0. - """ - stack = Stack() - - for i in range(size): - stack.push(i) - - out = [] - while stack.size > 0: - out.append(stack.pop()) - - return out - - -def test_pushpop(size): - """ - Test basic push pop operation on a Stack object - """ - result = pushpop(size) - print("== Result ==") - print(result) - assert result == list(reversed(range(size))) - - -def test_exception(): - """ - Test exception raised from a jit method - """ - stack = Stack() - stack.push(1) - assert 1 == stack.pop() - try: - # Unfortunately, numba will leak when an exception is thrown. - stack.pop() - except ValueError as e: - assert 'empty' == str(e) - - -def runme(): - size = 24 - test_pushpop(size) - test_exception() - - -if __name__ == '__main__': - runme() - print("== Print memory allocation information == ") - print(rtsys.get_allocation_stats()) diff --git a/numba/examples/structures.py b/numba/examples/structures.py deleted file mode 100755 index fd6f19c85..000000000 --- a/numba/examples/structures.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import jit - - -record_type = np.dtype([('x', np.double), ('y', np.double)]) -a = np.array([(1.0, 2.0), (3.0, 4.0)], dtype=record_type) - -@jit -def hypot(data): - result = np.empty_like(data, dtype=np.float64) - # notice access to structure elements 'x' and 'y' via attribute access - for i in range(data.shape[0]): - result[i] = np.sqrt(data[i].x * data[i].x + data[i].y * data[i].y) - - return result - - -print(hypot(a)) diff --git a/numba/examples/sum.py b/numba/examples/sum.py deleted file mode 100755 index db78c31ee..000000000 --- a/numba/examples/sum.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import -from numba import double -from numba.decorators import jit as jit - -def sum2d(arr): - M, N = arr.shape - result = 0.0 - for i in range(M): - for j in range(N): - result += arr[i,j] - return result - -jitsum2d = jit(sum2d) -csum2d = jitsum2d.compile(double(double[:,::1])) - -from numpy import random -arr = random.randn(100, 100) - -import time -start = time.time() -res = sum2d(arr) -duration = time.time() - start -print("Result from python is %s in %s (msec)" % (res, duration*1000)) - -csum2d(arr) # warm up - -start = time.time() -res = csum2d(arr) -duration2 = time.time() - start -print("Result from compiled is %s in %s (msec)" % (res, duration2*1000)) - -print("Speed up is %s" % (duration / duration2)) diff --git a/numba/examples/tests/__init__.py b/numba/examples/tests/__init__.py deleted file mode 100644 index 831180145..000000000 --- a/numba/examples/tests/__init__.py +++ /dev/null @@ -1,107 +0,0 @@ -import os -from os.path import dirname, join, abspath -from unittest.case import TestCase -from unittest.suite import TestSuite -from subprocess import STDOUT, check_output, CalledProcessError - -from numba.testing.ddt import ddt, data -from numba.testing.notebook import NotebookTest -from numba import cuda - -# setup coverage -default_config_file = abspath(join(dirname(dirname(__file__)), '.coveragerc')) -print('using coveragerc:', default_config_file) -os.environ['COVERAGE_PROCESS_START'] = default_config_file - - -test_scripts = [ - 'binarytree.py', - 'bubblesort.py', - 'cffi_example.py', - 'compile_with_pycc.py', - 'ctypes_example.py', - 'fbcorr.py', - 'jitclass.py', - 'linkedlist.py', - 'movemean.py', - 'nogil.py', - 'objects.py', - 'ra24.py', - 'stack.py', - 'structures.py', - 'sum.py', - 'ufuncs.py', - 'blackscholes/blackscholes.py', - 'blackscholes/blackscholes_numba.py', - 'laplace2d/laplace2d.py', - 'laplace2d/laplace2d-numba.py', - 'blur_image.py', - 'mergesort.py', - 'mandel/mandel_vectorize.py', - 'mandel/mandel_jit.py', - 'nbody/nbody.py', - 'nbody/nbody_modified_by_MarkHarris.py', - 'vectorize/sum.py', - 'vectorize/polynomial.py', -] - -if cuda.is_available(): - test_scripts.extend([ - 'blackscholes/blackscholes_cuda.py', - 'cudajit/matmul.py', - 'cudajit/matmul_smem.py', - 'cudajit/sum.py', - 'laplace2d/laplace2d-numba-cuda.py', - 'laplace2d/laplace2d-numba-cuda-improve.py', - 'laplace2d/laplace2d-numba-cuda-smem.py', - 'vectorize/cuda_polynomial.py', - # 'cuda_mpi.py', - ]) - -notebooks = ['j0 in Numba.ipynb', - 'LinearRegr.ipynb', - 'numba.ipynb', - 'Using Numba.ipynb'] - - -@ddt -class TestExample(TestCase): - """Test adapter to validate example applets.""" - - def setUp(self): - # to pick up sitecustomize.py - basedir = dirname(__file__) - os.environ['PYTHONPATH'] = basedir - # matplotlibrc to suppress display - os.environ['MATPLOTLIBRC'] = basedir - - @data(*test_scripts) - def test(self, script): - script = abspath(join(dirname(dirname(__file__)), script)) - status = 0 - try: - print(script) - out = check_output(script, stderr=STDOUT, shell=True) - except CalledProcessError as e: - status = e.returncode - out = e.output - - print(out.decode()) - self.assertEqual(status, 0) - - -@ddt -class NBTest(NotebookTest): - - @data(*notebooks) - def test(self, nb): - test = 'check_error' # This is the only currently supported test type - notebook = join(dirname(dirname(__file__)), 'notebooks', nb) - self._test_notebook(notebook, test) - - -def load_tests(loader, tests, pattern): - notebooks = loader.loadTestsFromTestCase(NBTest) - examples = loader.loadTestsFromTestCase(TestExample) - return TestSuite([notebooks, examples]) - diff --git a/numba/examples/tests/matplotlibrc b/numba/examples/tests/matplotlibrc deleted file mode 100644 index ef4b278f3..000000000 --- a/numba/examples/tests/matplotlibrc +++ /dev/null @@ -1 +0,0 @@ -backend: agg diff --git a/numba/examples/tests/sitecustomize.py b/numba/examples/tests/sitecustomize.py deleted file mode 100644 index e391c856e..000000000 --- a/numba/examples/tests/sitecustomize.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import print_function -import os -import coverage - -coverage.process_startup() -print(os.environ['COVERAGE_PROCESS_START']) - - diff --git a/numba/examples/ufuncs.py b/numba/examples/ufuncs.py deleted file mode 100755 index 6cc6abf38..000000000 --- a/numba/examples/ufuncs.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python -from numba import vectorize -from numba import autojit, double, jit -import math -import numpy as np - -@vectorize(['f8(f8)','f4(f4)']) -def sinc(x): - if x == 0: - return 1.0 - else: - return math.sin(x*math.pi) / (x*math.pi) - -@vectorize(['int8(int8,int8)', - 'int16(int16,int16)', - 'int32(int32,int32)', - 'int64(int64,int64)', - 'f4(f4,f4)', - 'f8(f8,f8)']) -def add(x,y): - return x + y - -@vectorize(['f8(f8)','f4(f4)']) -def logit(x): - return math.log(x / (1-x)) - -@vectorize(['f8(f8)','f4(f4)']) -def expit(x): - if x > 0: - x = math.exp(x) - return x / (1 + x) - else: - return 1 / (1 + math.exp(-x)) - -@jit('f8(f8,f8[:])') -def polevl(x, coef): - N = len(coef) - ans = coef[0] - i = 1 - while i < N: - ans = ans * x + coef[i] - i += 1 - return ans - -@jit('f8(f8,f8[:])') -def p1evl(x, coef): - N = len(coef) - ans = x + coef[0] - i = 1 - while i < N: - ans = ans * x + coef[i] - i += 1 - return ans - - -PP = np.array([ - 7.96936729297347051624E-4, - 8.28352392107440799803E-2, - 1.23953371646414299388E0, - 5.44725003058768775090E0, - 8.74716500199817011941E0, - 5.30324038235394892183E0, - 9.99999999999999997821E-1], 'd') - -PQ = np.array([ - 9.24408810558863637013E-4, - 8.56288474354474431428E-2, - 1.25352743901058953537E0, - 5.47097740330417105182E0, - 8.76190883237069594232E0, - 5.30605288235394617618E0, - 1.00000000000000000218E0], 'd') - -DR1 = 5.783185962946784521175995758455807035071 -DR2 = 30.47126234366208639907816317502275584842 - -RP = np.array([ --4.79443220978201773821E9, - 1.95617491946556577543E12, --2.49248344360967716204E14, - 9.70862251047306323952E15], 'd') - -RQ = np.array([ - # 1.00000000000000000000E0, - 4.99563147152651017219E2, - 1.73785401676374683123E5, - 4.84409658339962045305E7, - 1.11855537045356834862E10, - 2.11277520115489217587E12, - 3.10518229857422583814E14, - 3.18121955943204943306E16, - 1.71086294081043136091E18], 'd') - -QP = np.array([ --1.13663838898469149931E-2, --1.28252718670509318512E0, --1.95539544257735972385E1, --9.32060152123768231369E1, --1.77681167980488050595E2, --1.47077505154951170175E2, --5.14105326766599330220E1, --6.05014350600728481186E0], 'd') - -QQ = np.array([ - # 1.00000000000000000000E0, - 6.43178256118178023184E1, - 8.56430025976980587198E2, - 3.88240183605401609683E3, - 7.24046774195652478189E3, - 5.93072701187316984827E3, - 2.06209331660327847417E3, - 2.42005740240291393179E2], 'd') - -NPY_PI_4 = .78539816339744830962 -SQ2OPI = .79788456080286535587989 - -@jit('f8(f8)') -def j0(x): - if (x < 0): - x = -x - - if (x <= 5.0): - z = x * x - if (x < 1.0e-5): - return (1.0 - z / 4.0) - p = (z-DR1) * (z-DR2) - p = p * polevl(z, RP) / polevl(z, RQ) - return p - - w = 5.0 / x - q = 25.0 / (x*x) - p = polevl(q, PP) / polevl(q, PQ) - q = polevl(q, QP) / p1evl(q, QQ) - xn = x - NPY_PI_4 - p = p*math.cos(xn) - w * q * math.sin(xn) - return p * SQ2OPI / math.sqrt(x) - - -x = np.arange(10000, dtype='i8') -y = np.arange(10000, dtype='i8') -print(sum(x, y)) diff --git a/numba/examples/vectorize/cuda_polynomial.py b/numba/examples/vectorize/cuda_polynomial.py deleted file mode 100755 index 5f8393f5e..000000000 --- a/numba/examples/vectorize/cuda_polynomial.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/env python -from __future__ import print_function - -import sys -from timeit import default_timer as time - -import numpy as np - -from numba import vectorize, cuda - -import polynomial as poly - - -def main(): - cu_discriminant = vectorize(['f4(f4, f4, f4)', 'f8(f8, f8, f8)'], - target='cuda')(poly.discriminant) - - N = 1e+8 // 2 - - print('Data size', N) - - A, B, C = poly.generate_input(N, dtype=np.float32) - D = np.empty(A.shape, dtype=A.dtype) - - stream = cuda.stream() - - print('== One') - - ts = time() - - with stream.auto_synchronize(): - dA = cuda.to_device(A, stream) - dB = cuda.to_device(B, stream) - dC = cuda.to_device(C, stream) - dD = cuda.to_device(D, stream, copy=False) - cu_discriminant(dA, dB, dC, out=dD, stream=stream) - dD.to_host(stream) - - te = time() - - - total_time = (te - ts) - - print('Execution time %.4f' % total_time) - print('Throughput %.2f' % (N / total_time)) - - print('== Chunked') - - chunksize = 1e+7 - chunkcount = N // chunksize - - print('Chunk size', chunksize) - - sA = np.split(A, chunkcount) - sB = np.split(B, chunkcount) - sC = np.split(C, chunkcount) - sD = np.split(D, chunkcount) - - device_ptrs = [] - - ts = time() - - with stream.auto_synchronize(): - for a, b, c, d in zip(sA, sB, sC, sD): - dA = cuda.to_device(a, stream) - dB = cuda.to_device(b, stream) - dC = cuda.to_device(c, stream) - dD = cuda.to_device(d, stream, copy=False) - cu_discriminant(dA, dB, dC, out=dD, stream=stream) - dD.to_host(stream) - device_ptrs.extend([dA, dB, dC, dD]) - - te = time() - - total_time = (te - ts) - - print('Execution time %.4f' % total_time) - print('Throughput %.2f' % (N / total_time)) - - - if '-verify' in sys.argv[1:]: - poly.check_answer(D, A, B, C) - - -if __name__ == '__main__': - main() diff --git a/numba/examples/vectorize/perfstat.ods b/numba/examples/vectorize/perfstat.ods deleted file mode 100644 index 803d1762664216f403cc6c02ba04d030dbb17e66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18964 zcma&M19WBEwk{mowv&o&Cl%Yay<*!oE4C`OZQHCk6(=<0_Oy~iIPL@ve0DDtA6M(Uct*M-?OLvF14!`cEsIzw{_L%jyrw23}JEw+Q#z?-Yt8cYe0i5)}O*6XWwDog1 zTx+Ha0Gb<7w%S+g*ZF+MopoLhWbpdydNv#1*chYKxmG|eWE5w6Ruc%tfS ztGt`5Xy(1yyu#;~by3asaI5I&EiAvKk2Lk#xRE~MAnNPqY89TJFQnQL2!vT$XC21) zl+8+dmCZ(YUC>VEw4A8Nbf+gh=Q8-&*{4je-`3l9RpXT26g(JXRBfFnk(G-!VWJJ^ zL`fqV74ENEzTev2rLj&IEL!sWwmfrY#hw$$n`LrOH4cz27-9J|#n;4)x(+%{fSagf zRpT;z(FVK==zqMe+&HnqW)9z64iC6@PxgB(u#~IzFMl~KWYr?GRCAj&_Byt16t!B* zX;^daIIlIYERjDosjn+X4vUS(WfJ3k!x3lr{Q126c#|jYQHay(z*_aa$WEJI?%o!; zxXvfh-YS4Gi%zx_+ny{+`}hnb>M&)xRm{$}45ALCaY9knoi%9fnh8&QK$10w-tVc~ zUX0B{0Z*HYf9K5$C#*y{I=2jGa{tu&s~ba;d8qu{CX}6hhj&Dpbj=XS^zd+fPw0`> z27DpLcSwO5V-|k#AO^oJqM@GPD3j-!b9mUZ*Zwa01`F44XMvEJ2+?=kj5E|QM?%{L zbxB0^48!E1ABMiUS_#kk^2-i^)@H@YTu3qJxz%QOnP*aIoH|%#qLvuq6Me>xukm7g zH3MOzoDfc5C)(SM*WSo;G+JUnilO6c`P!7LBi^qMzGt9aq)oGWl zvb?RQ$jEOsLNBGg;fl=7VX5Sx4PM%KC8&yo53TuU&_g1 z?lA2M+VXOF6fx}D$vo371Q#u!5PRFHyZv^(p;hr#%ddr5Dyj}GK77c@u0ArnfPjd) z!e=e^U}A@vykuG5rQXtBWRb}dSMm_v38g$iA+Q=MvY>5UO-*R2CpF?~9u1h*OI@IRFny?PZOSke*6N*Q zD(^X;PJ+N)T~onh&RF{_Z{FXy@EOe)HB zf8;k|*xGvI$o4AmBL*gMf{E}0_Ld|Dch|jz+B^W4ex`Kq&|RNb8+cn5k?*C~OKWHS z!}lR6Z9s9(nTytjf*Do~<{3@9EwNf>${7GE&Euh$G`$H(vT^OKQd`|~Mu%@$2@#bv zBf2)ZGSsi9AOAGVeov!ps*(SB9@EFEsmWnTglD$=wjfl;soQBbGv;j&k3p*kJHN{l z4RLDK{pS4oUeeGrQB`k13MdfD>x z!+Ep7fZTb%KU^R>`Z`YjvambS^Tzws0t}D!68HJ0oi^<^qQd>Fm(bj7^$AC|A`U(1 zC)Xr|B`b?agN1u6#4;hmtvbvz>~0^cB*xXauVd8_$}xrmW$#_Y1Vn7y0@>sniy2Jp ztv)Fv0#f@YJ_=!lB?N#|7jEl{?N`p1jkn+lQO;a1y&fBboMEo0%*%J2mgZ>@pf` z4!$K3Xkh9)2j@}0M{A^Goqfj81UZ@tqj93+$W$^IrJWK{wXPi(a5(OWu$8%|8VWrU zC88xKp#b8ZI7Mu22~Nktz&Ef#MaO9LF9>4RC`jV@A!LCaGa3o>m^|@%V$d;-_7KmB zKx`uRV`cG|NQw&nPliS5$p|{s)OQal>vl!CJ&!je_^wJKIVsi zp}%JsRE%_9TgNK6N-5H+F(4}!1Qy#XQVlvXPmjduVPf#{L1@(^Qtr4S^$RHy_4pEO zm_XPe@zRE0pi|K1_tuiA*XIB}B4!`_xr2aUan!IFvQh1Zlphh^%;jpGx2hRL5I^D3mbIs6D>jrCm%9J3o@# zR6q8BH!9cE4btErmsQ(=6@RXLAMi?62i9W`cV&YY*ddE+$^GPbQv76Q_FH1W_39k^ zwT4C`jAp7OEe9sFJ&2ZZcmdcm>7Fp{J||;APm~x$GE0Qr54l!=ro8i^zw_b9KsOh5 zDlV&^L(iFPPnt9j@O?EjQo#kugEmOs@ZZ+w2quaoQ;T$nXvDqv3L-)*n-~9F9W904n`F+<#;r1*Z2bbExp{@bF zT*A{3;;Y^14qq2=anN+w3gM)SbXwI|DRZQxMW7zt)f!=gryU&GSr6A~PqJtu1A*>! za~Y+=U&$K^1c#83^2>1_G!*GE7~^jilNk$X%;)q`@>-!weEwPrEYlM)zZjq~p^|@? z+9HYCWaStBwFpop)9v+TmdGe$QJ_V!oVZepCWa6!mP5%R+fVbzfi1!X1Jf;=)0+NT z1hkBEc7uA7Z6b|==C-Rv(3jvUX=p?ij^EK8?#^gnkOYK&VuGw-NR5LBf^%DcP4Fj) ziO6hHT*$i?zJvD<2}_TJ2+a`Ou*FOc`XO1X8}mN7&_PNv{u(DAoxkUHSpmfw^9Iq=7~76)Pv-02Ek zpu;&Sj#518%uNTfS2@XPPz1;W+eW>NAahgambGO-VOEO*CRC91_6Qr+5e}Rav$pTboU~jBim>&msjt78;wHM%j!V)vrjmqxr41QUBl0%_s$lV+7rqbV^W*UESpEQ__36t%y`hY&lXXhGX9(1VjldYozpoUz~M zZ0I~CLlbNq+8Qqk@QDYFm~hM5AYTckuS>Bv42?qxxI?8K0rV2caAfvJm``l3kxUsB zRpJ}c$QVpnMicvtEf!!Q#;7Oc*T4dgDYLxGK3-E9f80x_9qT;0iM%s6e%Li2+aWA# z%NI~V6RuzIP350*$!rSMSBv_!B4XnDJh=^Va?#F{3&c#`0DRxzOMASn;>b}fzSs8F zKeZ_~3AJ3@X-oO4OZcv6I%Zl@hG)EC2naydEgXlYChlM+?zp99L5zMc;Pl!!{x#VK zpW`J*8fS!7&j>TGDD)hN=`?h{qm_cL&=DV^i$}s+s=+330li}h*|c0j47$ph=&|97wqej_dom4EAYnaZ`Ea-0E@X6G9shQTGzQJ$s{776MlAc%9sl+L zG}d;I>;a;`59xB8t!)XDm)aYp2;!m!yUf03Sy)O^YYfIP9R5{AU1>CC0JrSqG34YC zGF;9^Dbu{`fN~&E`o>OctA?8K4N+Uq-7SA-`Y@y>#P<=1@gj_)Ct0}X9Se`4z28n= z|MuhS#p?XyWzTTt&sYwwwzT%;F23*uUl)LUwe2;$V(=oI^o3fBm4zTL8xt%^N41mt zGAm8_M;kBe{T)K|)3Fl-uoB^+v$_#~ll>HcCCj1{sx^Fqd^Vkh~6ItSt zxPG>Li&JR6uQ=vEdCqdJ=kx&JTPnjq3M(||xxLgGuh03UJEhtAtk^J|1@>Pd&b8B@ zjIlYoYV+yz6zKO95MG8&7hIRjnq`v|sYogR6z>VN>#>*ElPyVEH1ioka?%zhA~Xbsl*Jdw3oaQ1VWLJ~X#S|` z*o9&|!GM8Go=uj3)t(Bq_zCWyo#B@J&dc>}rV6K|Rx>mhMi$z!P?PbxnFvyKAUTl6 z1sGO?UMchr$Owshj?q z-XOq@j>7|~#%Dq{M`vd?U_Ln$8>;1D1~&&DKD=~sJ#+xYTy}ld-<>*tN+wFtyvWw# zeHh65J6@8EfgGa?>-?;NyW?Q|QDple2@VdHypz{d(@aCr?-@1%oje?DbU{9*f=1+< zWmu5T^GQb~_y)#AqwVHgNqL;-GiomxFzKkzjjxR2-)cw=?z*V=yP^0)y%UfMp1;E_ zpjFn$dQ^huV12YimIvsVq!2UQ^WCqK5X7_HMpUzt)05?f#qtl8vKC5rwCN!OdNwG+ zQr+fhPXrl6+_>nD$V^jI@~n2>Rj`bXzq$J66-gF>{2g?vY>fU+lugcJgu$`2=H)Dr-N&#p_IO?`_2oeC8J z7VFcqi6oTJ%UB4AH;8x_1987@RDtnI5^NgaMA$W%+&VNt2smO7ZzI+5T?McWK+5Ys zY{K&C2bDf%AmWp|uWN$=SNS!zfbuK~9$)a!-jQE2joI2;K z&R3qNH>ZD`zhx};km@+mHz9pPE`I~E8GV`=INMPiBYmg*^Gu~cQ}2lV5QTBUOmB%fwT0*}SH(Z-7 z$B{?bz6;1niKx%li>GZ$e8(S;x?P^ZFt#I_6+e|bksLvv=%qNXy7o38H>*lHu0iK< zJ=bS8LORU&byx_iuhjU5Rq+#_YjpBcHTXr~2H!xuz8@>1 z%|@VMWq>>A?QPLm#SFe*A95)ueY-{knHx*LG>I^(f% ziz>k+v!!5#Oawj}2V=RT)BaYrIac#!Jq)hUOa$czFrI9!|If>euTM;S7ootDam@K` zDD(p3uQktwA=mMe zEQ~{&Ym+AwXgEXBhz&Ueo!;Q!k+#RK4=Nz13yGv61f67=JPN(?o`ed$(Ch~jZYkci z`s-NN#evQGY3=#m3$tnEtNl*yBS3)q(?7L;gn|33h2voYZN9_!5S&5IWb9gZNZErz zEYKOC`vu!Kta#+RFxn}#Pk8IWG4aLmh0Nr(o^`r$LrxaE)4Y6KO}rw9Zkn|_D|{P* zbVhd&ppD1RMHd9yy)626l@alT4AOdaI~og*EQg3STXiLn$J0@2>J8*%Zu;?o!2M-~ z)OmRL(0%Sg-VvU&HP1hYWlcI4B_iTT!||#M&zX7z*y|Lv+y>f?nsOxCgCs}{im*0~ ze=Zn?ocxCj%9;7(Nsf3|?v&n3qSN=L-pJXeH)jI-(co~-)ECMB1*v6 zdw)|fBcX;l%_S@@0yWFBnRD@`%$w8rjl!u4%GAOo0X5QSwfHWu2!r>)nWwlJw#DH2 z@$AM?NI8oGmU%e$LYNqCX zpOxNvICm%Km_8{t%K8g&eh5BB1DJYmcwEQu4!D*+pozv_Bb1 zE)T!=d2t0uneTyb|t;B%+?Q1eAuk>>qOFj`*Qr1BMQ*I`#=+ zH&#tBQ#M~YE$vOo%Y=$0!>N{|#u)Vm^9L|A^4s26n`I+ZhxjlWiiKd{3$$Qpfk8Hw zJh)~QH%gN9U%g#jFYcf-lJHR)?k4>^${jPNWMz-`zJ5h!< zn^>DkRY@ecyfQJinvb(MRGI?#euiE$M^+2JZVD4l_ag>U(rR5F(DDkgTB02frCaz% zeH8?AuTkF4581viIzG|1mVm61n?Bi6tq99gH4 z6i|8G)qA%g&@p`7+$e6I(~*6yn_bDTGYVfXmiww2pGd+NU@LF-JD&N+b(#0)VK6h1 zn~NB_NEh3m!F^V@6t`YPHNMnPuu#gJkR8bfB9feSX8FjuF>ysGAX;rdR+Su_AJ`o` zR-IrD}fB#Tx{BromSNrn*}%qJ?SZaL0U?~hy*1HqYP}PemrM)tm#}= zu*OsnZ=)1-Jhml1DL-4!Cpw;3(AU!jGIXh3C>ys3HPm~&1)w*Apq1GuWfc#y8+wV* zyxp%Q>2$5Ljs=Y;E_*^`A8@0>TXRMCUH$5PirbNk(_t(HLfL|H}7Xqq%ZfG(aS1EswyW96)>hrrZ^&oFv*XhH{I&9IKx^o;#iefHkYKcvDSm zx1UTM`nt z2n3-IkC7M6yOd{I>${W-X<|urK-fD6m)U9V;D(wrYV-U^T&*qe z@tjGSuVI`c(bCmlyQI!CQ(br_kjY2KD_$!`Loi|@4W0PZgnQWJ!z}->2ZLa!(L*}Q z=-BEApG*r#rfeeY1*Y)C=J}fJJrAB)d@0-`LGr#8z9=ln6aPbK9FG0X_+Zgby?Mlm zH>H?ds zb0*J{IN^y4BC%+)RP(ERR1&aQZ|2$@w(DA6^$&DfLVl+WvCsQqJl|a@u+LTa(=qS+ zIfSF`GHX5q+J={vLnP&T=<2aKC^0vUOXwumSrx(*dd|;=JTGg(fYXb;F5<{EAv{QF zPCN%Z`a*@cMOs$-*QjGC&{s|^eX;YCc4EF%eqG-ky3)>CcvgV+#!>}*qw%NGWyp_G zKD^J7@n3+ywtU{8iwfSH-4u_^bzv1Pj7AhaRTZ&5oZ?tgXh zk+?Y8asP!vbK4o(nmTbi8*~3xIGX#va&!M>=wB3f8%sNDJ`xLOXM1i21~)f1dN&q& zfTKAB6Bid3!(Y85f15Hf{tutMi=)k7Tqed0zd`3efVUF^6Fn2dzj^<7`2W|bKfM3d zB>({MA47j)_*aO3nPX;TWM%l*?|)k|cQi4v`49hp3E(#`3xhd>iJ`M0ovWp(8z~9V z-+BBm31=qZ|95u(xWUi+movt{q5D7BKHYC&Q+{SfCT2P&CORfA6($yL7G`c%MqY-0 z;Nvwh=KgO2%#12bjNB}Y+>D&O4FB%@mjzQ3OXuH%OlR^p?$58F!X&{a%dGsj=KoLs ze-|_MF#i3`z|X?;KiL1K|BKn&)b3y90Q~a5vnc=99QrGZEcC4OOk}EdmhMFVy+{8y z{BC7x?99Nz%4Ew#ChTHqV?xKp#>Mitng6TtUl#r|A^+4YSUNdd8Wa7M7iU8w8&f)C zfQy|o9|;S|zlgu96KwuXV`XFgn+h=cWANW}MiK^o-oFQ!`(Fd{?_dA1^%wbnFOB$v z)~i^!*c#axTG}`9rOMy#&A!ZXR$nn*oBbjb!;E zP+gX!E}C$iR$}J#uC9e)lX_@lJ;J^-=LeXXval4DMs|@Ta63McFDL$mJ-b0;f3USx z^?CSkdTJS(`R)B#@+>Jzvq9dreCc7U{vqfcJH0=qEIEEh7e8q>bc;QE`1I|zkuK;6 zouee~N}P;cVY__d3V#u~CnxAbjvZvh->+uQFIT&mD1In?d@(+$7mJL<;|}8?Eveh| z+bK>&>Zg|z?-73_TtPT2e>(^!dGWXVt^;XNN0M{vT~@8Szy+PD4qZ)-R4xvzwTEnu zF3jZX%rEFo3^g10NzPx6wWN(;Ib${{UcU7yKd^iAr%7Cg4Co9E_uIvaxMcE=lsVa8 z9y@HrVPB(#JFCRi2I??@C5x?lfRznwNFRw<^_*g%u;QguOATou7zxxb;cU=ZKF3;t z=ory7op8n!NI;_E9eIS@yKt4MEJi3#!LN31W$5yr)RSW!xJ|ZlK>;q!`14Uyzqs~m zOpGhb{ZQ<{kqgm81eU8?D!e^Zjqfy#{iY4xPYhZu6P@uX9wj$&96nV9k#i+b45%zU zJY>|89XOI5zmJm#r#5?5npmf^lL=&y<=_%srSeo$sDt9 zi(>B|REHq}&}!q9dsZVoIA8p~ig|mIkjt&N9=y14dM%|3#te1tJeh7jDz&B4*F8(# z5n>lJ1DjvuOPy@K0Uf+i4X>8=mmZL{r@Y7cFYK%u_m?HrOoZm%yqkAC!`fIcl2r5i(3`_{W|~GQU7i{DDFLx+^W1*2b z9VL3%;l6$dQ6Yr39DoFKzYyf{FDcqyILuSx+9gu3C9wkUx$A-3*!ya; z9kl#9DReG-U|_GZEU|8cR`_;-5JKjTEzu2bZ9M@$=BnS#-NT44)z;|XtYa7((=TTh z_Tpms4}SjC+Yp31fIWTfovFqVGHrCFiD?O>tnT3|ja(45*tRQ1$-pp98lOGSvR9^{ zHLZ#LhH1NF#J}98yM+;XoQ--~vUAo!zQET$`LWsp$&?r%auV{844Dj&VzRIl)E}xB zDlXz%DDV=pYpyoMmbWg~Nz5i5X8O+LG~_48?w=y}ywuUya|m?=BRIResY5tfXx+Ly zjbx&1FLa2|q%rQ0om-83+jf$z9{}U=A?<0~_x&mcRF&4fWfcx%B_eqCRKz*o9&JSR zi!?p)G!OkArcmy$NK=!${5yKvuN$ODSQmqaW&{Fs_p78C=A@}$4j$iMX}S*93o#v2 zZkOT|*}D5W#iJ?6Dg7g&^*b`s?bRiYd`~@mx77Dmp4X1Jh=-$Mp6(a~+_TA7Rr_YY z=-8=5=eIU;&(I_oDGN6v*fnzWz%AWfnne#Dl_1g$Q;5)-M?#AV>l7USf z#;C@fAh?3sM#I!Q^G7fZvy~FHpkFkR@%(ri>{~k#f4nuc$hMW9B~}lVWf+q@K@@A- zt1k8Wq<&Mp1gw@;w7(E!LpJj!$v{_Ay_J3)@2aZ&)o(H0?YX4Y(rjxX_6d_PTAY%*L!FoUM9<5 zm2qUjHd_d69Mzvpo^eS63fBc&6ior44-FaAp{CXw(Pb$ah1Wtur60EbK;{b!dEcMp zPSD5sMKiZ&JcPFH8dSHvA6xfC2m`(jqAllcB~I{UtB6C9`I&8x(R(yJKU2j{%n25@ zn{%|rIWAZa+UTnwD1S-#0GH)7=!UhyxdCS)zBu;nZ3ye0bd-@JFDU)Wb_YZA$)($* zVbi3ih*apDFSExS1^KzCyKz)7=1xUN^SVc*AsD4 zqymiP&_ruV@vk7a6PT37%o#|cVQ)(wdMbuXX~a$F;Lpv$Ey@LYyZl|JnDV^tiupRZ zzK~0|*kNw7$36%E%w{yuqKyV#<`fw>ZGgb|z4!Y=7iLvlMTpQkfX^y&=)M;W=H9ud`Lz>6Qh zF?0Hh2?WEHjse^;asyG&)d5 zoa_QJkwceISW0YX*{b8|z!)?7w1TmO&TUhF-T;UQ;p4*GLL2Qf*?F~-ZvBvN4Yp5w zAZS}=dih%EewjUeS6P{}7vx=BWG26IL2(A@LZg@bwI3XX0hygaB4JCg?U(Isbob)C z_i>5wO<}*3qd41@JA4pcY1W&743hhDZ9+E89OQoK5-YA2`8HgJrQi6Y1Y* z3^47!ni?)#8pYvLI=)I5ZbiRRRz8isJ{<1Ee2`{La?A}?)mzgW9z9sa1uER6N1nfB z1F6?`R=MRNNcV&sO{=L|K6_PKVlWsb(O2-1c4S63O!DIP;b!hAhJMK)u%s`C&K|ll zmRcVxk)iQpx*QuVwF~mdX*Ud#_~aKYBGqfcE-@q#m2rTWAUjJCd3z7OCo;3sNX?oTOZ7R~A+@ zEvDH9>3vxgLY?J6)yjwVzmKZ9N>ZTOrCnFU(eROiLDfS~_~Z)=NL?{PE&261Ix0}4 z2ho$BcX46ShJ=}Q8JHhY-(-_dfaaxHZ`dH4Fp)=#eK3)2<-&nHIHbNarcc6(&7yRpyYMh76{?U2^^F%~_TTP-M@q3}+C4+;TSoPs=M3E=i4<)+tzj?+HXXY{!}#a9}dL@MHTHL=08l_4jg`ClXpeb4+m(O9K}G8~q=Sowsl zH_od#m8UCt9e_rK*Y6h(yPB{HwH5e`l*z=u2F=%FPA9uL|CIsr9@8@c(!60Z^SB2R9>Vs=17jgIq!PGSir z%5wFV0qkI20`5f!8Y@4I_gTjx!KHy0 zWsuHrt~aB(ENv2u%FJx1sdDW4 zR||U$bk;?Q7DbgBz@H$J7RoV0Hi9e?etRo(El zD1(r%d*|mlP$eKvj}M8V-&eY?m_nP`N{_U91{$4y`Wdl|Hv}wxHZB}$B337a*NdOT zTtC{97`k@Ct92u08H?FfMp+>5e#0%_m->D+&dvx_lRhzBAhYKciGb8$my0ZLsopQ5 z3YR{lv_7)X_M zH@;m(jd_w<>cR{#eEwsNLA~pxAI0W8pz%J@@)JKH6Ln(y778Ho*D@7qTXJT#Cj`wo zg7p{Z?Gr082%M6MjXRuqjN0{Sud?u%#5uxK{Lu1qap70-{bfpcn+`r|UxxHdIN zB3PWZ=-J}J!omoR9$K^x8(r2`3^YmSaMDQDgx{26>=$kgQR~gXEH!$Q+y#9WW6bs> z8(%6v&O=rv))Cs>m>QSJ{FRSL2wc$i->Iw zUV*Tuu|(42`jvT)Xa7y4QHQJzYz}h6!rHyQ;n1RACGJDkHP-*O$e97x@q^zG-Pr=L%n{pG1+?*cc4&Acw>5 zaEDJx+;FR;R0CcU$-hFiv23XAk7~C8fl`K>Eub@l@$TZ2f!!%M1JtZ?w~2IID#mBQ10v7eA=WF>UIrEsHR)7_vR%Ir&w#D@C~iCC<{-5hJvgPo!`d@-shjx? zYwv-NRu)aVw#?jcdzWO^cg5*)WiGqD)=(89_zHI?FpM0(KhJC!Jb!yaG|U0KJiNJ! zXIxj8-`n7V5+Og@Pb24O;}l2mgV=l;cLJLhx?WbcH|`J)0^U_u7(!EmZEb5~35CDesAiOS=dGa-4?9U^)9B5KX7v6mlycw$(lvMv|S6tJ|i&DZ!1El`x1g5EiiWHhvFcm-=v&K54lTVnA?_pU)Q-SPJi zo42!V`ev~Nap&R@VaQ2J0&;AXM}$oQ^*gsEZo-{)3J=GKwFTHiHGI-QzOf zCSu^(%M62J!^>EYGin*ne4uhXu?*r_*1!S&saB8u8rtTIs>I6sjju3UO5Ns_Xj zzz^$2FmMH$(u3J5rW87JNO?%UYzu@PZdgqLEdl7Ytnqb_Jp(gHJh7TD@}lhD?ht|y zb~mEM#Dot0gdzYJ;mE87;7l#-Dkmk^Jc+iiNpVtg6+yFuQ0-*-(5$3K5w1bO4QFVJ zR)KqS4+_W28Xs%#&d1osnKTaHF-RV_4N&_7O&u=*Q!Y`Zuv>{Jqt3_+qkl)(=<)O zfjgyO>L_F_!Jr_xg8ER3cV6JjyWi`tC@`&b)k;_h3%y5(_#Hv0d1xDxXxU~2gQ7!! zA&M4tmh$fd46Q*_+%=+1@9TtHiJnJF+L%gV(qrlDza-6;EFmWos=lD))lEuTE_WSfMZR5G`5C%31Z@47i@rV{`zjH zYY`yUz+E&{(*5Xvs97R#bJZJc-UTtvYD%J9J%ZWs(4-}VN7S1*{EOSO$x#wNmL21Y z_RF`r2Y;hw_U%lI)PUGC;1@s=+O8Skm$6Em+&CeLCw(41r^5B7jzcJDwc_G)hh`R8 zy31mpN7^9!tUCO9CBw87G3BRZ#dhc~Gk-_IDewq|Dwg{vo@7HmsOwwgqK4=BN8`#^ z=yb8Z&(VGvxD}wA++sBOG8VgpNYfU*^~6Mi5Bf?cz{dTCFn#>MIG;x_%oGN?U15Yx}ngJYgd!e7Tw~$cPWmHDEzneW;8!+SLk{-hnsDUb6~c<2%3?-?ofxa09}&eaU@!F|v$81` z?;_e#()mxM@Sm~{hQo0$Yeg7Ax40L?w5pl1$@GWX+$Roq_PkF-(b3PjXAB@LjVZnq zUGa~Hds!b=KYc2QLn<4RRnTdcDtp}y6|+9(>8XCo5i}%{e`44-OEu@hZld{}gq-Z? z6|Dap1ki4{F^o@=1KW2sx^F=_R1j5Kz@`f0o|gpRf&|}#ufNx^3Dv=c-b(-hA&Oqn zEnxXa>5VvlLzOh}aa$|9D7<(j=0_9s{=o*(n%7WJ7uky-xc7P_T@JJws$Q3GfGZw> z>J{69KA>_J)!<~<;-MOZG_Q@jak3=ejD0ia`QUAZId~o@?z@Hlp1eSKbVvuF=)xz% zhW+AqgDUQ-UOAXiV7mHM+-)Uxd0dhQ98dRT1DFRb$|a>YTyJv@>>5ud8+ zfxa1tD!dRy`JB*Ajt~1HqN3B``ZU4{Zjtf_XI}P88d>$8VCZ7G6hl>p-T&`>Q% z&QtgJO*e)^m^s_J93|JY%^@xreG%Jyi1=$^#g(qIw!|(_{3oNMvd#Gk{9Iy%l1lWF zwuG>4h81nbVAsT5;o^?U)4Nq3dIa}7A@>OfXhgZZ<}gL6c)e2G*gR_rZqB|60nW>z zj`konya<;{XV>uW;p;ej8cix_kw!Qy+M1q!#T8B3)|wM!g4XFGHcjZ$W%Kt zmU#?&xQuh$Gq`!VB1?fzbK;k&Aa+|^iD6V8@OFgz%1jOjy&nkjxSmty46+RzUgBxo z_;p4YhF@LG!q(Pvm&5NrmE$2L-SzEUrC2GKFF5q zm20E6;gwRnkbjLr*EC;}TdT??YX02!dehOvFkST}KKFDYCq!K|(^y~MuL%qem9tX< zXwtnWH2;D{J#z9InDx980Xe|?e#Vos0wc-L&DC=yy=b2^syTk1G*er7AdELyh~Ox)f_sbg97M^R zP2iM#uFZ_2F-lV*p9nQSkq+-^10Ca$u4^#JPSTIjdL!S{Rp*epc`n4qJXWHPnEZ+Jb*neZCO}%TzBix=ur)JAlTLr%JI~#<*TE zR@4CqWvUhI+bg=xVv0wVBP9usKyt#ma~28elxKCw&*Z9pPMoX=nn{32tSOM664kZY z#J5wgt8uI~5D>7;?B`wxrEwuiCL6p3e)P>+Jcn0Z(5r*fkoekjo8jfESOpwwDr^#m zfnir%fe0lP>;FwGAge# z#}cAuMei7qiPlmBqfYvDt_jv41}+ddEOhy4JC|;Uc_n>sSW>h$fVbl+xhu;A7DC}? zeo2~$8q1St(f2DzwNJ2AePti-93zsyyncq_P9wbUhtLTU9$Fp5_v_^asN=QD4zE1@IbAo(DCzDBT z{d^(@&RQEa-;~U7|Ks6H?=%@03P_n-_D#|VluyTZwcKu9&S(4MhGmH0qET_Rt1{~* z|Be&IZXC^(yM|w0=L1hIs5{Sm$mX0Pa)2$|k2j zpt@B~x>t|wS34;~fLAgL<;y;NcTH`i!x|-K@#D1B)Elej`|qs@3eQW_DoS)sxOt-_Ki+h9A6qT~d9CoKKy;Ctn*tzLe(CDnXCzlW<^ z?Xyeoc`Yidz`R2Q^pL(d#mrlXjpT6zhiER|^PZ^10BZ_O*31+j!t^@P^Us`%noxiJ z{G2WbnjhH<5JP13%$pPMxP9s^6o?Z4JmnZDDrWn=cJ42P;tA64dyAIRo!t%Qab&bu zvl_w1Fa3)&6=q{3pjz35T0yY2krY=yPJ4J%C}xiOq<5!r3Rio}fgHWai7rV@uQ9{n zpjw8d`^i)|?(o4VJ|;*&rGnY61qzwL-^23Onzcbkna&zNIngWd?HGwpu|PNu*CJ;A({F2g$9wRs0;J{A0BU^5s#Ajvp_od8o0 zyEQm16~J&%ooPGS4_BmjWt5_m5z%nnrqo8?iAvAgo?ma6@CnAd^ITD$)GvQo=G?rg z3wv-6sX|l`X=Lz7LHY>+UGX+iaW70M(b-$$VeLXJ5U`6X3EczJRwXwE8bfNqO--g)s+}{oL;-RZHQ~s*xuGUIQ2wbVYfZfAbu49Tt;2VVZdt^Ji8b>zFKCylDVY%*5%}i#ft7yz6_Ug zN_`q?x2Q5~`gd!+{}S)A)C9e0o70}~o^y_V{yFgt-v#T-pA;R{eZ1~P9xjb~yJ5OI z_orDpnyCi1xr{%|pG3%Yea|)8^R`9km74Ztt@VQJ+ckftOni1Kf?F<2YV#)5=ZgR$`V+gv!D%DiExb4gUrUfqYx zy`Pu-kU7;d`?ws_jMvq1ej5Yq_Z9!-1cm7@$Ktnwj0_AnfXnOxycwB97;vBD0t{Fr z06W(OT^EuP2GF4+2oM5fq92uld~yrKcH}kRu;W)iXSX0g50Hs&4vv#t@Hq~&>InfJ z1DWWCBF=F^4H$$j1~e~90)rCW6y$SVaGL`<*#!Xtv6+K%&I@jnPy@Xii%H1mzTh?o zHGB?ZF$ZxD46&yC#9|8KTo~M@fKG-%0DWM(Ko4b%b7CNd!BP|Q-U84`F$j self.last: - print("plot_step", x) - self.step() - self.last = x - -def wave2d(): - speed = 10 # Propagation speed - s = 512 # Array size (spatial resolution of the simulation) - - fig = plt.figure() - state = State(speed, s) - ani = FuncAnimation(fig, state.plot_step) - plt.show() - - return True - -def main(*args): - wave2d() - -if __name__ == "__main__": - main(*sys.argv[1:]) - diff --git a/numba/numba/__init__.py b/numba/numba/__init__.py deleted file mode 100644 index a251b49e7..000000000 --- a/numba/numba/__init__.py +++ /dev/null @@ -1,169 +0,0 @@ -""" -Expose top-level symbols that are safe for import * -""" -from __future__ import print_function, division, absolute_import - -import platform -import re -import sys -import warnings - -from . import config, errors, runtests, types - -# Re-export typeof -from .special import typeof, prange, pndindex - -# Re-export error classes -from .errors import * - -# Re-export all type names -from .types import * - -from .smartarray import SmartArray - -# Re-export decorators -from .decorators import autojit, cfunc, generated_jit, jit, njit, stencil - -# Re-export vectorize decorators -from .npyufunc import vectorize, guvectorize - -# Re-export Numpy helpers -from .numpy_support import carray, farray, from_dtype - -# Re-export jitclass -from .jitclass import jitclass - -# Keep this for backward compatibility. -test = runtests.main - - -__all__ = """ - autojit - cfunc - from_dtype - guvectorize - jit - jitclass - njit - stencil - typeof - prange - stencil - vectorize - """.split() + types.__all__ + errors.__all__ - - -_min_llvmlite_version = (0, 24, 0) -_min_llvm_version = (6, 0, 0) - -def _ensure_llvm(): - """ - Make sure llvmlite is operational. - """ - import warnings - import llvmlite - - # Only look at the the major, minor and bugfix version numbers. - # Ignore other stuffs - regex = re.compile(r'(\d+)\.(\d+).(\d+)') - m = regex.match(llvmlite.__version__) - if m: - ver = tuple(map(int, m.groups())) - if ver < _min_llvmlite_version: - msg = ("Numba requires at least version %d.%d.%d of llvmlite.\n" - "Installed version is %s.\n" - "Please update llvmlite." % - (_min_llvmlite_version + (llvmlite.__version__,))) - raise ImportError(msg) - else: - # Not matching? - warnings.warn("llvmlite version format not recognized!") - - from llvmlite.binding import llvm_version_info, check_jit_execution - - if llvm_version_info < _min_llvm_version: - msg = ("Numba requires at least version %d.%d.%d of LLVM.\n" - "Installed llvmlite is built against version %d.%d.%d.\n" - "Please update llvmlite." % - (_min_llvm_version + llvm_version_info)) - raise ImportError(msg) - - check_jit_execution() - -def _ensure_pynumpy(): - """ - Make sure Python and Numpy have supported versions. - """ - import warnings - from . import numpy_support - - pyver = sys.version_info[:2] - if pyver < (2, 7) or ((3,) <= pyver < (3, 4)): - raise ImportError("Numba needs Python 2.7 or greater, or 3.4 or greater") - - np_version = numpy_support.version[:2] - if np_version < (1, 7): - raise ImportError("Numba needs Numpy 1.7 or greater") - -def _try_enable_svml(): - """ - Tries to enable SVML if configuration permits use and the library is found. - """ - if not config.DISABLE_INTEL_SVML: - try: - if sys.platform.startswith('linux'): - llvmlite.binding.load_library_permanently("libsvml.so") - elif sys.platform.startswith('darwin'): - llvmlite.binding.load_library_permanently("libsvml.dylib") - elif sys.platform.startswith('win'): - llvmlite.binding.load_library_permanently("svml_dispmd") - else: - return False - # The SVML library is loaded, therefore SVML *could* be supported. - # Now see if LLVM has been compiled with the SVML support patch. - # If llvmlite has the checking function `has_svml` and it returns - # True, then LLVM was compiled with SVML support and the the setup - # for SVML can proceed. We err on the side of caution and if the - # checking function is missing, regardless of that being fine for - # most 0.23.{0,1} llvmlite instances (i.e. conda or pip installed), - # we assume that SVML was not compiled in. llvmlite 0.23.2 is a - # bugfix release with the checking function present that will always - # produce correct behaviour. For context see: #3006. - try: - if not getattr(llvmlite.binding.targets, "has_svml")(): - # has detection function, but no svml compiled in, therefore - # disable SVML - return False - except AttributeError: - if platform.machine() == 'x86_64' and config.DEBUG: - msg = ("SVML was found but llvmlite >= 0.23.2 is " - "needed to support it.") - warnings.warn(msg) - # does not have detection function, cannot detect reliably, - # disable SVML. - return False - - # All is well, detection function present and reports SVML is - # compiled in, set the vector library to SVML. - llvmlite.binding.set_option('SVML', '-vector-library=SVML') - return True - except: - if platform.machine() == 'x86_64' and config.DEBUG: - warnings.warn("SVML was not found/could not be loaded.") - return False - -_ensure_llvm() -_ensure_pynumpy() - -# we know llvmlite is working as the above tests passed, import it now as SVML -# needs to mutate runtime options (sets the `-vector-library`). -import llvmlite - -""" -Is set to True if Intel SVML is in use. -""" -config.USING_SVML = _try_enable_svml() - -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions diff --git a/numba/numba/_arraystruct.h b/numba/numba/_arraystruct.h deleted file mode 100644 index 0db07cbbf..000000000 --- a/numba/numba/_arraystruct.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef NUMBA_ARYSTRUCT_H_ -#define NUMBA_ARYSTRUCT_H_ -/* - * Fill in the *arystruct* with information from the Numpy array *obj*. - * *arystruct*'s layout is defined in numba.targets.arrayobj (look - * for the ArrayTemplate class). - */ - -typedef struct { - void *meminfo; /* see _nrt_python.c and nrt.h in numba/runtime */ - PyObject *parent; - npy_intp nitems; - npy_intp itemsize; - void *data; - - npy_intp shape_and_strides[]; -} arystruct_t; - - -#endif /* NUMBA_ARYSTRUCT_H_ */ - diff --git a/numba/numba/_dispatcher.c b/numba/numba/_dispatcher.c deleted file mode 100644 index 7086c97f1..000000000 --- a/numba/numba/_dispatcher.c +++ /dev/null @@ -1,678 +0,0 @@ -#include "_pymodule.h" - -#include -#include -#include - -#include "_dispatcher.h" -#include "_typeof.h" -#include "frameobject.h" - -/* - * The following call_trace and call_trace_protected functions - * as well as the C_TRACE macro are taken from ceval.c - * - */ - -static int -call_trace(Py_tracefunc func, PyObject *obj, - PyThreadState *tstate, PyFrameObject *frame, - int what, PyObject *arg) -{ - int result; - if (tstate->tracing) - return 0; - tstate->tracing++; - tstate->use_tracing = 0; - result = func(obj, frame, what, arg); - tstate->use_tracing = ((tstate->c_tracefunc != NULL) - || (tstate->c_profilefunc != NULL)); - tstate->tracing--; - return result; -} - -static int -call_trace_protected(Py_tracefunc func, PyObject *obj, - PyThreadState *tstate, PyFrameObject *frame, - int what, PyObject *arg) -{ - PyObject *type, *value, *traceback; - int err; - PyErr_Fetch(&type, &value, &traceback); - err = call_trace(func, obj, tstate, frame, what, arg); - if (err == 0) - { - PyErr_Restore(type, value, traceback); - return 0; - } - else - { - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); - return -1; - } -} - -/* - * The original C_TRACE macro (from ceval.c) would call - * PyTrace_C_CALL et al., for which the frame argument wouldn't - * be usable. Since we explicitly synthesize a frame using the - * original Python code object, we call PyTrace_CALL instead so - * the profiler can report the correct source location. - * - * Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case - * of error, the profiler would simply expect a RETURN in case of - * a Python function, so we generate that here (making sure the - * exception state is preserved correctly). - */ -#define C_TRACE(x, call) \ -if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ - tstate, tstate->frame, PyTrace_CALL, cfunc)) \ - x = NULL; \ -else \ -{ \ - x = call; \ - if (tstate->c_profilefunc != NULL) \ - { \ - if (x == NULL) \ - { \ - call_trace_protected(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate, tstate->frame, \ - PyTrace_RETURN, cfunc); \ - /* XXX should pass (type, value, tb) */ \ - } \ - else \ - { \ - if (call_trace(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate, tstate->frame, \ - PyTrace_RETURN, cfunc)) \ - { \ - Py_DECREF(x); \ - x = NULL; \ - } \ - } \ - } \ -} - - -typedef struct DispatcherObject{ - PyObject_HEAD - /* Holds borrowed references to PyCFunction objects */ - dispatcher_t *dispatcher; - char can_compile; /* Can auto compile */ - char can_fallback; /* Can fallback */ - /* Borrowed references */ - PyObject *firstdef, *fallbackdef, *interpdef; - /* Whether to fold named arguments and default values (false for lifted loops)*/ - int fold_args; - /* Whether the last positional argument is a stararg */ - int has_stararg; - /* Tuple of argument names */ - PyObject *argnames; - /* Tuple of default values */ - PyObject *defargs; -} DispatcherObject; - - -static int -Dispatcher_traverse(DispatcherObject *self, visitproc visit, void *arg) -{ - Py_VISIT(self->defargs); - return 0; -} - -static void -Dispatcher_dealloc(DispatcherObject *self) -{ - Py_XDECREF(self->argnames); - Py_XDECREF(self->defargs); - dispatcher_del(self->dispatcher); - Py_TYPE(self)->tp_free((PyObject*)self); -} - - -static int -Dispatcher_init(DispatcherObject *self, PyObject *args, PyObject *kwds) -{ - PyObject *tmaddrobj; - void *tmaddr; - int argct; - int can_fallback; - int has_stararg = 0; - - if (!PyArg_ParseTuple(args, "OiiO!O!i|i", &tmaddrobj, &argct, - &self->fold_args, - &PyTuple_Type, &self->argnames, - &PyTuple_Type, &self->defargs, - &can_fallback, - &has_stararg)) { - return -1; - } - Py_INCREF(self->argnames); - Py_INCREF(self->defargs); - tmaddr = PyLong_AsVoidPtr(tmaddrobj); - self->dispatcher = dispatcher_new(tmaddr, argct); - self->can_compile = 1; - self->can_fallback = can_fallback; - self->firstdef = NULL; - self->fallbackdef = NULL; - self->interpdef = NULL; - self->has_stararg = has_stararg; - return 0; -} - -static PyObject * -Dispatcher_clear(DispatcherObject *self, PyObject *args) -{ - dispatcher_clear(self->dispatcher); - Py_RETURN_NONE; -} - -static -PyObject* -Dispatcher_Insert(DispatcherObject *self, PyObject *args) -{ - PyObject *sigtup, *cfunc; - int i, sigsz; - int *sig; - int objectmode = 0; - int interpmode = 0; - - if (!PyArg_ParseTuple(args, "OO|ii", &sigtup, - &cfunc, &objectmode, &interpmode)) { - return NULL; - } - - if (!interpmode && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) { - PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method"); - return NULL; - } - - sigsz = PySequence_Fast_GET_SIZE(sigtup); - sig = malloc(sigsz * sizeof(int)); - - for (i = 0; i < sigsz; ++i) { - sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i)); - } - - if (!interpmode) { - /* The reference to cfunc is borrowed; this only works because the - derived Python class also stores an (owned) reference to cfunc. */ - dispatcher_add_defn(self->dispatcher, sig, (void*) cfunc); - - /* Add first definition */ - if (!self->firstdef) { - self->firstdef = cfunc; - } - } - /* Add pure python fallback */ - if (!self->fallbackdef && objectmode){ - self->fallbackdef = cfunc; - } - /* Add interpeter fallback */ - if (!self->interpdef && interpmode) { - self->interpdef = cfunc; - } - - free(sig); - - Py_RETURN_NONE; -} - - -static -void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws, - const char *method_name, const char *default_msg) -{ - PyObject *callback, *result; - callback = PyObject_GetAttrString(dispatcher, method_name); - if (!callback) { - PyErr_SetString(PyExc_TypeError, default_msg); - return; - } - result = PyObject_Call(callback, args, kws); - Py_DECREF(callback); - if (result != NULL) { - PyErr_Format(PyExc_RuntimeError, "%s must raise an exception", - method_name); - Py_DECREF(result); - } -} - -static -void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws) -{ - explain_issue(dispatcher, args, kws, "_explain_ambiguous", - "Ambigous overloading"); -} - -static -void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws) -{ - explain_issue(dispatcher, args, kws, "_explain_matching_error", - "No matching definition"); -} - -static -int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws) -{ - PyObject *callback, *result; - int res; - - callback = PyObject_GetAttrString(dispatcher, - "_search_new_conversions"); - if (!callback) { - return -1; - } - result = PyObject_Call(callback, args, kws); - Py_DECREF(callback); - if (result == NULL) { - return -1; - } - if (!PyBool_Check(result)) { - Py_DECREF(result); - PyErr_SetString(PyExc_TypeError, - "_search_new_conversions() should return a boolean"); - return -1; - } - res = (result == Py_True) ? 1 : 0; - Py_DECREF(result); - return res; -} - -/* A custom, fast, inlinable version of PyCFunction_Call() */ -static PyObject * -call_cfunc(DispatcherObject *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals) -{ - PyCFunctionWithKeywords fn; - PyThreadState *tstate; - assert(PyCFunction_Check(cfunc)); - assert(PyCFunction_GET_FLAGS(cfunc) == METH_VARARGS | METH_KEYWORDS); - fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc); - tstate = PyThreadState_GET(); - if (tstate->use_tracing && tstate->c_profilefunc) - { - /* - * The following code requires some explaining: - * - * We want the jit-compiled function to be visible to the profiler, so we - * need to synthesize a frame for it. - * The PyFrame_New() constructor doesn't do anything with the 'locals' value if the 'code's - * 'CO_NEWLOCALS' flag is set (which is always the case nowadays). - * So, to get local variables into the frame, we have to manually set the 'f_locals' - * member, then call `PyFrame_LocalsToFast`, where a subsequent call to the `frame.f_locals` - * property (by virtue of the `frame_getlocals` function in frameobject.c) will find them. - */ - PyCodeObject *code = (PyCodeObject*)PyObject_GetAttrString((PyObject*)self, "__code__"); - PyObject *globals = PyDict_New(); - PyObject *builtins = PyEval_GetBuiltins(); - PyFrameObject *frame = NULL; - PyObject *result = NULL; - - if (!code) { - PyErr_Format(PyExc_RuntimeError, "No __code__ attribute found."); - goto error; - } - /* Populate builtins, which is required by some JITted functions */ - if (PyDict_SetItemString(globals, "__builtins__", builtins)) { - goto error; - } - frame = PyFrame_New(tstate, code, globals, NULL); - if (frame == NULL) { - goto error; - } - /* Populate the 'fast locals' in `frame` */ - Py_XDECREF(frame->f_locals); - frame->f_locals = locals; - Py_XINCREF(frame->f_locals); - PyFrame_LocalsToFast(frame, 0); - tstate->frame = frame; - C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws)); - tstate->frame = frame->f_back; - - error: - Py_XDECREF(frame); - Py_XDECREF(globals); - Py_XDECREF(code); - return result; - } - else - return fn(PyCFunction_GET_SELF(cfunc), args, kws); -} - -static -PyObject* -compile_and_invoke(DispatcherObject *self, PyObject *args, PyObject *kws, PyObject *locals) -{ - /* Compile a new one */ - PyObject *cfa, *cfunc, *retval; - cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); - if (cfa == NULL) - return NULL; - - /* NOTE: we call the compiled function ourselves instead of - letting the Python derived class do it. This is for proper - behaviour of globals() in jitted functions (issue #476). */ - cfunc = PyObject_Call(cfa, args, kws); - Py_DECREF(cfa); - - if (cfunc == NULL) - return NULL; - - if (PyObject_TypeCheck(cfunc, &PyCFunction_Type)) { - retval = call_cfunc(self, cfunc, args, kws, locals); - } else { - /* Re-enter interpreter */ - retval = PyObject_Call(cfunc, args, kws); - } - Py_DECREF(cfunc); - - return retval; -} - -static int -find_named_args(DispatcherObject *self, PyObject **pargs, PyObject **pkws) -{ - PyObject *oldargs = *pargs, *newargs; - PyObject *kws = *pkws; - Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs); - Py_ssize_t named_args, total_args, i; - Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames); - Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs); - /* Last parameter with a default value */ - Py_ssize_t last_def = (self->has_stararg) - ? func_args - 2 - : func_args - 1; - /* First parameter with a default value */ - Py_ssize_t first_def = last_def - defaults + 1; - /* Minimum number of required arguments */ - Py_ssize_t minargs = first_def; - - if (kws != NULL) - named_args = PyDict_Size(kws); - else - named_args = 0; - total_args = pos_args + named_args; - if (!self->has_stararg && total_args > func_args) { - PyErr_Format(PyExc_TypeError, - "too many arguments: expected %d, got %d", - (int) func_args, (int) total_args); - return -1; - } - else if (total_args < minargs) { - if (minargs == func_args) - PyErr_Format(PyExc_TypeError, - "not enough arguments: expected %d, got %d", - (int) minargs, (int) total_args); - else - PyErr_Format(PyExc_TypeError, - "not enough arguments: expected at least %d, got %d", - (int) minargs, (int) total_args); - return -1; - } - newargs = PyTuple_New(func_args); - if (!newargs) - return -1; - /* First pack the stararg */ - if (self->has_stararg) { - Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1); - PyObject *stararg = PyTuple_New(stararg_size); - if (!stararg) { - Py_DECREF(newargs); - return -1; - } - for (i = 0; i < stararg_size; i++) { - PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i); - Py_INCREF(value); - PyTuple_SET_ITEM(stararg, i, value); - } - /* Put it in last position */ - PyTuple_SET_ITEM(newargs, func_args - 1, stararg); - - } - for (i = 0; i < pos_args; i++) { - PyObject *value = PyTuple_GET_ITEM(oldargs, i); - if (self->has_stararg && i >= func_args - 1) { - /* Skip stararg */ - break; - } - Py_INCREF(value); - PyTuple_SET_ITEM(newargs, i, value); - } - - /* Iterate over missing positional arguments, try to find them in - named arguments or default values. */ - for (i = pos_args; i < func_args; i++) { - PyObject *name = PyTuple_GET_ITEM(self->argnames, i); - if (self->has_stararg && i >= func_args - 1) { - /* Skip stararg */ - break; - } - if (kws != NULL) { - /* Named argument? */ - PyObject *value = PyDict_GetItem(kws, name); - if (value != NULL) { - Py_INCREF(value); - PyTuple_SET_ITEM(newargs, i, value); - named_args--; - continue; - } - } - if (i >= first_def && i <= last_def) { - /* Argument has a default value? */ - PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def); - Py_INCREF(value); - PyTuple_SET_ITEM(newargs, i, value); - continue; - } - else if (i < func_args - 1 || !self->has_stararg) { - PyErr_Format(PyExc_TypeError, - "missing argument '%s'", - PyString_AsString(name)); - Py_DECREF(newargs); - return -1; - } - } - if (named_args) { - PyErr_Format(PyExc_TypeError, - "some keyword arguments unexpected"); - Py_DECREF(newargs); - return -1; - } - *pargs = newargs; - *pkws = NULL; - return 0; -} - -static PyObject* -Dispatcher_call(DispatcherObject *self, PyObject *args, PyObject *kws) -{ - PyObject *tmptype, *retval = NULL; - int *tys; - int argct; - int i; - int prealloc[24]; - int matches; - PyObject *cfunc; - PyThreadState *ts = PyThreadState_Get(); - PyObject *locals = NULL; - if (ts->use_tracing && ts->c_profilefunc) - locals = PyEval_GetLocals(); - if (self->fold_args) { - if (find_named_args(self, &args, &kws)) - return NULL; - } - else - Py_INCREF(args); - /* Now we own a reference to args */ - - argct = PySequence_Fast_GET_SIZE(args); - - if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int))) - tys = prealloc; - else - tys = malloc(argct * sizeof(int)); - - for (i = 0; i < argct; ++i) { - tmptype = PySequence_Fast_GET_ITEM(args, i); - tys[i] = typeof_typecode((PyObject *) self, tmptype); - if (tys[i] == -1) { - if (self->can_fallback){ - /* We will clear the exception if fallback is allowed. */ - PyErr_Clear(); - } else { - goto CLEANUP; - } - } - } - - /* We only allow unsafe conversions if compilation of new specializations - has been disabled. */ - cfunc = dispatcher_resolve(self->dispatcher, tys, &matches, - !self->can_compile); - - if (matches == 0 && !self->can_compile) { - /* - * If we can't compile a new specialization, look for - * matching signatures for which conversions haven't been - * registered on the C++ TypeManager. - */ - int res = search_new_conversions((PyObject *) self, args, kws); - if (res < 0) { - retval = NULL; - goto CLEANUP; - } - if (res > 0) { - /* Retry with the newly registered conversions */ - cfunc = dispatcher_resolve(self->dispatcher, tys, &matches, - !self->can_compile); - } - } - - if (matches == 1) { - /* Definition is found */ - retval = call_cfunc(self, cfunc, args, kws, locals); - } else if (matches == 0) { - /* No matching definition */ - if (self->can_compile) { - retval = compile_and_invoke(self, args, kws, locals); - } else if (self->fallbackdef) { - /* Have object fallback */ - retval = call_cfunc(self, self->fallbackdef, args, kws, locals); - } else { - /* Raise TypeError */ - explain_matching_error((PyObject *) self, args, kws); - retval = NULL; - } - } else if (self->can_compile) { - /* Ambiguous, but are allowed to compile */ - retval = compile_and_invoke(self, args, kws, locals); - } else { - /* Ambiguous */ - explain_ambiguous((PyObject *) self, args, kws); - retval = NULL; - } - -CLEANUP: - if (tys != prealloc) - free(tys); - Py_DECREF(args); - - return retval; -} - -static PyMethodDef Dispatcher_methods[] = { - { "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL }, - { "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS, - "insert new definition"}, - { NULL }, -}; - -static PyMemberDef Dispatcher_members[] = { - {"_can_compile", T_BOOL, offsetof(DispatcherObject, can_compile), 0}, - {NULL} /* Sentinel */ -}; - - -static PyTypeObject DispatcherType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_dispatcher.Dispatcher", /* tp_name */ - sizeof(DispatcherObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)Dispatcher_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - (PyCFunctionWithKeywords)Dispatcher_call, /* tp_call*/ - 0, /* tp_str*/ - 0, /* tp_getattro*/ - 0, /* tp_setattro*/ - 0, /* tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/ - "Dispatcher object", /* tp_doc */ - (traverseproc) Dispatcher_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - Dispatcher_methods, /* tp_methods */ - Dispatcher_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)Dispatcher_init, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; - - -static PyObject *compute_fingerprint(PyObject *self, PyObject *args) -{ - PyObject *val; - if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val)) - return NULL; - return typeof_compute_fingerprint(val); -} - -static PyMethodDef ext_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } - declmethod(typeof_init), - declmethod(compute_fingerprint), - { NULL }, -#undef declmethod -}; - - -MOD_INIT(_dispatcher) { - PyObject *m; - MOD_DEF(m, "_dispatcher", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - - DispatcherType.tp_new = PyType_GenericNew; - if (PyType_Ready(&DispatcherType) < 0) { - return MOD_ERROR_VAL; - } - Py_INCREF(&DispatcherType); - PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType)); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/_dispatcher.h b/numba/numba/_dispatcher.h deleted file mode 100644 index 71ecfde0a..000000000 --- a/numba/numba/_dispatcher.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef NUMBA_DISPATCHER_H_ -#define NUMBA_DISPATCHER_H_ - -#ifdef __cplusplus - extern "C" { -#endif - -typedef struct _opaque_dispatcher dispatcher_t; - -dispatcher_t * -dispatcher_new(void *tm, int argct); - -void -dispatcher_clear(dispatcher_t *obj); - -void -dispatcher_del(dispatcher_t *obj); - -void -dispatcher_add_defn(dispatcher_t *obj, int tys[], void* callable); - -void* -dispatcher_resolve(dispatcher_t *obj, int sig[], int *matches, - int allow_unsafe); - -int -dispatcher_count(dispatcher_t *obj); - -#ifdef __cplusplus - } -#endif - -#endif /* NUMBA_DISPATCHER_H_ */ diff --git a/numba/numba/_dispatcherimpl.cpp b/numba/numba/_dispatcherimpl.cpp deleted file mode 100644 index b3ac936b3..000000000 --- a/numba/numba/_dispatcherimpl.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "typeconv/typeconv.hpp" -#include -#include - -typedef std::vector TypeTable; -typedef std::vector Functions; - -struct _opaque_dispatcher {}; - -class Dispatcher: public _opaque_dispatcher { -public: - Dispatcher(TypeManager *tm, int argct): argct(argct), tm(tm) { } - - void addDefinition(Type args[], void *callable) { - overloads.reserve(argct + overloads.size()); - for (int i=0; iselectOverload(sig, &overloads[0], selected, argct, - ovct, allow_unsafe); - } - if (matches == 1) { - return functions[selected]; - } - return NULL; - } - - int count() const { return functions.size(); } - - void clear() { - functions.clear(); - overloads.clear(); - } - -private: - const int argct; - TypeManager *tm; - // An array of overloads - Functions functions; - // A flattened array of argument types to all overloads - // (invariant: sizeof(overloads) == argct * sizeof(functions)) - TypeTable overloads; -}; - - -#include "_dispatcher.h" - -dispatcher_t * -dispatcher_new(void *tm, int argct){ - return new Dispatcher(static_cast(tm), argct); -} - -void -dispatcher_clear(dispatcher_t *obj) { - Dispatcher *disp = static_cast(obj); - disp->clear(); -} - -void -dispatcher_del(dispatcher_t *obj) { - Dispatcher *disp = static_cast(obj); - delete disp; -} - -void -dispatcher_add_defn(dispatcher_t *obj, int tys[], void* callable) { - assert(sizeof(int) == sizeof(Type) && - "Type should be representable by an int"); - - Dispatcher *disp = static_cast(obj); - Type *args = reinterpret_cast(tys); - disp->addDefinition(args, callable); -} - -void* -dispatcher_resolve(dispatcher_t *obj, int sig[], int *count, int allow_unsafe) { - Dispatcher *disp = static_cast(obj); - Type *args = reinterpret_cast(sig); - void *callable = disp->resolve(args, *count, (bool) allow_unsafe); - return callable; -} - -int -dispatcher_count(dispatcher_t *obj) { - Dispatcher *disp = static_cast(obj); - return disp->count(); -} diff --git a/numba/numba/_dynfunc.c b/numba/numba/_dynfunc.c deleted file mode 100644 index d0330d87f..000000000 --- a/numba/numba/_dynfunc.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Definition of Environment and Closure objects. - * This module is included by _dynfuncmod.c and by pycc-compiled modules. - */ - -#include "_pymodule.h" - -#include - -/* NOTE: EnvironmentObject and ClosureObject must be kept in sync with - * the definitions in numba/targets/base.py (EnvBody and ClosureBody). - */ - -/* - * EnvironmentObject hosts data needed for execution of compiled functions. - */ -typedef struct { - PyObject_HEAD - PyObject *globals; - /* Assorted "constants" that are needed at runtime to execute - the compiled function. This can include frozen closure variables, - lifted loops, etc. */ - PyObject *consts; -} EnvironmentObject; - - -static PyMemberDef env_members[] = { - {"globals", T_OBJECT, offsetof(EnvironmentObject, globals), READONLY}, - {"consts", T_OBJECT, offsetof(EnvironmentObject, consts), READONLY}, - {NULL} /* Sentinel */ -}; - -static int -env_traverse(EnvironmentObject *env, visitproc visit, void *arg) -{ - Py_VISIT(env->globals); - Py_VISIT(env->consts); - return 0; -} - -static int -env_clear(EnvironmentObject *env) -{ - Py_CLEAR(env->globals); - Py_CLEAR(env->consts); - return 0; -} - -static void -env_dealloc(EnvironmentObject *env) -{ - _PyObject_GC_UNTRACK((PyObject *) env); - env_clear(env); - Py_TYPE(env)->tp_free((PyObject *) env); -} - -static EnvironmentObject * -env_new_empty(PyTypeObject* type) -{ - return (EnvironmentObject *) PyType_GenericNew(type, NULL, NULL); -} - -static PyObject * -env_new(PyTypeObject* type, PyObject* args, PyObject* kwds) -{ - PyObject *globals; - EnvironmentObject *env; - static char *kwlist[] = {"globals", 0}; - - if (!PyArg_ParseTupleAndKeywords( - args, kwds, "O!:function", kwlist, - &PyDict_Type, &globals)) - return NULL; - - env = env_new_empty(type); - if (env == NULL) - return NULL; - Py_INCREF(globals); - env->globals = globals; - env->consts = PyList_New(0); - if (!env->consts) { - Py_DECREF(env); - return NULL; - } - return (PyObject *) env; -} - - -static PyTypeObject EnvironmentType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_dynfunc.Environment", /*tp_name*/ - sizeof(EnvironmentObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor) env_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /* tp_doc */ - (traverseproc) env_traverse, /* tp_traverse */ - (inquiry) env_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - env_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - env_new, /* tp_new */ -}; - -/* A closure object is created for each call to make_function(), and stored - as the resulting PyCFunction object's "self" pointer. It points to an - EnvironmentObject which is constructed during compilation. This allows - for two things: - - lifetime management of dependent data (e.g. lifted loop dispatchers) - - access to the execution environment by the compiled function - (for example the globals module) - */ - -/* Closure is a variable-sized object for binary compatibility with - Generator (see below). */ -#define CLOSURE_HEAD \ - PyObject_VAR_HEAD \ - EnvironmentObject *env; - -typedef struct { - CLOSURE_HEAD - /* The dynamically-filled method definition for the PyCFunction object - using this closure. */ - PyMethodDef def; - /* Arbitrary object to keep alive during the closure's lifetime. - (put a tuple to put several objects alive). - In practice, this helps keep the LLVM module and its generated - code alive. */ - PyObject *keepalive; - PyObject *weakreflist; -} ClosureObject; - - -static int -closure_traverse(ClosureObject *clo, visitproc visit, void *arg) -{ - Py_VISIT(clo->env); - Py_VISIT(clo->keepalive); - return 0; -} - -static void -closure_dealloc(ClosureObject *clo) -{ - _PyObject_GC_UNTRACK((PyObject *) clo); - if (clo->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) clo); - PyObject_Free((void *) clo->def.ml_name); - PyObject_Free((void *) clo->def.ml_doc); - Py_XDECREF(clo->env); - Py_XDECREF(clo->keepalive); - Py_TYPE(clo)->tp_free((PyObject *) clo); -} - -static PyTypeObject ClosureType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_dynfunc._Closure", /*tp_name*/ - sizeof(ClosureObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor) closure_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /* tp_doc */ - (traverseproc) closure_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - offsetof(ClosureObject, weakreflist), /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; - - -/* Return an owned piece of character data duplicating a Python string - object's value. */ -static char * -dup_string(PyObject *strobj) -{ - char *tmp, *str; - tmp = PyString_AsString(strobj); - if (tmp == NULL) - return NULL; - /* Using PyObject_Malloc allows this memory to be tracked for - leaks. */ - str = PyObject_Malloc(strlen(tmp) + 1); - if (str == NULL) { - PyErr_NoMemory(); - return NULL; - } - strcpy(str, tmp); - return str; -} - -/* Create and initialize a new Closure object */ -static ClosureObject * -closure_new(PyObject *module, PyObject *name, PyObject *doc, - PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive) -{ - ClosureObject *clo = (ClosureObject *) PyType_GenericAlloc(&ClosureType, 0); - if (clo == NULL) - return NULL; - - clo->def.ml_name = dup_string(name); - if (!clo->def.ml_name) { - Py_DECREF(clo); - return NULL; - } - clo->def.ml_meth = fnaddr; - clo->def.ml_flags = METH_VARARGS | METH_KEYWORDS; - clo->def.ml_doc = dup_string(doc); - if (!clo->def.ml_doc) { - Py_DECREF(clo); - return NULL; - } - Py_INCREF(env); - clo->env = env; - Py_XINCREF(keepalive); - clo->keepalive = keepalive; - return clo; -} - -/* Create a new PyCFunction object wrapping a closure defined by - the given arguments. */ -static PyObject * -pycfunction_new(PyObject *module, PyObject *name, PyObject *doc, - PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive) -{ - PyObject *funcobj; - PyObject *modname; - ClosureObject *closure; - - closure = closure_new(module, name, doc, fnaddr, env, keepalive); - if (closure == NULL) - return NULL; - - modname = PyString_FromString(PyModule_GetName(module)); - funcobj = PyCFunction_NewEx(&closure->def, (PyObject *) closure, modname); - Py_DECREF(closure); - Py_DECREF(modname); - return funcobj; -} - -/* - * Python-facing wrapper for Numba-compiled generator. - * Note the Environment's offset inside the struct is the same as in the - * Closure object. This is required to simplify generation of Python wrappers. - */ - -typedef void (*gen_finalizer_t)(void *); - -typedef struct { - CLOSURE_HEAD - PyCFunctionWithKeywords nextfunc; - gen_finalizer_t finalizer; - PyObject *weakreflist; - union { - double dummy; /* Force alignment */ - char state[0]; - }; -} GeneratorObject; - -static int -generator_traverse(GeneratorObject *gen, visitproc visit, void *arg) -{ - /* XXX this doesn't traverse the state, which can own references to - PyObjects */ - Py_VISIT(gen->env); - return 0; -} - -static int -generator_clear(GeneratorObject *gen) -{ - if (gen->finalizer != NULL) { - gen->finalizer(gen->state); - gen->finalizer = NULL; - } - Py_CLEAR(gen->env); - gen->nextfunc = NULL; - return 0; -} - -static void -generator_dealloc(GeneratorObject *gen) -{ - _PyObject_GC_UNTRACK((PyObject *) gen); - if (gen->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) gen); - /* XXX The finalizer may be called after the LLVM module has been - destroyed (typically at interpreter shutdown) */ -#if PY_MAJOR_VERSION >= 3 -#if PY_MINOR_VERSION >= 7 - if (!_Py_IsFinalizing()) -#else - if (!_Py_Finalizing) -#endif -#endif - if (gen->finalizer != NULL) - gen->finalizer(gen->state); - Py_XDECREF(gen->env); - Py_TYPE(gen)->tp_free((PyObject *) gen); -} - -static PyObject * -generator_iternext(GeneratorObject *gen) -{ - PyObject *res, *args; - if (gen->nextfunc == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "cannot call next() on finalized generator"); - return NULL; - } - args = PyTuple_Pack(1, (PyObject *) gen); - if (args == NULL) - return NULL; - res = (*gen->nextfunc)((PyObject *) gen, args, NULL); - Py_DECREF(args); - return res; -} - -static PyTypeObject GeneratorType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /* ob_size*/ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_dynfunc._Generator", /* tp_name*/ - offsetof(GeneratorObject, state), /* tp_basicsize*/ - 1, /* tp_itemsize*/ - (destructor) generator_dealloc, /* tp_dealloc*/ - 0, /* tp_print*/ - 0, /* tp_getattr*/ - 0, /* tp_setattr*/ - 0, /* tp_compare*/ - 0, /* tp_repr*/ - 0, /* tp_as_number*/ - 0, /* tp_as_sequence*/ - 0, /* tp_as_mapping*/ - 0, /* tp_hash */ - 0, /* tp_call*/ - 0, /* tp_str*/ - 0, /* tp_getattro*/ - 0, /* tp_setattro*/ - 0, /* tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC - | Py_TPFLAGS_BASETYPE, /* tp_flags*/ - 0, /* tp_doc */ - (traverseproc) generator_traverse, /* tp_traverse */ - (inquiry) generator_clear, /* tp_clear */ - 0, /* tp_richcompare */ - offsetof(GeneratorObject, weakreflist), /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc) generator_iternext, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; - -/* Dynamically create a new generator object */ -static PyObject * -Numba_make_generator(Py_ssize_t gen_state_size, - void *initial_state, - PyCFunctionWithKeywords nextfunc, - gen_finalizer_t finalizer, - EnvironmentObject *env) -{ - GeneratorObject *gen; - gen = (GeneratorObject *) PyType_GenericAlloc(&GeneratorType, gen_state_size); - if (gen == NULL) - return NULL; - memcpy(gen->state, initial_state, gen_state_size); - gen->nextfunc = nextfunc; - Py_XINCREF(env); - gen->env = env; - gen->finalizer = finalizer; - return (PyObject *) gen; -} - -/* Initialization subroutine for use by modules including this */ -static int -init_dynfunc_module(PyObject *module) -{ - if (PyType_Ready(&ClosureType)) - return -1; - if (PyType_Ready(&EnvironmentType)) - return -1; - if (PyType_Ready(&GeneratorType)) - return -1; - return 0; -} diff --git a/numba/numba/_dynfuncmod.c b/numba/numba/_dynfuncmod.c deleted file mode 100644 index 5d80529c0..000000000 --- a/numba/numba/_dynfuncmod.c +++ /dev/null @@ -1,93 +0,0 @@ -#include "_dynfunc.c" - -/* Python-facing function to dynamically create a new C function object */ -static PyObject* -make_function(PyObject *self, PyObject *args) -{ - PyObject *module, *fname, *fdoc, *fnaddrobj; - void *fnaddr; - EnvironmentObject *env; - PyObject *keepalive; - - if (!PyArg_ParseTuple(args, "OOOOO!|O", - &module, &fname, &fdoc, &fnaddrobj, &EnvironmentType, &env, - &keepalive)) { - return NULL; - } - - fnaddr = PyLong_AsVoidPtr(fnaddrobj); - if (fnaddr == NULL && PyErr_Occurred()) - return NULL; - - return pycfunction_new(module, fname, fdoc, fnaddr, env, keepalive); -} - -static PyMethodDef ext_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } - declmethod(make_function), - { NULL }, -#undef declmethod -}; - - -static PyObject * -build_c_helpers_dict(void) -{ - PyObject *dct = PyDict_New(); - if (dct == NULL) - goto error; - -#define _declpointer(name, value) do { \ - PyObject *o = PyLong_FromVoidPtr(value); \ - if (o == NULL) goto error; \ - if (PyDict_SetItemString(dct, name, o)) { \ - Py_DECREF(o); \ - goto error; \ - } \ - Py_DECREF(o); \ -} while (0) - -#define declmethod(func) _declpointer(#func, &Numba_##func) - -#define declpointer(ptr) _declpointer(#ptr, &ptr) - - declmethod(make_generator); - -#undef declmethod - return dct; -error: - Py_XDECREF(dct); - return NULL; -} - -MOD_INIT(_dynfunc) { - PyObject *m, *impl_info; - - MOD_DEF(m, "_dynfunc", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - - if (init_dynfunc_module(m)) - return MOD_ERROR_VAL; - - impl_info = Py_BuildValue( - "{snsnsn}", - "offsetof_closure_body", offsetof(ClosureObject, env), - "offsetof_env_body", offsetof(EnvironmentObject, globals), - "offsetof_generator_state", offsetof(GeneratorObject, state) - ); - if (impl_info == NULL) - return MOD_ERROR_VAL; - PyModule_AddObject(m, "_impl_info", impl_info); - - Py_INCREF(&ClosureType); - PyModule_AddObject(m, "_Closure", (PyObject *) (&ClosureType)); - Py_INCREF(&EnvironmentType); - PyModule_AddObject(m, "Environment", (PyObject *) (&EnvironmentType)); - Py_INCREF(&GeneratorType); - PyModule_AddObject(m, "_Generator", (PyObject *) (&GeneratorType)); - - PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/_hashtable.c b/numba/numba/_hashtable.c deleted file mode 100644 index 76392f79e..000000000 --- a/numba/numba/_hashtable.c +++ /dev/null @@ -1,530 +0,0 @@ -/* - * This file and _hashtable.h are from CPython 3.5. The symbols have been - * renamed from _Py_hashxxx to _Numba_hashxxx to avoid name clashes with - * the CPython definitions (including at runtime through dynamic linking). - * Those CPython APIs are private and can change in incompatible ways at - * any time. - * - * Command line used for renaming: - * $ sed -i -r 's/\b_Py_(has[h]table)/_Numba_\1/ig' numba/_hashtable.h numba/_hashtable.c - */ - -/* The implementation of the hash table (_Numba_hashtable_t) is based on the cfuhash - project: - http://sourceforge.net/projects/libcfu/ - - Copyright of cfuhash: - ---------------------------------- - Creation date: 2005-06-24 21:22:40 - Authors: Don - Change log: - - Copyright (c) 2005 Don Owens - All rights reserved. - - This code is released under the BSD license: - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the author nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - OF THE POSSIBILITY OF SUCH DAMAGE. - ---------------------------------- -*/ - -#include "_pymodule.h" -#include "_hashtable.h" - -#define HASHTABLE_MIN_SIZE 16 -#define HASHTABLE_HIGH 0.50 -#define HASHTABLE_LOW 0.10 -#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) - -#define BUCKETS_HEAD(SLIST) \ - ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) -#define TABLE_HEAD(HT, BUCKET) \ - ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) -#define ENTRY_NEXT(ENTRY) \ - ((_Numba_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) -#define HASHTABLE_ITEM_SIZE(HT) \ - (sizeof(_Numba_hashtable_entry_t) + (HT)->data_size) - -/* Forward declaration */ -static void hashtable_rehash(_Numba_hashtable_t *ht); - -static void -_Py_slist_init(_Py_slist_t *list) -{ - list->head = NULL; -} - -static void -_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) -{ - item->next = list->head; - list->head = item; -} - -static void -_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, - _Py_slist_item_t *item) -{ - if (previous != NULL) - previous->next = item->next; - else - list->head = item->next; -} - -Py_uhash_t -_Numba_hashtable_hash_int(const void *key) -{ - return (Py_uhash_t)key; -} - -Py_uhash_t -_Numba_hashtable_hash_ptr(const void *key) -{ - return (Py_uhash_t)_Py_HashPointer((void *)key); -} - -int -_Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry) -{ - return entry->key == key; -} - -/* makes sure the real size of the buckets array is a power of 2 */ -static size_t -round_size(size_t s) -{ - size_t i; - if (s < HASHTABLE_MIN_SIZE) - return HASHTABLE_MIN_SIZE; - i = 1; - while (i < s) - i <<= 1; - return i; -} - -_Numba_hashtable_t * -_Numba_hashtable_new_full(size_t data_size, size_t init_size, - _Numba_hashtable_hash_func hash_func, - _Numba_hashtable_compare_func compare_func, - _Numba_hashtable_copy_data_func copy_data_func, - _Numba_hashtable_free_data_func free_data_func, - _Numba_hashtable_get_data_size_func get_data_size_func, - _Numba_hashtable_allocator_t *allocator) -{ - _Numba_hashtable_t *ht; - size_t buckets_size; - _Numba_hashtable_allocator_t alloc; - - if (allocator == NULL) { - alloc.malloc = PyMem_RawMalloc; - alloc.free = PyMem_RawFree; - } - else - alloc = *allocator; - - ht = (_Numba_hashtable_t *)alloc.malloc(sizeof(_Numba_hashtable_t)); - if (ht == NULL) - return ht; - - ht->num_buckets = round_size(init_size); - ht->entries = 0; - ht->data_size = data_size; - - buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); - ht->buckets = alloc.malloc(buckets_size); - if (ht->buckets == NULL) { - alloc.free(ht); - return NULL; - } - memset(ht->buckets, 0, buckets_size); - - ht->hash_func = hash_func; - ht->compare_func = compare_func; - ht->copy_data_func = copy_data_func; - ht->free_data_func = free_data_func; - ht->get_data_size_func = get_data_size_func; - ht->alloc = alloc; - return ht; -} - -_Numba_hashtable_t * -_Numba_hashtable_new(size_t data_size, - _Numba_hashtable_hash_func hash_func, - _Numba_hashtable_compare_func compare_func) -{ - return _Numba_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, - hash_func, compare_func, - NULL, NULL, NULL, NULL); -} - -size_t -_Numba_hashtable_size(_Numba_hashtable_t *ht) -{ - size_t size; - size_t hv; - - size = sizeof(_Numba_hashtable_t); - - /* buckets */ - size += ht->num_buckets * sizeof(_Numba_hashtable_entry_t *); - - /* entries */ - size += ht->entries * HASHTABLE_ITEM_SIZE(ht); - - /* data linked from entries */ - if (ht->get_data_size_func) { - for (hv = 0; hv < ht->num_buckets; hv++) { - _Numba_hashtable_entry_t *entry; - - for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - void *data; - - data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); - size += ht->get_data_size_func(data); - } - } - } - return size; -} - -#ifdef Py_DEBUG -void -_Numba_hashtable_print_stats(_Numba_hashtable_t *ht) -{ - size_t size; - size_t chain_len, max_chain_len, total_chain_len, nchains; - _Numba_hashtable_entry_t *entry; - size_t hv; - double load; - - size = _Numba_hashtable_size(ht); - - load = (double)ht->entries / ht->num_buckets; - - max_chain_len = 0; - total_chain_len = 0; - nchains = 0; - for (hv = 0; hv < ht->num_buckets; hv++) { - entry = TABLE_HEAD(ht, hv); - if (entry != NULL) { - chain_len = 0; - for (; entry; entry = ENTRY_NEXT(entry)) { - chain_len++; - } - if (chain_len > max_chain_len) - max_chain_len = chain_len; - total_chain_len += chain_len; - nchains++; - } - } - printf("hash table %p: entries=%" - PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", - ht, ht->entries, ht->num_buckets, load * 100.0); - if (nchains) - printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); - printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n", - max_chain_len, size / 1024); -} -#endif - -/* Get an entry. Return NULL if the key does not exist. */ -_Numba_hashtable_entry_t * -_Numba_hashtable_get_entry(_Numba_hashtable_t *ht, const void *key) -{ - Py_uhash_t key_hash; - size_t index; - _Numba_hashtable_entry_t *entry; - - key_hash = ht->hash_func(key); - index = key_hash & (ht->num_buckets - 1); - - for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) - break; - } - - return entry; -} - -static int -_hashtable_pop_entry(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) -{ - Py_uhash_t key_hash; - size_t index; - _Numba_hashtable_entry_t *entry, *previous; - - key_hash = ht->hash_func(key); - index = key_hash & (ht->num_buckets - 1); - - previous = NULL; - for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) - break; - previous = entry; - } - - if (entry == NULL) - return 0; - - _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, - (_Py_slist_item_t *)entry); - ht->entries--; - - if (data != NULL) - _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); - ht->alloc.free(entry); - - if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) - hashtable_rehash(ht); - return 1; -} - -/* Add a new entry to the hash. The key must not be present in the hash table. - Return 0 on success, -1 on memory error. */ -int -_Numba_hashtable_set(_Numba_hashtable_t *ht, const void *key, - void *data, size_t data_size) -{ - Py_uhash_t key_hash; - size_t index; - _Numba_hashtable_entry_t *entry; - - assert(data != NULL || data_size == 0); -#ifndef NDEBUG - /* Don't write the assertion on a single line because it is interesting - to know the duplicated entry if the assertion failed. The entry can - be read using a debugger. */ - entry = _Numba_hashtable_get_entry(ht, key); - assert(entry == NULL); -#endif - - key_hash = ht->hash_func(key); - index = key_hash & (ht->num_buckets - 1); - - entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); - if (entry == NULL) { - /* memory allocation failed */ - return -1; - } - - entry->key = (void *)key; - entry->key_hash = key_hash; - - assert(data_size == ht->data_size); - memcpy(_Numba_HASHTABLE_ENTRY_DATA(entry), data, data_size); - - _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); - ht->entries++; - - if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) - hashtable_rehash(ht); - return 0; -} - -/* Get data from an entry. Copy entry data into data and return 1 if the entry - exists, return 0 if the entry does not exist. */ -int -_Numba_hashtable_get(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) -{ - _Numba_hashtable_entry_t *entry; - - assert(data != NULL); - - entry = _Numba_hashtable_get_entry(ht, key); - if (entry == NULL) - return 0; - _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); - return 1; -} - -int -_Numba_hashtable_pop(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) -{ - assert(data != NULL); - assert(ht->free_data_func == NULL); - return _hashtable_pop_entry(ht, key, data, data_size); -} - -/* Delete an entry. The entry must exist. */ -void -_Numba_hashtable_delete(_Numba_hashtable_t *ht, const void *key) -{ -#ifndef NDEBUG - int found = _hashtable_pop_entry(ht, key, NULL, 0); - assert(found); -#else - (void)_hashtable_pop_entry(ht, key, NULL, 0); -#endif -} - -/* Prototype for a pointer to a function to be called foreach - key/value pair in the hash by hashtable_foreach(). Iteration - stops if a non-zero value is returned. */ -int -_Numba_hashtable_foreach(_Numba_hashtable_t *ht, - int (*func) (_Numba_hashtable_entry_t *entry, void *arg), - void *arg) -{ - _Numba_hashtable_entry_t *entry; - size_t hv; - - for (hv = 0; hv < ht->num_buckets; hv++) { - for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - int res = func(entry, arg); - if (res) - return res; - } - } - return 0; -} - -static void -hashtable_rehash(_Numba_hashtable_t *ht) -{ - size_t buckets_size, new_size, bucket; - _Py_slist_t *old_buckets = NULL; - size_t old_num_buckets; - - new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); - if (new_size == ht->num_buckets) - return; - - old_num_buckets = ht->num_buckets; - - buckets_size = new_size * sizeof(ht->buckets[0]); - old_buckets = ht->buckets; - ht->buckets = ht->alloc.malloc(buckets_size); - if (ht->buckets == NULL) { - /* cancel rehash on memory allocation failure */ - ht->buckets = old_buckets ; - /* memory allocation failed */ - return; - } - memset(ht->buckets, 0, buckets_size); - - ht->num_buckets = new_size; - - for (bucket = 0; bucket < old_num_buckets; bucket++) { - _Numba_hashtable_entry_t *entry, *next; - for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { - size_t entry_index; - - assert(ht->hash_func(entry->key) == entry->key_hash); - next = ENTRY_NEXT(entry); - entry_index = entry->key_hash & (new_size - 1); - - _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); - } - } - - ht->alloc.free(old_buckets); -} - -void -_Numba_hashtable_clear(_Numba_hashtable_t *ht) -{ - _Numba_hashtable_entry_t *entry, *next; - size_t i; - - for (i=0; i < ht->num_buckets; i++) { - for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { - next = ENTRY_NEXT(entry); - if (ht->free_data_func) - ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); - ht->alloc.free(entry); - } - _Py_slist_init(&ht->buckets[i]); - } - ht->entries = 0; - hashtable_rehash(ht); -} - -void -_Numba_hashtable_destroy(_Numba_hashtable_t *ht) -{ - size_t i; - - for (i = 0; i < ht->num_buckets; i++) { - _Py_slist_item_t *entry = ht->buckets[i].head; - while (entry) { - _Py_slist_item_t *entry_next = entry->next; - if (ht->free_data_func) - ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); - ht->alloc.free(entry); - entry = entry_next; - } - } - - ht->alloc.free(ht->buckets); - ht->alloc.free(ht); -} - -/* Return a copy of the hash table */ -_Numba_hashtable_t * -_Numba_hashtable_copy(_Numba_hashtable_t *src) -{ - _Numba_hashtable_t *dst; - _Numba_hashtable_entry_t *entry; - size_t bucket; - int err; - void *data, *new_data; - - dst = _Numba_hashtable_new_full(src->data_size, src->num_buckets, - src->hash_func, src->compare_func, - src->copy_data_func, src->free_data_func, - src->get_data_size_func, &src->alloc); - if (dst == NULL) - return NULL; - - for (bucket=0; bucket < src->num_buckets; bucket++) { - entry = TABLE_HEAD(src, bucket); - for (; entry; entry = ENTRY_NEXT(entry)) { - if (src->copy_data_func) { - data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); - new_data = src->copy_data_func(data); - if (new_data != NULL) - err = _Numba_hashtable_set(dst, entry->key, - &new_data, src->data_size); - else - err = 1; - } - else { - data = _Numba_HASHTABLE_ENTRY_DATA(entry); - err = _Numba_hashtable_set(dst, entry->key, data, src->data_size); - } - if (err) { - _Numba_hashtable_destroy(dst); - return NULL; - } - } - } - return dst; -} - diff --git a/numba/numba/_hashtable.h b/numba/numba/_hashtable.h deleted file mode 100644 index 37430429d..000000000 --- a/numba/numba/_hashtable.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * See _hashtable.c for more information about this file. - */ - -#ifndef Py_HASHTABLE_H -#define Py_HASHTABLE_H - -/* The whole API is private */ -#ifndef Py_LIMITED_API - -typedef struct _Py_slist_item_s { - struct _Py_slist_item_s *next; -} _Py_slist_item_t; - -typedef struct { - _Py_slist_item_t *head; -} _Py_slist_t; - -#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next) - -#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) - -typedef struct { - /* used by _Numba_hashtable_t.buckets to link entries */ - _Py_slist_item_t _Py_slist_item; - - const void *key; - Py_uhash_t key_hash; - - /* data follows */ -} _Numba_hashtable_entry_t; - -#define _Numba_HASHTABLE_ENTRY_DATA(ENTRY) \ - ((char *)(ENTRY) + sizeof(_Numba_hashtable_entry_t)) - -#define _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ - (*(void **)_Numba_HASHTABLE_ENTRY_DATA(ENTRY)) - -#define _Numba_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ - do { \ - assert((DATA_SIZE) == (TABLE)->data_size); \ - memcpy(DATA, _Numba_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ - } while (0) - -typedef Py_uhash_t (*_Numba_hashtable_hash_func) (const void *key); -typedef int (*_Numba_hashtable_compare_func) (const void *key, const _Numba_hashtable_entry_t *he); -typedef void* (*_Numba_hashtable_copy_data_func)(void *data); -typedef void (*_Numba_hashtable_free_data_func)(void *data); -typedef size_t (*_Numba_hashtable_get_data_size_func)(void *data); - -typedef struct { - /* allocate a memory block */ - void* (*malloc) (size_t size); - - /* release a memory block */ - void (*free) (void *ptr); -} _Numba_hashtable_allocator_t; - -typedef struct { - size_t num_buckets; - size_t entries; /* Total number of entries in the table. */ - _Py_slist_t *buckets; - size_t data_size; - - _Numba_hashtable_hash_func hash_func; - _Numba_hashtable_compare_func compare_func; - _Numba_hashtable_copy_data_func copy_data_func; - _Numba_hashtable_free_data_func free_data_func; - _Numba_hashtable_get_data_size_func get_data_size_func; - _Numba_hashtable_allocator_t alloc; -} _Numba_hashtable_t; - -/* hash and compare functions for integers and pointers */ -PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_ptr(const void *key); -PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_int(const void *key); -PyAPI_FUNC(int) _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry); - -PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new( - size_t data_size, - _Numba_hashtable_hash_func hash_func, - _Numba_hashtable_compare_func compare_func); -PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new_full( - size_t data_size, - size_t init_size, - _Numba_hashtable_hash_func hash_func, - _Numba_hashtable_compare_func compare_func, - _Numba_hashtable_copy_data_func copy_data_func, - _Numba_hashtable_free_data_func free_data_func, - _Numba_hashtable_get_data_size_func get_data_size_func, - _Numba_hashtable_allocator_t *allocator); -PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_copy(_Numba_hashtable_t *src); -PyAPI_FUNC(void) _Numba_hashtable_clear(_Numba_hashtable_t *ht); -PyAPI_FUNC(void) _Numba_hashtable_destroy(_Numba_hashtable_t *ht); - -typedef int (*_Numba_hashtable_foreach_func) (_Numba_hashtable_entry_t *entry, void *arg); - -PyAPI_FUNC(int) _Numba_hashtable_foreach( - _Numba_hashtable_t *ht, - _Numba_hashtable_foreach_func func, void *arg); -PyAPI_FUNC(size_t) _Numba_hashtable_size(_Numba_hashtable_t *ht); - -PyAPI_FUNC(_Numba_hashtable_entry_t*) _Numba_hashtable_get_entry( - _Numba_hashtable_t *ht, - const void *key); -PyAPI_FUNC(int) _Numba_hashtable_set( - _Numba_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(int) _Numba_hashtable_get( - _Numba_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(int) _Numba_hashtable_pop( - _Numba_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(void) _Numba_hashtable_delete( - _Numba_hashtable_t *ht, - const void *key); - -#define _Numba_HASHTABLE_SET(TABLE, KEY, DATA) \ - _Numba_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) - -#define _Numba_HASHTABLE_GET(TABLE, KEY, DATA) \ - _Numba_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) - -#endif /* Py_LIMITED_API */ - -#endif diff --git a/numba/numba/_helperlib.c b/numba/numba/_helperlib.c deleted file mode 100644 index c25a1059c..000000000 --- a/numba/numba/_helperlib.c +++ /dev/null @@ -1,969 +0,0 @@ -/* - * Helper functions used by Numba at runtime. - * This C file is meant to be included after defining the - * NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros. - */ - -#include "_pymodule.h" -#include -#include -#include "_math_c99.h" -#ifdef _MSC_VER - #define int64_t signed __int64 - #define uint64_t unsigned __int64 -#else - #include -#endif -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include -#include -#include - -#include "_arraystruct.h" - -/* - * Other helpers. - */ - - -/* Fix fmod() and fmodf() for windows x64 VC 9.0 (VS 2008) -https://support.microsoft.com/en-us/kb/982107 -*/ -static void (*fnclex)(void) = NULL; - -NUMBA_EXPORT_FUNC(double) -numba_fixed_fmod(double x, double y){ - fnclex(); /* no inline asm in x64 =( */ - return fmod(x, y); -} - -NUMBA_EXPORT_FUNC(float) -numba_fixed_fmodf(float x, float y) { - fnclex(); /* no inline asm in x64 =( */ - return fmodf(x, y); -} - -NUMBA_EXPORT_FUNC(void) -numba_set_fnclex(void *fn){ - fnclex = fn; -} - -/* provide 64-bit division function to 32-bit platforms */ -NUMBA_EXPORT_FUNC(int64_t) -numba_sdiv(int64_t a, int64_t b) { - return a / b; -} - -NUMBA_EXPORT_FUNC(uint64_t) -numba_udiv(uint64_t a, uint64_t b) { - return a / b; -} - -/* provide 64-bit remainder function to 32-bit platforms */ -NUMBA_EXPORT_FUNC(int64_t) -numba_srem(int64_t a, int64_t b) { - return a % b; -} - -NUMBA_EXPORT_FUNC(uint64_t) -numba_urem(uint64_t a, uint64_t b) { - return a % b; -} - -/* provide frexp and ldexp; these wrappers deal with special cases - * (zero, nan, infinity) directly, to sidestep platform differences. - */ -NUMBA_EXPORT_FUNC(double) -numba_frexp(double x, int *exp) -{ - if (!Py_IS_FINITE(x) || !x) - *exp = 0; - else - x = frexp(x, exp); - return x; -} - -NUMBA_EXPORT_FUNC(float) -numba_frexpf(float x, int *exp) -{ - if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) - *exp = 0; - else - x = frexpf(x, exp); - return x; -} - -NUMBA_EXPORT_FUNC(double) -numba_ldexp(double x, int exp) -{ - if (Py_IS_FINITE(x) && x && exp) - x = ldexp(x, exp); - return x; -} - -NUMBA_EXPORT_FUNC(float) -numba_ldexpf(float x, int exp) -{ - if (Py_IS_FINITE(x) && x && exp) - x = ldexpf(x, exp); - return x; -} - -/* provide complex power */ -NUMBA_EXPORT_FUNC(void) -numba_cpow(Py_complex *a, Py_complex *b, Py_complex *out) { - errno = 0; - *out = _Py_c_pow(*a, *b); - if (errno == EDOM) { - /* _Py_c_pow() doesn't bother returning the right value - in this case, as Python raises ZeroDivisionError */ - out->real = out->imag = Py_NAN; - } -} - -NUMBA_EXPORT_FUNC(void) -numba_cpowf(npy_cfloat *a, npy_cfloat *b, npy_cfloat *out) { - Py_complex _a, _b, _out; - _a.real = npy_crealf(*a); - _a.imag = npy_cimagf(*a); - _b.real = npy_crealf(*b); - _b.imag = npy_cimagf(*b); - numba_cpow(&_a, &_b, &_out); - *out = npy_cpackf((float) _out.real, (float) _out.imag); -} - -/* C99 math functions: redirect to system implementations - (but see _math_c99.h for Windows) */ - -NUMBA_EXPORT_FUNC(double) -numba_gamma(double x) -{ - return tgamma(x); -} - -NUMBA_EXPORT_FUNC(float) -numba_gammaf(float x) -{ - return tgammaf(x); -} - -NUMBA_EXPORT_FUNC(double) -numba_lgamma(double x) -{ - return lgamma(x); -} - -NUMBA_EXPORT_FUNC(float) -numba_lgammaf(float x) -{ - return lgammaf(x); -} - -NUMBA_EXPORT_FUNC(double) -numba_erf(double x) -{ - return erf(x); -} - -NUMBA_EXPORT_FUNC(float) -numba_erff(float x) -{ - return erff(x); -} - -NUMBA_EXPORT_FUNC(double) -numba_erfc(double x) -{ - return erfc(x); -} - -NUMBA_EXPORT_FUNC(float) -numba_erfcf(float x) -{ - return erfcf(x); -} - -/* Note npy_signbit() is actually a polymorphic macro */ -NUMBA_EXPORT_FUNC(int) -numba_signbitf(float a) -{ - return npy_signbit(a); -} - -NUMBA_EXPORT_FUNC(int) -numba_signbit(npy_double a) -{ - return npy_signbit(a); -} - - -/* Unpack any Python complex-like object into a Py_complex structure */ -NUMBA_EXPORT_FUNC(int) -numba_complex_adaptor(PyObject* obj, Py_complex *out) { - PyObject* fobj; - PyArray_Descr *dtype; - double val[2]; - - // Convert from python complex or numpy complex128 - if (PyComplex_Check(obj)) { - out->real = PyComplex_RealAsDouble(obj); - out->imag = PyComplex_ImagAsDouble(obj); - } - // Convert from numpy complex64 - else if (PyArray_IsScalar(obj, ComplexFloating)) { - dtype = PyArray_DescrFromScalar(obj); - if (dtype == NULL) { - return 0; - } - if (PyArray_CastScalarDirect(obj, dtype, &val[0], NPY_CDOUBLE) < 0) { - Py_DECREF(dtype); - return 0; - } - out->real = val[0]; - out->imag = val[1]; - Py_DECREF(dtype); - } else { - fobj = PyNumber_Float(obj); - if (!fobj) return 0; - out->real = PyFloat_AsDouble(fobj); - out->imag = 0.; - Py_DECREF(fobj); - } - return 1; -} - -/* Minimum PyBufferObject structure to hack inside it */ -typedef struct { - PyObject_HEAD - PyObject *b_base; - void *b_ptr; - Py_ssize_t b_size; - Py_ssize_t b_offset; -} PyBufferObject_Hack; - -/* -Get data address of record data buffer -*/ -NUMBA_EXPORT_FUNC(void *) -numba_extract_record_data(PyObject *recordobj, Py_buffer *pbuf) { - PyObject *attrdata; - void *ptr; - - attrdata = PyObject_GetAttrString(recordobj, "data"); - if (!attrdata) return NULL; - - if (-1 == PyObject_GetBuffer(attrdata, pbuf, 0)){ - #if PY_MAJOR_VERSION >= 3 - Py_DECREF(attrdata); - return NULL; - #else - /* HACK!!! */ - /* In Python 2.6, it will report no buffer interface for record - even though it should */ - PyBufferObject_Hack *hack; - - /* Clear the error */ - PyErr_Clear(); - - hack = (PyBufferObject_Hack*) attrdata; - - if (hack->b_base == NULL) { - ptr = hack->b_ptr; - } else { - PyBufferProcs *bp; - readbufferproc proc = NULL; - - bp = hack->b_base->ob_type->tp_as_buffer; - /* FIXME Ignoring any flag. Just give me the pointer */ - proc = (readbufferproc)bp->bf_getreadbuffer; - if ((*proc)(hack->b_base, 0, &ptr) <= 0) { - Py_DECREF(attrdata); - return NULL; - } - ptr = (char*)ptr + hack->b_offset; - } - #endif - } else { - ptr = pbuf->buf; - } - Py_DECREF(attrdata); - return ptr; -} - -/* - * Return a record instance with dtype as the record type, and backed - * by a copy of the memory area pointed to by (pdata, size). - */ -NUMBA_EXPORT_FUNC(PyObject *) -numba_recreate_record(void *pdata, int size, PyObject *dtype) { - PyObject *numpy = NULL; - PyObject *numpy_record = NULL; - PyObject *aryobj = NULL; - PyObject *dtypearg = NULL; - PyObject *record = NULL; - PyArray_Descr *descr = NULL; - - numpy = PyImport_ImportModuleNoBlock("numpy"); - if (!numpy) goto CLEANUP; - - numpy_record = PyObject_GetAttrString(numpy, "record"); - if (!numpy_record) goto CLEANUP; - - dtypearg = PyTuple_Pack(2, numpy_record, dtype); - if (!dtypearg || !PyArray_DescrConverter(dtypearg, &descr)) - goto CLEANUP; - - /* This steals a reference to descr, so we don't have to DECREF it */ - aryobj = PyArray_FromString(pdata, size, descr, 1, NULL); - if (!aryobj) goto CLEANUP; - - record = PySequence_GetItem(aryobj, 0); - -CLEANUP: - Py_XDECREF(numpy); - Py_XDECREF(numpy_record); - Py_XDECREF(aryobj); - Py_XDECREF(dtypearg); - - return record; -} - -NUMBA_EXPORT_FUNC(int) -numba_adapt_ndarray(PyObject *obj, arystruct_t* arystruct) { - PyArrayObject *ndary; - int i, ndim; - npy_intp *p; - - if (!PyArray_Check(obj)) { - return -1; - } - - ndary = (PyArrayObject*)obj; - ndim = PyArray_NDIM(ndary); - - arystruct->data = PyArray_DATA(ndary); - arystruct->nitems = PyArray_SIZE(ndary); - arystruct->itemsize = PyArray_ITEMSIZE(ndary); - arystruct->parent = obj; - p = arystruct->shape_and_strides; - for (i = 0; i < ndim; i++, p++) { - *p = PyArray_DIM(ndary, i); - } - for (i = 0; i < ndim; i++, p++) { - *p = PyArray_STRIDE(ndary, i); - } - arystruct->meminfo = NULL; - return 0; -} - -NUMBA_EXPORT_FUNC(int) -numba_get_buffer(PyObject *obj, Py_buffer *buf) -{ - /* Ask for shape and strides, but no suboffsets */ - return PyObject_GetBuffer(obj, buf, PyBUF_RECORDS_RO); -} - -NUMBA_EXPORT_FUNC(void) -numba_adapt_buffer(Py_buffer *buf, arystruct_t *arystruct) -{ - int i; - npy_intp *p; - - arystruct->data = buf->buf; - arystruct->itemsize = buf->itemsize; - arystruct->parent = buf->obj; - arystruct->nitems = 1; - p = arystruct->shape_and_strides; - for (i = 0; i < buf->ndim; i++, p++) { - *p = buf->shape[i]; - arystruct->nitems *= buf->shape[i]; - } - for (i = 0; i < buf->ndim; i++, p++) { - *p = buf->strides[i]; - } - arystruct->meminfo = NULL; -} - -NUMBA_EXPORT_FUNC(void) -numba_release_buffer(Py_buffer *buf) -{ - PyBuffer_Release(buf); -} - -NUMBA_EXPORT_FUNC(PyObject *) -numba_ndarray_new(int nd, - npy_intp *dims, /* shape */ - npy_intp *strides, - void* data, - int type_num, - int itemsize) -{ - PyObject *ndary; - int flags = NPY_ARRAY_BEHAVED; - ndary = PyArray_New((PyTypeObject*)&PyArray_Type, nd, dims, type_num, - strides, data, 0, flags, NULL); - return ndary; -} - - -/* - * Handle reshaping of zero-sized array. - * See numba_attempt_nocopy_reshape() below. - */ -static int -nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, - npy_intp newnd, const npy_intp *newdims, - npy_intp *newstrides, npy_intp itemsize, - int is_f_order) -{ - int i; - /* Just make the strides vaguely reasonable - * (they can have any value in theory). - */ - for (i = 0; i < newnd; i++) - newstrides[i] = itemsize; - return 1; /* reshape successful */ -} - -/* - * Straight from Numpy's _attempt_nocopy_reshape() - * (np/core/src/multiarray/shape.c). - * Attempt to reshape an array without copying data - * - * This function should correctly handle all reshapes, including - * axes of length 1. Zero strides should work but are untested. - * - * If a copy is needed, returns 0 - * If no copy is needed, returns 1 and fills `npy_intp *newstrides` - * with appropriate strides - */ - -NUMBA_EXPORT_FUNC(int) -numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, - npy_intp newnd, const npy_intp *newdims, - npy_intp *newstrides, npy_intp itemsize, - int is_f_order) -{ - int oldnd; - npy_intp olddims[NPY_MAXDIMS]; - npy_intp oldstrides[NPY_MAXDIMS]; - npy_intp np, op, last_stride; - int oi, oj, ok, ni, nj, nk; - - oldnd = 0; - /* - * Remove axes with dimension 1 from the old array. They have no effect - * but would need special cases since their strides do not matter. - */ - for (oi = 0; oi < nd; oi++) { - if (dims[oi]!= 1) { - olddims[oldnd] = dims[oi]; - oldstrides[oldnd] = strides[oi]; - oldnd++; - } - } - - np = 1; - for (ni = 0; ni < newnd; ni++) { - np *= newdims[ni]; - } - op = 1; - for (oi = 0; oi < oldnd; oi++) { - op *= olddims[oi]; - } - if (np != op) { - /* different total sizes; no hope */ - return 0; - } - - if (np == 0) { - /* the Numpy code does not handle 0-sized arrays */ - return nocopy_empty_reshape(nd, dims, strides, - newnd, newdims, newstrides, - itemsize, is_f_order); - } - - /* oi to oj and ni to nj give the axis ranges currently worked with */ - oi = 0; - oj = 1; - ni = 0; - nj = 1; - while (ni < newnd && oi < oldnd) { - np = newdims[ni]; - op = olddims[oi]; - - while (np != op) { - if (np < op) { - /* Misses trailing 1s, these are handled later */ - np *= newdims[nj++]; - } else { - op *= olddims[oj++]; - } - } - - /* Check whether the original axes can be combined */ - for (ok = oi; ok < oj - 1; ok++) { - if (is_f_order) { - if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) { - /* not contiguous enough */ - return 0; - } - } - else { - /* C order */ - if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) { - /* not contiguous enough */ - return 0; - } - } - } - - /* Calculate new strides for all axes currently worked with */ - if (is_f_order) { - newstrides[ni] = oldstrides[oi]; - for (nk = ni + 1; nk < nj; nk++) { - newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]; - } - } - else { - /* C order */ - newstrides[nj - 1] = oldstrides[oj - 1]; - for (nk = nj - 1; nk > ni; nk--) { - newstrides[nk - 1] = newstrides[nk]*newdims[nk]; - } - } - ni = nj++; - oi = oj++; - } - - /* - * Set strides corresponding to trailing 1s of the new shape. - */ - if (ni >= 1) { - last_stride = newstrides[ni - 1]; - } - else { - last_stride = itemsize; - } - if (is_f_order) { - last_stride *= newdims[ni - 1]; - } - for (nk = ni; nk < newnd; nk++) { - newstrides[nk] = last_stride; - } - - return 1; -} - -/* - * Cython utilities. - */ - -/* Fetch the address of the given function, as exposed by - a cython module */ -static void * -import_cython_function(const char *module_name, const char *function_name) -{ - PyObject *module, *capi, *cobj; - void *res = NULL; - const char *capsule_name; - - module = PyImport_ImportModule(module_name); - if (module == NULL) - return NULL; - capi = PyObject_GetAttrString(module, "__pyx_capi__"); - Py_DECREF(module); - if (capi == NULL) - return NULL; - cobj = PyMapping_GetItemString(capi, function_name); - Py_DECREF(capi); - if (cobj == NULL) { - PyErr_Clear(); - PyErr_Format(PyExc_ValueError, - "No function '%s' found in __pyx_capi__ of '%s'", - function_name, module_name); - return NULL; - } - /* 2.7+ => Cython exports a PyCapsule */ - capsule_name = PyCapsule_GetName(cobj); - if (capsule_name != NULL) { - res = PyCapsule_GetPointer(cobj, capsule_name); - } - Py_DECREF(cobj); - return res; -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_import_cython_function(PyObject *self, PyObject *args) -{ - const char *module_name; - const char *function_name; - void *p = NULL; - PyObject *res; - - if (!PyArg_ParseTuple(args, "ss", &module_name, &function_name)) { - return NULL; - } - p = import_cython_function(module_name, function_name); - if (p == NULL) { - return NULL; - } - res = PyLong_FromVoidPtr(p); - if (res == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "Could not convert function address to int"); - return NULL; - } - return res; -} - -/* We use separate functions for datetime64 and timedelta64, to ensure - * proper type checking. - */ -NUMBA_EXPORT_FUNC(npy_int64) -numba_extract_np_datetime(PyObject *td) -{ - if (!PyArray_IsScalar(td, Datetime)) { - PyErr_SetString(PyExc_TypeError, - "expected a numpy.datetime64 object"); - return -1; - } - return PyArrayScalar_VAL(td, Timedelta); -} - -NUMBA_EXPORT_FUNC(npy_int64) -numba_extract_np_timedelta(PyObject *td) -{ - if (!PyArray_IsScalar(td, Timedelta)) { - PyErr_SetString(PyExc_TypeError, - "expected a numpy.timedelta64 object"); - return -1; - } - return PyArrayScalar_VAL(td, Timedelta); -} - -NUMBA_EXPORT_FUNC(PyObject *) -numba_create_np_datetime(npy_int64 value, int unit_code) -{ - PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *) - PyArrayScalar_New(Datetime); - if (obj != NULL) { - obj->obval = value; - obj->obmeta.base = unit_code; - obj->obmeta.num = 1; - } - return (PyObject *) obj; -} - -NUMBA_EXPORT_FUNC(PyObject *) -numba_create_np_timedelta(npy_int64 value, int unit_code) -{ - PyTimedeltaScalarObject *obj = (PyTimedeltaScalarObject *) - PyArrayScalar_New(Timedelta); - if (obj != NULL) { - obj->obval = value; - obj->obmeta.base = unit_code; - obj->obmeta.num = 1; - } - return (PyObject *) obj; -} - -NUMBA_EXPORT_FUNC(uint64_t) -numba_fptoui(double x) { - /* First cast to signed int of the full width to make sure sign extension - happens (this can make a difference on some platforms...). */ - return (uint64_t) (int64_t) x; -} - -NUMBA_EXPORT_FUNC(uint64_t) -numba_fptouif(float x) { - return (uint64_t) (int64_t) x; -} - -NUMBA_EXPORT_FUNC(void) -numba_gil_ensure(PyGILState_STATE *state) { - *state = PyGILState_Ensure(); -} - -NUMBA_EXPORT_FUNC(void) -numba_gil_release(PyGILState_STATE *state) { - PyGILState_Release(*state); -} - -NUMBA_EXPORT_FUNC(PyObject *) -numba_py_type(PyObject *obj) { - return (PyObject *) Py_TYPE(obj); -} - - -/* - * Functions for tagging an arbitrary Python object with an arbitrary pointer. - * These functions make strong lifetime assumptions, see below. - */ - -static PyObject *private_data_dict = NULL; - -static PyObject * -_get_private_data_dict(void) -{ - if (private_data_dict == NULL) - private_data_dict = PyDict_New(); - return private_data_dict; -} - -NUMBA_EXPORT_FUNC(void) -numba_set_pyobject_private_data(PyObject *obj, void *ptr) -{ - PyObject *dct = _get_private_data_dict(); - /* This assumes the reference to setobj is kept alive until the - call to numba_reset_set_private_data()! */ - PyObject *key = PyLong_FromVoidPtr((void *) obj); - PyObject *value = PyLong_FromVoidPtr(ptr); - - if (!dct || !value || !key) - goto error; - if (PyDict_SetItem(dct, key, value)) - goto error; - Py_DECREF(key); - Py_DECREF(value); - return; - -error: - Py_FatalError("unable to set private data"); -} - -NUMBA_EXPORT_FUNC(void *) -numba_get_pyobject_private_data(PyObject *obj) -{ - PyObject *dct = _get_private_data_dict(); - PyObject *value, *key = PyLong_FromVoidPtr((void *) obj); - void *ptr; - if (!dct || !key) - goto error; - - value = PyDict_GetItem(dct, key); - Py_DECREF(key); - if (!value) - return NULL; - else { - ptr = PyLong_AsVoidPtr(value); - if (ptr == NULL && PyErr_Occurred()) - goto error; - return ptr; - } - -error: - Py_FatalError("unable to get private data"); - return NULL; -} - -NUMBA_EXPORT_FUNC(void) -numba_reset_pyobject_private_data(PyObject *obj) -{ - PyObject *dct = _get_private_data_dict(); - PyObject *key = PyLong_FromVoidPtr((void *) obj); - - if (!key) - goto error; - if (PyDict_DelItem(dct, key)) - PyErr_Clear(); - Py_DECREF(key); - return; - -error: - Py_FatalError("unable to reset private data"); -} - -NUMBA_EXPORT_FUNC(int) -numba_unpack_slice(PyObject *obj, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) -{ - PySliceObject *slice = (PySliceObject *) obj; - if (!PySlice_Check(obj)) { - PyErr_Format(PyExc_TypeError, - "Expected a slice object, got '%s'", - Py_TYPE(slice)->tp_name); - return -1; - } -#define FETCH_MEMBER(NAME, DEFAULT) \ - if (slice->NAME != Py_None) { \ - Py_ssize_t v = PyNumber_AsSsize_t(slice->NAME, \ - PyExc_OverflowError); \ - if (v == -1 && PyErr_Occurred()) \ - return -1; \ - *NAME = v; \ - } \ - else { \ - *NAME = DEFAULT; \ - } - FETCH_MEMBER(step, 1) - FETCH_MEMBER(stop, (*step > 0) ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN) - FETCH_MEMBER(start, (*step > 0) ? 0 : PY_SSIZE_T_MAX) - return 0; - -#undef FETCH_MEMBER -} - -NUMBA_EXPORT_FUNC(int) -numba_fatal_error(void) -{ - PyGILState_Ensure(); -#if PY_MAJOR_VERSION < 3 - /* Py_FatalError doesn't print the current error on Python 2, do it - ourselves. */ - if (PyErr_Occurred()) - PyErr_Print(); -#endif - Py_FatalError("in Numba-compiled function"); - return 0; /* unreachable */ -} - -/* Logic for raising an arbitrary object. Adapted from CPython's ceval.c. - This *consumes* a reference count to its argument. */ -NUMBA_EXPORT_FUNC(int) -numba_do_raise(PyObject *exc) -{ - PyObject *type = NULL, *value = NULL; - - /* We support the following forms of raise: - raise - raise - raise */ - - if (exc == Py_None) { - /* Reraise */ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *tb; -#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 7) - _PyErr_StackItem *tstate_exc = tstate->exc_info; -#else - PyThreadState *tstate_exc = tstate; -#endif - Py_DECREF(exc); - type = tstate_exc->exc_type; - value = tstate_exc->exc_value; - tb = tstate_exc->exc_traceback; - if (type == Py_None) { - PyErr_SetString(PyExc_RuntimeError, - "No active exception to reraise"); - return 0; - } - Py_XINCREF(type); - Py_XINCREF(value); - Py_XINCREF(tb); - PyErr_Restore(type, value, tb); - return 1; - } - - if (PyTuple_CheckExact(exc)) { - /* A (callable, arguments) tuple. */ - if (!PyArg_ParseTuple(exc, "OO", &type, &value)) { - Py_DECREF(exc); - goto raise_error; - } - value = PyObject_CallObject(type, value); - Py_DECREF(exc); - type = NULL; - if (value == NULL) - goto raise_error; - if (!PyExceptionInstance_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "exceptions must derive from BaseException"); - goto raise_error; - } - type = PyExceptionInstance_Class(value); - Py_INCREF(type); - } - else if (PyExceptionClass_Check(exc)) { - type = exc; - value = PyObject_CallObject(exc, NULL); - if (value == NULL) - goto raise_error; - if (!PyExceptionInstance_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "exceptions must derive from BaseException"); - goto raise_error; - } - } - else if (PyExceptionInstance_Check(exc)) { - value = exc; - type = PyExceptionInstance_Class(exc); - Py_INCREF(type); - } - else { - /* Not something you can raise. You get an exception - anyway, just not what you specified :-) */ - Py_DECREF(exc); - PyErr_SetString(PyExc_TypeError, - "exceptions must derive from BaseException"); - goto raise_error; - } - - PyErr_SetObject(type, value); - /* PyErr_SetObject incref's its arguments */ - Py_XDECREF(value); - Py_XDECREF(type); - return 0; - -raise_error: - Py_XDECREF(value); - Py_XDECREF(type); - return 0; -} - -NUMBA_EXPORT_FUNC(PyObject *) -numba_unpickle(const char *data, int n) -{ - PyObject *buf, *obj; - static PyObject *loads; - - /* Caching the pickle.loads function shaves a couple µs here. */ - if (loads == NULL) { - PyObject *picklemod; -#if PY_MAJOR_VERSION >= 3 - picklemod = PyImport_ImportModule("pickle"); -#else - picklemod = PyImport_ImportModule("cPickle"); -#endif - if (picklemod == NULL) - return NULL; - loads = PyObject_GetAttrString(picklemod, "loads"); - Py_DECREF(picklemod); - if (loads == NULL) - return NULL; - } - - buf = PyBytes_FromStringAndSize(data, n); - if (buf == NULL) - return NULL; - obj = PyObject_CallFunctionObjArgs(loads, buf, NULL); - Py_DECREF(buf); - return obj; -} - - -/* - * Define bridge for all math functions - */ - -#define MATH_UNARY(F, R, A) \ - NUMBA_EXPORT_FUNC(R) numba_##F(A a) { return F(a); } -#define MATH_BINARY(F, R, A, B) \ - NUMBA_EXPORT_FUNC(R) numba_##F(A a, B b) { return F(a, b); } - -#include "mathnames.h" - -#undef MATH_UNARY -#undef MATH_BINARY - -/* - * BLAS and LAPACK wrappers - */ - -#include "_lapack.c" - -/* - * PRNG support - */ - -#include "_random.c" diff --git a/numba/numba/_helpermod.c b/numba/numba/_helpermod.c deleted file mode 100644 index b2821b188..000000000 --- a/numba/numba/_helpermod.c +++ /dev/null @@ -1,243 +0,0 @@ -/* -Expose all functions as pointers in a dedicated C extension. -*/ - -#define NUMBA_EXPORT_FUNC(_rettype) static _rettype -#define NUMBA_EXPORT_DATA(_vartype) static _vartype - -/* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */ -#include "_pymodule.h" -#include -#ifdef _MSC_VER - #define false 0 - #define true 1 - #define bool int -#else - #include -#endif -/* Numba C helpers */ -#include "_helperlib.c" - -/* Numpy C math function exports */ -#include "_npymath_exports.c" - -static PyObject * -build_c_helpers_dict(void) -{ - PyObject *dct = PyDict_New(); - if (dct == NULL) - goto error; - -#define _declpointer(name, value) do { \ - PyObject *o = PyLong_FromVoidPtr(value); \ - if (o == NULL) goto error; \ - if (PyDict_SetItemString(dct, name, o)) { \ - Py_DECREF(o); \ - goto error; \ - } \ - Py_DECREF(o); \ -} while (0) - -#define declmethod(func) _declpointer(#func, &numba_##func) - -#define declpointer(ptr) _declpointer(#ptr, &numba_##ptr) - - declmethod(fixed_fmod); - declmethod(fixed_fmodf); - declmethod(set_fnclex); - - declmethod(sdiv); - declmethod(srem); - declmethod(udiv); - declmethod(urem); - declmethod(frexp); - declmethod(frexpf); - declmethod(ldexp); - declmethod(ldexpf); - declmethod(cpow); - declmethod(cpowf); - declmethod(erf); - declmethod(erff); - declmethod(erfc); - declmethod(erfcf); - declmethod(gamma); - declmethod(gammaf); - declmethod(lgamma); - declmethod(lgammaf); - declmethod(signbit); - declmethod(signbitf); - declmethod(complex_adaptor); - declmethod(adapt_ndarray); - declmethod(ndarray_new); - declmethod(extract_record_data); - declmethod(get_buffer); - declmethod(adapt_buffer); - declmethod(release_buffer); - declmethod(extract_np_datetime); - declmethod(create_np_datetime); - declmethod(extract_np_timedelta); - declmethod(create_np_timedelta); - declmethod(recreate_record); - declmethod(fptoui); - declmethod(fptouif); - declmethod(gil_ensure); - declmethod(gil_release); - declmethod(fatal_error); - declmethod(py_type); - declmethod(unpack_slice); - declmethod(do_raise); - declmethod(unpickle); - declmethod(attempt_nocopy_reshape); - declmethod(get_pyobject_private_data); - declmethod(set_pyobject_private_data); - declmethod(reset_pyobject_private_data); - - /* BLAS / LAPACK */ - declmethod(xxgemm); - declmethod(xxgemv); - declmethod(xxdot); - declmethod(xxgetrf); - declmethod(ez_xxgetri); - declmethod(xxpotrf); - declmethod(ez_rgeev); - declmethod(ez_cgeev); - declmethod(ez_xxxevd); - declmethod(ez_gesdd); - declmethod(ez_geqrf); - declmethod(ez_xxgqr); - declmethod(ez_gelsd); - declmethod(xgesv); - declmethod(xxnrm2); - - /* PRNG support */ - declmethod(get_py_random_state); - declmethod(get_np_random_state); - declmethod(rnd_shuffle); - declmethod(rnd_init); - declmethod(poisson_ptrs); - -#define MATH_UNARY(F, R, A) declmethod(F); -#define MATH_BINARY(F, R, A, B) declmethod(F); - #include "mathnames.h" -#undef MATH_UNARY -#undef MATH_BINARY - -#undef declmethod - return dct; -error: - Py_XDECREF(dct); - return NULL; -} - -static int -register_npymath_exports(PyObject *dct) -{ - size_t count = sizeof(npymath_exports) / sizeof(npymath_exports[0]); - size_t i; - - for (i = 0; i < count; ++i) { - PyObject *ptr = PyLong_FromVoidPtr(npymath_exports[i].func); - if (ptr == NULL) - return -1; - if (PyDict_SetItemString(dct, npymath_exports[i].name, ptr) < 0) { - Py_DECREF(ptr); - return -1; - } - Py_DECREF(ptr); - } - - return 0; -} - -static PyObject * -build_npymath_exports_dict(void) -{ - PyObject *dct = PyDict_New(); - if (dct != NULL) { - if (register_npymath_exports(dct) < 0) - Py_CLEAR(dct); - } - return dct; -} - -static PyMethodDef ext_methods[] = { - { "rnd_get_state", (PyCFunction) _numba_rnd_get_state, METH_O, NULL }, - { "rnd_get_py_state_ptr", (PyCFunction) _numba_rnd_get_py_state_ptr, METH_NOARGS, NULL }, - { "rnd_get_np_state_ptr", (PyCFunction) _numba_rnd_get_np_state_ptr, METH_NOARGS, NULL }, - { "rnd_seed", (PyCFunction) _numba_rnd_seed, METH_VARARGS, NULL }, - { "rnd_set_state", (PyCFunction) _numba_rnd_set_state, METH_VARARGS, NULL }, - { "rnd_shuffle", (PyCFunction) _numba_rnd_shuffle, METH_O, NULL }, - { "_import_cython_function", (PyCFunction) _numba_import_cython_function, METH_VARARGS, NULL }, - { NULL }, -}; - -/* - * These functions are exported by the module's DLL, to exercise ctypes / cffi - * without relying on libc availability (see https://bugs.python.org/issue23606) - */ - -PyAPI_FUNC(double) _numba_test_sin(double x); -PyAPI_FUNC(double) _numba_test_cos(double x); -PyAPI_FUNC(double) _numba_test_exp(double x); -PyAPI_FUNC(void) _numba_test_vsquare(int n, double *x, double *out); -PyAPI_FUNC(double) _numba_test_funcptr(double (*func)(double)); -PyAPI_FUNC(bool) _numba_test_boolean(void); - -double _numba_test_sin(double x) -{ - return sin(x); -} - -double _numba_test_cos(double x) -{ - return cos(x); -} - -double _numba_test_exp(double x) -{ - return exp(x); -} - -void _numba_test_vsquare(int n, double *x, double *out) -{ - int i; - for (i = 0; i < n; i++) - out[i] = pow(x[i], 2.0); -} - -void _numba_test_vcube(int n, double *x, double *out) -{ - int i; - for (i = 0; i < n; i++) - out[i] = pow(x[i], 3.0); -} - -double _numba_test_funcptr(double (*func)(double)) -{ - return func(1.5); -} - -bool _numba_test_boolean() -{ - return true; -} - -MOD_INIT(_helperlib) { - PyObject *m; - MOD_DEF(m, "_helperlib", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - - import_array(); - - PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); - PyModule_AddObject(m, "npymath_exports", build_npymath_exports_dict()); - PyModule_AddIntConstant(m, "long_min", LONG_MIN); - PyModule_AddIntConstant(m, "long_max", LONG_MAX); - PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer)); - PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE)); - - numba_rnd_ensure_global_init(); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/_lapack.c b/numba/numba/_lapack.c deleted file mode 100644 index fdc6e8cfa..000000000 --- a/numba/numba/_lapack.c +++ /dev/null @@ -1,1946 +0,0 @@ -/* - * This file contains wrappers of BLAS and LAPACK functions - */ -/* - * BLAS calling helpers. The helpers can be called without the GIL held. - * The caller is responsible for checking arguments (especially dimensions). - */ - -/* Fast getters caching the value of a function's address after - the first call to import_cblas_function(). */ - -#define EMIT_GET_CBLAS_FUNC(name) \ - static void *cblas_ ## name = NULL; \ - static void *get_cblas_ ## name(void) { \ - if (cblas_ ## name == NULL) { \ - PyGILState_STATE st = PyGILState_Ensure(); \ - const char *mod = "scipy.linalg.cython_blas"; \ - cblas_ ## name = import_cython_function(mod, # name); \ - PyGILState_Release(st); \ - } \ - return cblas_ ## name; \ - } - -EMIT_GET_CBLAS_FUNC(dgemm) -EMIT_GET_CBLAS_FUNC(sgemm) -EMIT_GET_CBLAS_FUNC(cgemm) -EMIT_GET_CBLAS_FUNC(zgemm) -EMIT_GET_CBLAS_FUNC(dgemv) -EMIT_GET_CBLAS_FUNC(sgemv) -EMIT_GET_CBLAS_FUNC(cgemv) -EMIT_GET_CBLAS_FUNC(zgemv) -EMIT_GET_CBLAS_FUNC(ddot) -EMIT_GET_CBLAS_FUNC(sdot) -EMIT_GET_CBLAS_FUNC(cdotu) -EMIT_GET_CBLAS_FUNC(zdotu) -EMIT_GET_CBLAS_FUNC(cdotc) -EMIT_GET_CBLAS_FUNC(zdotc) -EMIT_GET_CBLAS_FUNC(snrm2) -EMIT_GET_CBLAS_FUNC(dnrm2) -EMIT_GET_CBLAS_FUNC(scnrm2) -EMIT_GET_CBLAS_FUNC(dznrm2) - - -#undef EMIT_GET_CBLAS_FUNC - -/* - * NOTE: On return value convention. - * For LAPACK wrapper development the following conventions are followed: - * Publically exposed wrapper functions must return:- - * STATUS_ERROR : For an unrecoverable error e.g. caught by xerbla, this is so - * a Py_FatalError can be raised. - * STATUS_SUCCESS: For successful execution - * +n : Where n is an integer for a routine specific error - * (typically derived from an `info` argument). - * - * The caller is responsible for checking and handling the error status. - */ - -/* return STATUS_SUCCESS if everything went ok */ -#define STATUS_SUCCESS (0) - -/* return STATUS_ERROR if an unrecoverable error is encountered */ -#define STATUS_ERROR (-1) - -/* - * A union of all the types accepted by BLAS/LAPACK for use in cases where - * stack based allocation is needed (typically for work space query args length - * 1). - */ -typedef union all_dtypes_ -{ - float s; - double d; - npy_complex64 c; - npy_complex128 z; -} all_dtypes; - -/* - * A checked PyMem_RawMalloc, ensures that the var is either NULL - * and an exception is raised, or that the allocation was successful. - * Returns zero on success for status checking. - */ -static int checked_PyMem_RawMalloc(void** var, size_t bytes) -{ - *var = NULL; - *var = PyMem_RawMalloc(bytes); - if (!(*var)) - { - { - PyGILState_STATE st = PyGILState_Ensure(); - - PyErr_SetString(PyExc_MemoryError, - "Insufficient memory for buffer allocation\ - required by LAPACK."); - PyGILState_Release(st); - } - return 1; - } - return 0; -} - -/* - * Checks that the char kind is valid (one of [s,d,c,z]) for use in blas/lapack. - * Returns zero on success for status checking. - */ -static int check_kind(char kind) -{ - switch (kind) - { - case 's': - case 'd': - case 'c': - case 'z': - break; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError, - "invalid data type (kind) found"); - PyGILState_Release(st); - } - return 1; - } - return 0; -} - -/* - * Guard macro for ensuring a valid data "kind" is being used. - * Place at the top of all routines with switches on "kind" that accept - * one of [s,d,c,z]. - */ -#define ENSURE_VALID_KIND(__KIND) \ -if (check_kind( __KIND )) \ -{ \ - return STATUS_ERROR; \ -} \ - -/* - * Checks that the char kind is valid for the real domain (one of [s,d]) - * for use in blas/lapack. - * Returns zero on success for status checking. - */ -static int check_real_kind(char kind) -{ - switch (kind) - { - case 's': - case 'd': - break; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError, - "invalid data type (kind) found"); - PyGILState_Release(st); - } - return 1; - } - return 0; -} - -/* - * Guard macro for ensuring a valid data "kind" is being used for the - * real domain routines. - * Place at the top of all routines with switches on "kind" that accept - * one of [s,d]. - */ -#define ENSURE_VALID_REAL_KIND(__KIND) \ -if (check_real_kind( __KIND )) \ -{ \ - return STATUS_ERROR; \ -} \ - - -/* - * Checks that the char kind is valid for the complex domain (one of [c,z]) - * for use in blas/lapack. - * Returns zero on success for status checking. - */ -static int check_complex_kind(char kind) -{ - switch (kind) - { - case 'c': - case 'z': - break; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError, - "invalid data type (kind) found"); - PyGILState_Release(st); - } - return 1; - } - return 0; -} - -/* - * Guard macro for ensuring a valid data "kind" is being used for the - * real domain routines. - * Place at the top of all routines with switches on "kind" that accept - * one of [c,z]. - */ -#define ENSURE_VALID_COMPLEX_KIND(__KIND) \ -if (check_complex_kind( __KIND )) \ -{ \ - return STATUS_ERROR; \ -} \ - - -/* - * Checks that a function is found (i.e. not null) - * Returns zero on success for status checking. - */ -static int check_func(void *func) -{ - if (func == NULL) - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_RuntimeError, - "Specified LAPACK function could not be found."); - PyGILState_Release(st); - return STATUS_ERROR; - } - return STATUS_SUCCESS; -} - - -/* - * Guard macro for ensuring a valid function is found. - */ -#define ENSURE_VALID_FUNC(__FUNC) \ -if (check_func(__FUNC)) \ -{ \ - return STATUS_ERROR; \ -} \ - - -/* - * Define what a Fortran "int" is, some LAPACKs have 64 bit integer support - * numba presently opts for a 32 bit C int. - * This definition allows scope for later configuration time magic to adjust - * the size of int at all the call sites. - */ -#define F_INT int - - -typedef float (*sdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); -typedef double (*ddot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT - *incy); -typedef npy_complex64 (*cdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, - F_INT *incy); -typedef npy_complex128 (*zdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, - F_INT *incy); - -typedef void (*xxgemv_t)(char *trans, F_INT *m, F_INT *n, - void *alpha, void *a, F_INT *lda, - void *x, F_INT *incx, void *beta, - void *y, F_INT *incy); - -typedef void (*xxgemm_t)(char *transa, char *transb, - F_INT *m, F_INT *n, F_INT *k, - void *alpha, void *a, F_INT *lda, - void *b, F_INT *ldb, void *beta, - void *c, F_INT *ldc); - -typedef float (*sxnrm2_t) (F_INT *n, void *x, F_INT *incx); -typedef double (*dxnrm2_t) (F_INT *n, void *x, F_INT *incx); - -/* Vector * vector: result = dx * dy */ -NUMBA_EXPORT_FUNC(int) -numba_xxdot(char kind, char conjugate, Py_ssize_t n, void *dx, void *dy, - void *result) -{ - void *raw_func = NULL; - F_INT _n; - F_INT inc = 1; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_cblas_sdot(); - break; - case 'd': - raw_func = get_cblas_ddot(); - break; - case 'c': - raw_func = conjugate ? get_cblas_cdotc() : get_cblas_cdotu(); - break; - case 'z': - raw_func = conjugate ? get_cblas_zdotc() : get_cblas_zdotu(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - - switch (kind) - { - case 's': - *(float *) result = (*(sdot_t) raw_func)(&_n, dx, &inc, dy, &inc);; - break; - case 'd': - *(double *) result = (*(ddot_t) raw_func)(&_n, dx, &inc, dy, &inc);; - break; - case 'c': - *(npy_complex64 *) result = (*(cdot_t) raw_func)(&_n, dx, &inc, dy,\ - &inc);; - break; - case 'z': - *(npy_complex128 *) result = (*(zdot_t) raw_func)(&_n, dx, &inc,\ - dy, &inc);; - break; - } - - return 0; -} - -/* Matrix * vector: y = alpha * a * x + beta * y */ -NUMBA_EXPORT_FUNC(int) -numba_xxgemv(char kind, char trans, Py_ssize_t m, Py_ssize_t n, - void *alpha, void *a, Py_ssize_t lda, - void *x, void *beta, void *y) -{ - void *raw_func = NULL; - F_INT _m, _n; - F_INT _lda; - F_INT inc = 1; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_cblas_sgemv(); - break; - case 'd': - raw_func = get_cblas_dgemv(); - break; - case 'c': - raw_func = get_cblas_cgemv(); - break; - case 'z': - raw_func = get_cblas_zgemv(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _lda = (F_INT) lda; - - (*(xxgemv_t) raw_func)(&trans, &_m, &_n, alpha, a, &_lda, - x, &inc, beta, y, &inc); - return 0; -} - -/* Matrix * matrix: c = alpha * a * b + beta * c */ -NUMBA_EXPORT_FUNC(int) -numba_xxgemm(char kind, char transa, char transb, - Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, - void *alpha, void *a, Py_ssize_t lda, - void *b, Py_ssize_t ldb, void *beta, - void *c, Py_ssize_t ldc) -{ - void *raw_func = NULL; - F_INT _m, _n, _k; - F_INT _lda, _ldb, _ldc; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_cblas_sgemm(); - break; - case 'd': - raw_func = get_cblas_dgemm(); - break; - case 'c': - raw_func = get_cblas_cgemm(); - break; - case 'z': - raw_func = get_cblas_zgemm(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _k = (F_INT) k; - _lda = (F_INT) lda; - _ldb = (F_INT) ldb; - _ldc = (F_INT) ldc; - - (*(xxgemm_t) raw_func)(&transa, &transb, &_m, &_n, &_k, alpha, a, &_lda, - b, &_ldb, beta, c, &_ldc); - return 0; -} - - -/* L2-norms */ -NUMBA_EXPORT_FUNC(F_INT) -numba_xxnrm2(char kind, Py_ssize_t n, void * x, Py_ssize_t incx, void * result) -{ - void *raw_func = NULL; - F_INT _incx; - F_INT _n; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_cblas_snrm2(); - break; - case 'd': - raw_func = get_cblas_dnrm2(); - break; - case 'c': - raw_func = get_cblas_scnrm2(); - break; - case 'z': - raw_func = get_cblas_dznrm2(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _incx = (F_INT) incx; - - switch (kind) - { - case 's': - *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; - break; - case 'd': - *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; - break; - case 'c': - *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; - break; - case 'z': - *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; - break; - } - - return 0; -} - - -/* - * LAPACK calling helpers. The helpers can be called without the GIL held. - * The caller is responsible for checking arguments (especially dimensions). - */ - -/* Fast getters caching the value of a function's address after - the first call to import_clapack_function(). */ - -#define EMIT_GET_CLAPACK_FUNC(name) \ - static void *clapack_ ## name = NULL; \ - static void *get_clapack_ ## name(void) { \ - if (clapack_ ## name == NULL) { \ - PyGILState_STATE st = PyGILState_Ensure(); \ - const char *mod = "scipy.linalg.cython_lapack"; \ - clapack_ ## name = import_cython_function(mod, # name); \ - PyGILState_Release(st); \ - } \ - return clapack_ ## name; \ - } - -/* Computes an LU factorization of a general M-by-N matrix A - * using partial pivoting with row interchanges. - */ -EMIT_GET_CLAPACK_FUNC(sgetrf) -EMIT_GET_CLAPACK_FUNC(dgetrf) -EMIT_GET_CLAPACK_FUNC(cgetrf) -EMIT_GET_CLAPACK_FUNC(zgetrf) - -/* Computes the inverse of a matrix using the LU factorization - * computed by xGETRF. - */ -EMIT_GET_CLAPACK_FUNC(sgetri) -EMIT_GET_CLAPACK_FUNC(dgetri) -EMIT_GET_CLAPACK_FUNC(cgetri) -EMIT_GET_CLAPACK_FUNC(zgetri) - -/* Compute Cholesky factorizations */ -EMIT_GET_CLAPACK_FUNC(spotrf) -EMIT_GET_CLAPACK_FUNC(dpotrf) -EMIT_GET_CLAPACK_FUNC(cpotrf) -EMIT_GET_CLAPACK_FUNC(zpotrf) - -/* Computes for an N-by-N real nonsymmetric matrix A, the - * eigenvalues and, optionally, the left and/or right eigenvectors. - */ -EMIT_GET_CLAPACK_FUNC(sgeev) -EMIT_GET_CLAPACK_FUNC(dgeev) -EMIT_GET_CLAPACK_FUNC(cgeev) -EMIT_GET_CLAPACK_FUNC(zgeev) - -/* Computes for an N-by-N Hermitian matrix A, the - * eigenvalues and, optionally, the left and/or right eigenvectors. - */ -EMIT_GET_CLAPACK_FUNC(ssyevd) -EMIT_GET_CLAPACK_FUNC(dsyevd) -EMIT_GET_CLAPACK_FUNC(cheevd) -EMIT_GET_CLAPACK_FUNC(zheevd) - -/* Computes generalised SVD */ -EMIT_GET_CLAPACK_FUNC(sgesdd) -EMIT_GET_CLAPACK_FUNC(dgesdd) -EMIT_GET_CLAPACK_FUNC(cgesdd) -EMIT_GET_CLAPACK_FUNC(zgesdd) - -/* Computes QR decompositions */ -EMIT_GET_CLAPACK_FUNC(sgeqrf) -EMIT_GET_CLAPACK_FUNC(dgeqrf) -EMIT_GET_CLAPACK_FUNC(cgeqrf) -EMIT_GET_CLAPACK_FUNC(zgeqrf) - -/* Computes columns of Q from elementary reflectors produced by xgeqrf() (QR). - */ -EMIT_GET_CLAPACK_FUNC(sorgqr) -EMIT_GET_CLAPACK_FUNC(dorgqr) -EMIT_GET_CLAPACK_FUNC(cungqr) -EMIT_GET_CLAPACK_FUNC(zungqr) - -/* Computes the minimum norm solution to linear least squares problems */ -EMIT_GET_CLAPACK_FUNC(sgelsd) -EMIT_GET_CLAPACK_FUNC(dgelsd) -EMIT_GET_CLAPACK_FUNC(cgelsd) -EMIT_GET_CLAPACK_FUNC(zgelsd) - -// Computes the solution to a system of linear equations -EMIT_GET_CLAPACK_FUNC(sgesv) -EMIT_GET_CLAPACK_FUNC(dgesv) -EMIT_GET_CLAPACK_FUNC(cgesv) -EMIT_GET_CLAPACK_FUNC(zgesv) - - -#undef EMIT_GET_CLAPACK_FUNC - -typedef void (*xxgetrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, F_INT *ipiv, - F_INT *info); - -typedef void (*xxgetri_t)(F_INT *n, void *a, F_INT *lda, F_INT *ipiv, void - *work, F_INT *lwork, F_INT *info); - -typedef void (*xxpotrf_t)(char *uplo, F_INT *n, void *a, F_INT *lda, F_INT - *info); - -typedef void (*rgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda, - void *wr, void *wi, void *vl, F_INT *ldvl, void *vr, - F_INT *ldvr, void *work, F_INT *lwork, F_INT *info); - -typedef void (*cgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT - *lda, void *w, void *vl, F_INT *ldvl, void *vr, - F_INT *ldvr, void *work, F_INT *lwork, void *rwork, - F_INT *info); - -typedef void (*rgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, - void *s, void *u, F_INT *ldu, void *vt, F_INT *ldvt, - void *work, F_INT *lwork, F_INT *iwork, F_INT *info); - -typedef void (*cgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, - void *s, void * u, F_INT *ldu, void * vt, F_INT *ldvt, - void *work, F_INT *lwork, void *rwork, F_INT *iwork, - F_INT *info); - -typedef void (*xsyevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, - void *w, void *work, F_INT *lwork, F_INT *iwork, - F_INT *liwork, F_INT *info); - -typedef void (*xheevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, - void *w, void *work, F_INT *lwork, void *rwork, - F_INT *lrwork, F_INT *iwork, F_INT *liwork, - F_INT *info); - -typedef void (*xgeqrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, void *tau, - void *work, F_INT *lwork, F_INT *info); - -typedef void (*xxxgqr_t)(F_INT *m, F_INT *n, F_INT *k, void *a, F_INT *lda, - void *tau, void *work, F_INT *lwork, F_INT *info); - -typedef void (*rgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, - void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, - void *work, F_INT *lwork, F_INT *iwork, F_INT *info); - -typedef void (*cgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, - void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, - void *work, F_INT *lwork, void *rwork, F_INT *iwork, - F_INT *info); - -typedef void (*xgesv_t)(F_INT *n, F_INT *nrhs, void *a, F_INT *lda, F_INT *ipiv, - void *b, F_INT *ldb, F_INT *info); - - - -/* - * kind_size() - * gets the data size appropriate for a specified kind. - * - * Input: - * kind - the kind, one of: - * (s, d, c, z) = (float, double, complex, double complex). - * - * Returns: - * data_size - the appropriate data size. - * - */ -static size_t kind_size(char kind) -{ - size_t data_size = 0; - switch (kind) - { - case 's': - data_size = sizeof(float); - break; - case 'd': - data_size = sizeof(double); - break; - case 'c': - data_size = sizeof(npy_complex64); - break; - case 'z': - data_size = sizeof(npy_complex128); - break; - } - return data_size; - -} - -/* - * underlying_float_kind() - * gets the underlying float kind for a given kind. - * - * Input: - * kind - the kind, one of: - * (s, d, c, z) = (float, double, complex, double complex). - * - * Returns: - * underlying_float_kind - the underlying float kind, one of: - * (s, d) = (float, double). - * - * This function essentially provides a map between the char kind - * of a type and the char kind of the underlying float used in the - * type. Essentially: - * --------------- - * Input -> Output - * --------------- - * s -> s - * d -> d - * c -> s - * z -> d - * --------------- - * - */ -static char underlying_float_kind(char kind) -{ - switch(kind) - { - case 's': - case 'c': - return 's'; - case 'd': - case 'z': - return 'd'; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError, - "invalid kind in underlying_float_kind()"); - PyGILState_Release(st); - } - } - return -1; -} - -/* - * cast_from_X() - * cast from a kind (s, d, c, z) = (float, double, complex, double complex) - * to a Fortran integer. - * - * Parameters: - * kind the kind of val - * val a pointer to the value to cast - * - * Returns: - * A Fortran int from a cast of val (in complex case, takes the real part). - * - * Struct access via non c99 (python only) cmplx types, used for compatibility. - */ -static F_INT -cast_from_X(char kind, void *val) -{ - switch(kind) - { - case 's': - return (F_INT)(*((float *) val)); - case 'd': - return (F_INT)(*((double *) val)); - case 'c': - return (F_INT)(*((npy_complex64 *)val)).real; - case 'z': - return (F_INT)(*((npy_complex128 *)val)).real; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError, - "invalid kind in cast"); - PyGILState_Release(st); - } - } - return -1; -} - - -#define CATCH_LAPACK_INVALID_ARG(__routine, info) \ - do { \ - if (info < 0) { \ - PyGILState_STATE st = PyGILState_Ensure(); \ - PyErr_Format(PyExc_RuntimeError, \ - "LAPACK Error: Routine " #__routine ". On input %d\n",\ - -(int) info); \ - PyGILState_Release(st); \ - return STATUS_ERROR; \ - } \ - } while(0) - -/* Compute LU decomposition of A - * NOTE: ipiv is an array of Fortran integers allocated by the caller, - * which is therefore expected to use the right dtype. - */ -NUMBA_EXPORT_FUNC(int) -numba_xxgetrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, - F_INT *ipiv) -{ - void *raw_func = NULL; - F_INT _m, _n, _lda, info; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgetrf(); - break; - case 'd': - raw_func = get_clapack_dgetrf(); - break; - case 'c': - raw_func = get_clapack_cgetrf(); - break; - case 'z': - raw_func = get_clapack_zgetrf(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _lda = (F_INT) lda; - - (*(xxgetrf_t) raw_func)(&_m, &_n, a, &_lda, ipiv, &info); - CATCH_LAPACK_INVALID_ARG("xxgetrf", info); - - return (int)info; -} - -/* Compute the inverse of a matrix given its LU decomposition - * Args are as per LAPACK. - */ -static int -numba_raw_xxgetri(char kind, F_INT n, void *a, F_INT lda, - F_INT *ipiv, void *work, F_INT *lwork, F_INT *info) -{ - void *raw_func = NULL; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgetri(); - break; - case 'd': - raw_func = get_clapack_dgetri(); - break; - case 'c': - raw_func = get_clapack_cgetri(); - break; - case 'z': - raw_func = get_clapack_zgetri(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - (*(xxgetri_t) raw_func)(&n, a, &lda, ipiv, work, lwork, info); - - return 0; -} - -/* Compute the inverse of a matrix from the factorization provided by - * xxgetrf. (see numba_xxgetrf() about ipiv) - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_xxgetri(char kind, Py_ssize_t n, void *a, Py_ssize_t lda, - F_INT *ipiv) -{ - F_INT _n, _lda; - F_INT lwork = -1; - F_INT info = 0; - size_t base_size = -1; - void * work = NULL; - all_dtypes stack_slot; - - ENSURE_VALID_KIND(kind) - - _n = (F_INT)n; - _lda = (F_INT)lda; - - base_size = kind_size(kind); - - work = &stack_slot; - - numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); - CATCH_LAPACK_INVALID_ARG("xxgetri", info); - - lwork = cast_from_X(kind, work); - - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - { - return STATUS_ERROR; - } - - numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); - PyMem_RawFree(work); - CATCH_LAPACK_INVALID_ARG("xxgetri", info); - - return (int)info; -} - -/* Compute the Cholesky factorization of a matrix. */ -NUMBA_EXPORT_FUNC(int) -numba_xxpotrf(char kind, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda) -{ - void *raw_func = NULL; - F_INT _n, _lda, info; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_spotrf(); - break; - case 'd': - raw_func = get_clapack_dpotrf(); - break; - case 'c': - raw_func = get_clapack_cpotrf(); - break; - case 'z': - raw_func = get_clapack_zpotrf(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _lda = (F_INT) lda; - - (*(xxpotrf_t) raw_func)(&uplo, &_n, a, &_lda, &info); - CATCH_LAPACK_INVALID_ARG("xxpotrf", info); - return (int)info; -} - - -/* real space eigen systems info from dgeev/sgeev */ -static int -numba_raw_rgeev(char kind, char jobvl, char jobvr, - Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi, - void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, - void *work, Py_ssize_t lwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _n, _lda, _ldvl, _ldvr, _lwork; - - ENSURE_VALID_REAL_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgeev(); - break; - case 'd': - raw_func = get_clapack_dgeev(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldvl = (F_INT) ldvl; - _ldvr = (F_INT) ldvr; - _lwork = (F_INT) lwork; - - (*(rgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, wr, wi, vl, &_ldvl, vr, - &_ldvr, work, &_lwork, info); - return 0; -} - -/* Real space eigen systems info from dgeev/sgeev - * as numba_raw_rgeev but the allocation and error handling is done for the user. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, - Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl, - void *vr, Py_ssize_t ldvr) -{ - F_INT info = 0; - F_INT lwork = -1; - F_INT _n, _lda, _ldvl, _ldvr; - size_t base_size = -1; - void * work = NULL; - all_dtypes stack_slot; - - ENSURE_VALID_REAL_KIND(kind) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldvl = (F_INT) ldvl; - _ldvr = (F_INT) ldvr; - - base_size = kind_size(kind); - - work = &stack_slot; - numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, - vr, _ldvr, work, lwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); - - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - { - return STATUS_ERROR; - } - numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, - vr, _ldvr, work, lwork, &info); - PyMem_RawFree(work); - - CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); - - return (int)info; -} - -/* Complex space eigen systems info from cgeev/zgeev - * Args are as per LAPACK. - */ -static int -numba_raw_cgeev(char kind, char jobvl, char jobvr, - Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl, - Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work, - Py_ssize_t lwork, void *rwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _n, _lda, _ldvl, _ldvr, _lwork; - - ENSURE_VALID_COMPLEX_KIND(kind) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldvl = (F_INT) ldvl; - _ldvr = (F_INT) ldvr; - _lwork = (F_INT) lwork; - - switch (kind) - { - case 'c': - raw_func = get_clapack_cgeev(); - break; - case 'z': - raw_func = get_clapack_zgeev(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - (*(cgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, w, vl, &_ldvl, vr, - &_ldvr, work, &_lwork, rwork, info); - return 0; -} - - -/* Complex space eigen systems info from cgeev/zgeev - * as numba_raw_cgeev but the allocation and error handling is done for the user. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_cgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, - Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr, - Py_ssize_t ldvr) -{ - F_INT info = 0; - F_INT lwork = -1; - F_INT _n, _lda, _ldvl, _ldvr; - size_t base_size = -1; - all_dtypes stack_slot, wk; - void * work = NULL; - void * rwork = (void *)&wk; - - ENSURE_VALID_COMPLEX_KIND(kind) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldvl = (F_INT) ldvl; - _ldvr = (F_INT) ldvr; - - base_size = kind_size(kind); - - work = &stack_slot; - numba_raw_cgeev(kind, jobvl, jobvr, n, a, lda, w, vl, ldvl, - vr, ldvr, work, lwork, rwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); - - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc((void**)&rwork, 2*n*base_size)) - { - return STATUS_ERROR; - } - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - { - PyMem_RawFree(rwork); - return STATUS_ERROR; - } - numba_raw_cgeev(kind, jobvl, jobvr, _n, a, _lda, w, vl, _ldvl, - vr, _ldvr, work, lwork, rwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(rwork); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); - - return (int)info; -} - -/* real space symmetric eigen systems info from ssyevd/dsyevd */ -static int -numba_raw_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, - Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, - F_INT *iwork, Py_ssize_t liwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _n, _lda, _lwork, _liwork; - - ENSURE_VALID_REAL_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_ssyevd(); - break; - case 'd': - raw_func = get_clapack_dsyevd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _lwork = (F_INT) lwork; - _liwork = (F_INT) liwork; - - (*(xsyevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, iwork, &_liwork, info); - return 0; -} - -/* Real space eigen systems info from dsyevd/ssyevd - * as numba_raw_rsyevd but the allocation and error handling is done for the user. - * Args are as per LAPACK. - */ -static int -numba_ez_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) -{ - F_INT info = 0; - F_INT lwork = -1, liwork=-1; - F_INT _n, _lda; - size_t base_size = -1; - void *work = NULL; - F_INT *iwork = NULL; - all_dtypes stack_slot; - int stack_int = -1; - - ENSURE_VALID_REAL_KIND(kind) - - _n = (F_INT) n; - _lda = (F_INT) lda; - - base_size = kind_size(kind); - - work = &stack_slot; - iwork = &stack_int; - numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); - - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - { - return STATUS_ERROR; - } - liwork = *iwork; - if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(iwork); - - CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); - - return (int)info; -} - - -/* complex space symmetric eigen systems info from cheevd/zheevd*/ -static int -numba_raw_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, - Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, - void *rwork, Py_ssize_t lrwork, F_INT *iwork, - Py_ssize_t liwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _n, _lda, _lwork, _lrwork, _liwork; - - ENSURE_VALID_COMPLEX_KIND(kind) - - switch (kind) - { - case 'c': - raw_func = get_clapack_cheevd(); - break; - case 'z': - raw_func = get_clapack_zheevd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _lda = (F_INT) lda; - _lwork = (F_INT) lwork; - _lrwork = (F_INT) lrwork; - _liwork = (F_INT) liwork; - - (*(xheevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, rwork, &_lrwork, iwork, &_liwork, info); - return 0; -} - -/* complex space eigen systems info from cheevd/zheevd - * as numba_raw_cheevd but the allocation and error handling is done for the user. - * Args are as per LAPACK. - */ -static int -numba_ez_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) -{ - F_INT info = 0; - F_INT lwork = -1, lrwork = -1, liwork=-1; - F_INT _n, _lda; - size_t base_size = -1, underlying_float_size = -1; - void *work = NULL, *rwork = NULL; - F_INT *iwork = NULL; - all_dtypes stack_slot1, stack_slot2; - char uf_kind; - int stack_int = -1; - - ENSURE_VALID_COMPLEX_KIND(kind) - - _n = (F_INT) n; - _lda = (F_INT) lda; - - base_size = kind_size(kind); - uf_kind = underlying_float_kind(kind); - underlying_float_size = kind_size(uf_kind); - - work = &stack_slot1; - rwork = &stack_slot2; - iwork = &stack_int; - numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); - - lwork = cast_from_X(uf_kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - { - return STATUS_ERROR; - } - - lrwork = cast_from_X(uf_kind, rwork); - if (checked_PyMem_RawMalloc(&rwork, underlying_float_size * lrwork)) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - - liwork = *iwork; - if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) - { - PyMem_RawFree(work); - PyMem_RawFree(rwork); - return STATUS_ERROR; - } - numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(rwork); - PyMem_RawFree(iwork); - - CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); - - return (int)info; -} - -/* Hermitian eigenvalue systems info from *syevd and *heevd. - * This routine hides the type and general complexity involved with making the - * calls. The work space computation and error handling etc is hidden. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_xxxevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) -{ - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - case 'd': - return numba_ez_rsyevd(kind, jobz, uplo, n, a, lda, w); - case 'c': - case 'z': - return numba_ez_cheevd(kind, jobz, uplo, n, a, lda, w); - } - return STATUS_ERROR; /* unreachable */ -} - -/* Real space svd systems info from dgesdd/sgesdd - * Args are as per LAPACK. - */ -static int -numba_raw_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, - Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, - Py_ssize_t ldvt, void *work, Py_ssize_t lwork, - F_INT *iwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; - - ENSURE_VALID_REAL_KIND(kind) - - _m = (F_INT) m; - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldu = (F_INT) ldu; - _ldvt = (F_INT) ldvt; - _lwork = (F_INT) lwork; - - switch (kind) - { - case 's': - raw_func = get_clapack_sgesdd(); - break; - case 'd': - raw_func = get_clapack_dgesdd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - (*(rgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, - work, &_lwork, iwork, info); - return 0; -} - -/* Real space svd info from dgesdd/sgesdd. - * As numba_raw_rgesdd but the allocation and error handling is done for the - * user. - * Args are as per LAPACK. - */ -static int -numba_ez_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, - Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, - Py_ssize_t ldvt) -{ - F_INT info = 0; - Py_ssize_t minmn = -1; - Py_ssize_t lwork = -1; - all_dtypes stack_slot, wk; - size_t base_size = -1; - F_INT *iwork = (F_INT *)&wk; - void *work = NULL; - - ENSURE_VALID_REAL_KIND(kind) - - base_size = kind_size(kind); - - work = &stack_slot; - - /* Compute optimal work size (lwork) */ - numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, - lwork, iwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - return -1; - minmn = m > n ? n : m; - if (checked_PyMem_RawMalloc((void**) &iwork, 8 * minmn * sizeof(F_INT))) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, - iwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(iwork); - CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); - - return (int)info; -} - -/* Complex space svd systems info from cgesdd/zgesdd - * Args are as per LAPACK. - */ -static int -numba_raw_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, - Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, - Py_ssize_t ldvt, void *work, Py_ssize_t lwork, void *rwork, - F_INT *iwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; - - ENSURE_VALID_COMPLEX_KIND(kind) - - _m = (F_INT) m; - _n = (F_INT) n; - _lda = (F_INT) lda; - _ldu = (F_INT) ldu; - _ldvt = (F_INT) ldvt; - _lwork = (F_INT) lwork; - - switch (kind) - { - case 'c': - raw_func = get_clapack_cgesdd(); - break; - case 'z': - raw_func = get_clapack_zgesdd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - (*(cgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, - work, &_lwork, rwork, iwork, info); - return 0; -} - -/* complex space svd info from cgesdd/zgesdd. - * As numba_raw_cgesdd but the allocation and error handling is done for the - * user. - * Args are as per LAPACK. - */ -static int -numba_ez_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, - Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, - Py_ssize_t ldvt) -{ - F_INT info = 0; - Py_ssize_t lwork = -1; - Py_ssize_t lrwork = -1; - Py_ssize_t minmn = -1; - Py_ssize_t tmp1, tmp2; - Py_ssize_t maxmn = -1; - size_t real_base_size = -1; - size_t complex_base_size = -1; - all_dtypes stack_slot, wk1, wk2; - void *work = NULL; - void *rwork = (void *)&wk1; - F_INT *iwork = (F_INT *)&wk2; - - ENSURE_VALID_COMPLEX_KIND(kind) - - switch (kind) - { - case 'c': - real_base_size = sizeof(float); - complex_base_size = sizeof(npy_complex64); - break; - case 'z': - real_base_size = sizeof(double); - complex_base_size = sizeof(npy_complex128); - break; - default: - { - PyGILState_STATE st = PyGILState_Ensure(); - PyErr_SetString(PyExc_ValueError,\ - "Invalid kind in numba_ez_rgesdd"); - PyGILState_Release(st); - } - return STATUS_ERROR; - } - - work = &stack_slot; - - /* Compute optimal work size (lwork) */ - numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, - rwork, iwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, complex_base_size * lwork)) - return STATUS_ERROR; - - minmn = m > n ? n : m; - if (jobz == 'n') - { - lrwork = 7 * minmn; - } - else - { - maxmn = m > n ? m : n; - tmp1 = 5 * minmn + 7; - tmp2 = 2 * maxmn + 2 * minmn + 1; - lrwork = minmn * (tmp1 > tmp2 ? tmp1: tmp2); - } - - if (checked_PyMem_RawMalloc(&rwork, - real_base_size * (lrwork > 1 ? lrwork : 1))) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - if (checked_PyMem_RawMalloc((void **) &iwork, - 8 * minmn * sizeof(F_INT))) - { - PyMem_RawFree(work); - PyMem_RawFree(rwork); - return STATUS_ERROR; - } - numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, - rwork, iwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(rwork); - PyMem_RawFree(iwork); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); - - return (int)info; -} - - -/* SVD systems info from *gesdd. - * This routine hides the type and general complexity involved with making the - * calls to *gesdd. The work space computation and error handling etc is hidden. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_gesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, - Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, - Py_ssize_t ldvt) -{ - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - case 'd': - return numba_ez_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, - ldvt); - case 'c': - case 'z': - return numba_ez_cgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, - ldvt); - } - return STATUS_ERROR; /* unreachable */ -} - - -/* - * Compute the QR factorization of a matrix. - * Return -1 on internal error, 0 on success, > 0 on failure. - */ -static int -numba_raw_xgeqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t - lda, void *tau, void *work, Py_ssize_t lwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _lda, _lwork; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgeqrf(); - break; - case 'd': - raw_func = get_clapack_dgeqrf(); - break; - case 'c': - raw_func = get_clapack_cgeqrf(); - break; - case 'z': - raw_func = get_clapack_zgeqrf(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _lda = (F_INT) lda; - _lwork = (F_INT) lwork; - - (*(xgeqrf_t) raw_func)(&_m, &_n, a, &_lda, tau, work, &_lwork, info); - return 0; -} - -/* - * Compute the QR factorization of a matrix. - * This routine hides the type and general complexity involved with making the - * xgeqrf calls. The work space computation and error handling etc is hidden. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_geqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t - lda, void *tau) -{ - F_INT info = 0; - Py_ssize_t lwork = -1; - size_t base_size = -1; - all_dtypes stack_slot; - void *work = NULL; - - base_size = kind_size(kind); - - work = &stack_slot; - - /* Compute optimal work size (lwork) */ - numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - return STATUS_ERROR; - - numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); - PyMem_RawFree(work); - CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); - - return 0; /* info cannot be >0 */ - -} - - -/* - * Compute the orthogonal Q matrix (in QR) from elementary relectors. - */ -static int -numba_raw_xxxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, - Py_ssize_t lda, void *tau, void * work, Py_ssize_t lwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _k, _lda, _lwork; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sorgqr(); - break; - case 'd': - raw_func = get_clapack_dorgqr(); - break; - case 'c': - raw_func = get_clapack_cungqr(); - break; - case 'z': - raw_func = get_clapack_zungqr(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _k = (F_INT) k; - _lda = (F_INT) lda; - _lwork = (F_INT) lwork; - - (*(xxxgqr_t) raw_func)(&_m, &_n, &_k, a, &_lda, tau, work, &_lwork, info); - return 0; -} - - -/* - * Compute the orthogonal Q matrix (in QR) from elementary reflectors. - * This routine hides the type and general complexity involved with making the - * x{or,un}qrf calls. The work space computation and error handling etc is - * hidden. Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_xxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, - Py_ssize_t lda, void *tau) -{ - F_INT info = 0; - Py_ssize_t lwork = -1; - size_t base_size = -1; - all_dtypes stack_slot; - void *work = NULL; - - work = &stack_slot; - - /* Compute optimal work size (lwork) */ - numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); - - base_size = kind_size(kind); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - return STATUS_ERROR; - - numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); - PyMem_RawFree(work); - CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); - - return 0; /* info cannot be >0 */ - -} - - -/* - * Compute the minimum-norm solution to a real linear least squares problem. - */ -static int -numba_raw_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, - void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, - void * rcond, Py_ssize_t * rank, void * work, - Py_ssize_t lwork, F_INT *iwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; - - ENSURE_VALID_REAL_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgelsd(); - break; - case 'd': - raw_func = get_clapack_dgelsd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _nrhs = (F_INT) nrhs; - _lda = (F_INT) lda; - _ldb = (F_INT) ldb; - _lwork = (F_INT) lwork; - - (*(rgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, - &_rank, work, &_lwork, iwork, info); - *rank = (Py_ssize_t) _rank; - return 0; -} - -/* - * Compute the minimum-norm solution to a real linear least squares problem. - * This routine hides the type and general complexity involved with making the - * {s,d}gelsd calls. The work space computation and error handling etc is - * hidden. Args are as per LAPACK. - */ -static int -numba_ez_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, - void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, - double rcond, Py_ssize_t * rank) -{ - F_INT info = 0; - Py_ssize_t lwork = -1; - size_t base_size = -1; - all_dtypes stack_slot; - void *work = NULL, *rcond_cast = NULL; - F_INT *iwork = NULL; - F_INT iwork_tmp; - float tmpf; - - ENSURE_VALID_REAL_KIND(kind) - - base_size = kind_size(kind); - - work = &stack_slot; - rcond_cast = work; /* stop checks on null ptr complaining */ - - /* Compute optimal work size (lwork) */ - numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, - work, lwork, &iwork_tmp, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - return STATUS_ERROR; - - /* Allocate iwork array */ - if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - - /* cast rcond to the right type */ - switch (kind) - { - case 's': - tmpf = (float)rcond; - rcond_cast = (void * )&tmpf; - break; - case 'd': - rcond_cast = (void * )&rcond; - break; - } - - numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, - work, lwork, iwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(iwork); - CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); - - return (int)info; -} - - -/* - * Compute the minimum-norm solution to a complex linear least squares problem. - */ -static int -numba_raw_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, - void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, - void *rcond, Py_ssize_t * rank, void * work, - Py_ssize_t lwork, void * rwork, F_INT *iwork, F_INT *info) -{ - void *raw_func = NULL; - F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; - - ENSURE_VALID_COMPLEX_KIND(kind) - - switch (kind) - { - case 'c': - raw_func = get_clapack_cgelsd(); - break; - case 'z': - raw_func = get_clapack_zgelsd(); - break; - } - ENSURE_VALID_FUNC(raw_func) - - _m = (F_INT) m; - _n = (F_INT) n; - _nrhs = (F_INT) nrhs; - _lda = (F_INT) lda; - _ldb = (F_INT) ldb; - _lwork = (F_INT) lwork; - - (*(cgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, - &_rank, work, &_lwork, rwork, iwork, info); - *rank = (Py_ssize_t) _rank; - return 0; -} - - -/* - * Compute the minimum-norm solution to a complex linear least squares problem. - * This routine hides the type and general complexity involved with making the - * {c,z}gelsd calls. The work space computation and error handling etc is - * hidden. Args are as per LAPACK. - */ -static int -numba_ez_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, - void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, - double rcond, Py_ssize_t * rank) -{ - F_INT info = 0; - Py_ssize_t lwork = -1; - size_t base_size = -1; - all_dtypes stack_slot1, stack_slot2; - size_t real_base_size = 0; - void *work = NULL, *rwork = NULL, *rcond_cast = NULL; - Py_ssize_t lrwork; - F_INT *iwork = NULL; - F_INT iwork_tmp; - char real_kind = '-'; - float tmpf; - - ENSURE_VALID_COMPLEX_KIND(kind) - - base_size = kind_size(kind); - - work = &stack_slot1; - rwork = &stack_slot2; - rcond_cast = work; /* stop checks on null ptr complaining */ - - /* Compute optimal work size */ - numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, - work, lwork, rwork, &iwork_tmp, &info); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); - - /* Allocate work array */ - lwork = cast_from_X(kind, work); - if (checked_PyMem_RawMalloc(&work, base_size * lwork)) - return STATUS_ERROR; - - /* Allocate iwork array */ - if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) - { - PyMem_RawFree(work); - return STATUS_ERROR; - } - - switch (kind) - { - case 'c': - real_kind = 's'; - tmpf = (float)rcond; - rcond_cast = (void * )&tmpf; - break; - case 'z': - real_kind = 'd'; - rcond_cast = (void * )&rcond; - break; - } - - real_base_size = kind_size(real_kind); - - lrwork = cast_from_X(real_kind, rwork); - if (checked_PyMem_RawMalloc((void **)&rwork, real_base_size * lrwork)) - { - PyMem_RawFree(work); - PyMem_RawFree(iwork); - return STATUS_ERROR; - } - - numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, - work, lwork, rwork, iwork, &info); - PyMem_RawFree(work); - PyMem_RawFree(rwork); - PyMem_RawFree(iwork); - CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); - - return (int)info; -} - - -/* - * Compute the minimum-norm solution to a linear least squares problems. - * This routine hides the type and general complexity involved with making the - * calls to *gelsd. The work space computation and error handling etc is hidden. - * Args are as per LAPACK. - */ -NUMBA_EXPORT_FUNC(int) -numba_ez_gelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, - void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, - double rcond, Py_ssize_t * rank) -{ - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - case 'd': - return numba_ez_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, - rank); - case 'c': - case 'z': - return numba_ez_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, - rank); - } - return STATUS_ERROR; /* unreachable */ -} - - -/* - * Compute the solution to a system of linear equations - */ -NUMBA_EXPORT_FUNC(int) -numba_xgesv(char kind, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, - F_INT *ipiv, void *b, Py_ssize_t ldb) -{ - void *raw_func = NULL; - F_INT _n, _nrhs, _lda, _ldb, info; - - ENSURE_VALID_KIND(kind) - - switch (kind) - { - case 's': - raw_func = get_clapack_sgesv(); - break; - case 'd': - raw_func = get_clapack_dgesv(); - break; - case 'c': - raw_func = get_clapack_cgesv(); - break; - case 'z': - raw_func = get_clapack_zgesv(); - break; - } - - ENSURE_VALID_FUNC(raw_func) - - _n = (F_INT) n; - _nrhs = (F_INT) nrhs; - _lda = (F_INT) lda; - _ldb = (F_INT) ldb; - - (*(xgesv_t) raw_func)(&_n, &_nrhs, a, &_lda, ipiv, b, &_ldb, &info); - CATCH_LAPACK_INVALID_ARG("xgesv", info); - - return (int)info; -} - -/* undef defines and macros */ -#undef STATUS_SUCCESS -#undef STATUS_ERROR -#undef ENSURE_VALID_KIND -#undef ENSURE_VALID_REAL_KIND -#undef ENSURE_VALID_COMPLEX_KIND -#undef ENSURE_VALID_FUNC -#undef F_INT -#undef EMIT_GET_CLAPACK_FUNC -#undef CATCH_LAPACK_INVALID_ARG diff --git a/numba/numba/_math_c99.c b/numba/numba/_math_c99.c deleted file mode 100644 index 8988446dd..000000000 --- a/numba/numba/_math_c99.c +++ /dev/null @@ -1,781 +0,0 @@ -#include "Python.h" -#include -#include "_math_c99.h" - - -/* Copied from Python Module/_math.c with modification to symbol name */ - - -/* The following copyright notice applies to the original - implementations of acosh, asinh and atanh. */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -static const double ln2 = 6.93147180559945286227E-01; -static const double two_pow_m28 = 3.7252902984619141E-09; /* 2**-28 */ -static const double two_pow_p28 = 268435456.0; /* 2**28 */ -#ifndef Py_NAN -static const double zero = 0.0; // used only if no NaN is available -#endif - -/* acosh(x) - * Method : - * Based on - * acosh(x) = log [ x + sqrt(x*x-1) ] - * we have - * acosh(x) := log(x)+ln2, if x is large; else - * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else - * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. - * - * Special cases: - * acosh(x) is NaN with signal if x<1. - * acosh(NaN) is NaN without signal. - */ - -double -m_acosh(double x) -{ - if (Py_IS_NAN(x)) { - return x+x; - } - if (x < 1.) { /* x < 1; return a signaling NaN */ - errno = EDOM; -#ifdef Py_NAN - return Py_NAN; -#else - return (x-x)/(x-x); -#endif - } - else if (x >= two_pow_p28) { /* x > 2**28 */ - if (Py_IS_INFINITY(x)) { - return x+x; - } - else { - return log(x)+ln2; /* acosh(huge)=log(2x) */ - } - } - else if (x == 1.) { - return 0.0; /* acosh(1) = 0 */ - } - else if (x > 2.) { /* 2 < x < 2**28 */ - double t = x*x; - return log(2.0*x - 1.0 / (x + sqrt(t - 1.0))); - } - else { /* 1 < x <= 2 */ - double t = x - 1.0; - return m_log1p(t + sqrt(2.0*t + t*t)); - } -} - - -/* asinh(x) - * Method : - * Based on - * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] - * we have - * asinh(x) := x if 1+x*x=1, - * := sign(x)*(log(x)+ln2)) for large |x|, else - * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else - * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2))) - */ - -double -m_asinh(double x) -{ - double w; - double absx = fabs(x); - - if (Py_IS_NAN(x) || Py_IS_INFINITY(x)) { - return x+x; - } - if (absx < two_pow_m28) { /* |x| < 2**-28 */ - return x; /* return x inexact except 0 */ - } - if (absx > two_pow_p28) { /* |x| > 2**28 */ - w = log(absx)+ln2; - } - else if (absx > 2.0) { /* 2 < |x| < 2**28 */ - w = log(2.0*absx + 1.0 / (sqrt(x*x + 1.0) + absx)); - } - else { /* 2**-28 <= |x| < 2= */ - double t = x*x; - w = m_log1p(absx + t / (1.0 + sqrt(1.0 + t))); - } - return copysign(w, x); - -} - -/* atanh(x) - * Method : - * 1.Reduced x to positive by atanh(-x) = -atanh(x) - * 2.For x>=0.5 - * 1 2x x - * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * -------) - * 2 1 - x 1 - x - * - * For x<0.5 - * atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) - * - * Special cases: - * atanh(x) is NaN if |x| >= 1 with signal; - * atanh(NaN) is that NaN with no signal; - * - */ - -double -m_atanh(double x) -{ - double absx; - double t; - - if (Py_IS_NAN(x)) { - return x+x; - } - absx = fabs(x); - if (absx >= 1.) { /* |x| >= 1 */ - errno = EDOM; -#ifdef Py_NAN - return Py_NAN; -#else - return x/zero; -#endif - } - if (absx < two_pow_m28) { /* |x| < 2**-28 */ - return x; - } - if (absx < 0.5) { /* |x| < 0.5 */ - t = absx+absx; - t = 0.5 * m_log1p(t + t*absx / (1.0 - absx)); - } - else { /* 0.5 <= |x| <= 1.0 */ - t = 0.5 * m_log1p((absx + absx) / (1.0 - absx)); - } - return copysign(t, x); -} - -/* Mathematically, expm1(x) = exp(x) - 1. The expm1 function is designed - to avoid the significant loss of precision that arises from direct - evaluation of the expression exp(x) - 1, for x near 0. */ - -double -m_expm1(double x) -{ - /* For abs(x) >= log(2), it's safe to evaluate exp(x) - 1 directly; this - also works fine for infinities and nans. - - For smaller x, we can use a method due to Kahan that achieves close to - full accuracy. - */ - - if (fabs(x) < 0.7) { - double u; - u = exp(x); - if (u == 1.0) - return x; - else - return (u - 1.0) * x / log(u); - } - else - return exp(x) - 1.0; -} - -/* log1p(x) = log(1+x). The log1p function is designed to avoid the - significant loss of precision that arises from direct evaluation when x is - small. */ - -double -m_log1p(double x) -{ - /* For x small, we use the following approach. Let y be the nearest float - to 1+x, then - - 1+x = y * (1 - (y-1-x)/y) - - so log(1+x) = log(y) + log(1-(y-1-x)/y). Since (y-1-x)/y is tiny, the - second term is well approximated by (y-1-x)/y. If abs(x) >= - DBL_EPSILON/2 or the rounding-mode is some form of round-to-nearest - then y-1-x will be exactly representable, and is computed exactly by - (y-1)-x. - - If abs(x) < DBL_EPSILON/2 and the rounding mode is not known to be - round-to-nearest then this method is slightly dangerous: 1+x could be - rounded up to 1+DBL_EPSILON instead of down to 1, and in that case - y-1-x will not be exactly representable any more and the result can be - off by many ulps. But this is easily fixed: for a floating-point - number |x| < DBL_EPSILON/2., the closest floating-point number to - log(1+x) is exactly x. - */ - - double y; - if (fabs(x) < DBL_EPSILON/2.) { - return x; - } - else if (-0.5 <= x && x <= 1.) { - /* WARNING: it's possible than an overeager compiler - will incorrectly optimize the following two lines - to the equivalent of "return log(1.+x)". If this - happens, then results from log1p will be inaccurate - for small x. */ - y = 1.+x; - return log(y)-((y-1.)-x)/y; - } - else { - /* NaNs and infinities should end up here */ - return log(1.+x); - } -} - -/* Hand written */ -double m_trunc(double x) -{ - double integral; - (void)modf(x, &integral); - return integral; -} - - -/* Hand written */ -double m_round(double x) { - if (x < 0.0) { - return ceil(x - 0.5); - } else { - return floor(x + 0.5); - } -} - -/* Hand written */ -float m_roundf(float x) { - if (x < 0.0) { - return (float) ceilf(x - 0.5f); - } else { - return (float) floorf(x + 0.5f); - } -} - -/* - CPython implementation for atan2(): - - wrapper for atan2 that deals directly with special cases before - delegating to the platform libm for the remaining cases. This - is necessary to get consistent behaviour across platforms. - Windows, FreeBSD and alpha Tru64 are amongst platforms that don't - always follow C99. -*/ - -double m_atan2(double y, double x) -{ - if (Py_IS_NAN(x) || Py_IS_NAN(y)) - return Py_NAN; - if (Py_IS_INFINITY(y)) { - if (Py_IS_INFINITY(x)) { - if (copysign(1., x) == 1.) - /* atan2(+-inf, +inf) == +-pi/4 */ - return copysign(0.25*Py_MATH_PI, y); - else - /* atan2(+-inf, -inf) == +-pi*3/4 */ - return copysign(0.75*Py_MATH_PI, y); - } - /* atan2(+-inf, x) == +-pi/2 for finite x */ - return copysign(0.5*Py_MATH_PI, y); - } - if (Py_IS_INFINITY(x) || y == 0.) { - if (copysign(1., x) == 1.) - /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */ - return copysign(0., y); - else - /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */ - return copysign(Py_MATH_PI, y); - } - return atan2(y, x); -} - -/* Map to double version directly */ -float m_atan2f(float y, float x) { - return (float) m_atan2(y, x); -} - - -/* provide gamma() and lgamma(); code borrowed from CPython */ - -/* - sin(pi*x), giving accurate results for all finite x (especially x - integral or close to an integer). This is here for use in the - reflection formula for the gamma function. It conforms to IEEE - 754-2008 for finite arguments, but not for infinities or nans. -*/ - -static const double pi = 3.141592653589793238462643383279502884197; -static const double sqrtpi = 1.772453850905516027298167483341145182798; -static const double logpi = 1.144729885849400174143427351353058711647; - -static double -sinpi(double x) -{ - double y, r; - int n; - /* this function should only ever be called for finite arguments */ - assert(Py_IS_FINITE(x)); - y = fmod(fabs(x), 2.0); - n = (int)round(2.0*y); - assert(0 <= n && n <= 4); - switch (n) { - case 0: - r = sin(pi*y); - break; - case 1: - r = cos(pi*(y-0.5)); - break; - case 2: - /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give - -0.0 instead of 0.0 when y == 1.0. */ - r = sin(pi*(1.0-y)); - break; - case 3: - r = -cos(pi*(y-1.5)); - break; - case 4: - r = sin(pi*(y-2.0)); - break; - default: - assert(0); /* should never get here */ - r = -1.23e200; /* silence gcc warning */ - } - return copysign(1.0, x)*r; -} - -/* Implementation of the real gamma function. In extensive but non-exhaustive - random tests, this function proved accurate to within <= 10 ulps across the - entire float domain. Note that accuracy may depend on the quality of the - system math functions, the pow function in particular. Special cases - follow C99 annex F. The parameters and method are tailored to platforms - whose double format is the IEEE 754 binary64 format. - - Method: for x > 0.0 we use the Lanczos approximation with parameters N=13 - and g=6.024680040776729583740234375; these parameters are amongst those - used by the Boost library. Following Boost (again), we re-express the - Lanczos sum as a rational function, and compute it that way. The - coefficients below were computed independently using MPFR, and have been - double-checked against the coefficients in the Boost source code. - - For x < 0.0 we use the reflection formula. - - There's one minor tweak that deserves explanation: Lanczos' formula for - Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x - values, x+g-0.5 can be represented exactly. However, in cases where it - can't be represented exactly the small error in x+g-0.5 can be magnified - significantly by the pow and exp calls, especially for large x. A cheap - correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error - involved in the computation of x+g-0.5 (that is, e = computed value of - x+g-0.5 - exact value of x+g-0.5). Here's the proof: - - Correction factor - ----------------- - Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754 - double, and e is tiny. Then: - - pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e) - = pow(y, x-0.5)/exp(y) * C, - - where the correction_factor C is given by - - C = pow(1-e/y, x-0.5) * exp(e) - - Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so: - - C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y - - But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and - - pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y), - - Note that for accuracy, when computing r*C it's better to do - - r + e*g/y*r; - - than - - r * (1 + e*g/y); - - since the addition in the latter throws away most of the bits of - information in e*g/y. -*/ - -#define LANCZOS_N 13 -static const double lanczos_g = 6.024680040776729583740234375; -static const double lanczos_g_minus_half = 5.524680040776729583740234375; -static const double lanczos_num_coeffs[LANCZOS_N] = { - 23531376880.410759688572007674451636754734846804940, - 42919803642.649098768957899047001988850926355848959, - 35711959237.355668049440185451547166705960488635843, - 17921034426.037209699919755754458931112671403265390, - 6039542586.3520280050642916443072979210699388420708, - 1439720407.3117216736632230727949123939715485786772, - 248874557.86205415651146038641322942321632125127801, - 31426415.585400194380614231628318205362874684987640, - 2876370.6289353724412254090516208496135991145378768, - 186056.26539522349504029498971604569928220784236328, - 8071.6720023658162106380029022722506138218516325024, - 210.82427775157934587250973392071336271166969580291, - 2.5066282746310002701649081771338373386264310793408 -}; - -/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */ -static const double lanczos_den_coeffs[LANCZOS_N] = { - 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0, - 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0}; - -/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */ -#define NGAMMA_INTEGRAL 23 -static const double gamma_integral[NGAMMA_INTEGRAL] = { - 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0, - 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0, - 1307674368000.0, 20922789888000.0, 355687428096000.0, - 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0, - 51090942171709440000.0, 1124000727777607680000.0, -}; - -/* Lanczos' sum L_g(x), for positive x */ - -static double -lanczos_sum(double x) -{ - double num = 0.0, den = 0.0; - int i; - assert(x > 0.0); - /* evaluate the rational function lanczos_sum(x). For large - x, the obvious algorithm risks overflow, so we instead - rescale the denominator and numerator of the rational - function by x**(1-LANCZOS_N) and treat this as a - rational function in 1/x. This also reduces the error for - larger x values. The choice of cutoff point (5.0 below) is - somewhat arbitrary; in tests, smaller cutoff values than - this resulted in lower accuracy. */ - if (x < 5.0) { - for (i = LANCZOS_N; --i >= 0; ) { - num = num * x + lanczos_num_coeffs[i]; - den = den * x + lanczos_den_coeffs[i]; - } - } - else { - for (i = 0; i < LANCZOS_N; i++) { - num = num / x + lanczos_num_coeffs[i]; - den = den / x + lanczos_den_coeffs[i]; - } - } - return num/den; -} - -double -m_gamma(double x) -{ - double absx, r, y, z, sqrtpow; - - /* special cases */ - if (!Py_IS_FINITE(x)) { - if (Py_IS_NAN(x) || x > 0.0) - return x; /* tgamma(nan) = nan, tgamma(inf) = inf */ - else { - /*errno = EDOM;*/ - return Py_NAN; /* tgamma(-inf) = nan, invalid */ - } - } - if (x == 0.0) { - /*errno = EDOM;*/ - /* tgamma(+-0.0) = +-inf, divide-by-zero */ - return copysign(Py_HUGE_VAL, x); - } - - /* integer arguments */ - if (x == floor(x)) { - if (x < 0.0) { - /*errno = EDOM;*/ /* tgamma(n) = nan, invalid for */ - return Py_NAN; /* negative integers n */ - } - if (x <= NGAMMA_INTEGRAL) - return gamma_integral[(int)x - 1]; - } - absx = fabs(x); - - /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */ - if (absx < 1e-20) { - r = 1.0/x; - /*if (Py_IS_INFINITY(r)) - errno = ERANGE;*/ - return r; - } - - /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for - x > 200, and underflows to +-0.0 for x < -200, not a negative - integer. */ - if (absx > 200.0) { - if (x < 0.0) { - return 0.0/sinpi(x); - } - else { - /*errno = ERANGE;*/ - return Py_HUGE_VAL; - } - } - - y = absx + lanczos_g_minus_half; - /* compute error in sum */ - if (absx > lanczos_g_minus_half) { - /* note: the correction can be foiled by an optimizing - compiler that (incorrectly) thinks that an expression like - a + b - a - b can be optimized to 0.0. This shouldn't - happen in a standards-conforming compiler. */ - double q = y - absx; - z = q - lanczos_g_minus_half; - } - else { - double q = y - lanczos_g_minus_half; - z = q - absx; - } - z = z * lanczos_g / y; - if (x < 0.0) { - r = -pi / sinpi(absx) / absx * exp(y) / lanczos_sum(absx); - r -= z * r; - if (absx < 140.0) { - r /= pow(y, absx - 0.5); - } - else { - sqrtpow = pow(y, absx / 2.0 - 0.25); - r /= sqrtpow; - r /= sqrtpow; - } - } - else { - r = lanczos_sum(absx) / exp(y); - r += z * r; - if (absx < 140.0) { - r *= pow(y, absx - 0.5); - } - else { - sqrtpow = pow(y, absx / 2.0 - 0.25); - r *= sqrtpow; - r *= sqrtpow; - } - } - /*if (Py_IS_INFINITY(r)) - errno = ERANGE;*/ - return r; -} - -/* - lgamma: natural log of the absolute value of the Gamma function. - For large arguments, Lanczos' formula works extremely well here. -*/ - -double -m_lgamma(double x) -{ - double r, absx; - - /* special cases */ - if (!Py_IS_FINITE(x)) { - if (Py_IS_NAN(x)) - return x; /* lgamma(nan) = nan */ - else - return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */ - } - - /* integer arguments */ - if (x == floor(x) && x <= 2.0) { - if (x <= 0.0) { - /*errno = EDOM;*/ /* lgamma(n) = inf, divide-by-zero for */ - return Py_HUGE_VAL; /* integers n <= 0 */ - } - else { - return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */ - } - } - - absx = fabs(x); - /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */ - if (absx < 1e-20) - return -log(absx); - - /* Lanczos' formula. We could save a fraction of a ulp in accuracy by - having a second set of numerator coefficients for lanczos_sum that - absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g - subtraction below; it's probably not worth it. */ - r = log(lanczos_sum(absx)) - lanczos_g; - r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1); - if (x < 0.0) - /* Use reflection formula to get value for negative x. */ - r = logpi - log(fabs(sinpi(absx))) - log(absx) - r; - /*if (Py_IS_INFINITY(r)) - errno = ERANGE;*/ - return r; -} - -/* provide erf() and erfc(); code borrowed from CPython */ - -/* - Implementations of the error function erf(x) and the complementary error - function erfc(x). - - Method: following 'Numerical Recipes' by Flannery, Press et. al. (2nd ed., - Cambridge University Press), we use a series approximation for erf for - small x, and a continued fraction approximation for erfc(x) for larger x; - combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x), - this gives us erf(x) and erfc(x) for all x. - - The series expansion used is: - - erf(x) = x*exp(-x*x)/sqrt(pi) * [ - 2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...] - - The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k). - This series converges well for smallish x, but slowly for larger x. - - The continued fraction expansion used is: - - erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - ) - 3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...] - - after the first term, the general term has the form: - - k*(k-0.5)/(2*k+0.5 + x**2 - ...). - - This expansion converges fast for larger x, but convergence becomes - infinitely slow as x approaches 0.0. The (somewhat naive) continued - fraction evaluation algorithm used below also risks overflow for large x; - but for large x, erfc(x) == 0.0 to within machine precision. (For - example, erfc(30.0) is approximately 2.56e-393). - - Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and - continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) < - ERFC_CONTFRAC_CUTOFF. ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the - numbers of terms to use for the relevant expansions. */ - -#define ERF_SERIES_CUTOFF 1.5 -#define ERF_SERIES_TERMS 25 -#define ERFC_CONTFRAC_CUTOFF 30.0 -#define ERFC_CONTFRAC_TERMS 50 - -/* - Error function, via power series. - - Given a finite float x, return an approximation to erf(x). - Converges reasonably fast for small x. -*/ - -static double -m_erf_series(double x) -{ - double x2, acc, fk, result; - int i, saved_errno; - - x2 = x * x; - acc = 0.0; - fk = (double)ERF_SERIES_TERMS + 0.5; - for (i = 0; i < ERF_SERIES_TERMS; i++) { - acc = 2.0 + x2 * acc / fk; - fk -= 1.0; - } - /* Make sure the exp call doesn't affect errno; - see m_erfc_contfrac for more. */ - saved_errno = errno; - result = acc * x * exp(-x2) / sqrtpi; - errno = saved_errno; - return result; -} - -/* - Complementary error function, via continued fraction expansion. - - Given a positive float x, return an approximation to erfc(x). Converges - reasonably fast for x large (say, x > 2.0), and should be safe from - overflow if x and nterms are not too large. On an IEEE 754 machine, with x - <= 30.0, we're safe up to nterms = 100. For x >= 30.0, erfc(x) is smaller - than the smallest representable nonzero float. */ - -static double -m_erfc_contfrac(double x) -{ - double x2, a, da, p, p_last, q, q_last, b, result; - int i, saved_errno; - - if (x >= ERFC_CONTFRAC_CUTOFF) - return 0.0; - - x2 = x*x; - a = 0.0; - da = 0.5; - p = 1.0; p_last = 0.0; - q = da + x2; q_last = 1.0; - for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) { - double temp; - a += da; - da += 2.0; - b = da + x2; - temp = p; p = b*p - a*p_last; p_last = temp; - temp = q; q = b*q - a*q_last; q_last = temp; - } - /* Issue #8986: On some platforms, exp sets errno on underflow to zero; - save the current errno value so that we can restore it later. */ - saved_errno = errno; - result = p / q * x * exp(-x2) / sqrtpi; - errno = saved_errno; - return result; -} - -/* Error function erf(x), for general x */ - -double -m_erf(double x) -{ - double absx, cf; - - if (Py_IS_NAN(x)) - return x; - absx = fabs(x); - if (absx < ERF_SERIES_CUTOFF) - return m_erf_series(x); - else { - cf = m_erfc_contfrac(absx); - return x > 0.0 ? 1.0 - cf : cf - 1.0; - } -} - -/* Complementary error function erfc(x), for general x. */ - -double -m_erfc(double x) -{ - double absx, cf; - - if (Py_IS_NAN(x)) - return x; - absx = fabs(x); - if (absx < ERF_SERIES_CUTOFF) - return 1.0 - m_erf_series(x); - else { - cf = m_erfc_contfrac(absx); - return x > 0.0 ? cf : 2.0 - cf; - } -} - -#define FLOATVER(Fn) float Fn##f(float x) { return (float)Fn(x); } - -FLOATVER(m_acosh); -FLOATVER(m_asinh); -FLOATVER(m_atanh); -FLOATVER(m_erf); -FLOATVER(m_erfc); -FLOATVER(m_expm1); -FLOATVER(m_gamma); -FLOATVER(m_lgamma); -FLOATVER(m_log1p); -FLOATVER(m_trunc); - diff --git a/numba/numba/_math_c99.h b/numba/numba/_math_c99.h deleted file mode 100644 index fec8ac08a..000000000 --- a/numba/numba/_math_c99.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef NUMBA_MATH_C99_H_ -#define NUMBA_MATH_C99_H_ - -#include "_numba_common.h" - -/* We require C99 on POSIX, but have to be tolerant on Windows since - Python < 3.5 is compiled with old MSVC versions */ - -#if !defined(_MSC_VER) || _MSC_VER >= 1800 /* Visual Studio 2013 */ -#define HAVE_C99_MATH 1 -#else -#define HAVE_C99_MATH 0 -#endif - - -VISIBILITY_HIDDEN double m_acosh(double x); -VISIBILITY_HIDDEN float m_acoshf(float x); - -VISIBILITY_HIDDEN double m_asinh(double x); -VISIBILITY_HIDDEN float m_asinhf(float x); - -VISIBILITY_HIDDEN double m_atanh(double x); -VISIBILITY_HIDDEN float m_atanhf(float x); - -VISIBILITY_HIDDEN double m_erf(double x); -VISIBILITY_HIDDEN float m_erff(float x); - -VISIBILITY_HIDDEN double m_erfc(double x); -VISIBILITY_HIDDEN float m_erfcf(float x); - -VISIBILITY_HIDDEN double m_expm1(double x); -VISIBILITY_HIDDEN float m_expm1f(float x); - -VISIBILITY_HIDDEN double m_gamma(double x); -VISIBILITY_HIDDEN float m_gammaf(float x); - -VISIBILITY_HIDDEN double m_lgamma(double x); -VISIBILITY_HIDDEN float m_lgammaf(float x); - -VISIBILITY_HIDDEN double m_log1p(double x); -VISIBILITY_HIDDEN float m_log1pf(float x); - -VISIBILITY_HIDDEN double m_round(double x); -VISIBILITY_HIDDEN float m_roundf(float x); - -VISIBILITY_HIDDEN double m_trunc(double x); -VISIBILITY_HIDDEN float m_truncf(float x); - -VISIBILITY_HIDDEN double m_atan2(double y, double x); -VISIBILITY_HIDDEN float m_atan2f(float y, float x); - - -#if !HAVE_C99_MATH - -/* Define missing math functions */ - -#define asinh(x) m_asinh(x) -#define asinhf(x) m_asinhf(x) -#define acosh(x) m_acosh(x) -#define acoshf(x) m_acoshf(x) -#define atanh(x) m_atanh(x) -#define atanhf(x) m_atanhf(x) - -#define erf(x) m_erf(x) -#define erfc(x) m_erfc(x) -#define erfcf(x) m_erfcf(x) -#define erff(x) m_erff(x) - -#define expm1(x) m_expm1(x) -#define expm1f(x) m_expm1f(x) -#define log1p(x) m_log1p(x) -#define log1pf(x) m_log1pf(x) - -#define lgamma(x) m_lgamma(x) -#define lgammaf(x) m_lgammaf(x) -#define tgamma(x) m_gamma(x) -#define tgammaf(x) m_gammaf(x) - -#define round(x) m_round(x) -#define roundf(x) m_roundf(x) -#define trunc(x) m_trunc(x) -#define truncf(x) m_truncf(x) - -#define atan2f(x, y) m_atan2f(x, y) - -#endif /* !HAVE_C99_MATH */ - -#define atan2_fixed(x, y) m_atan2(x, y) - -#endif /* NUMBA_MATH_C99_H_ */ diff --git a/numba/numba/_npymath_exports.c b/numba/numba/_npymath_exports.c deleted file mode 100644 index 881b56c91..000000000 --- a/numba/numba/_npymath_exports.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This file contains exports of Numpy math functions needed by numba. - */ - -#include "_pymodule.h" -#include -#include - - -/* - * Map Numpy C function symbols to their addresses. - */ - -struct npymath_entry { - const char *name; - void *func; -}; - -#define NPYMATH_SYMBOL(name) \ - { "npy_" #name, (void*) npy_##name } - -static struct npymath_entry npymath_exports[] = { - /* double functions */ - NPYMATH_SYMBOL(exp2), - NPYMATH_SYMBOL(log2), - - NPYMATH_SYMBOL(logaddexp), - NPYMATH_SYMBOL(logaddexp2), - NPYMATH_SYMBOL(nextafter), - NPYMATH_SYMBOL(spacing), - - NPYMATH_SYMBOL(modf), - - /* float functions */ - NPYMATH_SYMBOL(exp2f), - NPYMATH_SYMBOL(log2f), - - NPYMATH_SYMBOL(logaddexpf), - NPYMATH_SYMBOL(logaddexp2f), - NPYMATH_SYMBOL(nextafterf), - NPYMATH_SYMBOL(spacingf), - - NPYMATH_SYMBOL(modff), -}; - -#undef NPYMATH_SYMBOL diff --git a/numba/numba/_numba_common.h b/numba/numba/_numba_common.h deleted file mode 100644 index 6d25fb13d..000000000 --- a/numba/numba/_numba_common.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef NUMBA_COMMON_H_ -#define NUMBA_COMMON_H_ - -/* __has_attribute() is a clang / gcc-5 macro */ -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif - -/* This attribute marks symbols that can be shared accross C objects - * but are not exposed outside of a shared library or executable. - * Note this is default behaviour for global symbols under Windows. - */ -#if (__has_attribute(visibility) || \ - (defined(__GNUC__) && __GNUC__ >= 4)) -#define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden"))) -#else -#define VISIBILITY_HIDDEN -#endif - -#endif /* NUMBA_COMMON_H_ */ diff --git a/numba/numba/_pymodule.h b/numba/numba/_pymodule.h deleted file mode 100644 index 31f18f61c..000000000 --- a/numba/numba/_pymodule.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef NUMBA_PY_MODULE_H_ -#define NUMBA_PY_MODULE_H_ - -#define PY_SSIZE_T_CLEAN - -#include -#include - -#if PY_MAJOR_VERSION >= 3 - #define MOD_ERROR_VAL NULL - #define MOD_SUCCESS_VAL(val) val - #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) - #define MOD_DEF(ob, name, doc, methods) { \ - static struct PyModuleDef moduledef = { \ - PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ - ob = PyModule_Create(&moduledef); } - #define MOD_INIT_EXEC(name) PyInit_##name(); -#else - #define MOD_ERROR_VAL - #define MOD_SUCCESS_VAL(val) - #define MOD_INIT(name) PyMODINIT_FUNC init##name(void) - #define MOD_DEF(ob, name, doc, methods) \ - ob = Py_InitModule3(name, methods, doc); - #define MOD_INIT_EXEC(name) init##name(); -#endif - - -#if PY_MAJOR_VERSION >= 3 - #define PyString_AsString PyUnicode_AsUTF8 - #define PyString_Check PyUnicode_Check - #define PyString_FromFormat PyUnicode_FromFormat - #define PyString_FromString PyUnicode_FromString - #define PyString_InternFromString PyUnicode_InternFromString - #define PyInt_Type PyLong_Type - #define PyInt_Check PyLong_Check - #define PyInt_CheckExact PyLong_CheckExact -#else - #define Py_hash_t long - #define Py_uhash_t unsigned long -#endif - -#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4) - #define PyMem_RawMalloc malloc - #define PyMem_RawRealloc realloc - #define PyMem_RawFree free -#endif - -#ifndef Py_MIN -#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) -#endif - -#ifndef Py_MAX -#define Py_MAX(x, y) (((x) < (y)) ? (y) : (x)) -#endif - -#endif /* NUMBA_PY_MODULE_H_ */ diff --git a/numba/numba/_random.c b/numba/numba/_random.c deleted file mode 100644 index b3d1c2370..000000000 --- a/numba/numba/_random.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * PRNG support. - */ - -#ifdef _MSC_VER -#define HAVE_PTHREAD_ATFORK 0 -#else -#define HAVE_PTHREAD_ATFORK 1 -#include -#endif - - -/* Magic Mersenne Twister constants */ -#define MT_N 624 -#define MT_M 397 -#define MT_MATRIX_A 0x9908b0dfU -#define MT_UPPER_MASK 0x80000000U -#define MT_LOWER_MASK 0x7fffffffU - -/* - * Note this structure is accessed in numba.targets.randomimpl, - * any changes here should be reflected there too. - */ -typedef struct { - int index; - /* unsigned int is sufficient on modern machines as we only need 32 bits */ - unsigned int mt[MT_N]; - int has_gauss; - double gauss; - int is_initialized; -} rnd_state_t; - -/* Some code portions below from CPython's _randommodule.c, some others - from Numpy's and Jean-Sebastien Roy's randomkit.c. */ - -NUMBA_EXPORT_FUNC(void) -numba_rnd_shuffle(rnd_state_t *state) -{ - int i; - unsigned int y; - - for (i = 0; i < MT_N - MT_M; i++) { - y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); - state->mt[i] = state->mt[i+MT_M] ^ (y >> 1) ^ - (-(int) (y & 1) & MT_MATRIX_A); - } - for (; i < MT_N - 1; i++) { - y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); - state->mt[i] = state->mt[i+(MT_M-MT_N)] ^ (y >> 1) ^ - (-(int) (y & 1) & MT_MATRIX_A); - } - y = (state->mt[MT_N - 1] & MT_UPPER_MASK) | (state->mt[0] & MT_LOWER_MASK); - state->mt[MT_N - 1] = state->mt[MT_M - 1] ^ (y >> 1) ^ - (-(int) (y & 1) & MT_MATRIX_A); -} - -/* Initialize mt[] with an integer seed */ -NUMBA_EXPORT_FUNC(void) -numba_rnd_init(rnd_state_t *state, unsigned int seed) -{ - unsigned int pos; - seed &= 0xffffffffU; - - /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ - for (pos = 0; pos < MT_N; pos++) { - state->mt[pos] = seed; - seed = (1812433253U * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffU; - } - state->index = MT_N; - state->has_gauss = 0; - state->gauss = 0.0; - state->is_initialized = 1; -} - -/* Perturb mt[] with a key array */ -static void -rnd_init_by_array(rnd_state_t *state, unsigned int init_key[], size_t key_length) -{ - size_t i, j, k; - unsigned int *mt = state->mt; - - numba_rnd_init(state, 19650218U); - i = 1; j = 0; - k = (MT_N > key_length ? MT_N : key_length); - for (; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525U)) - + init_key[j] + (unsigned int) j; /* non linear */ - mt[i] &= 0xffffffffU; - i++; j++; - if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i = 1; } - if (j >= key_length) j = 0; - } - for (k = MT_N - 1; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941U)) - - (unsigned int) i; /* non linear */ - mt[i] &= 0xffffffffU; - i++; - if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i=1; } - } - - mt[0] = 0x80000000U; /* MSB is 1; ensuring non-zero initial array */ - state->index = MT_N; - state->has_gauss = 0; - state->gauss = 0.0; - state->is_initialized = 1; -} - -/* - * Management of thread-local random state. - */ - -static int rnd_globally_initialized; - -#ifdef _MSC_VER -#define THREAD_LOCAL(ty) __declspec(thread) ty -#else -/* Non-standard C99 extension that's understood by gcc and clang */ -#define THREAD_LOCAL(ty) __thread ty -#endif - -static THREAD_LOCAL(rnd_state_t) numba_py_random_state; -static THREAD_LOCAL(rnd_state_t) numba_np_random_state; - -/* Seed the state with random bytes */ -static int -rnd_seed_with_bytes(rnd_state_t *state, Py_buffer *buf) -{ - unsigned int *keys; - unsigned char *bytes; - size_t i, nkeys; - - nkeys = buf->len / sizeof(unsigned int); - keys = (unsigned int *) PyMem_Malloc(nkeys * sizeof(unsigned int)); - if (keys == NULL) { - PyBuffer_Release(buf); - return -1; - } - bytes = (unsigned char *) buf->buf; - /* Convert input bytes to int32 keys, without violating alignment - * constraints. - */ - for (i = 0; i < nkeys; i++, bytes += 4) { - keys[i] = (bytes[3] << 24) + (bytes[2] << 16) + - (bytes[1] << 8) + (bytes[0] << 0); - } - PyBuffer_Release(buf); - rnd_init_by_array(state, keys, nkeys); - PyMem_Free(keys); - return 0; -} - -#if HAVE_PTHREAD_ATFORK -/* After a fork(), the child should reseed its random states. - * Since only the main thread survives in the child, it's enough to mark - * the current thread-local states as uninitialized. - */ -static void -rnd_atfork_child(void) -{ - numba_py_random_state.is_initialized = 0; - numba_np_random_state.is_initialized = 0; -} -#endif - -/* Global initialization routine. It must be called as early as possible. - */ -NUMBA_EXPORT_FUNC(void) -numba_rnd_ensure_global_init(void) -{ - if (!rnd_globally_initialized) { -#if HAVE_PTHREAD_ATFORK - pthread_atfork(NULL, NULL, rnd_atfork_child); -#endif - numba_py_random_state.is_initialized = 0; - numba_np_random_state.is_initialized = 0; - rnd_globally_initialized = 1; - } -} - -/* First-time init a random state */ -static void -rnd_implicit_init(rnd_state_t *state) -{ - /* Initialize with random bytes. The easiest way to get good-quality - * cross-platform random bytes is still to call os.urandom() - * using the Python interpreter... - */ - PyObject *module, *bufobj; - Py_buffer buf; - PyGILState_STATE gilstate = PyGILState_Ensure(); - - module = PyImport_ImportModuleNoBlock("os"); - if (module == NULL) - goto error; - /* Read as many bytes as necessary to get the full entropy - * exploitable by the MT generator. - */ - bufobj = PyObject_CallMethod(module, "urandom", "i", - (int) (MT_N * sizeof(unsigned int))); - Py_DECREF(module); - if (bufobj == NULL) - goto error; - if (PyObject_GetBuffer(bufobj, &buf, PyBUF_SIMPLE)) - goto error; - Py_DECREF(bufobj); - if (rnd_seed_with_bytes(state, &buf)) - goto error; - /* state->is_initialized is set now */ - - PyGILState_Release(gilstate); - return; - -error: - /* In normal conditions, os.urandom() and PyMem_Malloc() shouldn't fail, - * and we don't want the caller to deal with errors, so just bail out. - */ - if (PyErr_Occurred()) - PyErr_Print(); - Py_FatalError(NULL); -} - -/* Functions returning the thread-local random state pointer. - * The LLVM JIT doesn't support thread-local variables so we rely - * on the C compiler instead. - */ - -NUMBA_EXPORT_FUNC(rnd_state_t *) -numba_get_py_random_state(void) -{ - rnd_state_t *state = &numba_py_random_state; - if (!state->is_initialized) - rnd_implicit_init(state); - return state; -} - -NUMBA_EXPORT_FUNC(rnd_state_t *) -numba_get_np_random_state(void) -{ - rnd_state_t *state = &numba_np_random_state; - if (!state->is_initialized) - rnd_implicit_init(state); - return state; -} - - -/* - * Python-exposed helpers for state management and testing. - */ -static int -rnd_state_converter(PyObject *obj, rnd_state_t **state) -{ - *state = (rnd_state_t *) PyLong_AsVoidPtr(obj); - return (*state != NULL || !PyErr_Occurred()); -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_get_py_state_ptr(PyObject *self) -{ - return PyLong_FromVoidPtr(numba_get_py_random_state()); -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_get_np_state_ptr(PyObject *self) -{ - return PyLong_FromVoidPtr(numba_get_np_random_state()); -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_shuffle(PyObject *self, PyObject *arg) -{ - rnd_state_t *state; - if (!rnd_state_converter(arg, &state)) - return NULL; - numba_rnd_shuffle(state); - Py_RETURN_NONE; -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_set_state(PyObject *self, PyObject *args) -{ - int i, index; - rnd_state_t *state; - PyObject *tuplearg, *intlist; - - if (!PyArg_ParseTuple(args, "O&O!:rnd_set_state", - rnd_state_converter, &state, - &PyTuple_Type, &tuplearg)) - return NULL; - if (!PyArg_ParseTuple(tuplearg, "iO!", &index, &PyList_Type, &intlist)) - return NULL; - if (PyList_GET_SIZE(intlist) != MT_N) { - PyErr_SetString(PyExc_ValueError, "list object has wrong size"); - return NULL; - } - state->index = index; - for (i = 0; i < MT_N; i++) { - PyObject *v = PyList_GET_ITEM(intlist, i); - unsigned long x = PyLong_AsUnsignedLong(v); - if (x == (unsigned long) -1 && PyErr_Occurred()) - return NULL; - state->mt[i] = (unsigned int) x; - } - state->has_gauss = 0; - state->gauss = 0.0; - state->is_initialized = 1; - Py_RETURN_NONE; -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_get_state(PyObject *self, PyObject *arg) -{ - PyObject *intlist; - int i; - rnd_state_t *state; - if (!rnd_state_converter(arg, &state)) - return NULL; - - intlist = PyList_New(MT_N); - if (intlist == NULL) - return NULL; - for (i = 0; i < MT_N; i++) { - PyObject *v = PyLong_FromUnsignedLong(state->mt[i]); - if (v == NULL) { - Py_DECREF(intlist); - return NULL; - } - PyList_SET_ITEM(intlist, i, v); - } - return Py_BuildValue("iN", state->index, intlist); -} - -NUMBA_EXPORT_FUNC(PyObject *) -_numba_rnd_seed(PyObject *self, PyObject *args) -{ - unsigned int seed; - rnd_state_t *state; - - if (!PyArg_ParseTuple(args, "O&I:rnd_seed", - rnd_state_converter, &state, &seed)) { - /* rnd_seed_*(bytes-like object) */ - Py_buffer buf; - - PyErr_Clear(); - if (!PyArg_ParseTuple(args, "O&s*:rnd_seed", - rnd_state_converter, &state, &buf)) - return NULL; - - if (rnd_seed_with_bytes(state, &buf)) - return NULL; - else - Py_RETURN_NONE; - } - else { - /* rnd_seed_*(int32) */ - numba_rnd_init(state, seed); - Py_RETURN_NONE; - } -} - -/* - * Random distribution helpers. - * Most code straight from Numpy's distributions.c. - */ - -#ifndef M_PI -#define M_PI 3.14159265358979323846264338328 -#endif - -NUMBA_EXPORT_FUNC(unsigned int) -get_next_int32(rnd_state_t *state) -{ - unsigned int y; - - if (state->index == MT_N) { - numba_rnd_shuffle(state); - state->index = 0; - } - y = state->mt[state->index++]; - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & 0x9d2c5680U; - y ^= (y << 15) & 0xefc60000U; - y ^= (y >> 18); - return y; -} - -NUMBA_EXPORT_FUNC(double) -get_next_double(rnd_state_t *state) -{ - double a = get_next_int32(state) >> 5; - double b = get_next_int32(state) >> 6; - return (a * 67108864.0 + b) / 9007199254740992.0; -} - -NUMBA_EXPORT_FUNC(double) -loggam(double x) -{ - double x0, x2, xp, gl, gl0; - long k, n; - - static double a[10] = {8.333333333333333e-02,-2.777777777777778e-03, - 7.936507936507937e-04,-5.952380952380952e-04, - 8.417508417508418e-04,-1.917526917526918e-03, - 6.410256410256410e-03,-2.955065359477124e-02, - 1.796443723688307e-01,-1.39243221690590e+00}; - x0 = x; - n = 0; - if ((x == 1.0) || (x == 2.0)) - { - return 0.0; - } - else if (x <= 7.0) - { - n = (long)(7 - x); - x0 = x + n; - } - x2 = 1.0/(x0*x0); - xp = 2*M_PI; - gl0 = a[9]; - for (k=8; k>=0; k--) - { - gl0 *= x2; - gl0 += a[k]; - } - gl = gl0/x0 + 0.5*log(xp) + (x0-0.5)*log(x0) - x0; - if (x <= 7.0) - { - for (k=1; k<=n; k++) - { - gl -= log(x0-1.0); - x0 -= 1.0; - } - } - return gl; -} - - -NUMBA_EXPORT_FUNC(int64_t) -numba_poisson_ptrs(rnd_state_t *state, double lam) -{ - /* This method is invoked only if the parameter lambda of this - * distribution is big enough ( >= 10 ). The algorithm used is - * described in "Hörmann, W. 1992. 'The Transformed Rejection - * Method for Generating Poisson Random Variables'. - * The implementation comes straight from Numpy. - */ - int64_t k; - double U, V, slam, loglam, a, b, invalpha, vr, us; - - slam = sqrt(lam); - loglam = log(lam); - b = 0.931 + 2.53*slam; - a = -0.059 + 0.02483*b; - invalpha = 1.1239 + 1.1328/(b-3.4); - vr = 0.9277 - 3.6224/(b-2); - - while (1) - { - U = get_next_double(state) - 0.5; - V = get_next_double(state); - us = 0.5 - fabs(U); - k = (int64_t) floor((2*a/us + b)*U + lam + 0.43); - if ((us >= 0.07) && (V <= vr)) - { - return k; - } - if ((k < 0) || - ((us < 0.013) && (V > us))) - { - continue; - } - if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <= - (-lam + (double) k*loglam - loggam((double) k+1))) - { - return k; - } - } -} diff --git a/numba/numba/_typeof.c b/numba/numba/_typeof.c deleted file mode 100644 index cd4819727..000000000 --- a/numba/numba/_typeof.c +++ /dev/null @@ -1,906 +0,0 @@ -#include "_pymodule.h" - -#include -#include -#include - -#include "_typeof.h" -#include "_hashtable.h" - -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include - - -/* Cached typecodes for basic scalar types */ -static int tc_int8; -static int tc_int16; -static int tc_int32; -static int tc_int64; -static int tc_uint8; -static int tc_uint16; -static int tc_uint32; -static int tc_uint64; -static int tc_float32; -static int tc_float64; -static int tc_complex64; -static int tc_complex128; -static int BASIC_TYPECODES[12]; - -static int tc_intp; - -/* The type object for the numba .dispatcher.OmittedArg class - * that wraps omitted arguments. - */ -static PyObject *omittedarg_type; - -static PyObject *typecache; -static PyObject *ndarray_typecache; -static PyObject *structured_dtypes; - -static PyObject *str_typeof_pyval = NULL; -static PyObject *str_value = NULL; -static PyObject *str_numba_type = NULL; - - -/* - * Type fingerprint computation. - */ - -typedef struct { - /* A buffer the fingerprint will be written to */ - char *buf; - size_t n; - size_t allocated; - /* A preallocated buffer, sufficient to fit the fingerprint for most types */ - char static_buf[40]; -} string_writer_t; - -static void -string_writer_init(string_writer_t *w) -{ - w->buf = w->static_buf; - w->n = 0; - w->allocated = sizeof(w->static_buf) / sizeof(unsigned char); -} - -static void -string_writer_clear(string_writer_t *w) -{ - if (w->buf != w->static_buf) - free(w->buf); -} - -static void -string_writer_move(string_writer_t *dest, const string_writer_t *src) -{ - dest->n = src->n; - dest->allocated = src->allocated; - if (src->buf == src->static_buf) { - dest->buf = dest->static_buf; - memcpy(dest->buf, src->buf, src->n); - } - else { - dest->buf = src->buf; - } -} - -/* Ensure at least *bytes* can be appended to the string writer's buffer. */ -static int -string_writer_ensure(string_writer_t *w, size_t bytes) -{ - size_t newsize; - bytes += w->n; - if (bytes <= w->allocated) - return 0; - newsize = (w->allocated << 2) + 1; - if (newsize < bytes) - newsize = bytes; - if (w->buf == w->static_buf) - w->buf = malloc(newsize); - else - w->buf = realloc(w->buf, newsize); - if (w->buf) { - w->allocated = newsize; - return 0; - } - else { - PyErr_NoMemory(); - return -1; - } -} - -static int -string_writer_put_char(string_writer_t *w, unsigned char c) -{ - if (string_writer_ensure(w, 1)) - return -1; - w->buf[w->n++] = c; - return 0; -} - -static int -string_writer_put_int32(string_writer_t *w, unsigned int v) -{ - if (string_writer_ensure(w, 4)) - return -1; - w->buf[w->n] = v & 0xff; - w->buf[w->n + 1] = (v >> 8) & 0xff; - w->buf[w->n + 2] = (v >> 16) & 0xff; - w->buf[w->n + 3] = (v >> 24) & 0xff; - w->n += 4; - return 0; -} - -static int -string_writer_put_intp(string_writer_t *w, npy_intp v) -{ - const int N = sizeof(npy_intp); - if (string_writer_ensure(w, N)) - return -1; - w->buf[w->n] = v & 0xff; - w->buf[w->n + 1] = (v >> 8) & 0xff; - w->buf[w->n + 2] = (v >> 16) & 0xff; - w->buf[w->n + 3] = (v >> 24) & 0xff; - if (N > 4) { - w->buf[w->n + 4] = (v >> 32) & 0xff; - w->buf[w->n + 5] = (v >> 40) & 0xff; - w->buf[w->n + 6] = (v >> 48) & 0xff; - w->buf[w->n + 7] = (v >> 56) & 0xff; - } - w->n += N; - return 0; -} - -static int -string_writer_put_string(string_writer_t *w, const char *s) -{ - if (s == NULL) { - return string_writer_put_char(w, 0); - } - else { - size_t N = strlen(s) + 1; - if (string_writer_ensure(w, N)) - return -1; - memcpy(w->buf + w->n, s, N); - w->n += N; - return 0; - } -} - -enum opcode { - OP_START_TUPLE = '(', - OP_END_TUPLE = ')', - OP_INT = 'i', - OP_FLOAT = 'f', - OP_COMPLEX = 'c', - OP_BOOL = '?', - OP_OMITTED = '!', - - OP_BYTEARRAY = 'a', - OP_BYTES = 'b', - OP_NONE = 'n', - OP_LIST = '[', - OP_SET = '{', - - OP_BUFFER = 'B', - OP_NP_SCALAR = 'S', - OP_NP_ARRAY = 'A', - OP_NP_DTYPE = 'D' -}; - -#define TRY(func, w, arg) \ - do { \ - if (func(w, arg)) return -1; \ - } while (0) - - -static int -fingerprint_unrecognized(PyObject *val) -{ - PyErr_SetString(PyExc_NotImplementedError, - "cannot compute type fingerprint for value"); - return -1; -} - -static int -compute_dtype_fingerprint(string_writer_t *w, PyArray_Descr *descr) -{ - int typenum = descr->type_num; - if (typenum < NPY_OBJECT) - return string_writer_put_char(w, (char) typenum); - if (typenum == NPY_VOID) { - /* Structured dtype: serialize the dtype pointer. Unfortunately, - * some structured dtypes can be ephemeral, so we have to - * intern them to avoid pointer reuse and fingerprint collisions. - * (e.g. np.recarray(dtype=some_dtype) creates a new dtype - * equal to some_dtype) - */ - PyObject *interned = PyDict_GetItem(structured_dtypes, - (PyObject *) descr); - if (interned == NULL) { - interned = (PyObject *) descr; - if (PyDict_SetItem(structured_dtypes, interned, interned)) - return -1; - } - TRY(string_writer_put_char, w, (char) typenum); - return string_writer_put_intp(w, (npy_intp) interned); - } -#if NPY_API_VERSION >= 0x00000007 - if (PyTypeNum_ISDATETIME(typenum)) { - PyArray_DatetimeMetaData *md; - md = &(((PyArray_DatetimeDTypeMetaData *)descr->c_metadata)->meta); - TRY(string_writer_put_char, w, (char) typenum); - TRY(string_writer_put_char, w, (char) md->base); - return string_writer_put_int32(w, (char) md->num); - } -#endif - - return fingerprint_unrecognized((PyObject *) descr); -} - -static int -compute_fingerprint(string_writer_t *w, PyObject *val) -{ - /* - * Implementation note: for performance, we start with common - * types that can be tested with fast checks. - */ - if (val == Py_None) - return string_writer_put_char(w, OP_NONE); - if (PyBool_Check(val)) - return string_writer_put_char(w, OP_BOOL); - /* Note we avoid matching int subclasses such as IntEnum */ - if (PyInt_CheckExact(val) || PyLong_CheckExact(val)) - return string_writer_put_char(w, OP_INT); - if (PyFloat_Check(val)) - return string_writer_put_char(w, OP_FLOAT); - if (PyComplex_CheckExact(val)) - return string_writer_put_char(w, OP_COMPLEX); - if (PyTuple_Check(val)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(val); - TRY(string_writer_put_char, w, OP_START_TUPLE); - for (i = 0; i < n; i++) - TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i)); - TRY(string_writer_put_char, w, OP_END_TUPLE); - return 0; - } - if (PyBytes_Check(val)) - return string_writer_put_char(w, OP_BYTES); - if (PyByteArray_Check(val)) - return string_writer_put_char(w, OP_BYTEARRAY); - if ((PyObject *) Py_TYPE(val) == omittedarg_type) { - PyObject *default_val = PyObject_GetAttr(val, str_value); - if (default_val == NULL) - return -1; - TRY(string_writer_put_char, w, OP_OMITTED); - TRY(compute_fingerprint, w, default_val); - Py_DECREF(default_val); - return 0; - } - if (PyArray_IsScalar(val, Generic)) { - /* Note: PyArray_DescrFromScalar() may be a bit slow on - non-trivial types. */ - PyArray_Descr *descr = PyArray_DescrFromScalar(val); - if (descr == NULL) - return -1; - TRY(string_writer_put_char, w, OP_NP_SCALAR); - TRY(compute_dtype_fingerprint, w, descr); - Py_DECREF(descr); - return 0; - } - if (PyArray_Check(val)) { - PyArrayObject *ary = (PyArrayObject *) val; - int ndim = PyArray_NDIM(ary); - - TRY(string_writer_put_char, w, OP_NP_ARRAY); - TRY(string_writer_put_int32, w, ndim); - if (PyArray_IS_C_CONTIGUOUS(ary)) - TRY(string_writer_put_char, w, 'C'); - else if (PyArray_IS_F_CONTIGUOUS(ary)) - TRY(string_writer_put_char, w, 'F'); - else - TRY(string_writer_put_char, w, 'A'); - if (PyArray_ISWRITEABLE(ary)) - TRY(string_writer_put_char, w, 'W'); - else - TRY(string_writer_put_char, w, 'R'); - return compute_dtype_fingerprint(w, PyArray_DESCR(ary)); - } - if (PyList_Check(val)) { - Py_ssize_t n = PyList_GET_SIZE(val); - if (n == 0) { - PyErr_SetString(PyExc_ValueError, - "cannot compute fingerprint of empty list"); - return -1; - } - /* Only the first item is considered, as in typeof.py */ - TRY(string_writer_put_char, w, OP_LIST); - TRY(compute_fingerprint, w, PyList_GET_ITEM(val, 0)); - return 0; - } - /* Note we only accept sets, not frozensets */ - if (Py_TYPE(val) == &PySet_Type) { - Py_hash_t h; - PyObject *item; - Py_ssize_t pos = 0; - /* Only one item is considered, as in typeof.py */ - if (!_PySet_NextEntry(val, &pos, &item, &h)) { - /* Empty set */ - PyErr_SetString(PyExc_ValueError, - "cannot compute fingerprint of empty set"); - return -1; - } - TRY(string_writer_put_char, w, OP_SET); - TRY(compute_fingerprint, w, item); - return 0; - } - if (PyObject_CheckBuffer(val)) { - Py_buffer buf; - int flags = PyBUF_ND | PyBUF_STRIDES | PyBUF_FORMAT; - char contig; - int ndim; - char readonly; - - /* Attempt to get a writable buffer, then fallback on read-only */ - if (PyObject_GetBuffer(val, &buf, flags | PyBUF_WRITABLE)) { - PyErr_Clear(); - if (PyObject_GetBuffer(val, &buf, flags)) - goto _unrecognized; - } - if (PyBuffer_IsContiguous(&buf, 'C')) - contig = 'C'; - else if (PyBuffer_IsContiguous(&buf, 'F')) - contig = 'F'; - else - contig = 'A'; - ndim = buf.ndim; - readonly = buf.readonly ? 'R' : 'W'; - if (string_writer_put_char(w, OP_BUFFER) || - string_writer_put_int32(w, ndim) || - string_writer_put_char(w, contig) || - string_writer_put_char(w, readonly) || - string_writer_put_string(w, buf.format) || - /* We serialize the object's Python type as well, to - distinguish between types which have Numba specializations - (e.g. array.array() vs. memoryview) - */ - string_writer_put_intp(w, (npy_intp) Py_TYPE(val))) { - PyBuffer_Release(&buf); - return -1; - } - PyBuffer_Release(&buf); - return 0; - } - if (PyArray_DescrCheck(val)) { - TRY(string_writer_put_char, w, OP_NP_DTYPE); - return compute_dtype_fingerprint(w, (PyArray_Descr *) val); - } - -_unrecognized: - /* Type not recognized */ - return fingerprint_unrecognized(val); -} - -PyObject * -typeof_compute_fingerprint(PyObject *val) -{ - PyObject *res; - string_writer_t w; - - string_writer_init(&w); - - if (compute_fingerprint(&w, val)) - goto error; - res = PyBytes_FromStringAndSize(w.buf, w.n); - - string_writer_clear(&w); - return res; - -error: - string_writer_clear(&w); - return NULL; -} - -/* - * Getting the typecode from a Type object. - */ -static int -_typecode_from_type_object(PyObject *tyobj) { - int typecode; - PyObject *tmpcode = PyObject_GetAttrString(tyobj, "_code"); - if (tmpcode == NULL) { - return -1; - } - typecode = PyLong_AsLong(tmpcode); - Py_DECREF(tmpcode); - return typecode; -} - -/* When we want to cache the type's typecode for later lookup, we need to - keep a reference to the returned type object so that it cannot be - deleted. This is because of the following events occurring when first - using a @jit function for a given set of types: - - 1. typecode_fallback requests a new typecode for an arbitrary Python value; - this implies creating a Numba type object (on the first dispatcher call); - the typecode cache is then populated. - 2. matching of the typecode list in _dispatcherimpl.cpp fails, since the - typecode is new. - 3. we have to compile: compile_and_invoke() is called, it will invoke - Dispatcher_Insert to register the new signature. - - The reference to the Numba type object returned in step 1 is deleted as - soon as we call Py_DECREF() on it, since we are holding the only - reference. If this happens and we use the typecode we got to populate the - cache, then the cache won't ever return the correct typecode, and the - dispatcher will never successfully match the typecodes with those of - some already-compiled instance. So we need to make sure that we don't - call Py_DECREF() on objects whose typecode will be used to populate the - cache. This is ensured by calling _typecode_fallback with - retain_reference == 0. - - Note that technically we are leaking the reference, since we do not continue - to hold a pointer to the type object that we get back from typeof_pyval. - However, we don't need to refer to it again, we just need to make sure that - it is never deleted. -*/ -static int -_typecode_fallback(PyObject *dispatcher, PyObject *val, - int retain_reference) { - PyObject *numba_type; - int typecode; - - /* - * For values that define "_numba_type_", which holds a numba Type - * instance that should be used as the type of the value. - * Note this is done here, not in typeof_typecode(), so that - * some values can still benefit from fingerprint caching. - */ - if (PyObject_HasAttr(val, str_numba_type)) { - numba_type = PyObject_GetAttrString(val, "_numba_type_"); - if (!numba_type) - return -1; - } - else { - // Go back to the interpreter - numba_type = PyObject_CallMethodObjArgs((PyObject *) dispatcher, - str_typeof_pyval, val, NULL); - } - if (!numba_type) - return -1; - typecode = _typecode_from_type_object(numba_type); - if (!retain_reference) - Py_DECREF(numba_type); - return typecode; -} - -/* Variations on _typecode_fallback for convenience */ - -static -int typecode_fallback(PyObject *dispatcher, PyObject *val) { - return _typecode_fallback(dispatcher, val, 0); -} - -static -int typecode_fallback_keep_ref(PyObject *dispatcher, PyObject *val) { - return _typecode_fallback(dispatcher, val, 1); -} - - -/* A cache mapping fingerprints (string_writer_t *) to typecodes (int). */ -static _Numba_hashtable_t *fingerprint_hashtable = NULL; - -static Py_uhash_t -hash_writer(const void *key) -{ - string_writer_t *writer = (string_writer_t *) key; - Py_uhash_t x = 0; - - /* The old FNV algorithm used by Python 2 */ - if (writer->n > 0) { - unsigned char *p = (unsigned char *) writer->buf; - Py_ssize_t len = writer->n; - x ^= *p << 7; - while (--len >= 0) - x = (1000003*x) ^ *p++; - x ^= writer->n; - if (x == (Py_uhash_t) -1) - x = -2; - } - return x; -} - -static int -compare_writer(const void *key, const _Numba_hashtable_entry_t *entry) -{ - string_writer_t *v = (string_writer_t *) key; - string_writer_t *w = (string_writer_t *) entry->key; - if (v->n != w->n) - return 0; - return memcmp(v->buf, w->buf, v->n) == 0; -} - -/* Try to compute *val*'s typecode using its fingerprint and the - * fingerprint->typecode cache. - */ -static int -typecode_using_fingerprint(PyObject *dispatcher, PyObject *val) -{ - int typecode; - string_writer_t w; - - string_writer_init(&w); - - if (compute_fingerprint(&w, val)) { - string_writer_clear(&w); - if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { - /* Can't compute a type fingerprint for the given value, - fall back on typeof() without caching. */ - PyErr_Clear(); - return typecode_fallback(dispatcher, val); - } - return -1; - } - if (_Numba_HASHTABLE_GET(fingerprint_hashtable, &w, typecode) > 0) { - /* Cache hit */ - string_writer_clear(&w); - return typecode; - } - - /* Not found in cache: invoke pure Python typeof() and cache result. - * Note we have to keep the type alive forever as explained - * above in _typecode_fallback(). - */ - typecode = typecode_fallback_keep_ref(dispatcher, val); - if (typecode >= 0) { - string_writer_t *key = (string_writer_t *) malloc(sizeof(string_writer_t)); - if (key == NULL) { - string_writer_clear(&w); - PyErr_NoMemory(); - return -1; - } - /* Ownership of the string writer's buffer will be transferred - * to the hash table. - */ - string_writer_move(key, &w); - if (_Numba_HASHTABLE_SET(fingerprint_hashtable, key, typecode)) { - string_writer_clear(&w); - PyErr_NoMemory(); - return -1; - } - } - return typecode; -} - - -/* - * Direct lookup table for extra-fast typecode resolution of simple array types. - */ - -#define N_DTYPES 12 -#define N_NDIM 5 /* Fast path for up to 5D array */ -#define N_LAYOUT 3 -static int cached_arycode[N_NDIM][N_LAYOUT][N_DTYPES]; - -/* Convert a Numpy dtype number to an internal index into cached_arycode. - The returned value must also be a valid index into BASIC_TYPECODES. */ -static int dtype_num_to_typecode(int type_num) { - int dtype; - switch(type_num) { - case NPY_INT8: - dtype = 0; - break; - case NPY_INT16: - dtype = 1; - break; - case NPY_INT32: - dtype = 2; - break; - case NPY_INT64: - dtype = 3; - break; - case NPY_UINT8: - dtype = 4; - break; - case NPY_UINT16: - dtype = 5; - break; - case NPY_UINT32: - dtype = 6; - break; - case NPY_UINT64: - dtype = 7; - break; - case NPY_FLOAT32: - dtype = 8; - break; - case NPY_FLOAT64: - dtype = 9; - break; - case NPY_COMPLEX64: - dtype = 10; - break; - case NPY_COMPLEX128: - dtype = 11; - break; - default: - /* Type not included in the global lookup table */ - dtype = -1; - } - return dtype; -} - -static -int get_cached_typecode(PyArray_Descr* descr) { - PyObject* tmpobject = PyDict_GetItem(typecache, (PyObject*)descr); - if (tmpobject == NULL) - return -1; - - return PyLong_AsLong(tmpobject); -} - -static -void cache_typecode(PyArray_Descr* descr, int typecode) { - PyObject* value = PyLong_FromLong(typecode); - PyDict_SetItem(typecache, (PyObject*)descr, value); - Py_DECREF(value); -} - -static -PyObject* ndarray_key(int ndim, int layout, PyArray_Descr* descr) { - PyObject* tmpndim = PyLong_FromLong(ndim); - PyObject* tmplayout = PyLong_FromLong(layout); - PyObject* key = PyTuple_Pack(3, tmpndim, tmplayout, descr); - Py_DECREF(tmpndim); - Py_DECREF(tmplayout); - return key; -} - -static -int get_cached_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr) { - PyObject* key = ndarray_key(ndim, layout, descr); - PyObject *tmpobject = PyDict_GetItem(ndarray_typecache, key); - if (tmpobject == NULL) - return -1; - - Py_DECREF(key); - return PyLong_AsLong(tmpobject); -} - -static -void cache_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr, - int typecode) { - PyObject* key = ndarray_key(ndim, layout, descr); - PyObject* value = PyLong_FromLong(typecode); - PyDict_SetItem(ndarray_typecache, key, value); - Py_DECREF(key); - Py_DECREF(value); -} - -static -int typecode_ndarray(PyObject *dispatcher, PyArrayObject *ary) { - int typecode; - int dtype; - int ndim = PyArray_NDIM(ary); - int layout = 0; - - /* The order in which we check for the right contiguous-ness is important. - The order must match the order by numba.numpy_support.map_layout. - */ - if (PyArray_ISCARRAY(ary)){ - layout = 1; - } else if (PyArray_ISFARRAY(ary)) { - layout = 2; - } - - if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK; - - dtype = dtype_num_to_typecode(PyArray_TYPE(ary)); - if (dtype == -1) goto FALLBACK; - - /* Fast path, using direct table lookup */ - assert(layout < N_LAYOUT); - assert(ndim <= N_NDIM); - assert(dtype < N_DTYPES); - - typecode = cached_arycode[ndim - 1][layout][dtype]; - if (typecode == -1) { - /* First use of this table entry, so it requires populating */ - typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); - cached_arycode[ndim - 1][layout][dtype] = typecode; - } - return typecode; - -FALLBACK: - /* Slower path, for non-trivial array types */ - - /* If this isn't a structured array then we can't use the cache */ - if (PyArray_TYPE(ary) != NPY_VOID) - return typecode_using_fingerprint(dispatcher, (PyObject *) ary); - - /* Check type cache */ - typecode = get_cached_ndarray_typecode(ndim, layout, PyArray_DESCR(ary)); - if (typecode == -1) { - /* First use of this type, use fallback and populate the cache */ - typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); - cache_ndarray_typecode(ndim, layout, PyArray_DESCR(ary), typecode); - } - return typecode; -} - -static -int typecode_arrayscalar(PyObject *dispatcher, PyObject* aryscalar) { - int typecode; - PyArray_Descr *descr; - descr = PyArray_DescrFromScalar(aryscalar); - if (!descr) - return typecode_using_fingerprint(dispatcher, aryscalar); - - /* Is it a structured scalar? */ - if (descr->type_num == NPY_VOID) { - typecode = get_cached_typecode(descr); - if (typecode == -1) { - /* Resolve through fallback then populate cache */ - typecode = typecode_fallback_keep_ref(dispatcher, aryscalar); - cache_typecode(descr, typecode); - } - Py_DECREF(descr); - return typecode; - } - - /* Is it one of the well-known basic types? */ - typecode = dtype_num_to_typecode(descr->type_num); - Py_DECREF(descr); - if (typecode == -1) - return typecode_using_fingerprint(dispatcher, aryscalar); - return BASIC_TYPECODES[typecode]; -} - -int -typeof_typecode(PyObject *dispatcher, PyObject *val) -{ - PyTypeObject *tyobj = Py_TYPE(val); - /* This needs to be kept in sync with Dispatcher.typeof_pyval(), - * otherwise funny things may happen. - */ - if (tyobj == &PyInt_Type || tyobj == &PyLong_Type) { -#if SIZEOF_VOID_P < 8 - /* On 32-bit platforms, choose between tc_intp (32-bit) and tc_int64 */ - PY_LONG_LONG ll = PyLong_AsLongLong(val); - if (ll == -1 && PyErr_Occurred()) { - /* The integer is too large, let us truncate it */ - PyErr_Clear(); - return tc_int64; - } - if ((ll & 0xffffffff) != ll) - return tc_int64; -#endif - return tc_intp; - } - else if (tyobj == &PyFloat_Type) - return tc_float64; - else if (tyobj == &PyComplex_Type) - return tc_complex128; - /* Array scalar handling */ - else if (PyArray_CheckScalar(val)) { - return typecode_arrayscalar(dispatcher, val); - } - /* Array handling */ - else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { - return typecode_ndarray(dispatcher, (PyArrayObject*)val); - } - - return typecode_using_fingerprint(dispatcher, val); -} - - -#if PY_MAJOR_VERSION >= 3 - static - void* wrap_import_array(void) { - import_array(); /* import array returns NULL on failure */ - return (void*)1; - } -#else - static - void wrap_import_array(void) { - import_array(); - } -#endif - - -static -int init_numpy(void) { - #if PY_MAJOR_VERSION >= 3 - return wrap_import_array() != NULL; - #else - wrap_import_array(); - return 1; /* always succeed */ - #endif -} - - -/* - * typeof_init(omittedarg_type, typecode_dict) - * (called from dispatcher.py to fill in missing information) - */ -PyObject * -typeof_init(PyObject *self, PyObject *args) -{ - PyObject *tmpobj; - PyObject *dict; - int index = 0; - - if (!PyArg_ParseTuple(args, "O!O!:typeof_init", - &PyType_Type, &omittedarg_type, - &PyDict_Type, &dict)) - return NULL; - - /* Initialize Numpy API */ - if ( ! init_numpy() ) { - return NULL; - } - - #define UNWRAP_TYPE(S) \ - if(!(tmpobj = PyDict_GetItemString(dict, #S))) return NULL; \ - else { tc_##S = PyLong_AsLong(tmpobj); \ - BASIC_TYPECODES[index++] = tc_##S; } - - UNWRAP_TYPE(int8) - UNWRAP_TYPE(int16) - UNWRAP_TYPE(int32) - UNWRAP_TYPE(int64) - - UNWRAP_TYPE(uint8) - UNWRAP_TYPE(uint16) - UNWRAP_TYPE(uint32) - UNWRAP_TYPE(uint64) - - UNWRAP_TYPE(float32) - UNWRAP_TYPE(float64) - - UNWRAP_TYPE(complex64) - UNWRAP_TYPE(complex128) - - switch(sizeof(void*)) { - case 4: - tc_intp = tc_int32; - break; - case 8: - tc_intp = tc_int64; - break; - default: - PyErr_SetString(PyExc_AssertionError, "sizeof(void*) != {4, 8}"); - return NULL; - } - - #undef UNWRAP_TYPE - - typecache = PyDict_New(); - ndarray_typecache = PyDict_New(); - structured_dtypes = PyDict_New(); - if (typecache == NULL || ndarray_typecache == NULL || - structured_dtypes == NULL) { - PyErr_SetString(PyExc_RuntimeError, "failed to create type cache"); - return NULL; - } - - fingerprint_hashtable = _Numba_hashtable_new(sizeof(int), - hash_writer, - compare_writer); - if (fingerprint_hashtable == NULL) { - PyErr_NoMemory(); - return NULL; - } - - /* initialize cached_arycode to all ones (in bits) */ - memset(cached_arycode, 0xFF, sizeof(cached_arycode)); - - str_typeof_pyval = PyString_InternFromString("typeof_pyval"); - str_value = PyString_InternFromString("value"); - str_numba_type = PyString_InternFromString("_numba_type_"); - if (!str_value || !str_typeof_pyval || !str_numba_type) - return NULL; - - Py_RETURN_NONE; -} diff --git a/numba/numba/_typeof.h b/numba/numba/_typeof.h deleted file mode 100644 index 66e29c53f..000000000 --- a/numba/numba/_typeof.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NUMBA_TYPEOF_H_ -#define NUMBA_TYPEOF_H_ - - -extern PyObject *typeof_init(PyObject *self, PyObject *args); -extern int typeof_typecode(PyObject *dispatcher, PyObject *val); -extern PyObject *typeof_compute_fingerprint(PyObject *val); - - -#endif /* NUMBA_TYPEOF_H_ */ diff --git a/numba/numba/_version.py b/numba/numba/_version.py deleted file mode 100644 index 0e158819a..000000000 --- a/numba/numba/_version.py +++ /dev/null @@ -1,239 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.14 (https://github.com/warner/python-versioneer) - -import errno -import os -import re -import subprocess -import sys - -# these strings will be replaced by git during git-archive -git_refnames = "$Format:%d$" -git_full = "$Format:%H$" - -# these strings are filled in when 'setup.py versioneer' creates _version.py -tag_prefix = "" -parentdir_prefix = "numba-" -versionfile_source = "numba/_version.py" - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): - assert isinstance(commands, list) - p = None - for c in commands: - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % args[0]) - print(e) - return None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % args[0]) - return None - return stdout - - -def versions_from_parentdir(parentdir_prefix, root, verbose=False): - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with " - "prefix '%s'" % (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - - -def git_get_keywords(versionfile_abs): - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -def git_versions_from_keywords(keywords, tag_prefix, verbose=False): - if not keywords: - return {} # keyword-finding function failed to find keywords - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full": keywords["full"].strip()} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full": keywords["full"].strip()} - - -def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): - # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. - - # dirty - dirty = git_describe.endswith("-dirty") - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - dirty_suffix = ".dirty" if dirty else "" - - # now we have TAG-NUM-gHEX or HEX - - if "-" not in git_describe: # just HEX - return "0+untagged.g"+git_describe+dirty_suffix, dirty - - # just TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - return "0+unparseable"+dirty_suffix, dirty - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - return None, dirty - tag = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - distance = int(mo.group(2)) - - # commit: short hex revision ID - commit = mo.group(3) - - # now build up version string, with post-release "local version - # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a - # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you - # can always test version.endswith(".dirty"). - version = tag - if distance or dirty: - version += "+%d.g%s" % (distance, commit) + dirty_suffix - - return version, dirty - - -def git_versions_from_vcs(tag_prefix, root, verbose=False): - # this runs 'git' from the root of the source tree. This only gets called - # if the git-archive 'subst' keywords were *not* expanded, and - # _version.py hasn't already been rewritten with a short version string, - # meaning we're inside a checked out source tree. - - if not os.path.exists(os.path.join(root, ".git")): - if verbose: - print("no .git in %s" % root) - return {} # get_versions() will try next method - - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - # if there is a tag, this yields TAG-NUM-gHEX[-dirty] - # if there are no tags, this yields HEX[-dirty] (no NUM) - stdout = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long"], - cwd=root) - # --long was added in git-1.5.5 - if stdout is None: - return {} # try next method - version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) - - # build "full", which is FULLHEX[.dirty] - stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if stdout is None: - return {} - full = stdout.strip() - if dirty: - full += ".dirty" - - return {"version": version, "full": full} - - -def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - keywords = {"refnames": git_refnames, "full": git_full} - ver = git_versions_from_keywords(keywords, tag_prefix, verbose) - if ver: - return ver - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return default - - return (git_versions_from_vcs(tag_prefix, root, verbose) - or versions_from_parentdir(parentdir_prefix, root, verbose) - or default) diff --git a/numba/numba/analysis.py b/numba/numba/analysis.py deleted file mode 100644 index 12114cb47..000000000 --- a/numba/numba/analysis.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Utils for IR analysis -""" -import operator -from functools import reduce -from collections import namedtuple, defaultdict - -from numba import ir -from numba.controlflow import CFGraph - -# -# Analysis related to variable lifetime -# - -_use_defs_result = namedtuple('use_defs_result', 'usemap,defmap') - -# other packages that define new nodes add calls for finding defs -# format: {type:function} -ir_extension_usedefs = {} - -def compute_use_defs(blocks): - """ - Find variable use/def per block. - """ - - var_use_map = {} # { block offset -> set of vars } - var_def_map = {} # { block offset -> set of vars } - for offset, ir_block in blocks.items(): - var_use_map[offset] = use_set = set() - var_def_map[offset] = def_set = set() - for stmt in ir_block.body: - if type(stmt) in ir_extension_usedefs: - func = ir_extension_usedefs[type(stmt)] - func(stmt, use_set, def_set) - continue - if isinstance(stmt, ir.Assign): - if isinstance(stmt.value, ir.Inst): - rhs_set = set(var.name for var in stmt.value.list_vars()) - elif isinstance(stmt.value, ir.Var): - rhs_set = set([stmt.value.name]) - elif isinstance(stmt.value, (ir.Arg, ir.Const, ir.Global, - ir.FreeVar)): - rhs_set = () - else: - raise AssertionError('unreachable', type(stmt.value)) - # If lhs not in rhs of the assignment - if stmt.target.name not in rhs_set: - def_set.add(stmt.target.name) - - for var in stmt.list_vars(): - # do not include locally defined vars to use-map - if var.name not in def_set: - use_set.add(var.name) - - return _use_defs_result(usemap=var_use_map, defmap=var_def_map) - - -def compute_live_map(cfg, blocks, var_use_map, var_def_map): - """ - Find variables that must be alive at the ENTRY of each block. - We use a simple fix-point algorithm that iterates until the set of - live variables is unchanged for each block. - """ - live_map = {} - for offset in blocks.keys(): - live_map[offset] = var_use_map[offset] - - def fix_point_progress(): - return tuple(len(v) for v in live_map.values()) - - old_point = None - new_point = fix_point_progress() - while old_point != new_point: - for offset in live_map.keys(): - for inc_blk, _data in cfg.predecessors(offset): - # substract all variables that are defined in - # the incoming block - live_map[inc_blk] |= live_map[offset] - var_def_map[inc_blk] - old_point = new_point - new_point = fix_point_progress() - - return live_map - - -_dead_maps_result = namedtuple('dead_maps_result', 'internal,escaping,combined') - - -def compute_dead_maps(cfg, blocks, live_map, var_def_map): - """ - Compute the end-of-live information for variables. - `live_map` contains a mapping of block offset to all the living - variables at the ENTRY of the block. - """ - # The following three dictionaries will be - # { block offset -> set of variables to delete } - # all vars that should be deleted at the start of the successors - escaping_dead_map = defaultdict(set) - # all vars that should be deleted within this block - internal_dead_map = defaultdict(set) - # all vars that should be delted after the function exit - exit_dead_map = defaultdict(set) - - for offset, ir_block in blocks.items(): - # live vars WITHIN the block will include all the locally - # defined variables - cur_live_set = live_map[offset] | var_def_map[offset] - # vars alive alive in the outgoing blocks - outgoing_live_map = dict((out_blk, live_map[out_blk]) - for out_blk, _data in cfg.successors(offset)) - # vars to keep alive for the terminator - terminator_liveset = set(v.name - for v in ir_block.terminator.list_vars()) - # vars to keep alive in the successors - combined_liveset = reduce(operator.or_, outgoing_live_map.values(), - set()) - # include variables used in terminator - combined_liveset |= terminator_liveset - # vars that are dead within the block beacuse they are not - # propagated to any outgoing blocks - internal_set = cur_live_set - combined_liveset - internal_dead_map[offset] = internal_set - # vars that escape this block - escaping_live_set = cur_live_set - internal_set - for out_blk, new_live_set in outgoing_live_map.items(): - # successor should delete the unused escaped vars - new_live_set = new_live_set | var_def_map[out_blk] - escaping_dead_map[out_blk] |= escaping_live_set - new_live_set - - # if no outgoing blocks - if not outgoing_live_map: - # insert var used by terminator - exit_dead_map[offset] = terminator_liveset - - # Verify that the dead maps cover all live variables - all_vars = reduce(operator.or_, live_map.values(), set()) - internal_dead_vars = reduce(operator.or_, internal_dead_map.values(), - set()) - escaping_dead_vars = reduce(operator.or_, escaping_dead_map.values(), - set()) - exit_dead_vars = reduce(operator.or_, exit_dead_map.values(), set()) - dead_vars = (internal_dead_vars | escaping_dead_vars | exit_dead_vars) - missing_vars = all_vars - dead_vars - if missing_vars: - # There are no exit points - if not cfg.exit_points(): - # We won't be able to verify this - pass - else: - msg = 'liveness info missing for vars: {0}'.format(missing_vars) - raise RuntimeError(msg) - - combined = dict((k, internal_dead_map[k] | escaping_dead_map[k]) - for k in blocks) - - return _dead_maps_result(internal=internal_dead_map, - escaping=escaping_dead_map, - combined=combined) - - -def compute_live_variables(cfg, blocks, var_def_map, var_dead_map): - """ - Compute the live variables at the beginning of each block - and at each yield point. - The ``var_def_map`` and ``var_dead_map`` indicates the variable defined - and deleted at each block, respectively. - """ - # live var at the entry per block - block_entry_vars = defaultdict(set) - - def fix_point_progress(): - return tuple(map(len, block_entry_vars.values())) - - old_point = None - new_point = fix_point_progress() - - # Propagate defined variables and still live the successors. - # (note the entry block automatically gets an empty set) - - # Note: This is finding the actual available variables at the entry - # of each block. The algorithm in compute_live_map() is finding - # the variable that must be available at the entry of each block. - # This is top-down in the dataflow. The other one is bottom-up. - while old_point != new_point: - # We iterate until the result stabilizes. This is necessary - # because of loops in the graphself. - for offset in blocks: - # vars available + variable defined - avail = block_entry_vars[offset] | var_def_map[offset] - # substract variables deleted - avail -= var_dead_map[offset] - # add ``avail`` to each successors - for succ, _data in cfg.successors(offset): - block_entry_vars[succ] |= avail - - old_point = new_point - new_point = fix_point_progress() - - return block_entry_vars - - -# -# Analysis related to controlflow -# - -def compute_cfg_from_blocks(blocks): - cfg = CFGraph() - for k in blocks: - cfg.add_node(k) - - for k, b in blocks.items(): - term = b.terminator - for target in term.get_targets(): - cfg.add_edge(k, target) - - cfg.set_entry_point(min(blocks)) - cfg.process() - return cfg - - -def find_top_level_loops(cfg): - """ - A generator that yields toplevel loops given a control-flow-graph - """ - blocks_in_loop = set() - # get loop bodies - for loop in cfg.loops().values(): - insiders = set(loop.body) | set(loop.entries) | set(loop.exits) - insiders.discard(loop.header) - blocks_in_loop |= insiders - # find loop that is not part of other loops - for loop in cfg.loops().values(): - if loop.header not in blocks_in_loop: - yield loop diff --git a/numba/numba/annotations/__init__.py b/numba/numba/annotations/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/numba/annotations/template.html b/numba/numba/annotations/template.html deleted file mode 100644 index 73e2f6f85..000000000 --- a/numba/numba/annotations/template.html +++ /dev/null @@ -1,144 +0,0 @@ - - - - - - - - - - - {% for func_key in func_data.keys() %} - - {% set loop1 = loop %} - -

- -
- - - {%- for num, line in func_data[func_key]['python_lines'] -%} - {%- if func_data[func_key]['ir_lines'][num] %} - - {% else -%} - - {%- endif -%} - {%- endfor -%} -
-
- - - {{num}}: - {{func_data[func_key]['python_indent'][num]}}{{line|e}} - - - - - {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} - - - - {%- endfor -%} - -
  - {{- func_data[func_key]['python_indent'][num]}} - {{func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} - {{ir_line_type}} - -
-
-
- - {{num}}: - {{func_data[func_key]['python_indent'][num]}}{{line|e}} - -
-
- -


- - {% endfor %} - - - - diff --git a/numba/numba/annotations/type_annotations.py b/numba/numba/annotations/type_annotations.py deleted file mode 100644 index 77c6f610f..000000000 --- a/numba/numba/annotations/type_annotations.py +++ /dev/null @@ -1,283 +0,0 @@ -from __future__ import print_function, absolute_import - -from collections import Mapping, defaultdict, OrderedDict -from contextlib import closing -import copy -import inspect -import os -import re -import sys -import textwrap - -from numba.io_support import StringIO -from numba import ir -import numba.dispatcher - - -class SourceLines(Mapping): - def __init__(self, func): - - try: - lines, startno = inspect.getsourcelines(func) - except IOError: - self.lines = () - self.startno = 0 - else: - self.lines = textwrap.dedent(''.join(lines)).splitlines() - self.startno = startno - - def __getitem__(self, lineno): - try: - return self.lines[lineno - self.startno].rstrip() - except IndexError: - return '' - - def __iter__(self): - return iter((self.startno + i) for i in range(len(self.lines))) - - def __len__(self): - return len(self.lines) - - @property - def avail(self): - return bool(self.lines) - - -class TypeAnnotation(object): - - # func_data dict stores annotation data for all functions that are - # compiled. We store the data in the TypeAnnotation class since a new - # TypeAnnotation instance is created for each function that is compiled. - # For every function that is compiled, we add the type annotation data to - # this dict and write the html annotation file to disk (rewrite the html - # file for every function since we don't know if this is the last function - # to be compiled). - func_data = OrderedDict() - - def __init__(self, func_ir, typemap, calltypes, lifted, lifted_from, - args, return_type, html_output=None): - self.func_id = func_ir.func_id - self.blocks = func_ir.blocks - self.typemap = typemap - self.calltypes = calltypes - self.filename = func_ir.loc.filename - self.linenum = str(func_ir.loc.line) - self.signature = str(args) + ' -> ' + str(return_type) - - # lifted loop information - self.lifted = lifted - self.num_lifted_loops = len(lifted) - - # If this is a lifted loop function that is being compiled, lifted_from - # points to annotation data from function that this loop lifted function - # was lifted from. This is used to stick lifted loop annotations back - # into original function. - self.lifted_from = lifted_from - - def prepare_annotations(self): - # Prepare annotations - groupedinst = defaultdict(list) - found_lifted_loop = False - #for blkid, blk in self.blocks.items(): - for blkid in sorted(self.blocks.keys()): - blk = self.blocks[blkid] - groupedinst[blk.loc.line].append("label %s" % blkid) - for inst in blk.body: - lineno = inst.loc.line - - if isinstance(inst, ir.Assign): - if found_lifted_loop: - atype = 'XXX Lifted Loop XXX' - found_lifted_loop = False - elif (isinstance(inst.value, ir.Expr) and - inst.value.op == 'call'): - atype = self.calltypes[inst.value] - elif (isinstance(inst.value, ir.Const) and - isinstance(inst.value.value, numba.dispatcher.LiftedLoop)): - atype = 'XXX Lifted Loop XXX' - found_lifted_loop = True - else: - atype = self.typemap[inst.target.name] - - aline = "%s = %s :: %s" % (inst.target, inst.value, atype) - elif isinstance(inst, ir.SetItem): - atype = self.calltypes[inst] - aline = "%s :: %s" % (inst, atype) - else: - aline = "%s" % inst - groupedinst[lineno].append(" %s" % aline) - return groupedinst - - def annotate(self): - source = SourceLines(self.func_id.func) - # if not source.avail: - # return "Source code unavailable" - - groupedinst = self.prepare_annotations() - - # Format annotations - io = StringIO() - with closing(io): - if source.avail: - print("# File: %s" % self.filename, file=io) - for num in source: - srcline = source[num] - ind = _getindent(srcline) - print("%s# --- LINE %d --- " % (ind, num), file=io) - for inst in groupedinst[num]: - print('%s# %s' % (ind, inst), file=io) - print(file=io) - print(srcline, file=io) - print(file=io) - if self.lifted: - print("# The function contains lifted loops", file=io) - for loop in self.lifted: - print("# Loop at line %d" % loop.get_source_location(), - file=io) - print("# Has %d overloads" % len(loop.overloads), - file=io) - for cres in loop.overloads.values(): - print(cres.type_annotation, file=io) - else: - print("# Source code unavailable", file=io) - for num in groupedinst: - for inst in groupedinst[num]: - print('%s' % (inst,), file=io) - print(file=io) - - return io.getvalue() - - def html_annotate(self, outfile): - # ensure that annotation information is assembled - self.annotate_raw() - # make a deep copy ahead of the pending mutations - func_data = copy.deepcopy(self.func_data) - - key = 'python_indent' - for this_func in func_data.values(): - if key in this_func: - idents = {} - for line, amount in this_func[key].items(): - idents[line] = ' ' * amount - this_func[key] = idents - - key = 'ir_indent' - for this_func in func_data.values(): - if key in this_func: - idents = {} - for line, ir_id in this_func[key].items(): - idents[line] = [' ' * amount for amount in ir_id] - this_func[key] = idents - - - - try: - from jinja2 import Template - except ImportError: - raise ImportError("please install the 'jinja2' package") - - root = os.path.join(os.path.dirname(__file__)) - template_filename = os.path.join(root, 'template.html') - with open(template_filename, 'r') as template: - html = template.read() - - template = Template(html) - rendered = template.render(func_data=func_data) - outfile.write(rendered) - - def annotate_raw(self): - """ - This returns "raw" annotation information i.e. it has no output format - specific markup included. - """ - python_source = SourceLines(self.func_id.func) - ir_lines = self.prepare_annotations() - line_nums = [num for num in python_source] - lifted_lines = [l.get_source_location() for l in self.lifted] - - def add_ir_line(func_data, line): - line_str = line.strip() - line_type = '' - if line_str.endswith('pyobject'): - line_str = line_str.replace('pyobject', '') - line_type = 'pyobject' - func_data['ir_lines'][num].append((line_str, line_type)) - indent_len = len(_getindent(line)) - func_data['ir_indent'][num].append(indent_len) - - func_key = (self.func_id.filename + ':' + str(self.func_id.firstlineno + 1), - self.signature) - if self.lifted_from is not None and self.lifted_from[1]['num_lifted_loops'] > 0: - # This is a lifted loop function that is being compiled. Get the - # numba ir for lines in loop function to use for annotating - # original python function that the loop was lifted from. - func_data = self.lifted_from[1] - for num in line_nums: - if num not in ir_lines.keys(): - continue - func_data['ir_lines'][num] = [] - func_data['ir_indent'][num] = [] - for line in ir_lines[num]: - add_ir_line(func_data, line) - if line.strip().endswith('pyobject'): - func_data['python_tags'][num] = 'object_tag' - # If any pyobject line is found, make sure original python - # line that was marked as a lifted loop start line is tagged - # as an object line instead. Lifted loop start lines should - # only be marked as lifted loop lines if the lifted loop - # was successfully compiled in nopython mode. - func_data['python_tags'][self.lifted_from[0]] = 'object_tag' - - # We're done with this lifted loop, so decrement lfited loop counter. - # When lifted loop counter hits zero, that means we're ready to write - # out annotations to html file. - self.lifted_from[1]['num_lifted_loops'] -= 1 - - elif func_key not in TypeAnnotation.func_data.keys(): - TypeAnnotation.func_data[func_key] = {} - func_data = TypeAnnotation.func_data[func_key] - - for i, loop in enumerate(self.lifted): - # Make sure that when we process each lifted loop function later, - # we'll know where it originally came from. - loop.lifted_from = (lifted_lines[i], func_data) - func_data['num_lifted_loops'] = self.num_lifted_loops - - func_data['filename'] = self.filename - func_data['funcname'] = self.func_id.func_name - func_data['python_lines'] = [] - func_data['python_indent'] = {} - func_data['python_tags'] = {} - func_data['ir_lines'] = {} - func_data['ir_indent'] = {} - - for num in line_nums: - func_data['python_lines'].append((num, python_source[num].strip())) - indent_len = len(_getindent(python_source[num])) - func_data['python_indent'][num] = indent_len - func_data['python_tags'][num] = '' - func_data['ir_lines'][num] = [] - func_data['ir_indent'][num] = [] - - for line in ir_lines[num]: - add_ir_line(func_data, line) - if num in lifted_lines: - func_data['python_tags'][num] = 'lifted_tag' - elif line.strip().endswith('pyobject'): - func_data['python_tags'][num] = 'object_tag' - return self.func_data - - - def __str__(self): - return self.annotate() - - -re_longest_white_prefix = re.compile('^\s*') - - -def _getindent(text): - m = re_longest_white_prefix.match(text) - if not m: - return '' - else: - return ' ' * len(m.group(0)) diff --git a/numba/numba/appdirs.py b/numba/numba/appdirs.py deleted file mode 100644 index 3be0f7f6f..000000000 --- a/numba/numba/appdirs.py +++ /dev/null @@ -1,557 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2005-2010 ActiveState Software Inc. -# Copyright (c) 2013 Eddy Petrișor - -"""Utilities for determining application-specific dirs. - -See for details and usage. -""" -# Dev Notes: -# - MSDN on where to store app data files: -# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 -# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html -# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html - -__version_info__ = (1, 4, 1) -__version__ = '.'.join(map(str, __version_info__)) - - -import sys -import os - -PY3 = sys.version_info[0] == 3 - -if PY3: - unicode = str - -if sys.platform.startswith('java'): - import platform - os_name = platform.java_ver()[3][0] - if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. - system = 'win32' - elif os_name.startswith('Mac'): # "Mac OS X", etc. - system = 'darwin' - else: # "Linux", "SunOS", "FreeBSD", etc. - # Setting this to "linux2" is not ideal, but only Windows or Mac - # are actually checked for and the rest of the module expects - # *sys.platform* style strings. - system = 'linux2' -else: - system = sys.platform - - - -def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - - for a discussion of issues. - - Typical user data directories are: - Mac OS X: ~/Library/Application Support/ - Unix: ~/.local/share/ # or in $XDG_DATA_HOME, if defined - Win XP (not roaming): C:\Documents and Settings\\Application Data\\ - Win XP (roaming): C:\Documents and Settings\\Local Settings\Application Data\\ - Win 7 (not roaming): C:\Users\\AppData\Local\\ - Win 7 (roaming): C:\Users\\AppData\Roaming\\ - - For Unix, we follow the XDG spec and support $XDG_DATA_HOME. - That means, by default "~/.local/share/". - """ - if system == "win32": - if appauthor is None: - appauthor = appname - const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" - path = os.path.normpath(_get_win_folder(const)) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('~/Library/Application Support/') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): - """Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of data dirs should be - returned. By default, the first item from XDG_DATA_DIRS is - returned, or '/usr/local/share/', - if XDG_DATA_DIRS is not set - - Typical user data directories are: - Mac OS X: /Library/Application Support/ - Unix: /usr/local/share/ or /usr/share/ - Win XP: C:\Documents and Settings\All Users\Application Data\\ - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - Win 7: C:\ProgramData\\ # Hidden, but writeable on Win 7. - - For Unix, this is using the $XDG_DATA_DIRS[0] default. - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('/Library/Application Support') - if appname: - path = os.path.join(path, appname) - else: - # XDG default for $XDG_DATA_DIRS - # only first, if multipath is False - path = os.getenv('XDG_DATA_DIRS', - os.pathsep.join(['/usr/local/share', '/usr/share'])) - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - if appname and version: - path = os.path.join(path, version) - return path - - -def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific config dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - - for a discussion of issues. - - Typical user data directories are: - Mac OS X: same as user_data_dir - Unix: ~/.config/ # or in $XDG_CONFIG_HOME, if defined - Win *: same as user_data_dir - - For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. - That means, by deafult "~/.config/". - """ - if system in ["win32", "darwin"]: - path = user_data_dir(appname, appauthor, None, roaming) - else: - path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): - """Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of config dirs should be - returned. By default, the first item from XDG_CONFIG_DIRS is - returned, or '/etc/xdg/', if XDG_CONFIG_DIRS is not set - - Typical user data directories are: - Mac OS X: same as site_data_dir - Unix: /etc/xdg/ or $XDG_CONFIG_DIRS[i]/ for each value in - $XDG_CONFIG_DIRS - Win *: same as site_data_dir - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - - For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system in ["win32", "darwin"]: - path = site_data_dir(appname, appauthor) - if appname and version: - path = os.path.join(path, version) - else: - # XDG default for $XDG_CONFIG_DIRS - # only first, if multipath is False - path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg') - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - -def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific cache dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Cache" to the base app data dir for Windows. See - discussion below. - - Typical user cache directories are: - Mac OS X: ~/Library/Caches/ - Unix: ~/.cache/ (XDG default) - Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Cache - Vista: C:\Users\\AppData\Local\\\Cache - - On Windows the only suggestion in the MSDN docs is that local settings go in - the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming - app data dir (the default returned by `user_data_dir` above). Apps typically - put cache data somewhere *under* the given dir here. Some examples: - ...\Mozilla\Firefox\Profiles\\Cache - ...\Acme\SuperApp\Cache\1.0 - OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. - This can be disabled with the `opinion=False` option. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - if opinion: - path = os.path.join(path, "Cache") - elif system == 'darwin': - path = os.path.expanduser('~/Library/Caches') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific log dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be ".". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Logs" to the base app data dir for Windows, and "log" to the - base cache dir for Unix. See discussion below. - - Typical user cache directories are: - Mac OS X: ~/Library/Logs/ - Unix: ~/.cache//log # or under $XDG_CACHE_HOME if defined - Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Logs - Vista: C:\Users\\AppData\Local\\\Logs - - On Windows the only suggestion in the MSDN docs is that local settings - go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in - examples of what some windows apps use for a logs dir.) - - OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` - value for Windows and appends "log" to the user cache dir for Unix. - This can be disabled with the `opinion=False` option. - """ - if system == "darwin": - path = os.path.join( - os.path.expanduser('~/Library/Logs'), - appname) - elif system == "win32": - path = user_data_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "Logs") - else: - path = user_cache_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "log") - if appname and version: - path = os.path.join(path, version) - return path - - -class AppDirs(object): - """Convenience wrapper for getting application dirs.""" - def __init__(self, appname, appauthor=None, version=None, roaming=False, - multipath=False): - self.appname = appname - self.appauthor = appauthor - self.version = version - self.roaming = roaming - self.multipath = multipath - - @property - def user_data_dir(self): - return user_data_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_data_dir(self): - return site_data_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_config_dir(self): - return user_config_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_config_dir(self): - return site_config_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_cache_dir(self): - return user_cache_dir(self.appname, self.appauthor, - version=self.version) - - @property - def user_log_dir(self): - return user_log_dir(self.appname, self.appauthor, - version=self.version) - - -#---- internal support stuff - -def _get_win_folder_from_registry(csidl_name): - """This is a fallback technique at best. I'm not sure if using the - registry for this guarantees us the correct answer for all CSIDL_* - names. - """ - if PY3: - import winreg as _winreg - else: - import _winreg - - shell_folder_name = { - "CSIDL_APPDATA": "AppData", - "CSIDL_COMMON_APPDATA": "Common AppData", - "CSIDL_LOCAL_APPDATA": "Local AppData", - }[csidl_name] - - key = _winreg.OpenKey( - _winreg.HKEY_CURRENT_USER, - r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" - ) - dir, type = _winreg.QueryValueEx(key, shell_folder_name) - return dir - - -def _get_win_folder_with_pywin32(csidl_name): - from win32com.shell import shellcon, shell - dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) - # Try to make this a unicode path because SHGetFolderPath does - # not return unicode strings when there is unicode data in the - # path. - try: - dir = unicode(dir) - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - try: - import win32api - dir = win32api.GetShortPathName(dir) - except ImportError: - pass - except UnicodeError: - pass - return dir - - -def _get_win_folder_with_ctypes(csidl_name): - import ctypes - - csidl_const = { - "CSIDL_APPDATA": 26, - "CSIDL_COMMON_APPDATA": 35, - "CSIDL_LOCAL_APPDATA": 28, - }[csidl_name] - - buf = ctypes.create_unicode_buffer(1024) - ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in buf: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf2 = ctypes.create_unicode_buffer(1024) - if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): - buf = buf2 - - return buf.value - -def _get_win_folder_with_jna(csidl_name): - import array - from com.sun import jna - from com.sun.jna.platform import win32 - - buf_size = win32.WinDef.MAX_PATH * 2 - buf = array.zeros('c', buf_size) - shell = win32.Shell32.INSTANCE - shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - # Downgrade to short path name if have highbit chars. See - # . - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf = array.zeros('c', buf_size) - kernel = win32.Kernel32.INSTANCE - if kernel.GetShortPathName(dir, buf, buf_size): - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - return dir - -if system == "win32": - try: - import win32com.shell - _get_win_folder = _get_win_folder_with_pywin32 - except ImportError: - try: - from ctypes import windll - _get_win_folder = _get_win_folder_with_ctypes - except ImportError: - try: - import com.sun.jna - _get_win_folder = _get_win_folder_with_jna - except ImportError: - _get_win_folder = _get_win_folder_from_registry - - -#---- self test code - -if __name__ == "__main__": - appname = "MyApp" - appauthor = "MyCompany" - - props = ("user_data_dir", "site_data_dir", - "user_config_dir", "site_config_dir", - "user_cache_dir", "user_log_dir") - - print("-- app dirs %s --" % __version__) - - print("-- app dirs (with optional 'version')") - dirs = AppDirs(appname, appauthor, version="1.0") - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'version')") - dirs = AppDirs(appname, appauthor) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'appauthor')") - dirs = AppDirs(appname) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (with disabled 'appauthor')") - dirs = AppDirs(appname, appauthor=False) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) diff --git a/numba/numba/array_analysis.py b/numba/numba/array_analysis.py deleted file mode 100644 index 21762723c..000000000 --- a/numba/numba/array_analysis.py +++ /dev/null @@ -1,1895 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function, division, absolute_import -import types as pytypes # avoid confusion with numba.types -import numpy -from numba import ir, analysis, types, config, cgutils, typing -from numba.ir_utils import ( - mk_unique_var, - replace_vars_inner, - find_topo_order, - dprint_func_ir, - get_global_func_typ, - guard, - require, - get_definition, - find_callname, - find_build_sequence, - find_const, - is_namedtuple_class) -from numba.analysis import (compute_cfg_from_blocks) -from numba.typing import npydecl, signature -import collections -import copy -from numba.extending import intrinsic -import llvmlite.llvmpy.core as lc -import llvmlite - -UNKNOWN_CLASS = -1 -CONST_CLASS = 0 -MAP_TYPES = [numpy.ufunc] - -array_analysis_extensions = {} - -# declaring call classes -array_creation = ['empty', 'zeros', 'ones', 'full'] - -random_int_args = ['rand', 'randn'] - -random_1arg_size = ['ranf', 'random_sample', 'sample', - 'random', 'standard_normal'] - -random_2arg_sizelast = ['chisquare', 'weibull', 'power', - 'geometric', 'exponential', - 'poisson', 'rayleigh'] - -random_3arg_sizelast = ['normal', 'uniform', 'beta', - 'binomial', 'f', 'gamma', - 'lognormal', 'laplace'] - -random_calls = (random_int_args + - random_1arg_size + - random_2arg_sizelast + - random_3arg_sizelast + - ['randint', 'triangular']) - -@intrinsic -def wrap_index(typingctx, idx, size): - """ - Calculate index value "idx" relative to a size "size" value as - (idx % size), where "size" is known to be positive. - Note that we use the mod(%) operation here instead of - (idx < 0 ? idx + size : idx) because we may have situations - where idx > size due to the way indices are calculated - during slice/range analysis. - """ - if idx != size: - raise ValueError("Argument types for wrap_index must match") - - def codegen(context, builder, sig, args): - assert(len(args) == 2) - idx = args[0] - size = args[1] - rem = builder.srem(idx, size) - zero = llvmlite.ir.Constant(idx.type, 0) - is_negative = builder.icmp_signed('<', rem, zero) - wrapped_rem = builder.add(rem, size) - is_oversize = builder.icmp_signed('>', wrapped_rem, size) - mod = builder.select(is_negative, wrapped_rem, - builder.select(is_oversize, rem, wrapped_rem)) - return mod - - return signature(idx, idx, size), codegen - -@intrinsic -def assert_equiv(typingctx, *val): - """ - A function that asserts the inputs are of equivalent size, - and throws runtime error when they are not. The input is - a vararg that contains an error message, followed by a set - of objects of either array, tuple or integer. - """ - if len(val) > 1: - # Make sure argument is a single tuple type. Note that this only - # happens when IR containing assert_equiv call is being compiled - # (and going through type inference) again. - val = (types.Tuple(val),) - - assert(len(val[0]) > 1) - # Arguments must be either array, tuple, or integer - assert all(map(lambda a: (isinstance(a, types.ArrayCompatible) or - isinstance(a, types.BaseTuple) or - isinstance(a, types.SliceType) or - isinstance(a, types.Integer)), val[0][1:])) - - def codegen(context, builder, sig, args): - assert(len(args) == 1) # it is a vararg tuple - tup = cgutils.unpack_tuple(builder, args[0]) - tup_type = sig.args[0] - msg = sig.args[0][0].value - - def unpack_shapes(a, aty): - if isinstance(aty, types.ArrayCompatible): - ary = context.make_array(aty)(context, builder, a) - return cgutils.unpack_tuple(builder, ary.shape) - elif isinstance(aty, types.BaseTuple): - return cgutils.unpack_tuple(builder, a) - else: # otherwise it is a single integer - return [a] - - def pairwise(a, aty, b, bty): - ashapes = unpack_shapes(a, aty) - bshapes = unpack_shapes(b, bty) - assert len(ashapes) == len(bshapes) - for (m, n) in zip(ashapes, bshapes): - m_eq_n = builder.icmp(lc.ICMP_EQ, m, n) - with builder.if_else(m_eq_n) as (then, orelse): - with then: - pass - with orelse: - context.call_conv.return_user_exc( - builder, AssertionError, (msg,)) - - for i in range(1, len(tup_type) - 1): - pairwise(tup[i], tup_type[i], tup[i + 1], tup_type[i + 1]) - r = context.get_constant_generic(builder, types.NoneType, None) - return r - return signature(types.none, *val), codegen - - -class EquivSet(object): - - """EquivSet keeps track of equivalence relations between - a set of objects. - """ - - def __init__(self, obj_to_ind=None, ind_to_obj=None, next_ind=0): - """Create a new EquivSet object. Optional keyword arguments are for - internal use only. - """ - # obj_to_ind maps object to equivalence index (sometimes also called - # equivalence class) is a non-nagative number that uniquely identifies - # a set of objects that are equivalent. - self.obj_to_ind = obj_to_ind if obj_to_ind else {} - # ind_to_obj maps equivalence index to a list of objects. - self.ind_to_obj = ind_to_obj if ind_to_obj else {} - # next index number that is incremented each time a new equivalence - # relation is created. - self.next_ind = next_ind - - def empty(self): - """Return an empty EquivSet object. - """ - return EquivSet() - - def clone(self): - """Return a new copy. - """ - return EquivSet(obj_to_ind=copy.deepcopy(self.obj_to_ind), - ind_to_obj=copy.deepcopy(self.ind_to_obj), - next_id=self.next_ind) - - def __repr__(self): - return "EquivSet({})".format(self.ind_to_obj) - - def is_empty(self): - """Return true if the set is empty, or false otherwise. - """ - return self.obj_to_ind == {} - - def _get_ind(self, x): - """Return the internal index (greater or equal to 0) of the given - object, or -1 if not found. - """ - return self.obj_to_ind.get(x, -1) - - def _get_or_add_ind(self, x): - """Return the internal index (greater or equal to 0) of the given - object, or create a new one if not found. - """ - if x in self.obj_to_ind: - i = self.obj_to_ind[x] - else: - i = self.next_ind - self.next_ind += 1 - return i - - def _insert(self, objs): - """Base method that inserts a set of equivalent objects by modifying - self. - """ - assert len(objs) > 1 - - inds = tuple(self._get_or_add_ind(x) for x in objs) - ind = min(inds) - - if not (ind in self.ind_to_obj): - self.ind_to_obj[ind] = [] - - for i, obj in zip(inds, objs): - if i == ind: - if not (obj in self.ind_to_obj[ind]): - self.ind_to_obj[ind].append(obj) - self.obj_to_ind[obj] = ind - else: - if i in self.ind_to_obj: - # those already existing are reassigned - for x in self.ind_to_obj[i]: - self.obj_to_ind[x] = ind - self.ind_to_obj[ind].append(x) - del self.ind_to_obj[i] - else: - # those that are new are assigned. - self.obj_to_ind[obj] = ind - self.ind_to_obj[ind].append(obj) - - def is_equiv(self, *objs): - """Try to derive if given objects are equivalent, return true - if so, or false otherwise. - """ - inds = [self._get_ind(x) for x in objs] - ind = max(inds) - if ind != -1: - return all(i == ind for i in inds) - else: - return all([x == objs[0] for x in objs]) - - def get_equiv_const(self, obj): - """Check if obj is equivalent to some int constant, and return - the constant if found, or None otherwise. - """ - ind = self._get_ind(obj) - if ind >= 0: - objs = self.ind_to_obj[ind] - for x in objs: - if isinstance(x, int): - return x - return None - - def get_equiv_set(self, obj): - """Return the set of equivalent objects. - """ - ind = self._get_ind(obj) - if ind >= 0: - return set(self.ind_to_obj[ind]) - return set() - - def insert_equiv(self, *objs): - """Insert a set of equivalent objects by modifying self. This - method can be overloaded to transform object type before insertion. - """ - self._insert(objs) - - def intersect(self, equiv_set): - """ Return the intersection of self and the given equiv_set, - without modifying either of them. The result will also keep - old equivalence indices unchanged. - """ - new_set = self.empty() - new_set.next_ind = self.next_ind - - for objs in equiv_set.ind_to_obj.values(): - inds = tuple(self._get_ind(x) for x in objs) - ind_to_obj = {} - - for i, x in zip(inds, objs): - if i in ind_to_obj: - ind_to_obj[i].append(x) - elif i >= 0: - ind_to_obj[i] = [x] - - for v in ind_to_obj.values(): - if len(v) > 1: - new_set._insert(v) - - return new_set - - -class ShapeEquivSet(EquivSet): - - """Just like EquivSet, except that it accepts only numba IR variables - and constants as objects, guided by their types. Arrays are considered - equivalent as long as their shapes are equivalent. Scalars are - equivalent only when they are equal in value. Tuples are equivalent - when they are of the same size, and their elements are equivalent. - """ - - def __init__(self, typemap, defs=None, ind_to_var=None, - obj_to_ind=None, ind_to_obj=None, next_id=0): - """Create a new ShapeEquivSet object, where typemap is a dictionary - that maps variable names to their types, and it will not be modified. - Optional keyword arguments are for internal use only. - """ - self.typemap = typemap - # defs maps variable name to an int, where - # 1 means the variable is defined only once, and numbers greater - # than 1 means defined more than onces. - self.defs = defs if defs else {} - # ind_to_var maps index number to a list of variables (of ir.Var type). - # It is used to retrieve defined shape variables given an equivalence - # index. - self.ind_to_var = ind_to_var if ind_to_var else {} - - super(ShapeEquivSet, self).__init__(obj_to_ind, ind_to_obj, next_id) - - def empty(self): - """Return an empty ShapeEquivSet. - """ - return ShapeEquivSet(self.typemap, {}) - - def clone(self): - """Return a new copy. - """ - return ShapeEquivSet( - self.typemap, - defs=copy.copy(self.defs), - ind_to_var=copy.copy(self.ind_to_var), - obj_to_ind=copy.deepcopy(self.obj_to_ind), - ind_to_obj=copy.deepcopy(self.ind_to_obj), - next_id=self.next_ind) - - def __repr__(self): - return "ShapeEquivSet({}, ind_to_var={})".format( - self.ind_to_obj, self.ind_to_var) - - def _get_names(self, obj): - """Return a set of names for the given obj, where array and tuples - are broken down to their individual shapes or elements. This is - safe because both Numba array shapes and Python tuples are immutable. - """ - if isinstance(obj, ir.Var) or isinstance(obj, str): - name = obj if isinstance(obj, str) else obj.name - typ = self.typemap[name] - if (isinstance(typ, types.BaseTuple) or - isinstance(typ, types.ArrayCompatible)): - ndim = (typ.ndim if isinstance(typ, types.ArrayCompatible) - else len(typ)) - if ndim == 0: - return () - else: - return tuple("{}#{}".format(name, i) for i in range(ndim)) - else: - return (name,) - elif isinstance(obj, ir.Const): - if isinstance(obj.value, tuple): - return obj.value - else: - return (obj.value,) - elif isinstance(obj, tuple): - return tuple(self._get_names(x)[0] for x in obj) - elif isinstance(obj, int): - return (obj,) - else: - raise NotImplementedError( - "ShapeEquivSet does not support {}".format(obj)) - - def is_equiv(self, *objs): - """Overload EquivSet.is_equiv to handle Numba IR variables and - constants. - """ - assert(len(objs) > 1) - obj_names = [self._get_names(x) for x in objs] - obj_names = [x for x in obj_names if x != ()] # rule out 0d shape - if len(obj_names) <= 1: - return False; - ndims = [len(names) for names in obj_names] - ndim = ndims[0] - if not all(ndim == x for x in ndims): - if config.DEBUG_ARRAY_OPT == 1: - print("is_equiv: Dimension mismatch for {}".format(objs)) - return False - for i in range(ndim): - names = [obj_name[i] for obj_name in obj_names] - if not super(ShapeEquivSet, self).is_equiv(*names): - return False - return True - - def get_equiv_const(self, obj): - """If the given object is equivalent to a constant scalar, - return the scalar value, or None otherwise. - """ - names = self._get_names(obj) - if len(names) > 1: - return None - return super(ShapeEquivSet, self).get_equiv_const(names[0]) - - def get_equiv_var(self, obj): - """If the given object is equivalent to some defined variable, - return the variable, or None otherwise. - """ - names = self._get_names(obj) - if len(names) != 1: - return None - ind = self._get_ind(names[0]) - vs = self.ind_to_var.get(ind, []) - return vs[0] if vs != [] else None - - def get_equiv_set(self, obj): - """Return the set of equivalent objects. - """ - names = self._get_names(obj) - if len(names) > 1: - return None - return super(ShapeEquivSet, self).get_equiv_set(names[0]) - - def _insert(self, objs): - """Overload EquivSet._insert to manage ind_to_var dictionary. - """ - inds = [] - for obj in objs: - if obj in self.obj_to_ind: - inds.append(self.obj_to_ind[obj]) - varlist = [] - names = set() - for i in sorted(inds): - for x in self.ind_to_var[i]: - if not (x.name in names): - varlist.append(x) - names.add(x.name) - super(ShapeEquivSet, self)._insert(objs) - new_ind = self.obj_to_ind[objs[0]] - for i in set(inds): - del self.ind_to_var[i] - self.ind_to_var[new_ind] = varlist - - def insert_equiv(self, *objs): - """Overload EquivSet.insert_equiv to handle Numba IR variables and - constants. Input objs are either variable or constant, and at least - one of them must be variable. - """ - assert(len(objs) > 1) - obj_names = [self._get_names(x) for x in objs] - obj_names = [x for x in obj_names if x != ()] # rule out 0d shape - if len(obj_names) <= 1: - return; - names = sum([list(x) for x in obj_names], []) - ndims = [len(x) for x in obj_names] - ndim = ndims[0] - assert all(ndim == x for x in ndims), ( - "Dimension mismatch for {}".format(objs)) - varlist = [] - for obj in objs: - if not isinstance(obj, tuple): - obj = (obj,) - for var in obj: - if isinstance(var, ir.Var) and not (var.name in varlist): - # favor those already defined, move to front of varlist - if var.name in self.defs: - varlist.insert(0, var) - else: - varlist.append(var) - # try to populate ind_to_var if variables are present - for obj in varlist: - name = obj.name - if name in names and not (name in self.obj_to_ind): - self.ind_to_obj[self.next_ind] = [name] - self.obj_to_ind[name] = self.next_ind - self.ind_to_var[self.next_ind] = [obj] - self.next_ind += 1 - for i in range(ndim): - names = [obj_name[i] for obj_name in obj_names] - super(ShapeEquivSet, self).insert_equiv(*names) - - def has_shape(self, name): - """Return true if the shape of the given variable is available. - """ - return self.get_shape(name) != None - - def get_shape(self, name): - """Return a tuple of variables that corresponds to the shape - of the given array, or None if not found. - """ - return guard(self._get_shape, name) - - def _get_shape(self, name): - """Return a tuple of variables that corresponds to the shape - of the given array, or raise GuardException if not found. - """ - inds = self.get_shape_classes(name) - require (inds != ()) - shape = [] - for i in inds: - require(i in self.ind_to_var) - vs = self.ind_to_var[i] - assert(vs != []) - shape.append(vs[0]) - return tuple(shape) - - def get_shape_classes(self, name): - """Instead of the shape tuple, return tuple of int, where - each int is the corresponding class index of the size object. - Unknown shapes are given class index -1. Return empty tuple - if the input name is a scalar variable. - """ - if isinstance(name, ir.Var): - name = name.name - typ = self.typemap[name] if name in self.typemap else None - if not (isinstance(typ, types.BaseTuple) or - isinstance(typ, types.SliceType) or - isinstance(typ, types.ArrayCompatible)): - return [] - names = self._get_names(name) - inds = tuple(self._get_ind(name) for name in names) - return inds - - def intersect(self, equiv_set): - """Overload the intersect method to handle ind_to_var. - """ - newset = super(ShapeEquivSet, self).intersect(equiv_set) - ind_to_var = {} - for i, objs in newset.ind_to_obj.items(): - assert(len(objs) > 0) - obj = objs[0] - assert(obj in self.obj_to_ind) - assert(obj in equiv_set.obj_to_ind) - j = self.obj_to_ind[obj] - k = equiv_set.obj_to_ind[obj] - assert(j in self.ind_to_var) - assert(k in equiv_set.ind_to_var) - varlist = [] - names = [x.name for x in equiv_set.ind_to_var[k]] - for x in self.ind_to_var[j]: - if x.name in names: - varlist.append(x) - assert(len(varlist) > 0) - ind_to_var[i] = varlist - newset.ind_to_var = ind_to_var - return newset - - def define(self, name): - """Increment the internal count of how many times a variable is being - defined. Most variables in Numba IR are SSA, i.e., defined only once, - but not all of them. When a variable is being re-defined, it must - be removed from the equivalence relation. - """ - if isinstance(name, ir.Var): - name = name.name - if name in self.defs: - self.defs[name] += 1 - # NOTE: variable being redefined, must invalidate previous - # equivalences. Believe it is a rare case, and only happens to - # scalar accumuators. - if name in self.obj_to_ind: - i = self.obj_to_ind[name] - del self.obj_to_ind[name] - self.ind_to_obj[i].remove(name) - if self.ind_to_obj[i] == []: - del self.ind_to_obj[i] - assert(i in self.ind_to_var) - names = [x.name for x in self.ind_to_var[i]] - if name in names: - j = names.index(name) - del self.ind_to_var[i][j] - if self.ind_to_var[i] == []: - del self.ind_to_var[i] - # no more size variables, remove equivalence too - if i in self.ind_to_obj: - for obj in self.ind_to_obj[i]: - del self.obj_to_ind[obj] - del self.ind_to_obj[i] - else: - self.defs[name] = 1 - - def union_defs(self, defs): - """Union with the given defs dictionary. This is meant to handle - branch join-point, where a variable may have been defined in more - than one branches. - """ - for k, v in defs.items(): - if v > 0: - self.define(k) - -class SymbolicEquivSet(ShapeEquivSet): - - """Just like ShapeEquivSet, except that it also reasons about variable - equivalence symbolically by using their arithmetic definitions. - The goal is to automatically derive the equivalence of array ranges - (slicing). For instance, a[1:m] and a[0:m-1] shall be considered - size-equivalence. - """ - - def __init__(self, typemap, def_by=None, ref_by=None, ext_shapes=None, - defs=None, ind_to_var=None, obj_to_ind=None, - ind_to_obj=None, next_id=0): - """Create a new SymbolicEquivSet object, where typemap is a dictionary - that maps variable names to their types, and it will not be modified. - Optional keyword arguments are for internal use only. - """ - # A "defined-by" table that maps A to a tuple of (B, i), which - # means A is defined as: A = B + i, where A,B are variable names, - # and i is an integer constants. - self.def_by = def_by if def_by else {} - # A "refered-by" table that maps A to a list of [(B, i), (C, j) ...], - # which implies a sequence of definitions: B = A - i, C = A - j, and - # so on, where A,B,C,... are variable names, and i,j,... are - # integer constants. - self.ref_by = ref_by if ref_by else {} - # A extended shape table that can map an arbitrary object to a shape, - # currently used to remember shapes for SetItem IR node, and wrapped - # indices for Slice objects. - self.ext_shapes = ext_shapes if ext_shapes else {} - super(SymbolicEquivSet, self).__init__( - typemap, defs, ind_to_var, obj_to_ind, ind_to_obj, next_id) - - def empty(self): - """Return an empty SymbolicEquivSet. - """ - return SymbolicEquivSet(self.typemap) - - def __repr__(self): - return ("SymbolicEquivSet({}, ind_to_var={}, def_by={}, " - "ref_by={}, ext_shapes={})".format(self.ind_to_obj, - self.ind_to_var, self.def_by, self.ref_by, self.ext_shapes)) - - def clone(self): - """Return a new copy. - """ - return SymbolicEquivSet( - self.typemap, - def_by=copy.copy(self.def_by), - ref_by=copy.copy(self.ref_by), - ext_shapes=copy.copy(self.ext_shapes), - defs=copy.copy(self.defs), - ind_to_var=copy.copy(self.ind_to_var), - obj_to_ind=copy.deepcopy(self.obj_to_ind), - ind_to_obj=copy.deepcopy(self.ind_to_obj), - next_id=self.next_ind) - - def get_rel(self, name): - """Retrieve a definition pair for the given variable, - or return None if it is not available. - """ - return guard(self._get_or_set_rel, name) - - def _get_or_set_rel(self, name, func_ir=None): - """Retrieve a definition pair for the given variable, - and if it is not already available, try to look it up - in the given func_ir, and remember it for future use. - """ - if isinstance(name, ir.Var): - name = name.name - require(self.defs.get(name, 0) == 1) - if name in self.def_by: - return self.def_by[name] - else: - require(func_ir != None) - def plus(x, y): - x_is_const = isinstance(x, int) - y_is_const = isinstance(y, int) - if x_is_const: - if y_is_const: - return x + y - else: - (var, offset) = y - return (var, x + offset) - else: - (var, offset) = x - if y_is_const: - return (var, y + offset) - else: - return None - def minus(x, y): - if isinstance(y, int): - return plus(x, -y) - elif (isinstance(x, tuple) and isinstance(y, tuple) and - x[0] == y[0]): - return minus(x[1], y[1]) - else: - return None - expr = get_definition(func_ir, name) - value = (name, 0) # default to its own name - if isinstance(expr, ir.Expr): - if expr.op == 'call': - fname, mod_name = find_callname( - func_ir, expr, typemap=self.typemap) - if fname == 'wrap_index' and mod_name == 'numba.array_analysis': - index = tuple(self.obj_to_ind.get(x.name, -1) - for x in expr.args) - if -1 in index: - return None - names = self.ext_shapes.get(index, []) - names.append(name) - if len(names) > 0: - self._insert(names) - self.ext_shapes[index] = names - elif expr.op == 'binop': - lhs = self._get_or_set_rel(expr.lhs, func_ir) - rhs = self._get_or_set_rel(expr.rhs, func_ir) - if expr.fn == '+': - value = plus(lhs, rhs) - elif expr.fn == '-': - value = minus(lhs, rhs) - elif isinstance(expr, ir.Const) and isinstance(expr.value, int): - value = expr.value - require(value != None) - # update def_by table - self.def_by[name] = value - if isinstance(value, int) or (isinstance(value, tuple) and - (value[0] != name or value[1] != 0)): - # update ref_by table too - if isinstance(value, tuple): - (var, offset) = value - if not (var in self.ref_by): - self.ref_by[var] = [] - self.ref_by[var].append((name, -offset)) - # insert new equivalence if found - ind = self._get_ind(var) - if ind >= 0: - objs = self.ind_to_obj[ind] - names = [] - for obj in objs: - if obj in self.ref_by: - names += [ x for (x, i) in self.ref_by[obj] - if i == -offset ] - if len(names) > 1: - super(SymbolicEquivSet, self)._insert(names) - return value - - def define(self, var, func_ir=None, typ=None): - """Besides incrementing the definition count of the given variable - name, it will also retrieve and simplify its definition from func_ir, - and remember the result for later equivalence comparison. Supported - operations are: - 1. arithmetic plus and minus with constants - 2. wrap_index (relative to some given size) - """ - if isinstance(var, ir.Var): - name = var.name - else: - name = var - super(SymbolicEquivSet, self).define(name) - if (func_ir and self.defs.get(name, 0) == 1 and - isinstance(typ, types.Number)): - value = guard(self._get_or_set_rel, name, func_ir) - # turn constant definition into equivalence - if isinstance(value, int): - self._insert([name, value]) - if isinstance(var, ir.Var): - ind = self._get_or_add_ind(name) - if not (ind in self.ind_to_obj): - self.ind_to_obj[ind] = [name] - self.obj_to_ind[name] = ind - if ind in self.ind_to_var: - self.ind_to_var[ind].append(var) - else: - self.ind_to_var[ind] = [var] - - def _insert(self, objs): - """Overload _insert method to handle ind changes between relative - objects. - """ - indset = set() - uniqs = set() - for obj in objs: - ind = self._get_ind(obj) - if ind == -1: - uniqs.add(obj) - elif not (ind in indset): - uniqs.add(obj) - indset.add(ind) - if len(uniqs) <= 1: - return - uniqs = list(uniqs) - super(SymbolicEquivSet, self)._insert(uniqs) - objs = self.ind_to_obj[self._get_ind(uniqs[0])] - - # New equivalence guided by def_by and ref_by - offset_dict = {} - def get_or_set(d, k): - if k in d: - v = d[k] - else: - v = [] - d[k] = v - return v - for obj in objs: - if obj in self.def_by: - value = self.def_by[obj] - if isinstance(value, tuple): - (name, offset) = value - get_or_set(offset_dict, -offset).append(name) - if name in self.ref_by: # relative to name - for (v, i) in self.ref_by[name]: - get_or_set(offset_dict, -(offset+i)).append(v) - if obj in self.ref_by: - for (name, offset) in self.ref_by[obj]: - get_or_set(offset_dict, offset).append(name) - for names in offset_dict.values(): - self._insert(names) - - def set_shape(self, obj, shape): - """Overload set_shape to remember shapes of SetItem IR nodes. - """ - if isinstance(obj, ir.StaticSetItem) or isinstance(obj, ir.SetItem): - self.ext_shapes[obj] = shape - else: - assert(isinstance(obj, ir.Var)) - typ = self.typemap[obj.name] - super(SymbolicEquivSet, self).set_shape(obj, shape) - - def _get_shape(self, obj): - """Overload _get_shape to retrieve the shape of SetItem IR nodes. - """ - if isinstance(obj, ir.StaticSetItem) or isinstance(obj, ir.SetItem): - require(obj in self.ext_shapes) - return self.ext_shapes[obj] - else: - assert(isinstance(obj, ir.Var)) - typ = self.typemap[obj.name] - # for slice type, return the shape variable itself - if isinstance(typ, types.SliceType): - return (obj,) - else: - return super(SymbolicEquivSet, self)._get_shape(obj) - -class ArrayAnalysis(object): - - """Analyzes Numpy array computations for properties such as - shape/size equivalence, and keeps track of them on a per-block - basis. The analysis should only be run once because it modifies - the incoming IR by inserting assertion statements that safeguard - parfor optimizations. - """ - - def __init__(self, context, func_ir, typemap, calltypes): - self.context = context - self.func_ir = func_ir - self.typemap = typemap - self.calltypes = calltypes - - # EquivSet of variables, indexed by block number - self.equiv_sets = {} - # keep attr calls to arrays like t=A.sum() as {t:('sum',A)} - self.array_attr_calls = {} - # keep prepended instructions from conditional branch - self.prepends = {} - # keep track of pruned precessors when branch degenerates to jump - self.pruned_predecessors = {} - - def get_equiv_set(self, block_label): - """Return the equiv_set object of an block given its label. - """ - return self.equiv_sets[block_label] - - def run(self, blocks=None, equiv_set=None): - """run array shape analysis on the given IR blocks, resulting in - modified IR and finalized EquivSet for each block. - """ - if blocks == None: - blocks = self.func_ir.blocks - - if equiv_set == None: - init_equiv_set = SymbolicEquivSet(self.typemap) - else: - init_equiv_set = equiv_set - - dprint_func_ir(self.func_ir, "before array analysis", blocks) - - if config.DEBUG_ARRAY_OPT == 1: - print("variable types: ", sorted(self.typemap.items())) - print("call types: ", self.calltypes) - - cfg = compute_cfg_from_blocks(blocks) - topo_order = find_topo_order(blocks, cfg=cfg) - # Traverse blocks in topological order - for label in topo_order: - block = blocks[label] - scope = block.scope - new_body = [] - equiv_set = None - - # equiv_set is the intersection of predecessors - preds = cfg.predecessors(label) - # some incoming edge may be pruned due to prior analysis - if label in self.pruned_predecessors: - pruned = self.pruned_predecessors[label] - else: - pruned = [] - # Go through each incoming edge, process prepended instructions and - # calculate beginning equiv_set of current block as an intersection - # of incoming ones. - for (p, q) in preds: - if p in pruned: - continue - if p in self.equiv_sets: - from_set = self.equiv_sets[p].clone() - if (p, label) in self.prepends: - instrs = self.prepends[(p, label)] - for inst in instrs: - self._analyze_inst(label, scope, from_set, inst) - if equiv_set == None: - equiv_set = from_set - else: - equiv_set = equiv_set.intersect(from_set) - equiv_set.union_defs(from_set.defs) - - # Start with a new equiv_set if none is computed - if equiv_set == None: - equiv_set = init_equiv_set - self.equiv_sets[label] = equiv_set - # Go through instructions in a block, and insert pre/post - # instructions as we analyze them. - for inst in block.body: - pre, post = self._analyze_inst(label, scope, equiv_set, inst) - for instr in pre: - new_body.append(instr) - new_body.append(inst) - for instr in post: - new_body.append(instr) - block.body = new_body - - if config.DEBUG_ARRAY_OPT == 1: - self.dump() - - dprint_func_ir(self.func_ir, "after array analysis", blocks) - - def dump(self): - """dump per-block equivalence sets for debugging purposes. - """ - print("Array Analysis: ", self.equiv_sets) - - def _define(self, equiv_set, var, typ, value): - self.typemap[var.name] = typ - self.func_ir._definitions[var.name] = [value] - equiv_set.define(var, self.func_ir, typ) - - def _analyze_inst(self, label, scope, equiv_set, inst): - pre = [] - post = [] - if isinstance(inst, ir.Assign): - lhs = inst.target - typ = self.typemap[lhs.name] - shape = None - if isinstance(typ, types.ArrayCompatible) and typ.ndim == 0: - shape = () - elif isinstance(inst.value, ir.Expr): - result = self._analyze_expr(scope, equiv_set, inst.value) - if result: - shape = result[0] - pre = result[1] - if len(result) > 2: - rhs = result[2] - inst.value = rhs - elif (isinstance(inst.value, ir.Var) or - isinstance(inst.value, ir.Const)): - shape = inst.value - - if isinstance(shape, ir.Const): - if isinstance(shape.value, tuple): - loc = shape.loc - shape = tuple(ir.Const(x, loc) for x in shape.value) - elif isinstance(shape.value, int): - shape = (shape,) - else: - shape = None - elif (isinstance(shape, ir.Var) and - isinstance(self.typemap[shape.name], types.Integer)): - shape = (shape,) - - if isinstance(typ, types.ArrayCompatible): - if (shape == None or isinstance(shape, tuple) or - (isinstance(shape, ir.Var) and - not equiv_set.has_shape(shape))): - (shape, post) = self._gen_shape_call(equiv_set, lhs, - typ.ndim, shape) - elif isinstance(typ, types.UniTuple): - if shape and isinstance(typ.dtype, types.Integer): - (shape, post) = self._gen_shape_call(equiv_set, lhs, - len(typ), shape) - - if shape != None: - if isinstance(typ, types.SliceType): - equiv_set.set_shape(lhs, shape) - else: - equiv_set.insert_equiv(lhs, shape) - equiv_set.define(lhs, self.func_ir, typ) - elif isinstance(inst, ir.StaticSetItem) or isinstance(inst, ir.SetItem): - index = inst.index if isinstance(inst, ir.SetItem) else inst.index_var - result = guard(self._index_to_shape, - scope, equiv_set, inst.target, index) - if not result: - return [], [] - (target_shape, pre) = result - value_shape = equiv_set.get_shape(inst.value) - if value_shape is (): # constant - equiv_set.set_shape(inst, target_shape) - return pre, [] - elif value_shape != None: - target_typ = self.typemap[inst.target.name] - require(isinstance(target_typ, types.ArrayCompatible)) - target_ndim = target_typ.ndim - shapes = [target_shape, value_shape] - names = [inst.target.name, inst.value.name] - shape, asserts = self._broadcast_assert_shapes( - scope, equiv_set, inst.loc, shapes, names) - n = len(shape) - # shape dimension must be within target dimension - assert(target_ndim >= n) - equiv_set.set_shape(inst, shape) - return pre + asserts, [] - else: - return pre, [] - elif isinstance(inst, ir.Branch): - cond_var = inst.cond - cond_def = guard(get_definition, self.func_ir, cond_var) - if not cond_def: # phi variable has no single definition - # We'll use equiv_set to try to find a cond_def instead - equivs = equiv_set.get_equiv_set(cond_var) - defs = [] - for name in equivs: - if isinstance(name, str) and name in self.typemap: - var_def = guard(get_definition, self.func_ir, name, - lhs_only=True) - if isinstance(var_def, ir.Var): - var_def = var_def.name - if var_def: - defs.append(var_def) - else: - defs.append(name) - defvars = set(filter(lambda x: isinstance(x, str), defs)) - defconsts = set(defs).difference(defvars) - if len(defconsts) == 1: - cond_def = list(defconsts)[0] - elif len(defvars) == 1: - cond_def = guard(get_definition, self.func_ir, - list(defvars)[0]) - if isinstance(cond_def, ir.Expr) and cond_def.op == 'binop': - br = None - if cond_def.fn == '==': - br = inst.truebr - otherbr = inst.falsebr - cond_val = 1 - elif cond_def.fn == '!=': - br = inst.falsebr - otherbr = inst.truebr - cond_val = 0 - lhs_typ = self.typemap[cond_def.lhs.name] - rhs_typ = self.typemap[cond_def.rhs.name] - if (br != None and - ((isinstance(lhs_typ, types.Integer) and - isinstance(rhs_typ, types.Integer)) or - (isinstance(lhs_typ, types.BaseTuple) and - isinstance(rhs_typ, types.BaseTuple)))): - loc = inst.loc - args = (cond_def.lhs, cond_def.rhs) - asserts = self._make_assert_equiv( - scope, loc, equiv_set, args) - asserts.append( - ir.Assign(ir.Const(cond_val, loc), cond_var, loc)) - self.prepends[(label, br)] = asserts - self.prepends[(label, otherbr)] = [ - ir.Assign(ir.Const(1 - cond_val, loc), cond_var, loc)] - else: - if isinstance(cond_def, ir.Const): - cond_def = cond_def.value - if isinstance(cond_def, int) or isinstance(cond_def, bool): - # condition is always true/false, prune the outgoing edge - pruned_br = inst.falsebr if cond_def else inst.truebr - if pruned_br in self.pruned_predecessors: - self.pruned_predecessors[pruned_br].append(label) - else: - self.pruned_predecessors[pruned_br] = [label] - - elif type(inst) in array_analysis_extensions: - # let external calls handle stmt if type matches - f = array_analysis_extensions[type(inst)] - pre, post = f(inst, equiv_set, self.typemap, self) - - return pre, post - - def _analyze_expr(self, scope, equiv_set, expr): - fname = "_analyze_op_{}".format(expr.op) - try: - fn = getattr(self, fname) - except AttributeError: - return None - return guard(fn, scope, equiv_set, expr) - - def _analyze_op_getattr(self, scope, equiv_set, expr): - # TODO: getattr of npytypes.Record - if expr.attr == 'T': - return self._analyze_op_call_numpy_transpose(scope, equiv_set, [expr.value], {}) - elif expr.attr == 'shape': - shape = equiv_set.get_shape(expr.value) - return shape, [] - return None - - def _analyze_op_cast(self, scope, equiv_set, expr): - return expr.value, [] - - def _analyze_op_exhaust_iter(self, scope, equiv_set, expr): - var = expr.value - typ = self.typemap[var.name] - if isinstance(typ, types.BaseTuple): - require(len(typ) == expr.count) - require(equiv_set.has_shape(var)) - return var, [] - return None - - def _index_to_shape(self, scope, equiv_set, var, ind_var): - """For indexing like var[index] (either write or read), see if - the index corresponds to a range/slice shape. Return the shape - (and prepending instructions) if so, or raise GuardException - otherwise. - """ - typ = self.typemap[var.name] - require(isinstance(typ, types.ArrayCompatible)) - ind_typ = self.typemap[ind_var.name] - ind_shape = equiv_set._get_shape(ind_var) - var_shape = equiv_set._get_shape(var) - if isinstance(ind_typ, types.SliceType): - seq_typs = (ind_typ,) - else: - require(isinstance(ind_typ, types.BaseTuple)) - seq, op = find_build_sequence(self.func_ir, ind_var) - require(op == 'build_tuple') - seq_typs = tuple(self.typemap[x.name] for x in seq) - require(len(ind_shape)==len(seq_typs)==len(var_shape)) - stmts = [] - - def slice_size(index, dsize): - """Reason about the size of a slice represented by the "index" - variable, and return a variable that has this size data, or - raise GuardException if it cannot reason about it. - - The computation takes care of negative values used in the slice - with respect to the given dimensional size ("dsize"). - - Extra statments required to produce the result are appended - to parent function's stmts list. - """ - loc = index.loc - index_def = get_definition(self.func_ir, index) - fname, mod_name = find_callname( - self.func_ir, index_def, typemap=self.typemap) - require(fname == 'slice' and mod_name in ('__builtin__', 'builtins')) - require(len(index_def.args) == 2) - lhs = index_def.args[0] - rhs = index_def.args[1] - size_typ = self.typemap[dsize.name] - lhs_typ = self.typemap[lhs.name] - rhs_typ = self.typemap[rhs.name] - zero_var = ir.Var(scope, mk_unique_var("zero"), loc) - - if isinstance(lhs_typ, types.NoneType): - zero = ir.Const(0, loc) - stmts.append(ir.Assign(value=zero, target=zero_var, loc=loc)) - self._define(equiv_set, zero_var, size_typ, zero) - lhs = zero_var - lhs_typ = size_typ - - if isinstance(rhs_typ, types.NoneType): - rhs = dsize - rhs_typ = size_typ - - lhs_rel = equiv_set.get_rel(lhs) - rhs_rel = equiv_set.get_rel(rhs) - if (lhs_rel == 0 and isinstance(rhs_rel, tuple) and - equiv_set.is_equiv(dsize, rhs_rel[0]) and - rhs_rel[1] == 0): - return dsize - - size_var = ir.Var(scope, mk_unique_var("slice_size"), loc) - size_val = ir.Expr.binop('-', rhs, lhs, loc=loc) - self.calltypes[size_val] = signature(size_typ, lhs_typ, rhs_typ) - self._define(equiv_set, size_var, size_typ, size_val) - - # short cut size_val to a constant if its relation is known to be - # a constant or its basis matches dsize - size_rel = equiv_set.get_rel(size_var) - if (isinstance(size_rel, int) or (isinstance(size_rel, tuple) and - equiv_set.is_equiv(size_rel[0], dsize.name))): - rel = size_rel if isinstance(size_rel, int) else size_rel[1] - size_val = ir.Const(rel, size_typ) - size_var = ir.Var(scope, mk_unique_var("slice_size"), loc) - self._define(equiv_set, size_var, size_typ, size_val) - - wrap_var = ir.Var(scope, mk_unique_var("wrap"), loc) - wrap_def = ir.Global('wrap_index', wrap_index, loc=loc) - fnty = get_global_func_typ(wrap_index) - sig = self.context.resolve_function_type(fnty, (size_typ, size_typ,), {}) - self._define(equiv_set, wrap_var, fnty, wrap_def) - - var = ir.Var(scope, mk_unique_var("var"), loc) - value = ir.Expr.call(wrap_var, [size_var, dsize], {}, loc) - self._define(equiv_set, var, size_typ, value) - self.calltypes[value] = sig - - stmts.append(ir.Assign(value=size_val, target=size_var, loc=loc)) - stmts.append(ir.Assign(value=wrap_def, target=wrap_var, loc=loc)) - stmts.append(ir.Assign(value=value, target=var, loc=loc)) - return var - - def to_shape(typ, index, dsize): - if isinstance(typ, types.SliceType): - return slice_size(index, dsize) - elif isinstance(typ, types.Number): - return None - else: - # unknown dimension size for this index, - # so we'll raise GuardException - require(False) - shape = tuple(to_shape(typ, size, dsize) for - (typ, size, dsize) in zip(seq_typs, ind_shape, var_shape)) - require(not all(x == None for x in shape)) - shape = tuple(x for x in shape if x != None) - return shape, stmts - - def _analyze_op_getitem(self, scope, equiv_set, expr): - return self._index_to_shape(scope, equiv_set, expr.value, expr.index) - - def _analyze_op_static_getitem(self, scope, equiv_set, expr): - var = expr.value - typ = self.typemap[var.name] - if not isinstance(typ, types.BaseTuple): - return self._index_to_shape(scope, equiv_set, expr.value, expr.index_var) - shape = equiv_set._get_shape(var) - require(isinstance(expr.index, int) and expr.index < len(shape)) - return shape[expr.index], [] - - def _analyze_op_unary(self, scope, equiv_set, expr): - require(expr.fn in UNARY_MAP_OP) - # for scalars, only + operator results in equivalence - # for example, if "m = -n", m and n are not equivalent - if self._isarray(expr.value.name) or expr.fn == '+': - return expr.value, [] - return None - - def _analyze_op_binop(self, scope, equiv_set, expr): - require(expr.fn in BINARY_MAP_OP) - return self._analyze_broadcast(scope, equiv_set, expr.loc, [expr.lhs, expr.rhs]) - - def _analyze_op_inplace_binop(self, scope, equiv_set, expr): - require(expr.immutable_fn in BINARY_MAP_OP) - return self._analyze_broadcast(scope, equiv_set, expr.loc, [expr.lhs, expr.rhs]) - - def _analyze_op_arrayexpr(self, scope, equiv_set, expr): - return self._analyze_broadcast(scope, equiv_set, expr.loc, expr.list_vars()) - - def _analyze_op_build_tuple(self, scope, equiv_set, expr): - return tuple(expr.items), [] - - def _analyze_op_call(self, scope, equiv_set, expr): - from numba.stencil import StencilFunc - - callee = expr.func - callee_def = get_definition(self.func_ir, callee) - if (isinstance(callee_def, (ir.Global, ir.FreeVar)) - and is_namedtuple_class(callee_def.value)): - return tuple(expr.args), [] - if (isinstance(callee_def, (ir.Global, ir.FreeVar)) - and isinstance(callee_def.value, StencilFunc)): - args = expr.args - return self._analyze_stencil(scope, equiv_set, callee_def.value, - expr.loc, args, dict(expr.kws)) - - fname, mod_name = find_callname( - self.func_ir, expr, typemap=self.typemap) - # call via attribute (i.e. array.func) - if (isinstance(mod_name, ir.Var) - and isinstance(self.typemap[mod_name.name], - types.ArrayCompatible)): - args = [mod_name] + expr.args - mod_name = 'numpy' - else: - args = expr.args - fname = "_analyze_op_call_{}_{}".format( - mod_name, fname).replace('.', '_') - if fname in UFUNC_MAP_OP: # known numpy ufuncs - return self._analyze_broadcast(scope, equiv_set, expr.loc, args) - else: - try: - fn = getattr(self, fname) - except AttributeError: - return None - return guard(fn, scope, equiv_set, args, dict(expr.kws)) - - def _analyze_op_call___builtin___len(self, scope, equiv_set, args, kws): - # python 2 version of len() - return self._analyze_op_call_builtins_len(scope, equiv_set, args, kws) - - def _analyze_op_call_builtins_len(self, scope, equiv_set, args, kws): - # python 3 version of len() - require(len(args) == 1) - var = args[0] - typ = self.typemap[var.name] - require(isinstance(typ, types.ArrayCompatible)) - if typ.ndim == 1: - shape = equiv_set._get_shape(var) - return shape[0], [], shape[0] - return None - - def _analyze_op_call_numba_array_analysis_assert_equiv(self, scope, - equiv_set, args, kws): - equiv_set.insert_equiv(*args[1:]) - return None - - def _analyze_numpy_create_array(self, scope, equiv_set, args, kws): - shape_var = None - if len(args) > 0: - shape_var = args[0] - elif 'shape' in kws: - shape_var = kws['shape'] - if shape_var: - return shape_var, [] - raise NotImplementedError("Must specify a shape for array creation") - - def _analyze_op_call_numpy_empty(self, scope, equiv_set, args, kws): - return self._analyze_numpy_create_array(scope, equiv_set, args, kws) - - def _analyze_op_call_numba_unsafe_ndarray_empty_inferred(self, scope, - equiv_set, args, kws): - return self._analyze_numpy_create_array(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_zeros(self, scope, equiv_set, args, kws): - return self._analyze_numpy_create_array(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_ones(self, scope, equiv_set, args, kws): - return self._analyze_numpy_create_array(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_eye(self, scope, equiv_set, args, kws): - if len(args) > 0: - N = args[0] - elif 'N' in kws: - N = kws['N'] - else: - raise NotImplementedError( - "Expect one argument (or 'N') to eye function") - if 'M' in kws: - M = kws['M'] - else: - M = N - return (N, M), [] - - def _analyze_op_call_numpy_identity(self, scope, equiv_set, args, kws): - assert len(args) > 0 - N = args[0] - return (N, N), [] - - def _analyze_op_call_numpy_diag(self, scope, equiv_set, args, kws): - # We can only reason about the output shape when the input is 1D or - # square 2D. - assert len(args) > 0 - a = args[0] - assert(isinstance(a, ir.Var)) - atyp = self.typemap[a.name] - if isinstance(atyp, types.ArrayCompatible): - if atyp.ndim == 2: - if 'k' in kws: # will proceed only when k = 0 or absent - k = kws['k'] - if not equiv_set.is_equiv(k, 0): - return None - (m, n) = equiv_set._get_shape(a) - if equiv_set.is_equiv(m, n): - return (m,), [] - elif atyp.ndim == 1: - (m,) = equiv_set._get_shape(a) - return (m, m), [] - return None - - def _analyze_numpy_array_like(self, scope, equiv_set, args, kws): - assert(len(args) > 0) - var = args[0] - typ = self.typemap[var.name] - if isinstance(typ, types.Integer): - return (1,), [] - elif (isinstance(typ, types.ArrayCompatible) and - equiv_set.has_shape(var)): - return var, [] - return None - - def _analyze_op_call_numpy_ravel(self, scope, equiv_set, args, kws): - assert(len(args) == 1) - var = args[0] - typ = self.typemap[var.name] - assert isinstance(typ, types.ArrayCompatible) - # output array is same shape as input if input is 1D - if typ.ndim == 1 and equiv_set.has_shape(var): - if typ.layout == 'C': - # output is the same as input (no copy) for 'C' layout - # optimize out the call - return var, [], var - else: - return var, [] - # TODO: handle multi-D input arrays (calc array size) - return None - - def _analyze_op_call_numpy_copy(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_empty_like(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_zeros_like(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_ones_like(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_full_like(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_asfortranarray(self, *args): - return self._analyze_numpy_array_like(*args) - - def _analyze_op_call_numpy_reshape(self, scope, equiv_set, args, kws): - n = len(args) - assert(n > 1) - if n == 2: - typ = self.typemap[args[1].name] - if isinstance(typ, types.BaseTuple): - return args[1], [] - return tuple(args[1:]), [] - - def _analyze_op_call_numpy_transpose(self, scope, equiv_set, args, kws): - in_arr = args[0] - typ = self.typemap[in_arr.name] - assert isinstance(typ, types.ArrayCompatible), \ - "Invalid np.transpose argument" - shape = equiv_set._get_shape(in_arr) - if len(args) == 1: - return tuple(reversed(shape)), [] - axes = [guard(find_const, self.func_ir, a) for a in args[1:]] - if isinstance(axes[0], tuple): - axes = list(axes[0]) - if None in axes: - return None - ret = [shape[i] for i in axes] - return tuple(ret), [] - - def _analyze_op_call_numpy_random_rand(self, scope, equiv_set, args, kws): - if len(args) > 0: - return tuple(args), [] - return None - - def _analyze_op_call_numpy_random_randn(self, *args): - return self._analyze_op_call_numpy_random_rand(*args) - - def _analyze_op_numpy_random_with_size(self, pos, scope, equiv_set, args, kws): - if 'size' in kws: - return kws['size'], [] - if len(args) > pos: - return args[pos], [] - return None - - def _analyze_op_call_numpy_random_ranf(self, *args): - return self._analyze_op_numpy_random_with_size(0, *args) - - def _analyze_op_call_numpy_random_random_sample(self, *args): - return self._analyze_op_numpy_random_with_size(0, *args) - - def _analyze_op_call_numpy_random_sample(self, *args): - return self._analyze_op_numpy_random_with_size(0, *args) - - def _analyze_op_call_numpy_random_random(self, *args): - return self._analyze_op_numpy_random_with_size(0, *args) - - def _analyze_op_call_numpy_random_standard_normal(self, *args): - return self._analyze_op_numpy_random_with_size(0, *args) - - def _analyze_op_call_numpy_random_chisquare(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_weibull(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_power(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_geometric(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_exponential(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_poisson(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_rayleigh(self, *args): - return self._analyze_op_numpy_random_with_size(1, *args) - - def _analyze_op_call_numpy_random_normal(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_uniform(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_beta(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_binomial(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_f(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_gamma(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_lognormal(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_laplace(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_randint(self, *args): - return self._analyze_op_numpy_random_with_size(2, *args) - - def _analyze_op_call_numpy_random_triangular(self, *args): - return self._analyze_op_numpy_random_with_size(3, *args) - - def _analyze_op_call_numpy_concatenate(self, scope, equiv_set, args, kws): - assert(len(args) > 0) - loc = args[0].loc - seq, op = find_build_sequence(self.func_ir, args[0]) - n = len(seq) - require(n > 0) - axis = 0 - if 'axis' in kws: - if isinstance(kws['axis'], int): # internal use only - axis = kws['axis'] - else: - axis = find_const(self.func_ir, kws['axis']) - elif len(args) > 1: - axis = find_const(self.func_ir, args[1]) - require(isinstance(axis, int)) - require(op == 'build_tuple') - shapes = [equiv_set._get_shape(x) for x in seq] - if axis < 0: - axis = len(shapes[0]) + axis - require(0 <= axis < len(shapes[0])) - asserts = [] - new_shape = [] - if n == 1: # from one array N-dimension to (N-1)-dimension - shape = shapes[0] - # first size is the count, pop it out of shapes - n = equiv_set.get_equiv_const(shapes[0]) - shape.pop(0) - for i in range(len(shape)): - if i == axis: - m = equiv_set.get_equiv_const(shape[i]) - size = m * n if (m and n) else None - else: - size = self._sum_size(equiv_set, shapes[0]) - new_shape.append(size) - else: # from n arrays N-dimension to N-dimension - for i in range(len(shapes[0])): - if i == axis: - size = self._sum_size( - equiv_set, [shape[i] for shape in shapes]) - else: - sizes = [shape[i] for shape in shapes] - asserts.append( - self._call_assert_equiv(scope, loc, equiv_set, sizes)) - size = sizes[0] - new_shape.append(size) - return tuple(new_shape), sum(asserts, []) - - def _analyze_op_call_numpy_stack(self, scope, equiv_set, args, kws): - assert(len(args) > 0) - loc = args[0].loc - seq, op = find_build_sequence(self.func_ir, args[0]) - n = len(seq) - require(n > 0) - axis = 0 - if 'axis' in kws: - if isinstance(kws['axis'], int): # internal use only - axis = kws['axis'] - else: - axis = find_const(self.func_ir, kws['axis']) - elif len(args) > 1: - axis = find_const(self.func_ir, args[1]) - require(isinstance(axis, int)) - # only build_tuple can give reliable count - require(op == 'build_tuple') - shapes = [equiv_set._get_shape(x) for x in seq] - asserts = self._call_assert_equiv(scope, loc, equiv_set, seq) - shape = shapes[0] - if axis < 0: - axis = len(shape) + axis + 1 - require(0 <= axis <= len(shape)) - new_shape = list(shape[0:axis]) + [n] + list(shape[axis:]) - return tuple(new_shape), asserts - - def _analyze_op_call_numpy_vstack(self, scope, equiv_set, args, kws): - assert(len(args) == 1) - seq, op = find_build_sequence(self.func_ir, args[0]) - n = len(seq) - require(n > 0) - typ = self.typemap[seq[0].name] - require(isinstance(typ, types.ArrayCompatible)) - if typ.ndim < 2: - return self._analyze_op_call_numpy_stack(scope, equiv_set, args, kws) - else: - kws['axis'] = 0 - return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_hstack(self, scope, equiv_set, args, kws): - assert(len(args) == 1) - seq, op = find_build_sequence(self.func_ir, args[0]) - n = len(seq) - require(n > 0) - typ = self.typemap[seq[0].name] - require(isinstance(typ, types.ArrayCompatible)) - if typ.ndim < 2: - kws['axis'] = 0 - else: - kws['axis'] = 1 - return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_dstack(self, scope, equiv_set, args, kws): - assert(len(args) == 1) - seq, op = find_build_sequence(self.func_ir, args[0]) - n = len(seq) - require(n > 0) - typ = self.typemap[seq[0].name] - require(isinstance(typ, types.ArrayCompatible)) - if typ.ndim == 1: - kws['axis'] = 1 - result = self._analyze_op_call_numpy_stack( - scope, equiv_set, args, kws) - require(result) - (shape, pre) = result - shape = tuple([1] + list(shape)) - return shape, pre - elif typ.ndim == 2: - kws['axis'] = 2 - return self._analyze_op_call_numpy_stack(scope, equiv_set, args, kws) - else: - kws['axis'] = 2 - return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) - - def _analyze_op_call_numpy_cumsum(self, scope, equiv_set, args, kws): - # TODO - return None - - def _analyze_op_call_numpy_cumprod(self, scope, equiv_set, args, kws): - # TODO - return None - - def _analyze_op_call_numpy_linspace(self, scope, equiv_set, args, kws): - n = len(args) - num = 50 - if n > 2: - num = args[2] - elif 'num' in kws: - num = kws['num'] - return (num,), [] - - def _analyze_op_call_numpy_dot(self, scope, equiv_set, args, kws): - n = len(args) - assert(n >= 2) - loc = args[0].loc - require(all([self._isarray(x.name) for x in args])) - typs = [self.typemap[x.name] for x in args] - dims = [ty.ndim for ty in typs] - require(all(x > 0 for x in dims)) - if dims[0] == 1 and dims[1] == 1: - return None - shapes = [equiv_set._get_shape(x) for x in args] - if dims[0] == 1: - asserts = self._call_assert_equiv( - scope, loc, equiv_set, [shapes[0][0], shapes[1][-2]]) - return tuple(shapes[1][0:-2] + shapes[1][-1:]), asserts - if dims[1] == 1: - asserts = self._call_assert_equiv( - scope, loc, equiv_set, [shapes[0][-1], shapes[1][0]]) - return tuple(shapes[0][0:-1]), asserts - if dims[0] == 2 and dims[1] == 2: - asserts = self._call_assert_equiv( - scope, loc, equiv_set, [shapes[0][1], shapes[1][0]]) - return (shapes[0][0], shapes[1][1]), asserts - if dims[0] > 2: # TODO: handle higher dimension cases - pass - return None - - def _analyze_stencil(self, scope, equiv_set, stencil_func, loc, args, kws): - # stencil requires that all relatively indexed array arguments are - # of same size - std_idx_arrs = stencil_func.options.get('standard_indexing', ()) - kernel_arg_names = stencil_func.kernel_ir.arg_names - if isinstance(std_idx_arrs, str): - std_idx_arrs = (std_idx_arrs,) - rel_idx_arrs = [] - assert(len(args) > 0 and len(args) == len(kernel_arg_names)) - for arg, var in zip(kernel_arg_names, args): - typ = self.typemap[var.name] - if (isinstance(typ, types.ArrayCompatible) and - not(arg in std_idx_arrs)): - rel_idx_arrs.append(var) - n = len(rel_idx_arrs) - require(n > 0) - asserts = self._call_assert_equiv(scope, loc, equiv_set, rel_idx_arrs) - shape = equiv_set.get_shape(rel_idx_arrs[0]) - return shape, asserts - - def _analyze_op_call_numpy_linalg_inv(self, scope, equiv_set, args, kws): - require(len(args) >= 1) - return equiv_set._get_shape(args[0]), [] - - def _analyze_broadcast(self, scope, equiv_set, loc, args): - """Infer shape equivalence of arguments based on Numpy broadcast rules - and return shape of output - https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html - """ - arrs = list(filter(lambda a: self._isarray(a.name), args)) - require(len(arrs) > 0) - names = [x.name for x in arrs] - dims = [self.typemap[x.name].ndim for x in arrs] - max_dim = max(dims) - require(max_dim > 0) - try: - shapes = [equiv_set.get_shape(x) for x in arrs] - except GuardException: - return arrs[0], self._call_assert_equiv(scope, loc, equiv_set, arrs) - return self._broadcast_assert_shapes(scope, equiv_set, loc, shapes, names) - - def _broadcast_assert_shapes(self, scope, equiv_set, loc, shapes, names): - """Produce assert_equiv for sizes in each dimension, taking into account - of dimension coercion and constant size of 1. - """ - asserts = [] - new_shape = [] - max_dim = max([len(shape) for shape in shapes]) - const_size_one = None - for i in range(max_dim): - sizes = [] - size_names = [] - for name, shape in zip(names, shapes): - if i < len(shape): - size = shape[len(shape) - 1 - i] - const_size = equiv_set.get_equiv_const(size) - if const_size == 1: - const_size_one = size - else: - sizes.append(size) # non-1 size to front - size_names.append(name) - if sizes == []: - assert(const_size_one != None) - sizes.append(const_size_one) - size_names.append("1") - asserts.append(self._call_assert_equiv(scope, loc, equiv_set, - sizes, names=size_names)) - new_shape.append(sizes[0]) - return tuple(reversed(new_shape)), sum(asserts, []) - - def _call_assert_equiv(self, scope, loc, equiv_set, args, names=None): - insts = self._make_assert_equiv( - scope, loc, equiv_set, args, names=names) - if len(args) > 1: - equiv_set.insert_equiv(*args) - return insts - - def _make_assert_equiv(self, scope, loc, equiv_set, _args, names=None): - # filter out those that are already equivalent - if names == None: - names = [x.name for x in _args] - args = [] - arg_names = [] - for name, x in zip(names, _args): - seen = False - for y in args: - if equiv_set.is_equiv(x, y): - seen = True - break - if not seen: - args.append(x) - arg_names.append(name) - - # no assertion necessary if there are less than two - if len(args) < 2: - return [] - - msg = "Sizes of {} do not match on {}".format(', '.join(arg_names), loc) - msg_val = ir.Const(msg, loc) - msg_typ = types.Const(msg) - msg_var = ir.Var(scope, mk_unique_var("msg"), loc) - self.typemap[msg_var.name] = msg_typ - argtyps = tuple([msg_typ] + [self.typemap[x.name] for x in args]) - - # assert_equiv takes vararg, which requires a tuple as argument type - tup_typ = types.BaseTuple.from_types(argtyps) - - # prepare function variable whose type may vary since it takes vararg - assert_var = ir.Var(scope, mk_unique_var("assert"), loc) - assert_def = ir.Global('assert_equiv', assert_equiv, loc=loc) - fnty = get_global_func_typ(assert_equiv) - sig = self.context.resolve_function_type(fnty, (tup_typ,), {}) - self._define(equiv_set, assert_var, fnty, assert_def) - - # The return value from assert_equiv is always of none type. - var = ir.Var(scope, mk_unique_var("ret"), loc) - value = ir.Expr.call(assert_var, [msg_var] + args, {}, loc=loc) - self._define(equiv_set, var, types.none, value) - self.calltypes[value] = sig - - return [ir.Assign(value=msg_val, target=msg_var, loc=loc), - ir.Assign(value=assert_def, target=assert_var, loc=loc), - ir.Assign(value=value, target=var, loc=loc), - ] - - def _gen_shape_call(self, equiv_set, var, ndims, shape): - out = [] - # attr call: A_sh_attr = getattr(A, shape) - if isinstance(shape, ir.Var): - shape = equiv_set.get_shape(shape) - # already a tuple variable that contains size - if isinstance(shape, ir.Var): - attr_var = shape - shape_attr_call = None - shape = None - else: - shape_attr_call = ir.Expr.getattr(var, "shape", var.loc) - attr_var = ir.Var(var.scope, mk_unique_var( - "{}_shape".format(var.name)), var.loc) - shape_attr_typ = types.containers.UniTuple(types.intp, ndims) - size_vars = [] - use_attr_var = False - # trim shape tuple if it is more than ndim - if shape: - nshapes = len(shape) - if ndims < nshapes: - shape = shape[(nshapes-ndims):] - for i in range(ndims): - skip = False - if shape and shape[i]: - if isinstance(shape[i], ir.Var): - typ = self.typemap[shape[i].name] - if (isinstance(typ, types.Number) or - isinstance(typ, types.SliceType)): - size_var = shape[i] - skip = True - else: - if isinstance(shape[i], int): - size_val = ir.Const(shape[i], var.loc) - else: - size_val = shape[i] - assert(isinstance(size_val, ir.Const)) - size_var = ir.Var(var.scope, mk_unique_var( - "{}_size{}".format(var.name, i)), var.loc) - out.append(ir.Assign(size_val, size_var, var.loc)) - self._define(equiv_set, size_var, types.intp, size_val) - skip = True - if not skip: - # get size: Asize0 = A_sh_attr[0] - size_var = ir.Var(var.scope, mk_unique_var( - "{}_size{}".format(var.name, i)), var.loc) - getitem = ir.Expr.static_getitem(attr_var, i, None, var.loc) - use_attr_var = True - self.calltypes[getitem] = None - out.append(ir.Assign(getitem, size_var, var.loc)) - self._define(equiv_set, size_var, types.intp, getitem) - size_vars.append(size_var) - if use_attr_var and shape_attr_call: - # only insert shape call if there is any getitem call - out.insert(0, ir.Assign(shape_attr_call, attr_var, var.loc)) - self._define(equiv_set, attr_var, shape_attr_typ, shape_attr_call) - return tuple(size_vars), out - - def _isarray(self, varname): - # no SmartArrayType support yet (can't generate parfor, allocate, etc) - typ = self.typemap[varname] - return (isinstance(typ, types.npytypes.Array) and - not isinstance(typ, types.npytypes.SmartArrayType) and - typ.ndim > 0) - - def _sum_size(self, equiv_set, sizes): - """Return the sum of the given list of sizes if they are all equivalent - to some constant, or None otherwise. - """ - s = 0 - for size in sizes: - n = equiv_set.get_equiv_const(size) - if n == None: - return None - else: - s += n - return s - -UNARY_MAP_OP = list( - npydecl.NumpyRulesUnaryArrayOperator._op_map.keys()) + ['+'] -BINARY_MAP_OP = npydecl.NumpyRulesArrayOperator._op_map.keys() -UFUNC_MAP_OP = [f.__name__ for f in npydecl.supported_ufuncs] diff --git a/numba/numba/bytecode.py b/numba/numba/bytecode.py deleted file mode 100644 index 32adc093b..000000000 --- a/numba/numba/bytecode.py +++ /dev/null @@ -1,330 +0,0 @@ -""" -From NumbaPro - -""" -from __future__ import print_function, division, absolute_import - -from collections import namedtuple, OrderedDict -import dis -import inspect -import sys -import itertools -from types import CodeType, ModuleType - -from numba import errors, utils - - -opcode_info = namedtuple('opcode_info', ['argsize']) - - -def get_function_object(obj): - """ - Objects that wraps function should provide a "__numba__" magic attribute - that contains a name of an attribute that contains the actual python - function object. - """ - attr = getattr(obj, "__numba__", None) - if attr: - return getattr(obj, attr) - return obj - - -def get_code_object(obj): - "Shamelessly borrowed from llpython" - return getattr(obj, '__code__', getattr(obj, 'func_code', None)) - - -def _as_opcodes(seq): - lst = [] - for s in seq: - c = dis.opmap.get(s) - if c is not None: - lst.append(c) - return lst - - -JREL_OPS = frozenset(dis.hasjrel) -JABS_OPS = frozenset(dis.hasjabs) -JUMP_OPS = JREL_OPS | JABS_OPS -TERM_OPS = frozenset(_as_opcodes(['RETURN_VALUE', 'RAISE_VARARGS'])) -EXTENDED_ARG = dis.EXTENDED_ARG -HAVE_ARGUMENT = dis.HAVE_ARGUMENT - - -class ByteCodeInst(object): - ''' - Attributes - ---------- - - offset: - byte offset of opcode - - opcode: - opcode integer value - - arg: - instruction arg - - lineno: - -1 means unknown - ''' - __slots__ = 'offset', 'next', 'opcode', 'opname', 'arg', 'lineno' - - def __init__(self, offset, opcode, arg, nextoffset): - self.offset = offset - self.next = nextoffset - self.opcode = opcode - self.opname = dis.opname[opcode] - self.arg = arg - self.lineno = -1 # unknown line number - - @property - def is_jump(self): - return self.opcode in JUMP_OPS - - @property - def is_terminator(self): - return self.opcode in TERM_OPS - - def get_jump_target(self): - assert self.is_jump - if self.opcode in JREL_OPS: - return self.next + self.arg - else: - assert self.opcode in JABS_OPS - return self.arg - - def __repr__(self): - return '%s(arg=%s, lineno=%d)' % (self.opname, self.arg, self.lineno) - - @property - def block_effect(self): - """Effect of the block stack - Returns +1 (push), 0 (none) or -1 (pop) - """ - if self.opname.startswith('SETUP_'): - return 1 - elif self.opname == 'POP_BLOCK': - return -1 - else: - return 0 - - -if sys.version_info[:2] >= (3, 6): - CODE_LEN = 1 - ARG_LEN = 1 - NO_ARG_LEN = 1 -else: - CODE_LEN = 1 - ARG_LEN = 2 - NO_ARG_LEN = 0 - - -# Adapted from Lib/dis.py -def _unpack_opargs(code): - """ - Returns a 4-int-tuple of - (bytecode offset, opcode, argument, offset of next bytecode). - """ - if sys.version_info[0] < 3: - code = list(map(ord, code)) - - extended_arg = 0 - n = len(code) - offset = i = 0 - while i < n: - op = code[i] - i += CODE_LEN - if op >= HAVE_ARGUMENT: - arg = code[i] | extended_arg - for j in range(ARG_LEN): - arg |= code[i + j] << (8 * j) - i += ARG_LEN - if op == EXTENDED_ARG: - extended_arg = arg << 8 * ARG_LEN - continue - else: - arg = None - i += NO_ARG_LEN - - extended_arg = 0 - yield (offset, op, arg, i) - offset = i # Mark inst offset at first extended - - -class ByteCodeIter(object): - def __init__(self, code): - self.code = code - self.iter = iter(_unpack_opargs(self.code.co_code)) - - def __iter__(self): - return self - - def _fetch_opcode(self): - return next(self.iter) - - def next(self): - offset, opcode, arg, nextoffset = self._fetch_opcode() - return offset, ByteCodeInst(offset=offset, opcode=opcode, arg=arg, - nextoffset=nextoffset) - - __next__ = next - - def read_arg(self, size): - buf = 0 - for i in range(size): - _offset, byte = next(self.iter) - buf |= byte << (8 * i) - return buf - - -class ByteCode(object): - """ - The decoded bytecode of a function, and related information. - """ - __slots__ = ('func_id', 'co_names', 'co_varnames', 'co_consts', - 'co_cellvars', 'co_freevars', 'table', 'labels') - - def __init__(self, func_id): - code = func_id.code - - labels = set(dis.findlabels(code.co_code)) - labels.add(0) - - # A map of {offset: ByteCodeInst} - table = OrderedDict(ByteCodeIter(code)) - self._compute_lineno(table, code) - - self.func_id = func_id - self.co_names = code.co_names - self.co_varnames = code.co_varnames - self.co_consts = code.co_consts - self.co_cellvars = code.co_cellvars - self.co_freevars = code.co_freevars - self.table = table - self.labels = sorted(labels) - - @classmethod - def _compute_lineno(cls, table, code): - """ - Compute the line numbers for all bytecode instructions. - """ - for offset, lineno in dis.findlinestarts(code): - if offset in table: - table[offset].lineno = lineno - known = -1 - for inst in table.values(): - if inst.lineno >= 0: - known = inst.lineno - else: - inst.lineno = known - return table - - def __iter__(self): - return utils.itervalues(self.table) - - def __getitem__(self, offset): - return self.table[offset] - - def __contains__(self, offset): - return offset in self.table - - def dump(self): - def label_marker(i): - if i[1].offset in self.labels: - return '>' - else: - return ' ' - - return '\n'.join('%s %10s\t%s' % ((label_marker(i),) + i) - for i in utils.iteritems(self.table)) - - @classmethod - def _compute_used_globals(cls, func, table, co_consts, co_names): - """ - Compute the globals used by the function with the given - bytecode table. - """ - d = {} - globs = func.__globals__ - builtins = globs.get('__builtins__', utils.builtins) - if isinstance(builtins, ModuleType): - builtins = builtins.__dict__ - # Look for LOAD_GLOBALs in the bytecode - for inst in table.values(): - if inst.opname == 'LOAD_GLOBAL': - name = co_names[inst.arg] - if name not in d: - try: - value = globs[name] - except KeyError: - value = builtins[name] - d[name] = value - # Add globals used by any nested code object - for co in co_consts: - if isinstance(co, CodeType): - subtable = OrderedDict(ByteCodeIter(co)) - d.update(cls._compute_used_globals(func, subtable, - co.co_consts, co.co_names)) - return d - - def get_used_globals(self): - """ - Get a {name: value} map of the globals used by this code - object and any nested code objects. - """ - return self._compute_used_globals(self.func_id.func, self.table, - self.co_consts, self.co_names) - - -class FunctionIdentity(object): - """ - A function's identity and metadata. - - Note this typically represents a function whose bytecode is - being compiled, not necessarily the top-level user function - (the two might be distinct, e.g. in the `@generated_jit` case). - """ - _unique_ids = itertools.count(1) - - @classmethod - def from_function(cls, pyfunc): - """ - Create the FunctionIdentity of the given function. - """ - func = get_function_object(pyfunc) - code = get_code_object(func) - pysig = utils.pysignature(func) - if not code: - raise errors.ByteCodeSupportError( - "%s does not provide its bytecode" % func) - - try: - func_qualname = func.__qualname__ - except AttributeError: - func_qualname = func.__name__ - - self = cls() - self.func = func - self.func_qualname = func_qualname - self.func_name = func_qualname.split('.')[-1] - self.code = code - self.module = inspect.getmodule(func) - self.modname = (utils._dynamic_modname - if self.module is None - else self.module.__name__) - self.is_generator = inspect.isgeneratorfunction(func) - self.pysig = pysig - self.filename = code.co_filename - self.firstlineno = code.co_firstlineno - self.arg_count = len(pysig.parameters) - self.arg_names = list(pysig.parameters) - - # Even the same function definition can be compiled into - # several different function objects with distinct closure - # variables, so we make sure to disambiguate using an unique id. - uid = next(cls._unique_ids) - self.unique_name = '{}${}'.format(self.func_qualname, uid) - - return self - - def derive(self): - """Copy the object and increment the unique counter. - """ - return self.from_function(self.func) diff --git a/numba/numba/caching.py b/numba/numba/caching.py deleted file mode 100644 index 61ad1143a..000000000 --- a/numba/numba/caching.py +++ /dev/null @@ -1,732 +0,0 @@ -""" -Caching mechanism for compiled functions. -""" - -from __future__ import print_function, division, absolute_import - -from abc import ABCMeta, abstractmethod, abstractproperty -import contextlib -import errno -import hashlib -import inspect -import itertools -import os -from .six.moves import cPickle as pickle -import sys -import tempfile -import warnings - -from .appdirs import AppDirs -from .six import add_metaclass - -import numba -from . import compiler, config, utils -from .errors import NumbaWarning -from numba.targets.base import BaseContext -from numba.targets.codegen import CodeLibrary -from numba.compiler import CompileResult - - -def _get_codegen(obj): - """ - Returns the Codegen associated with the given object. - """ - if isinstance(obj, BaseContext): - return obj.codegen() - elif isinstance(obj, CodeLibrary): - return obj.codegen - elif isinstance(obj, CompileResult): - return obj.target_context.codegen() - else: - raise TypeError(type(obj)) - - -def _cache_log(msg, *args): - if config.DEBUG_CACHE: - msg = msg % args - print(msg) - - -@add_metaclass(ABCMeta) -class _Cache(object): - - @abstractproperty - def cache_path(self): - """ - The base filesystem path of this cache (for example its root folder). - """ - - @abstractmethod - def load_overload(self, sig, target_context): - """ - Load an overload for the given signature using the target context. - The saved object must be returned if successful, None if not found - in the cache. - """ - - @abstractmethod - def save_overload(self, sig, data): - """ - Save the overload for the given signature. - """ - - @abstractmethod - def enable(self): - """ - Enable the cache. - """ - - @abstractmethod - def disable(self): - """ - Disable the cache. - """ - - @abstractmethod - def flush(self): - """ - Flush the cache. - """ - - -class NullCache(_Cache): - @property - def cache_path(self): - return None - - def load_overload(self, sig, target_context): - pass - - def save_overload(self, sig, cres): - pass - - def enable(self): - pass - - def disable(self): - pass - - def flush(self): - pass - - -@add_metaclass(ABCMeta) -class _CacheLocator(object): - """ - A filesystem locator for caching a given function. - """ - - def ensure_cache_path(self): - path = self.get_cache_path() - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - # Ensure the directory is writable by trying to write a temporary file - tempfile.TemporaryFile(dir=path).close() - - @abstractmethod - def get_cache_path(self): - """ - Return the directory the function is cached in. - """ - - @abstractmethod - def get_source_stamp(self): - """ - Get a timestamp representing the source code's freshness. - Can return any picklable Python object. - """ - - @abstractmethod - def get_disambiguator(self): - """ - Get a string disambiguator for this locator's function. - It should allow disambiguating different but similarly-named functions. - """ - - @classmethod - def from_function(cls, py_func, py_file): - """ - Create a locator instance for the given function located in the - given file. - """ - raise NotImplementedError - - -class _SourceFileBackedLocatorMixin(object): - """ - A cache locator mixin for functions which are backed by a well-known - Python source file. - """ - - def get_source_stamp(self): - if getattr(sys, 'frozen', False): - st = os.stat(sys.executable) - else: - st = os.stat(self._py_file) - # We use both timestamp and size as some filesystems only have second - # granularity. - return st.st_mtime, st.st_size - - def get_disambiguator(self): - return str(self._lineno) - - @classmethod - def from_function(cls, py_func, py_file): - if not os.path.exists(py_file): - # Perhaps a placeholder (e.g. "") - return - self = cls(py_func, py_file) - try: - self.ensure_cache_path() - except OSError: - # Cannot ensure the cache directory exists or is writable - return - return self - - -class _UserProvidedCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): - """ - A locator that always point to the user provided directory in - `numba.config.CACHE_DIR` - """ - def __init__(self, py_func, py_file): - self._py_file = py_file - self._lineno = py_func.__code__.co_firstlineno - drive, path = os.path.splitdrive(os.path.abspath(self._py_file)) - subpath = os.path.dirname(path).lstrip(os.path.sep) - self._cache_path = os.path.join(config.CACHE_DIR, subpath) - - def get_cache_path(self): - return self._cache_path - - @classmethod - def from_function(cls, py_func, py_file): - if not config.CACHE_DIR: - return - parent = super(_UserProvidedCacheLocator, cls) - return parent.from_function(py_func, py_file) - - -class _InTreeCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): - """ - A locator for functions backed by a regular Python module with a - writable __pycache__ directory. - """ - - def __init__(self, py_func, py_file): - self._py_file = py_file - self._lineno = py_func.__code__.co_firstlineno - self._cache_path = os.path.join(os.path.dirname(self._py_file), '__pycache__') - - def get_cache_path(self): - return self._cache_path - - -class _UserWideCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): - """ - A locator for functions backed by a regular Python module or a - frozen executable, cached into a user-wide cache directory. - """ - - def __init__(self, py_func, py_file): - self._py_file = py_file - self._lineno = py_func.__code__.co_firstlineno - appdirs = AppDirs(appname="numba", appauthor=False) - cache_dir = appdirs.user_cache_dir - cache_subpath = os.path.dirname(py_file) - if not (os.name == "nt" or getattr(sys, 'frozen', False)): - # On non-Windows, further disambiguate by appending the entire - # absolute source path to the cache dir, e.g. - # "$HOME/.cache/numba/usr/lib/.../mypkg/mysubpkg" - # On Windows, this is undesirable because of path length limitations - # For frozen applications, there is no existing "full path" - # directory, and depends on a relocatable executable. - cache_subpath = os.path.abspath(cache_subpath).lstrip(os.path.sep) - self._cache_path = os.path.join(cache_dir, cache_subpath) - - def get_cache_path(self): - return self._cache_path - - @classmethod - def from_function(cls, py_func, py_file): - if not (os.path.exists(py_file) or getattr(sys, 'frozen', False)): - # Perhaps a placeholder (e.g. "") - # stop function exit if frozen, since it uses a temp placeholder - return - self = cls(py_func, py_file) - try: - self.ensure_cache_path() - except OSError: - # Cannot ensure the cache directory exists or is writable - return - return self - - -class _IPythonCacheLocator(_CacheLocator): - """ - A locator for functions entered at the IPython prompt (notebook or other). - """ - - def __init__(self, py_func, py_file): - self._py_file = py_file - # Note IPython enhances the linecache module to be able to - # inspect source code of functions defined on the interactive prompt. - source = inspect.getsource(py_func) - if isinstance(source, bytes): - self._bytes_source = source - else: - self._bytes_source = source.encode('utf-8') - - def get_cache_path(self): - # We could also use jupyter_core.paths.jupyter_runtime_dir() - # In both cases this is a user-wide directory, so we need to - # be careful when disambiguating if we don't want too many - # conflicts (see below). - try: - from IPython.paths import get_ipython_cache_dir - except ImportError: - # older IPython version - from IPython.utils.path import get_ipython_cache_dir - return os.path.join(get_ipython_cache_dir(), 'numba') - - def get_source_stamp(self): - return hashlib.sha256(self._bytes_source).hexdigest() - - def get_disambiguator(self): - # Heuristic: we don't want too many variants being saved, but - # we don't want similar named functions (e.g. "f") to compete - # for the cache, so we hash the first two lines of the function - # source (usually this will be the @jit decorator + the function - # signature). - firstlines = b''.join(self._bytes_source.splitlines(True)[:2]) - return hashlib.sha256(firstlines).hexdigest()[:10] - - @classmethod - def from_function(cls, py_func, py_file): - if not py_file.startswith("' can appear in the qualname (e.g. '') but - # are forbidden in Windows filenames - fixed_fullname = fullname.replace('<', '').replace('>', '') - fmt = '%s-%s.py%d%d%s' - return fmt % (fixed_fullname, self.locator.get_disambiguator(), - sys.version_info[0], sys.version_info[1], abiflags) - - @property - def filename_base(self): - return self._filename_base - - @property - def locator(self): - return self._locator - - @abstractmethod - def reduce(self, data): - "Returns the serialized form the data" - pass - - @abstractmethod - def rebuild(self, target_context, reduced_data): - "Returns the de-serialized form of the *reduced_data*" - pass - - @abstractmethod - def check_cachable(self, data): - "Returns True if the given data is cachable; otherwise, returns False." - pass - - -class CompileResultCacheImpl(_CacheImpl): - """ - Implements the logic to cache CompileResult objects. - """ - - def reduce(self, cres): - """ - Returns a serialized CompileResult - """ - return cres._reduce() - - def rebuild(self, target_context, payload): - """ - Returns the unserialized CompileResult - """ - return compiler.CompileResult._rebuild(target_context, *payload) - - def check_cachable(self, cres): - """ - Check cachability of the given compile result. - """ - cannot_cache = None - if self._is_closure: - cannot_cache = "as it uses outer variables in a closure" - elif cres.lifted: - cannot_cache = "as it uses lifted loops" - elif cres.has_dynamic_globals: - cannot_cache = ("as it uses dynamic globals " - "(such as ctypes pointers and large global arrays)") - if cannot_cache: - msg = ('Cannot cache compiled function "%s" %s' - % (cres.fndesc.qualname.split('.')[-1], cannot_cache)) - warnings.warn_explicit(msg, NumbaWarning, - self._locator._py_file, self._lineno) - return False - return True - - -class CodeLibraryCacheImpl(_CacheImpl): - """ - Implements the logic to cache CodeLibrary objects. - """ - - _filename_prefix = None # must be overriden - - def reduce(self, codelib): - """ - Returns a serialized CodeLibrary - """ - return codelib.serialize_using_object_code() - - def rebuild(self, target_context, payload): - """ - Returns the unserialized CodeLibrary - """ - return target_context.codegen().unserialize_library(payload) - - def check_cachable(self, codelib): - """ - Check cachability of the given CodeLibrary. - """ - return not self._is_closure - - def get_filename_base(self, fullname, abiflags): - parent = super(CodeLibraryCacheImpl, self) - res = parent.get_filename_base(fullname, abiflags) - return '-'.join([self._filename_prefix, res]) - - -class IndexDataCacheFile(object): - """ - Implements the logic for the index file and data file used by a cache. - """ - def __init__(self, cache_path, filename_base, source_stamp): - self._cache_path = cache_path - self._index_name = '%s.nbi' % (filename_base,) - self._index_path = os.path.join(self._cache_path, self._index_name) - self._data_name_pattern = '%s.{number:d}.nbc' % (filename_base,) - self._source_stamp = source_stamp - self._version = numba.__version__ - - def flush(self): - self._save_index({}) - - def save(self, key, data): - """ - Save a new cache entry with *key* and *data*. - """ - overloads = self._load_index() - try: - # If key already exists, we will overwrite the file - data_name = overloads[key] - except KeyError: - # Find an available name for the data file - existing = set(overloads.values()) - for i in itertools.count(1): - data_name = self._data_name(i) - if data_name not in existing: - break - overloads[key] = data_name - self._save_index(overloads) - self._save_data(data_name, data) - - def load(self, key): - """ - Load a cache entry with *key*. - """ - overloads = self._load_index() - data_name = overloads.get(key) - if data_name is None: - return - try: - return self._load_data(data_name) - except EnvironmentError: - # File could have been removed while the index still refers it. - return - - def _load_index(self): - """ - Load the cache index and return it as a dictionary (possibly - empty if cache is empty or obsolete). - """ - try: - with open(self._index_path, "rb") as f: - version = pickle.load(f) - data = f.read() - except EnvironmentError as e: - # Index doesn't exist yet? - if e.errno in (errno.ENOENT,): - return {} - raise - if version != self._version: - # This is another version. Avoid trying to unpickling the - # rest of the stream, as that may fail. - return {} - stamp, overloads = pickle.loads(data) - _cache_log("[cache] index loaded from %r", self._index_path) - if stamp != self._source_stamp: - # Cache is not fresh. Stale data files will be eventually - # overwritten, since they are numbered in incrementing order. - return {} - else: - return overloads - - def _save_index(self, overloads): - data = self._source_stamp, overloads - data = self._dump(data) - with self._open_for_write(self._index_path) as f: - pickle.dump(self._version, f, protocol=-1) - f.write(data) - _cache_log("[cache] index saved to %r", self._index_path) - - def _load_data(self, name): - path = self._data_path(name) - with open(path, "rb") as f: - data = f.read() - tup = pickle.loads(data) - _cache_log("[cache] data loaded from %r", path) - return tup - - def _save_data(self, name, data): - data = self._dump(data) - path = self._data_path(name) - with self._open_for_write(path) as f: - f.write(data) - _cache_log("[cache] data saved to %r", path) - - def _data_name(self, number): - return self._data_name_pattern.format(number=number) - - def _data_path(self, name): - return os.path.join(self._cache_path, name) - - def _dump(self, obj): - return pickle.dumps(obj, protocol=-1) - - @contextlib.contextmanager - def _open_for_write(self, filepath): - """ - Open *filepath* for writing in a race condition-free way - (hopefully). - """ - tmpname = '%s.tmp.%d' % (filepath, os.getpid()) - try: - with open(tmpname, "wb") as f: - yield f - utils.file_replace(tmpname, filepath) - except Exception: - # In case of error, remove dangling tmp file - try: - os.unlink(tmpname) - except OSError: - pass - raise - - -class Cache(_Cache): - """ - A per-function compilation cache. The cache saves data in separate - data files and maintains information in an index file. - - There is one index file per function and Python version - ("function_name-.pyXY.nbi") which contains a mapping of - signatures and architectures to data files. - It is prefixed by a versioning key and a timestamp of the Python source - file containing the function. - - There is one data file ("function_name-.pyXY..nbc") - per function, function signature, target architecture and Python version. - - Separate index and data files per Python version avoid pickle - compatibility problems. - - Note: - This contains the driver logic only. The core logic is provided - by a subclass of ``_CacheImpl`` specified as *_impl_class* in the subclass. - """ - - # The following class variables must be overriden by subclass. - _impl_class = None - - def __init__(self, py_func): - self._name = repr(py_func) - self._impl = self._impl_class(py_func) - self._cache_path = self._impl.locator.get_cache_path() - # This may be a bit strict but avoids us maintaining a magic number - source_stamp = self._impl.locator.get_source_stamp() - filename_base = self._impl.filename_base - self._cache_file = IndexDataCacheFile(cache_path=self._cache_path, - filename_base=filename_base, - source_stamp=source_stamp) - self.enable() - - def __repr__(self): - return "<%s py_func=%r>" % (self.__class__.__name__, self._name) - - @property - def cache_path(self): - return self._cache_path - - def enable(self): - self._enabled = True - - def disable(self): - self._enabled = False - - def flush(self): - self._cache_file.flush() - - def load_overload(self, sig, target_context): - """ - Load and recreate the cached object for the given signature, - using the *target_context*. - """ - # Refresh the context to ensure it is initialized - target_context.refresh() - with self._guard_against_spurious_io_errors(): - return self._load_overload(sig, target_context) - # None returned if the `with` block swallows an exception - - def _load_overload(self, sig, target_context): - if not self._enabled: - return - key = self._index_key(sig, _get_codegen(target_context)) - data = self._cache_file.load(key) - if data is not None: - data = self._impl.rebuild(target_context, data) - return data - - def save_overload(self, sig, data): - """ - Save the data for the given signature in the cache. - """ - with self._guard_against_spurious_io_errors(): - self._save_overload(sig, data) - - def _save_overload(self, sig, data): - if not self._enabled: - return - if not self._impl.check_cachable(data): - return - self._impl.locator.ensure_cache_path() - key = self._index_key(sig, _get_codegen(data)) - data = self._impl.reduce(data) - self._cache_file.save(key, data) - - @contextlib.contextmanager - def _guard_against_spurious_io_errors(self): - if os.name == 'nt': - # Guard against permission errors due to accessing the file - # from several processes (see #2028) - try: - yield - except EnvironmentError as e: - if e.errno != errno.EACCES: - raise - else: - # No such conditions under non-Windows OSes - yield - - def _index_key(self, sig, codegen): - """ - Compute index key for the given signature and codegen. - It includes a description of the OS and target architecture. - """ - return (sig, codegen.magic_tuple()) - - -class FunctionCache(Cache): - """ - Implements Cache that saves and loads CompileResult objects. - """ - _impl_class = CompileResultCacheImpl - - -# Remember used cache filename prefixes. -_lib_cache_prefixes = set(['']) - - -def make_library_cache(prefix): - """ - Create a Cache class for additional compilation features to cache their - result for reuse. The cache is saved in filename pattern like - in ``FunctionCache`` but with additional *prefix* as specified. - """ - # avoid cache prefix reuse - assert prefix not in _lib_cache_prefixes - _lib_cache_prefixes.add(prefix) - - class CustomCodeLibraryCacheImpl(CodeLibraryCacheImpl): - _filename_prefix = prefix - - class LibraryCache(Cache): - """ - Implements Cache that saves and loads CodeLibrary objects for additional - feature for the specified python function. - """ - _impl_class = CustomCodeLibraryCacheImpl - - return LibraryCache - - diff --git a/numba/numba/callwrapper.py b/numba/numba/callwrapper.py deleted file mode 100644 index 3037cbf25..000000000 --- a/numba/numba/callwrapper.py +++ /dev/null @@ -1,211 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from llvmlite.llvmpy.core import Type, Builder, Constant -import llvmlite.llvmpy.core as lc - -from numba import types, cgutils, config - - -class _ArgManager(object): - """ - A utility class to handle argument unboxing and cleanup - """ - def __init__(self, context, builder, api, env_manager, endblk, nargs): - self.context = context - self.builder = builder - self.api = api - self.env_manager = env_manager - self.arg_count = 0 # how many function arguments have been processed - self.cleanups = [] - self.nextblk = endblk - - def add_arg(self, obj, ty): - """ - Unbox argument and emit code that handles any error during unboxing. - Args are cleaned up in reverse order of the parameter list, and - cleanup begins as soon as unboxing of any argument fails. E.g. failure - on arg2 will result in control flow going through: - - arg2.err -> arg1.err -> arg0.err -> arg.end (returns) - """ - # Unbox argument - native = self.api.to_native_value(ty, obj) - - # If an error occurred, go to the cleanup block for the previous argument. - with cgutils.if_unlikely(self.builder, native.is_error): - self.builder.branch(self.nextblk) - - # Define the cleanup function for the argument - def cleanup_arg(): - # Native value reflection - self.api.reflect_native_value(ty, native.value, self.env_manager) - - # Native value cleanup - if native.cleanup is not None: - native.cleanup() - - # NRT cleanup - # (happens after the native value cleanup as the latter - # may need the native value) - if self.context.enable_nrt: - self.context.nrt.decref(self.builder, ty, native.value) - - self.cleanups.append(cleanup_arg) - - # Write the on-error cleanup block for this argument - cleanupblk = self.builder.append_basic_block("arg%d.err" % self.arg_count) - with self.builder.goto_block(cleanupblk): - cleanup_arg() - # Go to next cleanup block - self.builder.branch(self.nextblk) - - self.nextblk = cleanupblk - self.arg_count += 1 - return native.value - - def emit_cleanup(self): - """ - Emit the cleanup code after returning from the wrapped function. - """ - for dtor in self.cleanups: - dtor() - - -class _GilManager(object): - """ - A utility class to handle releasing the GIL and then re-acquiring it - again. - """ - - def __init__(self, builder, api, argman): - self.builder = builder - self.api = api - self.argman = argman - self.thread_state = api.save_thread() - - def emit_cleanup(self): - self.api.restore_thread(self.thread_state) - self.argman.emit_cleanup() - - -class PyCallWrapper(object): - def __init__(self, context, module, func, fndesc, env, call_helper, - release_gil): - self.context = context - self.module = module - self.func = func - self.fndesc = fndesc - self.env = env - self.release_gil = release_gil - - def build(self): - wrapname = self.fndesc.llvm_cpython_wrapper_name - - # This is the signature of PyCFunctionWithKeywords - # (see CPython's methodobject.h) - pyobj = self.context.get_argument_type(types.pyobject) - wrapty = Type.function(pyobj, [pyobj, pyobj, pyobj]) - wrapper = self.module.add_function(wrapty, name=wrapname) - - builder = Builder(wrapper.append_basic_block('entry')) - - # - `closure` will receive the `self` pointer stored in the - # PyCFunction object (see _dynfunc.c) - # - `args` and `kws` will receive the tuple and dict objects - # of positional and keyword arguments, respectively. - closure, args, kws = wrapper.args - closure.name = 'py_closure' - args.name = 'py_args' - kws.name = 'py_kws' - - api = self.context.get_python_api(builder) - self.build_wrapper(api, builder, closure, args, kws) - - return wrapper, api - - def build_wrapper(self, api, builder, closure, args, kws): - nargs = len(self.fndesc.argtypes) - - objs = [api.alloca_obj() for _ in range(nargs)] - parseok = api.unpack_tuple(args, self.fndesc.qualname, - nargs, nargs, *objs) - - pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) - with cgutils.if_unlikely(builder, pred): - builder.ret(api.get_null_object()) - - # Block that returns after erroneous argument unboxing/cleanup - endblk = builder.append_basic_block("arg.end") - with builder.goto_block(endblk): - builder.ret(api.get_null_object()) - - # Get the Environment object - env_manager = self.get_env(api, builder) - - cleanup_manager = _ArgManager(self.context, builder, api, - env_manager, endblk, nargs) - - # Compute the arguments to the compiled Numba function. - innerargs = [] - for obj, ty in zip(objs, self.fndesc.argtypes): - if isinstance(ty, types.Omitted): - # It's an omitted value => ignore dummy Python object - innerargs.append(None) - else: - val = cleanup_manager.add_arg(builder.load(obj), ty) - innerargs.append(val) - - if self.release_gil: - cleanup_manager = _GilManager(builder, api, cleanup_manager) - - status, retval = self.context.call_conv.call_function( - builder, self.func, self.fndesc.restype, self.fndesc.argtypes, - innerargs) - # Do clean up - self.debug_print(builder, "# callwrapper: emit_cleanup") - cleanup_manager.emit_cleanup() - self.debug_print(builder, "# callwrapper: emit_cleanup end") - - # Determine return status - with builder.if_then(status.is_ok, likely=True): - # Ok => return boxed Python value - with builder.if_then(status.is_none): - api.return_none() - - retty = self._simplified_return_type() - obj = api.from_native_return(retty, retval, env_manager) - builder.ret(obj) - - # Error out - self.context.call_conv.raise_error(builder, api, status) - builder.ret(api.get_null_object()) - - def get_env(self, api, builder): - """Get the Environment object which is declared as a global - in the module of the wrapped function. - """ - envname = self.context.get_env_name(self.fndesc) - gvptr = self.context.declare_env_global(builder.module, envname) - envptr = builder.load(gvptr) - - env_body = self.context.get_env_body(builder, envptr) - - api.emit_environment_sentry(envptr, return_pyobject=True) - env_manager = api.get_env_manager(self.env, env_body, envptr) - return env_manager - - def _simplified_return_type(self): - """ - The NPM callconv has already converted simplified optional types. - We can simply use the value type from it. - """ - restype = self.fndesc.restype - # Optional type - if isinstance(restype, types.Optional): - return restype.type - else: - return restype - - def debug_print(self, builder, msg): - if config.DEBUG_JIT: - self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) diff --git a/numba/numba/capsulethunk.h b/numba/numba/capsulethunk.h deleted file mode 100644 index 4bdf5b41f..000000000 --- a/numba/numba/capsulethunk.h +++ /dev/null @@ -1,108 +0,0 @@ -/** - - This is a modified version of capsulethunk.h for use in llvmpy - -**/ - -#ifndef __CAPSULETHUNK_H -#define __CAPSULETHUNK_H - -#if ( (PY_VERSION_HEX < 0x02070000) \ - || ((PY_VERSION_HEX >= 0x03000000) \ - && (PY_VERSION_HEX < 0x03010000)) ) - -//#define Assert(X) do_assert(!!(X), #X, __FILE__, __LINE__) -#define Assert(X) - -static -void do_assert(int cond, const char * msg, const char *file, unsigned line){ - if (!cond) { - fprintf(stderr, "Assertion failed %s:%d\n%s\n", file, line, msg); - exit(1); - } -} - -typedef void (*PyCapsule_Destructor)(PyObject *); - -struct FakePyCapsule_Desc { - const char *name; - void *context; - PyCapsule_Destructor dtor; - PyObject *parent; - - FakePyCapsule_Desc() : name(0), context(0), dtor(0) {} -}; - -static -FakePyCapsule_Desc* get_pycobj_desc(PyObject *p){ - void *desc = ((PyCObject*)p)->desc; - Assert(desc && "No desc in PyCObject"); - return static_cast(desc); -} - -static -void pycobject_pycapsule_dtor(void *p, void *desc){ - Assert(desc); - Assert(p); - FakePyCapsule_Desc *fpc_desc = static_cast(desc); - Assert(fpc_desc->parent); - Assert(PyCObject_Check(fpc_desc->parent)); - fpc_desc->dtor(static_cast(fpc_desc->parent)); - delete fpc_desc; -} - -static -PyObject* PyCapsule_New(void* ptr, const char *name, PyCapsule_Destructor dtor) -{ - FakePyCapsule_Desc *desc = new FakePyCapsule_Desc; - desc->name = name; - desc->context = NULL; - desc->dtor = dtor; - PyObject *p = PyCObject_FromVoidPtrAndDesc(ptr, desc, - pycobject_pycapsule_dtor); - desc->parent = p; - return p; -} - -static -int PyCapsule_CheckExact(PyObject *p) -{ - return PyCObject_Check(p); -} - -static -void* PyCapsule_GetPointer(PyObject *p, const char *name) -{ - Assert(PyCapsule_CheckExact(p)); - if (strcmp(get_pycobj_desc(p)->name, name) != 0) { - PyErr_SetString(PyExc_ValueError, "Invalid PyCapsule object"); - } - return PyCObject_AsVoidPtr(p); -} - -static -void* PyCapsule_GetContext(PyObject *p) -{ - Assert(p); - Assert(PyCapsule_CheckExact(p)); - return get_pycobj_desc(p)->context; -} - -static -int PyCapsule_SetContext(PyObject *p, void *context) -{ - Assert(PyCapsule_CheckExact(p)); - get_pycobj_desc(p)->context = context; - return 0; -} - -static -const char * PyCapsule_GetName(PyObject *p) -{ -// Assert(PyCapsule_CheckExact(p)); - return get_pycobj_desc(p)->name; -} - -#endif /* #if PY_VERSION_HEX < 0x02070000 */ - -#endif /* __CAPSULETHUNK_H */ diff --git a/numba/numba/ccallback.py b/numba/numba/ccallback.py deleted file mode 100644 index 5cab99719..000000000 --- a/numba/numba/ccallback.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Implementation of compiled C callbacks (@cfunc). -""" - -from __future__ import print_function, division, absolute_import - -import ctypes - -from llvmlite import ir - -from . import utils, compiler -from .caching import NullCache, FunctionCache -from .dispatcher import _FunctionCompiler -from .targets import registry -from .typing import signature -from .typing.ctypes_utils import to_ctypes - - -class _CFuncCompiler(_FunctionCompiler): - - def _customize_flags(self, flags): - flags.set('no_cpython_wrapper', True) - # Disable compilation of the IR module, because we first want to - # add the cfunc wrapper. - flags.set('no_compile', True) - # Object mode is not currently supported in C callbacks - # (no reliable way to get the environment) - flags.set('enable_pyobject', False) - if flags.force_pyobject: - raise NotImplementedError("object mode not allowed in C callbacks") - return flags - - -class CFunc(object): - """ - A compiled C callback, as created by the @cfunc decorator. - """ - _targetdescr = registry.cpu_target - - def __init__(self, pyfunc, sig, locals, options, - pipeline_class=compiler.Pipeline): - args, return_type = sig - if return_type is None: - raise TypeError("C callback needs an explicit return type") - self.__name__ = pyfunc.__name__ - self.__qualname__ = getattr(pyfunc, '__qualname__', self.__name__) - self.__wrapped__ = pyfunc - - self._pyfunc = pyfunc - self._sig = signature(return_type, *args) - self._compiler = _CFuncCompiler(pyfunc, self._targetdescr, - options, locals, - pipeline_class=pipeline_class) - - self._wrapper_name = None - self._wrapper_address = None - self._cache = NullCache() - self._cache_hits = 0 - - def enable_caching(self): - self._cache = FunctionCache(self._pyfunc) - - def compile(self): - # Use cache and compiler in a critical section - with compiler.lock_compiler: - # Try to load from cache - cres = self._cache.load_overload(self._sig, self._targetdescr.target_context) - if cres is None: - cres = self._compile_uncached() - self._cache.save_overload(self._sig, cres) - else: - self._cache_hits += 1 - - self._library = cres.library - self._wrapper_name = cres.fndesc.llvm_cfunc_wrapper_name - self._wrapper_address = self._library.get_pointer_to_function(self._wrapper_name) - - def _compile_uncached(self): - sig = self._sig - - # Compile native function - cres = self._compiler.compile(sig.args, sig.return_type) - assert not cres.objectmode # disabled by compiler above - fndesc = cres.fndesc - - # Compile C wrapper - # Note we reuse the same library to allow inlining the Numba - # function inside the wrapper. - library = cres.library - module = library.create_ir_module(fndesc.unique_name) - context = cres.target_context - ll_argtypes = [context.get_value_type(ty) for ty in sig.args] - ll_return_type = context.get_value_type(sig.return_type) - - wrapty = ir.FunctionType(ll_return_type, ll_argtypes) - wrapfn = module.add_function(wrapty, fndesc.llvm_cfunc_wrapper_name) - builder = ir.IRBuilder(wrapfn.append_basic_block('entry')) - - self._build_c_wrapper(context, builder, cres, wrapfn.args) - - library.add_ir_module(module) - library.finalize() - - return cres - - def _build_c_wrapper(self, context, builder, cres, c_args): - sig = self._sig - pyapi = context.get_python_api(builder) - - fnty = context.call_conv.get_function_type(sig.return_type, sig.args) - fn = builder.module.add_function(fnty, cres.fndesc.llvm_func_name) - - # XXX no obvious way to freeze an environment - status, out = context.call_conv.call_function( - builder, fn, sig.return_type, sig.args, c_args) - - with builder.if_then(status.is_error, likely=False): - # If (and only if) an error occurred, acquire the GIL - # and use the interpreter to write out the exception. - gil_state = pyapi.gil_ensure() - context.call_conv.raise_error(builder, pyapi, status) - cstr = context.insert_const_string(builder.module, repr(self)) - strobj = pyapi.string_from_string(cstr) - pyapi.err_write_unraisable(strobj) - pyapi.decref(strobj) - pyapi.gil_release(gil_state) - - builder.ret(out) - - @property - def native_name(self): - """ - The process-wide symbol the C callback is exposed as. - """ - # Note from our point of view, the C callback is the wrapper around - # the native function. - return self._wrapper_name - - @property - def address(self): - """ - The address of the C callback. - """ - return self._wrapper_address - - @utils.cached_property - def cffi(self): - """ - A cffi function pointer representing the C callback. - """ - import cffi - ffi = cffi.FFI() - # cffi compares types by name, so using precise types would risk - # spurious mismatches (such as "int32_t" vs. "int"). - return ffi.cast("void *", self.address) - - @utils.cached_property - def ctypes(self): - """ - A ctypes function object representing the C callback. - """ - ctypes_args = [to_ctypes(ty) for ty in self._sig.args] - ctypes_restype = to_ctypes(self._sig.return_type) - functype = ctypes.CFUNCTYPE(ctypes_restype, *ctypes_args) - return functype(self.address) - - def inspect_llvm(self): - """ - Return the LLVM IR of the C callback definition. - """ - return self._library.get_llvm_str() - - @property - def cache_hits(self): - return self._cache_hits - - def __repr__(self): - return "" % (self.__qualname__,) diff --git a/numba/numba/cffi_support.py b/numba/numba/cffi_support.py deleted file mode 100644 index 732e15bcb..000000000 --- a/numba/numba/cffi_support.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Alias to numba.typing.cffi_utils for backward compatibility -""" -from __future__ import print_function, division, absolute_import -from numba.typing.cffi_utils import * diff --git a/numba/numba/cgutils.py b/numba/numba/cgutils.py deleted file mode 100644 index 9bc90b4f8..000000000 --- a/numba/numba/cgutils.py +++ /dev/null @@ -1,1037 +0,0 @@ -""" -Generic helpers for LLVM code generation. -""" - -from __future__ import print_function, division, absolute_import - -import collections -from contextlib import contextmanager -import functools - -from llvmlite import ir - -from . import utils - - -bool_t = ir.IntType(1) -int8_t = ir.IntType(8) -int32_t = ir.IntType(32) -intp_t = ir.IntType(utils.MACHINE_BITS) -voidptr_t = int8_t.as_pointer() - -true_bit = bool_t(1) -false_bit = bool_t(0) -true_byte = int8_t(1) -false_byte = int8_t(0) - - -def as_bool_bit(builder, value): - return builder.icmp_unsigned('!=', value, value.type(0)) - - -def make_anonymous_struct(builder, values, struct_type=None): - """ - Create an anonymous struct containing the given LLVM *values*. - """ - if struct_type is None: - struct_type = ir.LiteralStructType([v.type for v in values]) - struct_val = struct_type(ir.Undefined) - for i, v in enumerate(values): - struct_val = builder.insert_value(struct_val, v, i) - return struct_val - - -def make_bytearray(buf): - """ - Make a byte array constant from *buf*. - """ - b = bytearray(buf) - n = len(b) - return ir.Constant(ir.ArrayType(ir.IntType(8), n), b) - - -_struct_proxy_cache = {} - -def create_struct_proxy(fe_type, kind='value'): - """ - Returns a specialized StructProxy subclass for the given fe_type. - """ - cache_key = (fe_type, kind) - res = _struct_proxy_cache.get(cache_key) - if res is None: - base = {'value': ValueStructProxy, - 'data': DataStructProxy, - }[kind] - clsname = base.__name__ + '_' + str(fe_type) - bases = (base,) - clsmembers = dict(_fe_type=fe_type) - res = type(clsname, bases, clsmembers) - - _struct_proxy_cache[cache_key] = res - return res - - -def copy_struct(dst, src, repl={}): - """ - Copy structure from *src* to *dst* with replacement from *repl*. - """ - repl = repl.copy() - # copy data from src or use those in repl - for k in src._datamodel._fields: - v = repl.pop(k, getattr(src, k)) - setattr(dst, k, v) - # use remaining key-values in repl - for k, v in repl.items(): - setattr(dst, k, v) - return dst - - -class _StructProxy(object): - """ - Creates a `Structure` like interface that is constructed with information - from DataModel instance. FE type must have a data model that is a - subclass of StructModel. - """ - # The following class members must be overridden by subclass - _fe_type = None - - def __init__(self, context, builder, value=None, ref=None): - from numba import datamodel # Avoid circular import - self._context = context - self._datamodel = self._context.data_model_manager[self._fe_type] - if not isinstance(self._datamodel, datamodel.StructModel): - raise TypeError("Not a structure model: {0}".format(self._datamodel)) - self._builder = builder - - self._be_type = self._get_be_type(self._datamodel) - assert not is_pointer(self._be_type) - - outer_ref, ref = self._make_refs(ref) - if ref.type.pointee != self._be_type: - raise AssertionError("bad ref type: expected %s, got %s" - % (self._be_type.as_pointer(), ref.type)) - - if value is not None: - if value.type != outer_ref.type.pointee: - raise AssertionError("bad value type: expected %s, got %s" - % (outer_ref.type.pointee, value.type)) - self._builder.store(value, outer_ref) - - self._value = ref - self._outer_ref = outer_ref - - def _make_refs(self, ref): - """ - Return an (outer ref, value ref) pair. By default, these are - the same pointers, but a derived class may override this. - """ - if ref is None: - ref = alloca_once(self._builder, self._be_type, zfill=True) - return ref, ref - - def _get_be_type(self, datamodel): - raise NotImplementedError - - def _cast_member_to_value(self, index, val): - raise NotImplementedError - - def _cast_member_from_value(self, index, val): - raise NotImplementedError - - def _get_ptr_by_index(self, index): - return gep_inbounds(self._builder, self._value, 0, index) - - def _get_ptr_by_name(self, attrname): - index = self._datamodel.get_field_position(attrname) - return self._get_ptr_by_index(index) - - def __getattr__(self, field): - """ - Load the LLVM value of the named *field*. - """ - if not field.startswith('_'): - return self[self._datamodel.get_field_position(field)] - else: - raise AttributeError(field) - - def __setattr__(self, field, value): - """ - Store the LLVM *value* into the named *field*. - """ - if field.startswith('_'): - return super(_StructProxy, self).__setattr__(field, value) - self[self._datamodel.get_field_position(field)] = value - - def __getitem__(self, index): - """ - Load the LLVM value of the field at *index*. - """ - member_val = self._builder.load(self._get_ptr_by_index(index)) - return self._cast_member_to_value(index, member_val) - - def __setitem__(self, index, value): - """ - Store the LLVM *value* into the field at *index*. - """ - ptr = self._get_ptr_by_index(index) - value = self._cast_member_from_value(index, value) - if value.type != ptr.type.pointee: - if (is_pointer(value.type) and is_pointer(ptr.type.pointee) - and value.type.pointee == ptr.type.pointee.pointee): - # Differ by address-space only - # Auto coerce it - value = self._context.addrspacecast(self._builder, - value, - ptr.type.pointee.addrspace) - else: - raise TypeError("Invalid store of {value.type} to " - "{ptr.type.pointee} in " - "{self._datamodel} " - "(trying to write member #{index})" - .format(value=value, ptr=ptr, self=self, - index=index)) - self._builder.store(value, ptr) - - def __len__(self): - """ - Return the number of fields. - """ - return self._datamodel.field_count - - def _getpointer(self): - """ - Return the LLVM pointer to the underlying structure. - """ - return self._outer_ref - - def _getvalue(self): - """ - Load and return the value of the underlying LLVM structure. - """ - return self._builder.load(self._outer_ref) - - def _setvalue(self, value): - """ - Store the value in this structure. - """ - assert not is_pointer(value.type) - assert value.type == self._be_type, (value.type, self._be_type) - self._builder.store(value, self._value) - - -class ValueStructProxy(_StructProxy): - """ - Create a StructProxy suitable for accessing regular values - (e.g. LLVM values or alloca slots). - """ - def _get_be_type(self, datamodel): - return datamodel.get_value_type() - - def _cast_member_to_value(self, index, val): - return val - - def _cast_member_from_value(self, index, val): - return val - - -class DataStructProxy(_StructProxy): - """ - Create a StructProxy suitable for accessing data persisted in memory. - """ - def _get_be_type(self, datamodel): - return datamodel.get_data_type() - - def _cast_member_to_value(self, index, val): - model = self._datamodel.get_model(index) - return model.from_data(self._builder, val) - - def _cast_member_from_value(self, index, val): - model = self._datamodel.get_model(index) - return model.as_data(self._builder, val) - - -class Structure(object): - """ - A high-level object wrapping a alloca'ed LLVM structure, including - named fields and attribute access. - """ - - # XXX Should this warrant several separate constructors? - def __init__(self, context, builder, value=None, ref=None, cast_ref=False): - self._type = context.get_struct_type(self) - self._context = context - self._builder = builder - if ref is None: - self._value = alloca_once(builder, self._type) - if value is not None: - assert not is_pointer(value.type) - assert value.type == self._type, (value.type, self._type) - builder.store(value, self._value) - else: - assert value is None - assert is_pointer(ref.type) - if self._type != ref.type.pointee: - if cast_ref: - ref = builder.bitcast(ref, self._type.as_pointer()) - else: - raise TypeError( - "mismatching pointer type: got %s, expected %s" - % (ref.type.pointee, self._type)) - self._value = ref - - self._namemap = {} - self._fdmap = [] - self._typemap = [] - base = int32_t(0) - for i, (k, tp) in enumerate(self._fields): - self._namemap[k] = i - self._fdmap.append((base, int32_t(i))) - self._typemap.append(tp) - - def _get_ptr_by_index(self, index): - ptr = self._builder.gep(self._value, self._fdmap[index], inbounds=True) - return ptr - - def _get_ptr_by_name(self, attrname): - return self._get_ptr_by_index(self._namemap[attrname]) - - def __getattr__(self, field): - """ - Load the LLVM value of the named *field*. - """ - if not field.startswith('_'): - return self[self._namemap[field]] - else: - raise AttributeError(field) - - def __setattr__(self, field, value): - """ - Store the LLVM *value* into the named *field*. - """ - if field.startswith('_'): - return super(Structure, self).__setattr__(field, value) - self[self._namemap[field]] = value - - def __getitem__(self, index): - """ - Load the LLVM value of the field at *index*. - """ - - return self._builder.load(self._get_ptr_by_index(index)) - - def __setitem__(self, index, value): - """ - Store the LLVM *value* into the field at *index*. - """ - ptr = self._get_ptr_by_index(index) - if ptr.type.pointee != value.type: - fmt = "Type mismatch: __setitem__(%d, ...) expected %r but got %r" - raise AssertionError(fmt % (index, - str(ptr.type.pointee), - str(value.type))) - self._builder.store(value, ptr) - - def __len__(self): - """ - Return the number of fields. - """ - return len(self._namemap) - - def _getpointer(self): - """ - Return the LLVM pointer to the underlying structure. - """ - return self._value - - def _getvalue(self): - """ - Load and return the value of the underlying LLVM structure. - """ - return self._builder.load(self._value) - - def _setvalue(self, value): - """Store the value in this structure""" - assert not is_pointer(value.type) - assert value.type == self._type, (value.type, self._type) - self._builder.store(value, self._value) - - # __iter__ is derived by Python from __len__ and __getitem__ - - -def alloca_once(builder, ty, size=None, name='', zfill=False): - """Allocate stack memory at the entry block of the current function - pointed by ``builder`` withe llvm type ``ty``. The optional ``size`` arg - set the number of element to allocate. The default is 1. The optional - ``name`` arg set the symbol name inside the llvm IR for debugging. - If ``zfill`` is set, also filling zeros to the memory. - """ - if isinstance(size, utils.INT_TYPES): - size = ir.Constant(intp_t, size) - with builder.goto_entry_block(): - ptr = builder.alloca(ty, size=size, name=name) - if zfill: - builder.store(ty(None), ptr) - return ptr - - -def alloca_once_value(builder, value, name=''): - """ - Like alloca_once(), but passing a *value* instead of a type. The - type is inferred and the allocated slot is also initialized with the - given value. - """ - storage = alloca_once(builder, value.type) - builder.store(value, storage) - return storage - - -def insert_pure_function(module, fnty, name): - """ - Insert a pure function (in the functional programming sense) in the - given module. - """ - fn = module.get_or_insert_function(fnty, name=name) - fn.attributes.add("readonly") - fn.attributes.add("nounwind") - return fn - - -def terminate(builder, bbend): - bb = builder.basic_block - if bb.terminator is None: - builder.branch(bbend) - - -def get_null_value(ltype): - return ltype(None) - - -def is_null(builder, val): - null = get_null_value(val.type) - return builder.icmp_unsigned('==', null, val) - - -def is_not_null(builder, val): - null = get_null_value(val.type) - return builder.icmp_unsigned('!=', null, val) - - -def if_unlikely(builder, pred): - return builder.if_then(pred, likely=False) - - -def if_likely(builder, pred): - return builder.if_then(pred, likely=True) - - -def ifnot(builder, pred): - return builder.if_then(builder.not_(pred)) - - -def increment_index(builder, val): - """ - Increment an index *val*. - """ - one = val.type(1) - # We pass the "nsw" flag in the hope that LLVM understands the index - # never changes sign. Unfortunately this doesn't always work - # (e.g. ndindex()). - return builder.add(val, one, flags=['nsw']) - - -Loop = collections.namedtuple('Loop', ('index', 'do_break')) - -@contextmanager -def for_range(builder, count, start=None, intp=None): - """ - Generate LLVM IR for a for-loop in [start, count). - *start* is equal to 0 by default. - - Yields a Loop namedtuple with the following members: - - `index` is the loop index's value - - `do_break` is a no-argument callable to break out of the loop - """ - if intp is None: - intp = count.type - if start is None: - start = intp(0) - stop = count - - bbcond = builder.append_basic_block("for.cond") - bbbody = builder.append_basic_block("for.body") - bbend = builder.append_basic_block("for.end") - - def do_break(): - builder.branch(bbend) - - bbstart = builder.basic_block - builder.branch(bbcond) - - with builder.goto_block(bbcond): - index = builder.phi(intp, name="loop.index") - pred = builder.icmp_signed('<', index, stop) - builder.cbranch(pred, bbbody, bbend) - - with builder.goto_block(bbbody): - yield Loop(index, do_break) - # Update bbbody as a new basic block may have been activated - bbbody = builder.basic_block - incr = increment_index(builder, index) - terminate(builder, bbcond) - - index.add_incoming(start, bbstart) - index.add_incoming(incr, bbbody) - - builder.position_at_end(bbend) - - -@contextmanager -def for_range_slice(builder, start, stop, step, intp=None, inc=True): - """ - Generate LLVM IR for a for-loop based on a slice. Yields a - (index, count) tuple where `index` is the slice index's value - inside the loop, and `count` the iteration count. - - Parameters - ------------- - builder : object - Builder object - start : int - The beginning value of the slice - stop : int - The end value of the slice - step : int - The step value of the slice - intp : - The data type - inc : boolean, optional - Signals whether the step is positive (True) or negative (False). - - Returns - ----------- - None - """ - if intp is None: - intp = start.type - - bbcond = builder.append_basic_block("for.cond") - bbbody = builder.append_basic_block("for.body") - bbend = builder.append_basic_block("for.end") - bbstart = builder.basic_block - builder.branch(bbcond) - - with builder.goto_block(bbcond): - index = builder.phi(intp, name="loop.index") - count = builder.phi(intp, name="loop.count") - if (inc): - pred = builder.icmp_signed('<', index, stop) - else: - pred = builder.icmp_signed('>', index, stop) - builder.cbranch(pred, bbbody, bbend) - - with builder.goto_block(bbbody): - yield index, count - bbbody = builder.basic_block - incr = builder.add(index, step) - next_count = increment_index(builder, count) - terminate(builder, bbcond) - - index.add_incoming(start, bbstart) - index.add_incoming(incr, bbbody) - count.add_incoming(ir.Constant(intp, 0), bbstart) - count.add_incoming(next_count, bbbody) - builder.position_at_end(bbend) - - -@contextmanager -def for_range_slice_generic(builder, start, stop, step): - """ - A helper wrapper for for_range_slice(). This is a context manager which - yields two for_range_slice()-alike context managers, the first for - the positive step case, the second for the negative step case. - - Use: - with for_range_slice_generic(...) as (pos_range, neg_range): - with pos_range as (idx, count): - ... - with neg_range as (idx, count): - ... - """ - intp = start.type - is_pos_step = builder.icmp_signed('>=', step, ir.Constant(intp, 0)) - - pos_for_range = for_range_slice(builder, start, stop, step, intp, inc=True) - neg_for_range = for_range_slice(builder, start, stop, step, intp, inc=False) - - @contextmanager - def cm_cond(cond, inner_cm): - with cond: - with inner_cm as value: - yield value - - with builder.if_else(is_pos_step, likely=True) as (then, otherwise): - yield cm_cond(then, pos_for_range), cm_cond(otherwise, neg_for_range) - - -@contextmanager -def loop_nest(builder, shape, intp, order='C'): - """ - Generate a loop nest walking a N-dimensional array. - Yields a tuple of N indices for use in the inner loop body, - iterating over the *shape* space. - - If *order* is 'C' (the default), indices are incremented inside-out - (i.e. (0,0), (0,1), (0,2), (1,0) etc.). - If *order* is 'F', they are incremented outside-in - (i.e. (0,0), (1,0), (2,0), (0,1) etc.). - This has performance implications when walking an array as it impacts - the spatial locality of memory accesses. - """ - assert order in 'CF' - if not shape: - # 0-d array - yield () - else: - if order == 'F': - _swap = lambda x: x[::-1] - else: - _swap = lambda x: x - with _loop_nest(builder, _swap(shape), intp) as indices: - assert len(indices) == len(shape) - yield _swap(indices) - - -@contextmanager -def _loop_nest(builder, shape, intp): - with for_range(builder, shape[0], intp=intp) as loop: - if len(shape) > 1: - with _loop_nest(builder, shape[1:], intp) as indices: - yield (loop.index,) + indices - else: - yield (loop.index,) - - -def pack_array(builder, values, ty=None): - """ - Pack a sequence of values in a LLVM array. *ty* should be given - if the array may be empty, in which case the type can't be inferred - from the values. - """ - n = len(values) - if ty is None: - ty = values[0].type - ary = ir.ArrayType(ty, n)(ir.Undefined) - for i, v in enumerate(values): - ary = builder.insert_value(ary, v, i) - return ary - - -def unpack_tuple(builder, tup, count=None): - """ - Unpack an array or structure of values, return a Python tuple. - """ - if count is None: - # Assuming *tup* is an aggregate - count = len(tup.type.elements) - vals = [builder.extract_value(tup, i) - for i in range(count)] - return vals - - -def get_item_pointer(builder, aryty, ary, inds, wraparound=False): - shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) - strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) - return get_item_pointer2(builder, data=ary.data, shape=shapes, - strides=strides, layout=aryty.layout, inds=inds, - wraparound=wraparound) - - -def get_item_pointer2(builder, data, shape, strides, layout, inds, - wraparound=False): - if wraparound: - # Wraparound - indices = [] - for ind, dimlen in zip(inds, shape): - negative = builder.icmp_signed('<', ind, ind.type(0)) - wrapped = builder.add(dimlen, ind) - selected = builder.select(negative, wrapped, ind) - indices.append(selected) - else: - indices = inds - if not indices: - # Indexing with empty tuple - return builder.gep(data, [int32_t(0)]) - intp = indices[0].type - # Indexing code - if layout in 'CF': - steps = [] - # Compute steps for each dimension - if layout == 'C': - # C contiguous - for i in range(len(shape)): - last = intp(1) - for j in shape[i + 1:]: - last = builder.mul(last, j) - steps.append(last) - elif layout == 'F': - # F contiguous - for i in range(len(shape)): - last = intp(1) - for j in shape[:i]: - last = builder.mul(last, j) - steps.append(last) - else: - raise Exception("unreachable") - - # Compute index - loc = intp(0) - for i, s in zip(indices, steps): - tmp = builder.mul(i, s) - loc = builder.add(loc, tmp) - ptr = builder.gep(data, [loc]) - return ptr - else: - # Any layout - dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] - offset = functools.reduce(builder.add, dimoffs) - return pointer_add(builder, data, offset) - - -def _scalar_pred_against_zero(builder, value, fpred, icond): - nullval = value.type(0) - if isinstance(value.type, (ir.FloatType, ir.DoubleType)): - isnull = fpred(value, nullval) - elif isinstance(value.type, ir.IntType): - isnull = builder.icmp_signed(icond, value, nullval) - else: - raise TypeError("unexpected value type %s" % (value.type,)) - return isnull - - -def is_scalar_zero(builder, value): - """ - Return a predicate representing whether *value* is equal to zero. - """ - return _scalar_pred_against_zero( - builder, value, functools.partial(builder.fcmp_ordered, '=='), '==') - - -def is_not_scalar_zero(builder, value): - """ - Return a predicate representin whether a *value* is not equal to zero. - (not exactly "not is_scalar_zero" because of nans) - """ - return _scalar_pred_against_zero( - builder, value, functools.partial(builder.fcmp_unordered, '!='), '!=') - - -def is_scalar_zero_or_nan(builder, value): - """ - Return a predicate representing whether *value* is equal to either zero - or NaN. - """ - return _scalar_pred_against_zero( - builder, value, functools.partial(builder.fcmp_unordered, '=='), '==') - -is_true = is_not_scalar_zero -is_false = is_scalar_zero - - -def is_scalar_neg(builder, value): - """ - Is *value* negative? Assumes *value* is signed. - """ - return _scalar_pred_against_zero( - builder, value, functools.partial(builder.fcmp_ordered, '<'), '<') - - -def guard_null(context, builder, value, exc_tuple): - """ - Guard against *value* being null or zero. - *exc_tuple* should be a (exception type, arguments...) tuple. - """ - with builder.if_then(is_scalar_zero(builder, value), likely=False): - exc = exc_tuple[0] - exc_args = exc_tuple[1:] or None - context.call_conv.return_user_exc(builder, exc, exc_args) - -def guard_memory_error(context, builder, pointer, msg=None): - """ - Guard against *pointer* being NULL (and raise a MemoryError). - """ - assert isinstance(pointer.type, ir.PointerType), pointer.type - exc_args = (msg,) if msg else () - with builder.if_then(is_null(builder, pointer), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, exc_args) - -@contextmanager -def if_zero(builder, value, likely=False): - """ - Execute the given block if the scalar value is zero. - """ - with builder.if_then(is_scalar_zero(builder, value), likely=likely): - yield - - -guard_zero = guard_null - - -def is_pointer(ltyp): - """ - Whether the LLVM type *typ* is a struct type. - """ - return isinstance(ltyp, ir.PointerType) - - -def get_record_member(builder, record, offset, typ): - pval = gep_inbounds(builder, record, 0, offset) - assert not is_pointer(pval.type.pointee) - return builder.bitcast(pval, typ.as_pointer()) - - -def is_neg_int(builder, val): - return builder.icmp_signed('<', val, val.type(0)) - - -def gep_inbounds(builder, ptr, *inds, **kws): - """ - Same as *gep*, but add the `inbounds` keyword. - """ - return gep(builder, ptr, *inds, inbounds=True, **kws) - - -def gep(builder, ptr, *inds, **kws): - """ - Emit a getelementptr instruction for the given pointer and indices. - The indices can be LLVM values or Python int constants. - """ - name = kws.pop('name', '') - inbounds = kws.pop('inbounds', False) - assert not kws - idx = [] - for i in inds: - if isinstance(i, utils.INT_TYPES): - # NOTE: llvm only accepts int32 inside structs, not int64 - ind = int32_t(i) - else: - ind = i - idx.append(ind) - return builder.gep(ptr, idx, name=name, inbounds=inbounds) - - -def pointer_add(builder, ptr, offset, return_type=None): - """ - Add an integral *offset* to pointer *ptr*, and return a pointer - of *return_type* (or, if omitted, the same type as *ptr*). - - Note the computation is done in bytes, and ignores the width of - the pointed item type. - """ - intptr = builder.ptrtoint(ptr, intp_t) - if isinstance(offset, utils.INT_TYPES): - offset = intp_t(offset) - intptr = builder.add(intptr, offset) - return builder.inttoptr(intptr, return_type or ptr.type) - - -def memset(builder, ptr, size, value): - """ - Fill *size* bytes starting from *ptr* with *value*. - """ - sizety = size.type - memset = "llvm.memset.p0i8.i%d" % (sizety.width) - fn = builder.module.declare_intrinsic('llvm.memset', (voidptr_t, size.type)) - ptr = builder.bitcast(ptr, voidptr_t) - if isinstance(value, int): - value = int8_t(value) - builder.call(fn, [ptr, value, size, int32_t(0), bool_t(0)]) - - -def global_constant(builder_or_module, name, value, linkage='internal'): - """ - Get or create a (LLVM module-)global constant with *name* or *value*. - """ - if isinstance(builder_or_module, ir.Module): - module = builder_or_module - else: - module = builder_or_module.module - data = module.add_global_variable(value.type, name=name) - data.linkage = linkage - data.global_constant = True - data.initializer = value - return data - - -def divmod_by_constant(builder, val, divisor): - """ - Compute the (quotient, remainder) of *val* divided by the constant - positive *divisor*. The semantics reflects those of Python integer - floor division, rather than C's / LLVM's signed division and modulo. - The difference lies with a negative *val*. - """ - assert divisor > 0 - divisor = val.type(divisor) - one = val.type(1) - - quot = alloca_once(builder, val.type) - - with builder.if_else(is_neg_int(builder, val)) as (if_neg, if_pos): - with if_pos: - # quot = val / divisor - quot_val = builder.sdiv(val, divisor) - builder.store(quot_val, quot) - with if_neg: - # quot = -1 + (val + 1) / divisor - val_plus_one = builder.add(val, one) - quot_val = builder.sdiv(val_plus_one, divisor) - builder.store(builder.sub(quot_val, one), quot) - - # rem = val - quot * divisor - # (should be slightly faster than a separate modulo operation) - quot_val = builder.load(quot) - rem_val = builder.sub(val, builder.mul(quot_val, divisor)) - return quot_val, rem_val - - -def cbranch_or_continue(builder, cond, bbtrue): - """ - Branch conditionally or continue. - - Note: a new block is created and builder is moved to the end of the new - block. - """ - bbcont = builder.append_basic_block('.continue') - builder.cbranch(cond, bbtrue, bbcont) - builder.position_at_end(bbcont) - return bbcont - - -def memcpy(builder, dst, src, count): - """ - Emit a memcpy to the builder. - - Copies each element of dst to src. Unlike the C equivalent, each element - can be any LLVM type. - - Assumes - ------- - * dst.type == src.type - * count is positive - """ - # Note this does seem to be optimized as a raw memcpy() by LLVM - # whenever possible... - assert dst.type == src.type - with for_range(builder, count, intp=count.type) as loop: - out_ptr = builder.gep(dst, [loop.index]) - in_ptr = builder.gep(src, [loop.index]) - builder.store(builder.load(in_ptr), out_ptr) - - -def _raw_memcpy(builder, func_name, dst, src, count, itemsize, align): - size_t = count.type - if isinstance(itemsize, utils.INT_TYPES): - itemsize = ir.Constant(size_t, itemsize) - - memcpy = builder.module.declare_intrinsic(func_name, - [voidptr_t, voidptr_t, size_t]) - align = ir.Constant(ir.IntType(32), align) - is_volatile = false_bit - builder.call(memcpy, [builder.bitcast(dst, voidptr_t), - builder.bitcast(src, voidptr_t), - builder.mul(count, itemsize), - align, - is_volatile]) - - -def raw_memcpy(builder, dst, src, count, itemsize, align=1): - """ - Emit a raw memcpy() call for `count` items of size `itemsize` - from `src` to `dest`. - """ - return _raw_memcpy(builder, 'llvm.memcpy', dst, src, count, itemsize, align) - -def raw_memmove(builder, dst, src, count, itemsize, align=1): - """ - Emit a raw memmove() call for `count` items of size `itemsize` - from `src` to `dest`. - """ - return _raw_memcpy(builder, 'llvm.memmove', dst, src, count, itemsize, align) - - -def muladd_with_overflow(builder, a, b, c): - """ - Compute (a * b + c) and return a (result, overflow bit) pair. - The operands must be signed integers. - """ - p = builder.smul_with_overflow(a, b) - prod = builder.extract_value(p, 0) - prod_ovf = builder.extract_value(p, 1) - s = builder.sadd_with_overflow(prod, c) - res = builder.extract_value(s, 0) - ovf = builder.or_(prod_ovf, builder.extract_value(s, 1)) - return res, ovf - - -def printf(builder, format, *args): - """ - Calls printf(). - Argument `format` is expected to be a Python string. - Values to be printed are listed in `args`. - - Note: There is no checking to ensure there is correct number of values - in `args` and there type matches the declaration in the format string. - """ - assert isinstance(format, str) - mod = builder.module - # Make global constant for format string - cstring = voidptr_t - fmt_bytes = make_bytearray((format + '\00').encode('ascii')) - global_fmt = global_constant(mod, "printf_format", fmt_bytes) - fnty = ir.FunctionType(int32_t, [cstring], var_arg=True) - # Insert printf() - try: - fn = mod.get_global('printf') - except KeyError: - fn = ir.Function(mod, fnty, name="printf") - # Call - ptr_fmt = builder.bitcast(global_fmt, cstring) - return builder.call(fn, [ptr_fmt] + list(args)) - - -if utils.PY3: - def normalize_ir_text(text): - """ - Normalize the given string to latin1 compatible encoding that is suitable - for use in LLVM IR. - """ - # Just re-encoding to latin1 is enough - return text.encode('utf8').decode('latin1') -else: - def normalize_ir_text(text): - """ - No-op for python2. Assume there won't be unicode names. - """ - return text - - -def hexdump(builder, ptr, nbytes): - """Debug print the memory region in *ptr* to *ptr + nbytes* - as hex. - """ - bytes_per_line = 16 - nbytes = builder.zext(nbytes, intp_t) - printf(builder, "hexdump p=%p n=%zu", - ptr, nbytes) - byte_t = ir.IntType(8) - ptr = builder.bitcast(ptr, byte_t.as_pointer()) - # Loop to print the bytes in *ptr* as hex - with for_range(builder, nbytes) as idx: - div_by = builder.urem(idx.index, intp_t(bytes_per_line)) - do_new_line = builder.icmp_unsigned("==", div_by, intp_t(0)) - with builder.if_then(do_new_line): - printf(builder, "\n") - - offset = builder.gep(ptr, [idx.index]) - val = builder.load(offset) - printf(builder, " %02x", val) - printf(builder, "\n") diff --git a/numba/numba/compiler.py b/numba/numba/compiler.py deleted file mode 100644 index a8a7f79ed..000000000 --- a/numba/numba/compiler.py +++ /dev/null @@ -1,1041 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from contextlib import contextmanager -from collections import namedtuple, defaultdict -import sys -import warnings -import traceback -import threading -from .tracing import event - -from numba import (bytecode, interpreter, funcdesc, postproc, - typing, typeinfer, lowering, objmode, utils, config, - errors, types, ir, rewrites, transforms) -from numba.targets import cpu, callconv -from numba.annotations import type_annotations -from numba.parfor import PreParforPass, ParforPass, Parfor -from numba.inline_closurecall import InlineClosureCallPass -from numba.errors import CompilerError -from numba.ir_utils import raise_on_unsupported_feature - -# terminal color markup -_termcolor = errors.termcolor() - -# Lock for the preventing multiple compiler execution -lock_compiler = threading.RLock() - - -class Flags(utils.ConfigOptions): - # These options are all false by default, but the defaults are - # different with the @jit decorator (see targets.options.TargetOptions). - - OPTIONS = { - # Enable loop-lifting - 'enable_looplift': False, - # Enable pyobject mode (in general) - 'enable_pyobject': False, - # Enable pyobject mode inside lifted loops - 'enable_pyobject_looplift': False, - # Force pyobject mode inside the whole function - 'force_pyobject': False, - # Release GIL inside the native function - 'release_gil': False, - 'no_compile': False, - 'debuginfo': False, - 'boundcheck': False, - 'forceinline': False, - 'no_cpython_wrapper': False, - # Enable automatic parallel optimization, can be fine-tuned by taking - # a dictionary of sub-options instead of a boolean, see parfor.py for - # detail. - 'auto_parallel': cpu.ParallelOptions(False), - 'nrt': False, - 'no_rewrites': False, - 'error_model': 'python', - 'fastmath': False, - 'noalias': False, - } - - -DEFAULT_FLAGS = Flags() -DEFAULT_FLAGS.set('nrt') - - -CR_FIELDS = ["typing_context", - "target_context", - "entry_point", - "typing_error", - "type_annotation", - "signature", - "objectmode", - "lifted", - "fndesc", - "interpmode", - "library", - "call_helper", - "environment", - "has_dynamic_globals"] - - -class CompileResult(namedtuple("_CompileResult", CR_FIELDS)): - __slots__ = () - - def _reduce(self): - """ - Reduce a CompileResult to picklable components. - """ - libdata = self.library.serialize_using_object_code() - # Make it (un)picklable efficiently - typeann = str(self.type_annotation) - fndesc = self.fndesc - # Those don't need to be pickled and may fail - fndesc.typemap = fndesc.calltypes = None - - return (libdata, self.fndesc, self.environment, self.signature, - self.objectmode, self.interpmode, self.lifted, typeann) - - @classmethod - def _rebuild(cls, target_context, libdata, fndesc, env, - signature, objectmode, interpmode, lifted, typeann): - library = target_context.codegen().unserialize_library(libdata) - cfunc = target_context.get_executable(library, fndesc, env) - cr = cls(target_context=target_context, - typing_context=target_context.typing_context, - library=library, - environment=env, - entry_point=cfunc, - fndesc=fndesc, - type_annotation=typeann, - signature=signature, - objectmode=objectmode, - interpmode=interpmode, - lifted=lifted, - typing_error=None, - call_helper=None, - has_dynamic_globals=False, # by definition - ) - return cr - - -_LowerResult = namedtuple("_LowerResult", [ - "fndesc", - "call_helper", - "cfunc", - "env", - "has_dynamic_globals", -]) - - -def compile_result(**kws): - keys = set(kws.keys()) - fieldset = set(CR_FIELDS) - badnames = keys - fieldset - if badnames: - raise NameError(*badnames) - missing = fieldset - keys - for k in missing: - kws[k] = None - # Avoid keeping alive traceback variables - if sys.version_info >= (3,): - err = kws['typing_error'] - if err is not None: - kws['typing_error'] = err.with_traceback(None) - return CompileResult(**kws) - - -def compile_isolated(func, args, return_type=None, flags=DEFAULT_FLAGS, - locals={}): - """ - Compile the function in an isolated environment (typing and target - context). - Good for testing. - """ - from .targets.registry import cpu_target - typingctx = typing.Context() - targetctx = cpu.CPUContext(typingctx) - # Register the contexts in case for nested @jit or @overload calls - with cpu_target.nested_context(typingctx, targetctx): - return compile_extra(typingctx, targetctx, func, args, return_type, - flags, locals) - - -def run_frontend(func): - """ - Run the compiler frontend over the given Python function, and return - the function's canonical Numba IR. - """ - # XXX make this a dedicated Pipeline? - func_id = bytecode.FunctionIdentity.from_function(func) - interp = interpreter.Interpreter(func_id) - bc = bytecode.ByteCode(func_id=func_id) - func_ir = interp.interpret(bc) - post_proc = postproc.PostProcessor(func_ir) - post_proc.run() - return func_ir - - -class _CompileStatus(object): - """ - Used like a C record - """ - __slots__ = ['fail_reason', 'can_fallback', 'can_giveup'] - - def __init__(self, can_fallback, can_giveup): - self.fail_reason = None - self.can_fallback = can_fallback - self.can_giveup = can_giveup - - def __repr__(self): - vals = [] - for k in self.__slots__: - vals.append("{k}={v}".format(k=k, v=getattr(self, k))) - return ', '.join(vals) - - -class _EarlyPipelineCompletion(Exception): - def __init__(self, result): - self.result = result - - -class _PipelineManager(object): - def __init__(self): - self.pipeline_order = [] - self.pipeline_stages = {} - self._finalized = False - - def create_pipeline(self, pipeline_name): - assert not self._finalized, "Pipelines can no longer be added" - self.pipeline_order.append(pipeline_name) - self.pipeline_stages[pipeline_name] = [] - self.current = pipeline_name - - def add_stage(self, stage_function, stage_description): - assert not self._finalized, "Stages can no longer be added." - current_pipeline_name = self.pipeline_order[-1] - func_desc_tuple = (stage_function, stage_description) - self.pipeline_stages[current_pipeline_name].append(func_desc_tuple) - - def finalize(self): - self._finalized = True - - def _patch_error(self, desc, exc): - """ - Patches the error to show the stage that it arose in. - """ - newmsg = "{desc}\n{exc}".format(desc=desc, exc=exc) - - # For python2, attach the traceback of the previous exception. - if not utils.IS_PY3 and config.FULL_TRACEBACKS: - # strip the new message to just print the error string and not - # the marked up source etc (this is handled already). - stripped = _termcolor.errmsg(newmsg.split('\n')[1]) - fmt = "Caused By:\n{tb}\n{newmsg}" - newmsg = fmt.format(tb=traceback.format_exc(), newmsg=stripped) - - exc.args = (newmsg,) - return exc - - def run(self, status): - assert self._finalized, "PM must be finalized before run()" - for pipeline_name in self.pipeline_order: - event(pipeline_name) - is_final_pipeline = pipeline_name == self.pipeline_order[-1] - for stage, stage_name in self.pipeline_stages[pipeline_name]: - try: - event(stage_name) - stage() - except _EarlyPipelineCompletion as e: - return e.result - except BaseException as e: - msg = "Failed at %s (%s)" % (pipeline_name, stage_name) - patched_exception = self._patch_error(msg, e) - # No more fallback pipelines? - if is_final_pipeline: - raise patched_exception - # Go to next fallback pipeline - else: - status.fail_reason = patched_exception - break - else: - return None - - # TODO save all error information - raise CompilerError("All pipelines have failed") - - -class BasePipeline(object): - """ - Stores and manages states for the compiler pipeline - """ - def __init__(self, typingctx, targetctx, library, args, return_type, flags, - locals): - # Make sure the environment is reloaded - config.reload_config() - typingctx.refresh() - targetctx.refresh() - - self.typingctx = typingctx - self.targetctx = _make_subtarget(targetctx, flags) - self.library = library - self.args = args - self.return_type = return_type - self.flags = flags - self.locals = locals - - # Results of various steps of the compilation pipeline - self.bc = None - self.func_id = None - self.func_ir = None - self.func_ir_original = None # used for fallback - self.lifted = None - self.lifted_from = None - self.typemap = None - self.calltypes = None - self.type_annotation = None - - self.status = _CompileStatus( - can_fallback=self.flags.enable_pyobject, - can_giveup=config.COMPATIBILITY_MODE - ) - - @contextmanager - def fallback_context(self, msg): - """ - Wraps code that would signal a fallback to object mode - """ - try: - yield - except BaseException as e: - if not self.status.can_fallback: - raise - else: - if utils.PYVERSION >= (3,): - # Clear all references attached to the traceback - e = e.with_traceback(None) - warnings.warn_explicit('%s: %s' % (msg, e), - errors.NumbaWarning, - self.func_id.filename, - self.func_id.firstlineno) - - raise - - @contextmanager - def giveup_context(self, msg): - """ - Wraps code that would signal a fallback to interpreter mode - """ - try: - yield - except BaseException as e: - if not self.status.can_giveup: - raise - else: - if utils.PYVERSION >= (3,): - # Clear all references attached to the traceback - e = e.with_traceback(None) - warnings.warn_explicit('%s: %s' % (msg, e), - errors.NumbaWarning, - self.func_id.filename, - self.func_id.firstlineno) - - raise - - def extract_bytecode(self, func_id): - """ - Extract bytecode from function - """ - bc = bytecode.ByteCode(func_id) - if config.DUMP_BYTECODE: - print(bc.dump()) - - return bc - - def compile_extra(self, func): - self.func_id = bytecode.FunctionIdentity.from_function(func) - - try: - bc = self.extract_bytecode(self.func_id) - except BaseException as e: - if self.status.can_giveup: - self.stage_compile_interp_mode() - return self.cr - else: - raise e - - self.bc = bc - self.lifted = () - self.lifted_from = None - return self._compile_bytecode() - - def compile_ir(self, func_ir, lifted=(), lifted_from=None): - self.func_id = func_ir.func_id - self.lifted = lifted - self.lifted_from = lifted_from - - self._set_and_check_ir(func_ir) - return self._compile_ir() - - def stage_analyze_bytecode(self): - """ - Analyze bytecode and translating to Numba IR - """ - func_ir = translate_stage(self.func_id, self.bc) - self._set_and_check_ir(func_ir) - - def _set_and_check_ir(self, func_ir): - self.func_ir = func_ir - self.nargs = self.func_ir.arg_count - if not self.args and self.flags.force_pyobject: - # Allow an empty argument types specification when object mode - # is explicitly requested. - self.args = (types.pyobject,) * self.nargs - elif len(self.args) != self.nargs: - raise TypeError("Signature mismatch: %d argument types given, " - "but function takes %d arguments" - % (len(self.args), self.nargs)) - - def stage_process_ir(self): - ir_processing_stage(self.func_ir) - - def stage_preserve_ir(self): - self.func_ir_original = self.func_ir.copy() - - def frontend_looplift(self): - """ - Loop lifting analysis and transformation - """ - loop_flags = self.flags.copy() - outer_flags = self.flags.copy() - # Do not recursively loop lift - outer_flags.unset('enable_looplift') - loop_flags.unset('enable_looplift') - if not self.flags.enable_pyobject_looplift: - loop_flags.unset('enable_pyobject') - - main, loops = transforms.loop_lifting(self.func_ir, - typingctx=self.typingctx, - targetctx=self.targetctx, - locals=self.locals, - flags=loop_flags) - if loops: - # Some loops were extracted - if config.DEBUG_FRONTEND or config.DEBUG: - for loop in loops: - print("Lifting loop", loop.get_source_location()) - - cres = compile_ir(self.typingctx, self.targetctx, main, - self.args, self.return_type, - outer_flags, self.locals, - lifted=tuple(loops), lifted_from=None) - return cres - - def stage_objectmode_frontend(self): - """ - Front-end: Analyze bytecode, generate Numba IR, infer types - """ - self.func_ir = self.func_ir_original or self.func_ir - if self.flags.enable_looplift: - assert not self.lifted - cres = self.frontend_looplift() - if cres is not None: - raise _EarlyPipelineCompletion(cres) - - # Fallback typing: everything is a python object - self.typemap = defaultdict(lambda: types.pyobject) - self.calltypes = defaultdict(lambda: types.pyobject) - self.return_type = types.pyobject - - def stage_nopython_frontend(self): - """ - Type inference and legalization - """ - with self.fallback_context('Function "%s" failed type inference' - % (self.func_id.func_name,)): - # Type inference - typemap, return_type, calltypes = type_inference_stage( - self.typingctx, - self.func_ir, - self.args, - self.return_type, - self.locals) - self.typemap = typemap - self.return_type = return_type - self.calltypes = calltypes - - with self.fallback_context('Function "%s" has invalid return type' - % (self.func_id.func_name,)): - legalize_return_type(self.return_type, self.func_ir, - self.targetctx) - - def stage_generic_rewrites(self): - """ - Perform any intermediate representation rewrites before type - inference. - """ - assert self.func_ir - msg = ('Internal error in pre-inference rewriting ' - 'pass encountered during compilation of ' - 'function "%s"' % (self.func_id.func_name,)) - with self.fallback_context(msg): - rewrites.rewrite_registry.apply('before-inference', - self, self.func_ir) - - def stage_nopython_rewrites(self): - """ - Perform any intermediate representation rewrites after type - inference. - """ - # Ensure we have an IR and type information. - assert self.func_ir - assert isinstance(getattr(self, 'typemap', None), dict) - assert isinstance(getattr(self, 'calltypes', None), dict) - msg = ('Internal error in post-inference rewriting ' - 'pass encountered during compilation of ' - 'function "%s"' % (self.func_id.func_name,)) - with self.fallback_context(msg): - rewrites.rewrite_registry.apply('after-inference', - self, self.func_ir) - - def stage_pre_parfor_pass(self): - """ - Preprocessing for data-parallel computations. - """ - # Ensure we have an IR and type information. - assert self.func_ir - preparfor_pass = PreParforPass( - self.func_ir, - self.type_annotation.typemap, - self.type_annotation.calltypes, self.typingctx, - self.flags.auto_parallel - ) - preparfor_pass.run() - - def stage_parfor_pass(self): - """ - Convert data-parallel computations into Parfor nodes - """ - # Ensure we have an IR and type information. - assert self.func_ir - parfor_pass = ParforPass(self.func_ir, self.type_annotation.typemap, - self.type_annotation.calltypes, self.return_type, self.typingctx, - self.flags.auto_parallel, self.flags) - parfor_pass.run() - - if config.WARNINGS: - # check the parfor pass worked and warn if it didn't - has_parfor = False - for blk in self.func_ir.blocks.values(): - for stmnt in blk.body: - if isinstance(stmnt, Parfor): - has_parfor = True - break - else: - continue - break - - if not has_parfor: - # parfor calls the compiler chain again with a string - if not self.func_ir.loc.filename == '': - msg = ("parallel=True was specified but no transformation" - " for parallel execution was possible.") - warnings.warn_explicit( - msg, - errors.NumbaWarning, - self.func_id.filename, - self.func_id.firstlineno - ) - - def stage_inline_pass(self): - """ - Inline calls to locally defined closures. - """ - # Ensure we have an IR and type information. - assert self.func_ir - inline_pass = InlineClosureCallPass(self.func_ir, - self.flags.auto_parallel) - inline_pass.run() - # Remove all Dels, and re-run postproc - post_proc = postproc.PostProcessor(self.func_ir) - post_proc.run() - - if config.DEBUG or config.DUMP_IR: - name = self.func_ir.func_id.func_qualname - print(("IR DUMP: %s" % name).center(80, "-")) - self.func_ir.dump() - - def stage_annotate_type(self): - """ - Create type annotation after type inference - """ - self.type_annotation = type_annotations.TypeAnnotation( - func_ir=self.func_ir, - typemap=self.typemap, - calltypes=self.calltypes, - lifted=self.lifted, - lifted_from=self.lifted_from, - args=self.args, - return_type=self.return_type, - html_output=config.HTML) - - if config.ANNOTATE: - print("ANNOTATION".center(80, '-')) - print(self.type_annotation) - print('=' * 80) - if config.HTML: - with open(config.HTML, 'w') as fout: - self.type_annotation.html_annotate(fout) - - def backend_object_mode(self): - """ - Object mode compilation - """ - with self.giveup_context("Function %s failed at object mode lowering" - % (self.func_id.func_name,)): - if len(self.args) != self.nargs: - # append missing - self.args = (tuple(self.args) + (types.pyobject,) * - (self.nargs - len(self.args))) - - return py_lowering_stage(self.targetctx, - self.library, - self.func_ir, - self.flags) - - def backend_nopython_mode(self): - """Native mode compilation""" - msg = ("Function %s failed at nopython " - "mode lowering" % (self.func_id.func_name,)) - with self.fallback_context(msg): - return native_lowering_stage( - self.targetctx, - self.library, - self.func_ir, - self.typemap, - self.return_type, - self.calltypes, - self.flags) - - def _backend(self, lowerfn, objectmode): - """ - Back-end: Generate LLVM IR from Numba IR, compile to machine code - """ - if self.library is None: - codegen = self.targetctx.codegen() - self.library = codegen.create_library(self.func_id.func_qualname) - # Enable object caching upfront, so that the library can - # be later serialized. - self.library.enable_object_caching() - - lowered = lowerfn() - signature = typing.signature(self.return_type, *self.args) - self.cr = compile_result( - typing_context=self.typingctx, - target_context=self.targetctx, - entry_point=lowered.cfunc, - typing_error=self.status.fail_reason, - type_annotation=self.type_annotation, - library=self.library, - call_helper=lowered.call_helper, - signature=signature, - objectmode=objectmode, - interpmode=False, - lifted=self.lifted, - fndesc=lowered.fndesc, - environment=lowered.env, - has_dynamic_globals=lowered.has_dynamic_globals, - ) - - def stage_objectmode_backend(self): - """ - Lowering for object mode - """ - lowerfn = self.backend_object_mode - self._backend(lowerfn, objectmode=True) - - # Warn if compiled function in object mode and force_pyobject not set - if not self.flags.force_pyobject: - if len(self.lifted) > 0: - warn_msg = ('Function "%s" was compiled in object mode without' - ' forceobj=True, but has lifted loops.' % - (self.func_id.func_name,)) - else: - warn_msg = ('Function "%s" was compiled in object mode without' - ' forceobj=True.' % (self.func_id.func_name,)) - warnings.warn_explicit(warn_msg, errors.NumbaWarning, - self.func_id.filename, - self.func_id.firstlineno) - if self.flags.release_gil: - warn_msg = ("Code running in object mode won't allow parallel" - " execution despite nogil=True.") - warnings.warn_explicit(warn_msg, errors.NumbaWarning, - self.func_id.filename, - self.func_id.firstlineno) - - def stage_nopython_backend(self): - """ - Do lowering for nopython - """ - lowerfn = self.backend_nopython_mode - self._backend(lowerfn, objectmode=False) - - def stage_compile_interp_mode(self): - """ - Just create a compile result for interpreter mode - """ - args = [types.pyobject] * len(self.args) - signature = typing.signature(types.pyobject, *args) - self.cr = compile_result(typing_context=self.typingctx, - target_context=self.targetctx, - entry_point=self.func_id.func, - typing_error=self.status.fail_reason, - type_annotation="", - signature=signature, - objectmode=False, - interpmode=True, - lifted=(), - fndesc=None,) - - def stage_ir_legalization(self): - raise_on_unsupported_feature(self.func_ir) - - def stage_cleanup(self): - """ - Cleanup intermediate results to release resources. - """ - - def define_pipelines(self, pm): - """Child classes override this to customize the pipeline. - """ - raise NotImplementedError() - - def add_preprocessing_stage(self, pm): - """Add the preprocessing stage that analyzes the bytecode to prepare - the Numba IR. - """ - if self.func_ir is None: - pm.add_stage(self.stage_analyze_bytecode, "analyzing bytecode") - pm.add_stage(self.stage_process_ir, "processing IR") - - def add_pre_typing_stage(self, pm): - """Add any stages that go before type-inference. - The current stages contain type-agnostic rewrite passes. - """ - if not self.flags.no_rewrites: - if self.status.can_fallback: - pm.add_stage(self.stage_preserve_ir, - "preserve IR for fallback") - pm.add_stage(self.stage_generic_rewrites, "nopython rewrites") - pm.add_stage(self.stage_inline_pass, - "inline calls to locally defined closures") - - def add_typing_stage(self, pm): - """Add the type-inference stage necessary for nopython mode. - """ - pm.add_stage(self.stage_nopython_frontend, "nopython frontend") - pm.add_stage(self.stage_annotate_type, "annotate type") - - def add_optimization_stage(self, pm): - """Add optimization stages. - """ - if self.flags.auto_parallel.enabled: - pm.add_stage(self.stage_pre_parfor_pass, - "Preprocessing for parfors") - if not self.flags.no_rewrites: - pm.add_stage(self.stage_nopython_rewrites, "nopython rewrites") - if self.flags.auto_parallel.enabled: - pm.add_stage(self.stage_parfor_pass, "convert to parfors") - - def add_lowering_stage(self, pm): - """Add the lowering (code-generation) stage for nopython-mode - """ - pm.add_stage(self.stage_nopython_backend, "nopython mode backend") - - def add_cleanup_stage(self, pm): - """Add the clean-up stage to remove intermediate results. - """ - pm.add_stage(self.stage_cleanup, "cleanup intermediate results") - - def define_nopython_pipeline(self, pm, name='nopython'): - """Add the nopython-mode pipeline to the pipeline manager - """ - pm.create_pipeline(name) - self.add_preprocessing_stage(pm) - self.add_pre_typing_stage(pm) - self.add_typing_stage(pm) - self.add_optimization_stage(pm) - pm.add_stage(self.stage_ir_legalization, - "ensure IR is legal prior to lowering") - self.add_lowering_stage(pm) - self.add_cleanup_stage(pm) - - def define_objectmode_pipeline(self, pm, name='object'): - """Add the object-mode pipeline to the pipeline manager - """ - pm.create_pipeline(name) - self.add_preprocessing_stage(pm) - pm.add_stage(self.stage_objectmode_frontend, - "object mode frontend") - pm.add_stage(self.stage_annotate_type, "annotate type") - pm.add_stage(self.stage_ir_legalization, - "ensure IR is legal prior to lowering") - pm.add_stage(self.stage_objectmode_backend, "object mode backend") - self.add_cleanup_stage(pm) - - def define_interpreted_pipeline(self, pm, name="interp"): - """Add the interpreted-mode (fallback) pipeline to the pipeline manager - """ - pm.create_pipeline(name) - pm.add_stage(self.stage_compile_interp_mode, - "compiling with interpreter mode") - self.add_cleanup_stage(pm) - - def _compile_core(self): - """ - Populate and run compiler pipeline - """ - pm = _PipelineManager() - self.define_pipelines(pm) - pm.finalize() - res = pm.run(self.status) - if res is not None: - # Early pipeline completion - return res - else: - assert self.cr is not None - return self.cr - - def _compile_bytecode(self): - """ - Populate and run pipeline for bytecode input - """ - assert self.func_ir is None - return self._compile_core() - - def _compile_ir(self): - """ - Populate and run pipeline for IR input - """ - assert self.func_ir is not None - return self._compile_core() - - -class Pipeline(BasePipeline): - """The default compiler pipeline - """ - def define_pipelines(self, pm): - if not self.flags.force_pyobject: - self.define_nopython_pipeline(pm) - if self.status.can_fallback or self.flags.force_pyobject: - self.define_objectmode_pipeline(pm) - if self.status.can_giveup: - self.define_interpreted_pipeline(pm) - - -def _make_subtarget(targetctx, flags): - """ - Make a new target context from the given target context and flags. - """ - subtargetoptions = {} - if flags.debuginfo: - subtargetoptions['enable_debuginfo'] = True - if flags.boundcheck: - subtargetoptions['enable_boundcheck'] = True - if flags.nrt: - subtargetoptions['enable_nrt'] = True - if flags.auto_parallel: - subtargetoptions['auto_parallel'] = flags.auto_parallel - if flags.fastmath: - subtargetoptions['enable_fastmath'] = True - error_model = callconv.create_error_model(flags.error_model, targetctx) - subtargetoptions['error_model'] = error_model - - return targetctx.subtarget(**subtargetoptions) - - -def compile_extra(typingctx, targetctx, func, args, return_type, flags, - locals, library=None, pipeline_class=Pipeline): - """Compiler entry point - - Parameter - --------- - typingctx : - typing context - targetctx : - target context - func : function - the python function to be compiled - args : tuple, list - argument types - return_type : - Use ``None`` to indicate void return - flags : numba.compiler.Flags - compiler flags - library : numba.codegen.CodeLibrary - Used to store the compiled code. - If it is ``None``, a new CodeLibrary is used. - pipeline_class : type like numba.compiler.BasePipeline - compiler pipeline - """ - pipeline = pipeline_class(typingctx, targetctx, library, - args, return_type, flags, locals) - return pipeline.compile_extra(func) - - -def compile_ir(typingctx, targetctx, func_ir, args, return_type, flags, - locals, lifted=(), lifted_from=None, library=None): - """ - Compile a function with the given IR. - - For internal use only. - """ - - pipeline = Pipeline(typingctx, targetctx, library, - args, return_type, flags, locals) - return pipeline.compile_ir(func_ir=func_ir, lifted=lifted, - lifted_from=lifted_from) - - -def compile_internal(typingctx, targetctx, library, - func, args, return_type, flags, locals): - """ - For internal use only. - """ - pipeline = Pipeline(typingctx, targetctx, library, - args, return_type, flags, locals) - return pipeline.compile_extra(func) - - -def legalize_return_type(return_type, interp, targetctx): - """ - Only accept array return type iff it is passed into the function. - Reject function object return types if in nopython mode. - """ - if not targetctx.enable_nrt and isinstance(return_type, types.Array): - # Walk IR to discover all arguments and all return statements - retstmts = [] - caststmts = {} - argvars = set() - for bid, blk in interp.blocks.items(): - for inst in blk.body: - if isinstance(inst, ir.Return): - retstmts.append(inst.value.name) - elif isinstance(inst, ir.Assign): - if (isinstance(inst.value, ir.Expr) - and inst.value.op == 'cast'): - caststmts[inst.target.name] = inst.value - elif isinstance(inst.value, ir.Arg): - argvars.add(inst.target.name) - - assert retstmts, "No return statements?" - - for var in retstmts: - cast = caststmts.get(var) - if cast is None or cast.value.name not in argvars: - raise TypeError("Only accept returning of array passed into " - "the function as argument") - - elif (isinstance(return_type, types.Function) or - isinstance(return_type, types.Phantom)): - msg = "Can't return function object ({}) in nopython mode" - raise TypeError(msg.format(return_type)) - - -def translate_stage(func_id, bytecode): - interp = interpreter.Interpreter(func_id) - return interp.interpret(bytecode) - - -def ir_processing_stage(func_ir): - post_proc = postproc.PostProcessor(func_ir) - post_proc.run() - - if config.DEBUG or config.DUMP_IR: - name = func_ir.func_id.func_qualname - print(("IR DUMP: %s" % name).center(80, "-")) - func_ir.dump() - if func_ir.is_generator: - print(("GENERATOR INFO: %s" % name).center(80, "-")) - func_ir.dump_generator_info() - - return func_ir - - -def type_inference_stage(typingctx, interp, args, return_type, locals={}): - if len(args) != interp.arg_count: - raise TypeError("Mismatch number of argument types") - - warnings = errors.WarningsFixer(errors.NumbaWarning) - infer = typeinfer.TypeInferer(typingctx, interp, warnings) - with typingctx.callstack.register(infer, interp.func_id, args): - # Seed argument types - for index, (name, ty) in enumerate(zip(interp.arg_names, args)): - infer.seed_argument(name, index, ty) - - # Seed return type - if return_type is not None: - infer.seed_return(return_type) - - # Seed local types - for k, v in locals.items(): - infer.seed_type(k, v) - - infer.build_constraint() - infer.propagate() - typemap, restype, calltypes = infer.unify() - - # Output all Numba warnings - warnings.flush() - - return typemap, restype, calltypes - - -def native_lowering_stage(targetctx, library, interp, typemap, restype, - calltypes, flags): - # Lowering - fndesc = funcdesc.PythonFunctionDescriptor.from_specialized_function( - interp, typemap, restype, calltypes, mangler=targetctx.mangler, - inline=flags.forceinline, noalias=flags.noalias) - - lower = lowering.Lower(targetctx, library, fndesc, interp) - lower.lower() - if not flags.no_cpython_wrapper: - lower.create_cpython_wrapper(flags.release_gil) - env = lower.env - call_helper = lower.call_helper - has_dynamic_globals = lower.has_dynamic_globals - del lower - - if flags.no_compile: - return _LowerResult(fndesc, call_helper, cfunc=None, env=env, - has_dynamic_globals=has_dynamic_globals) - else: - # Prepare for execution - cfunc = targetctx.get_executable(library, fndesc, env) - # Insert native function for use by other jitted-functions. - # We also register its library to allow for inlining. - targetctx.insert_user_function(cfunc, fndesc, [library]) - return _LowerResult(fndesc, call_helper, cfunc=cfunc, env=env, - has_dynamic_globals=has_dynamic_globals) - - -def py_lowering_stage(targetctx, library, interp, flags): - fndesc = funcdesc.PythonFunctionDescriptor.from_object_mode_function( - interp - ) - lower = objmode.PyLower(targetctx, library, fndesc, interp) - lower.lower() - if not flags.no_cpython_wrapper: - lower.create_cpython_wrapper() - env = lower.env - call_helper = lower.call_helper - has_dynamic_globals = lower.has_dynamic_globals - del lower - - if flags.no_compile: - return _LowerResult(fndesc, call_helper, cfunc=None, env=env, - has_dynamic_globals=has_dynamic_globals) - else: - # Prepare for execution - cfunc = targetctx.get_executable(library, fndesc, env) - return _LowerResult(fndesc, call_helper, cfunc=cfunc, env=env, - has_dynamic_globals=has_dynamic_globals) diff --git a/numba/numba/config.py b/numba/numba/config.py deleted file mode 100644 index defaaba98..000000000 --- a/numba/numba/config.py +++ /dev/null @@ -1,327 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import platform -import struct -import sys -import os -import re -import warnings -import multiprocessing - -# YAML needed to use file based Numba config -try: - import yaml - _HAVE_YAML = True -except ImportError: - _HAVE_YAML = False - - -import llvmlite.binding as ll - -IS_WIN32 = sys.platform.startswith('win32') -IS_OSX = sys.platform.startswith('darwin') -MACHINE_BITS = tuple.__itemsize__ * 8 -IS_32BITS = MACHINE_BITS == 32 -# Python version in (major, minor) tuple -PYVERSION = sys.version_info[:2] - -# this is the name of the user supplied configuration file -_config_fname = '.numba_config.yaml' - -def _parse_cc(text): - """ - Parse CUDA compute capability version string. - """ - if not text: - return None - else: - m = re.match(r'(\d+)\.(\d+)', text) - if not m: - raise ValueError("NUMBA_FORCE_CUDA_CC must be specified as a " - "string of \"major.minor\" where major " - "and minor are decimals") - grp = m.groups() - return int(grp[0]), int(grp[1]) - - -def _os_supports_avx(): - """ - Whether the current OS supports AVX, regardless of the CPU. - - This is necessary because the user may be running a very old Linux - kernel (e.g. CentOS 5) on a recent CPU. - """ - if (not sys.platform.startswith('linux') - or platform.machine() not in ('i386', 'i586', 'i686', 'x86_64')): - return True - # Executing the CPUID instruction may report AVX available even though - # the kernel doesn't support it, so parse /proc/cpuinfo instead. - try: - f = open('/proc/cpuinfo', 'r') - except OSError: - # If /proc isn't available, assume yes - return True - with f: - for line in f: - head, _, body = line.partition(':') - if head.strip() == 'flags' and 'avx' in body.split(): - return True - else: - return False - - -class _EnvReloader(object): - - def __init__(self): - self.reset() - - def reset(self): - self.old_environ = {} - self.update(force=True) - - def update(self, force=False): - new_environ = {} - - # first check if there's a .numba_config.yaml and use values from that - if os.path.exists(_config_fname) and os.path.isfile(_config_fname): - if not _HAVE_YAML: - msg = ("A Numba config file is found but YAML parsing " - "capabilities appear to be missing. " - "To use this feature please install `pyyaml`. e.g. " - "`conda install pyyaml`.") - warnings.warn(msg) - else: - with open(_config_fname, 'rt') as f: - y_conf = yaml.load(f) - if y_conf is not None: - for k, v in y_conf.items(): - new_environ['NUMBA_' + k.upper()] = v - - # clobber file based config with any locally defined env vars - for name, value in os.environ.items(): - if name.startswith('NUMBA_'): - new_environ[name] = value - # We update the config variables if at least one NUMBA environment - # variable was modified. This lets the user modify values - # directly in the config module without having them when - # reload_config() is called by the compiler. - if force or self.old_environ != new_environ: - self.process_environ(new_environ) - # Store a copy - self.old_environ = dict(new_environ) - - def process_environ(self, environ): - def _readenv(name, ctor, default): - value = environ.get(name) - if value is None: - return default() if callable(default) else default - try: - return ctor(value) - except Exception: - warnings.warn("environ %s defined but failed to parse '%s'" % - (name, value), RuntimeWarning) - return default - - def optional_str(x): - return str(x) if x is not None else None - - # Print warnings to screen about function compilation - # 0 = Numba warnings suppressed (default) - # 1 = All Numba warnings shown - WARNINGS = _readenv("NUMBA_WARNINGS", int, 0) - - # developer mode produces full tracebacks, disables help instructions - DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0) - - # Flag to enable full exception reporting - FULL_TRACEBACKS = _readenv("NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE) - - # Show help text when an error occurs - SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, not DEVELOPER_MODE) - - # The color scheme to use for error messages, default is no color - # just bold fonts in use. - COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color") - - # Debug flag to control compiler debug print - DEBUG = _readenv("NUMBA_DEBUG", int, 0) - - # JIT Debug flag to trigger IR instruction print - DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0) - - # Enable debugging of front-end operation (up to and including IR generation) - DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0) - - # Enable logging of cache operation - DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG) - - # Redirect cache directory - # Contains path to the directory - CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "") - - # Enable tracing support - TRACE = _readenv("NUMBA_TRACE", int, 0) - - # Enable debugging of type inference - DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0) - - # Configure compilation target to use the specified CPU name - # and CPU feature as the host information. - # Note: this overrides "host" option for AOT compilation. - CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None) - CPU_FEATURES = _readenv("NUMBA_CPU_FEATURES", optional_str, - ("" if str(CPU_NAME).lower() == 'generic' - else None)) - # Optimization level - OPT = _readenv("NUMBA_OPT", int, 3) - - # Force dump of Python bytecode - DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND) - - # Force dump of control flow graph - DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND) - - # Force dump of Numba IR - DUMP_IR = _readenv("NUMBA_DUMP_IR", int, - DEBUG_FRONTEND or DEBUG_TYPEINFER) - - # print debug info of analysis and optimization on array operations - DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0) - - # insert debug stmts to print information at runtime - DEBUG_ARRAY_OPT_RUNTIME = _readenv("NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0) - - # print stats about parallel for-loops - DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0) - - # print debug info of inline closure pass - DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0) - - # Force dump of LLVM IR - DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG) - - # Force dump of Function optimized LLVM IR - DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG) - - # Force dump of Optimized LLVM IR - DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG) - - # Force disable loop vectorize - # Loop vectorizer is disabled on 32-bit win32 due to a bug (#649) - LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, - not (IS_WIN32 and IS_32BITS)) - - # Force dump of generated assembly - DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG) - - # Force dump of type annotation - ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0) - - # Dump IR in such as way as to aid in "diff"ing. - DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0) - - # Dump type annotation in html format - def fmt_html_path(path): - if path is None: - return path - else: - return os.path.abspath(path) - - HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None) - - # Allow interpreter fallback so that Numba @jit decorator will never fail - # Use for migrating from old numba (<0.12) which supported closure, and other - # yet-to-be-supported features. - COMPATIBILITY_MODE = _readenv("NUMBA_COMPATIBILITY_MODE", int, 0) - - # x86-64 specific - # Enable AVX on supported platforms where it won't degrade performance. - def avx_default(): - if not _os_supports_avx(): - return False - else: - # There are various performance issues with AVX and LLVM - # on some CPUs (list at - # http://llvm.org/bugs/buglist.cgi?quicksearch=avx). - # For now we'd rather disable it, since it can pessimize the code. - cpu_name = ll.get_host_cpu_name() - return cpu_name not in ('corei7-avx', 'core-avx-i', - 'sandybridge', 'ivybridge') - - ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default) - - # if set and SVML is available, it will be disabled - # By default, it's disabled on 32-bit platforms. - DISABLE_INTEL_SVML = _readenv("NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS) - - # Disable jit for debugging - DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0) - - # CUDA Configs - - # Force CUDA compute capability to a specific version - FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None) - - # Disable CUDA support - DISABLE_CUDA = _readenv("NUMBA_DISABLE_CUDA", int, int(MACHINE_BITS==32)) - - # Enable CUDA simulator - ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0) - - # CUDA logging level - # Any level name from the *logging* module. Case insensitive. - # Defaults to CRITICAL if not set or invalid. - # Note: This setting only applies when logging is not configured. - # Any existing logging configuration is preserved. - CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, '') - - # Maximum number of pending CUDA deallocations (default: 10) - CUDA_DEALLOCS_COUNT = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT", - int, 10) - - # Maximum ratio of pending CUDA deallocations to capacity (default: 0.2) - CUDA_DEALLOCS_RATIO = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO", - float, 0.2) - - # HSA Configs - - # Disable HSA support - DISABLE_HSA = _readenv("NUMBA_DISABLE_HSA", int, 0) - - # The default number of threads to use. - NUMBA_DEFAULT_NUM_THREADS = max(1, multiprocessing.cpu_count()) - - # Numba thread pool size (defaults to number of CPUs on the system). - NUMBA_NUM_THREADS = _readenv("NUMBA_NUM_THREADS", int, - NUMBA_DEFAULT_NUM_THREADS) - - # Debug Info - - # The default value for the `debug` flag - DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, 0) - CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0) - - # Inject the configuration values into the module globals - for name, value in locals().copy().items(): - if name.isupper(): - globals()[name] = value - - # delay this until now, let the globals for the module be updated - # prior to loading numba.errors as it needs to use the config - if WARNINGS == 0: - from numba.errors import NumbaWarning - warnings.simplefilter('ignore', NumbaWarning) - if not _os_supports_avx(): - from numba.errors import PerformanceWarning - warnings.warn("your operating system doesn't support " - "AVX, this may degrade performance on " - "some numerical code", PerformanceWarning) - -_env_reloader = _EnvReloader() - - -def reload_config(): - """ - Reload the configuration from environment variables, if necessary. - """ - _env_reloader.update() diff --git a/numba/numba/consts.py b/numba/numba/consts.py deleted file mode 100644 index 1e43d6679..000000000 --- a/numba/numba/consts.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import print_function, absolute_import - -from types import ModuleType - -import weakref - -from . import ir -from .errors import ConstantInferenceError - - -class ConstantInference(object): - """ - A constant inference engine for a given interpreter. - Inference inspects the IR to try and compute a compile-time constant for - a variable. - - This shouldn't be used directly, instead call Interpreter.infer_constant(). - """ - - def __init__(self, func_ir): - # Avoid cyclic references as some user-visible objects may be - # held alive in the cache - self._func_ir = weakref.proxy(func_ir) - self._cache = {} - - def infer_constant(self, name): - """ - Infer a constant value for the given variable *name*. - If no value can be inferred, numba.errors.ConstantInferenceError - is raised. - """ - if name not in self._cache: - try: - self._cache[name] = (True, self._do_infer(name)) - except ConstantInferenceError as exc: - # Store the exception args only, to avoid keeping - # a whole traceback alive. - self._cache[name] = (False, (exc.__class__, exc.args)) - success, val = self._cache[name] - if success: - return val - else: - exc, args = val - raise exc(*args) - - def _fail(self, val): - raise ConstantInferenceError( - "constant inference not possible for %s" % (val,)) - - def _do_infer(self, name): - if not isinstance(name, str): - raise TypeError("infer_constant() called with non-str %r" - % (name,)) - try: - defn = self._func_ir.get_definition(name) - except KeyError: - raise ConstantInferenceError( - "no single definition for %r" % (name,)) - try: - const = defn.infer_constant() - except ConstantInferenceError: - if isinstance(defn, ir.Expr): - return self._infer_expr(defn) - self._fail(defn) - return const - - def _infer_expr(self, expr): - # Infer an expression: handle supported cases - if expr.op == 'call': - func = self.infer_constant(expr.func.name) - return self._infer_call(func, expr) - elif expr.op == 'getattr': - value = self.infer_constant(expr.value.name) - return self._infer_getattr(value, expr) - elif expr.op == 'build_list': - return [self.infer_constant(i.name) for i in expr.items] - elif expr.op == 'build_tuple': - return tuple(self.infer_constant(i.name) for i in expr.items) - self._fail(expr) - - def _infer_call(self, func, expr): - if expr.kws or expr.vararg: - self._fail(expr) - # Check supported callables - if (func in (slice,) or - (isinstance(func, type) and issubclass(func, BaseException))): - args = [self.infer_constant(a.name) for a in expr.args] - return func(*args) - self._fail(expr) - - def _infer_getattr(self, value, expr): - if isinstance(value, (ModuleType, type)): - # Allow looking up a constant on a class or module - return getattr(value, expr.attr) - self._fail(expr) diff --git a/numba/numba/controlflow.py b/numba/numba/controlflow.py deleted file mode 100644 index 92f969349..000000000 --- a/numba/numba/controlflow.py +++ /dev/null @@ -1,646 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -import functools -import sys - -from numba import utils - - -# List of bytecodes creating a new block in the control flow graph -# (in addition to explicit jump labels). -NEW_BLOCKERS = frozenset(['SETUP_LOOP', 'FOR_ITER']) - - -class CFBlock(object): - - def __init__(self, offset): - self.offset = offset - self.body = [] - # A map of jumps to outgoing blocks (successors): - # { offset of outgoing block -> number of stack pops } - self.outgoing_jumps = {} - # A map of jumps to incoming blocks (predecessors): - # { offset of incoming block -> number of stack pops } - self.incoming_jumps = {} - self.terminating = False - - def __repr__(self): - args = self.offset, sorted(self.outgoing_jumps), sorted(self.incoming_jumps) - return "block(offset:%d, outgoing: %s, incoming: %s)" % args - - def __iter__(self): - return iter(self.body) - - -class Loop( - collections.namedtuple("Loop", ("entries", "exits", "header", "body"))): - """ - A control flow loop, as detected by a CFGraph object. - """ - - __slots__ = () - - # The loop header is enough to detect that two loops are really - # the same, assuming they belong to the same graph. - # (note: in practice, only one loop instance is created per graph - # loop, so identity would be fine) - - def __eq__(self, other): - return isinstance(other, Loop) and other.header == self.header - - def __hash__(self): - return hash(self.header) - - -class CFGraph(object): - """ - Generic (almost) implementation of a Control Flow Graph. - """ - - def __init__(self): - self._nodes = set() - self._preds = collections.defaultdict(set) - self._succs = collections.defaultdict(set) - self._edge_data = {} - self._entry_point = None - - def add_node(self, node): - """ - Add *node* to the graph. This is necessary before adding any - edges from/to the node. *node* can be any hashable object. - """ - self._nodes.add(node) - - def add_edge(self, src, dest, data=None): - """ - Add an edge from node *src* to node *dest*, with optional - per-edge *data*. - If such an edge already exists, it is replaced (duplicate edges - are not possible). - """ - assert src in self._nodes - assert dest in self._nodes - self._add_edge(src, dest, data) - - def successors(self, src): - """ - Yield (node, data) pairs representing the successors of node *src*. - (*data* will be None if no data was specified when adding the edge) - """ - for dest in self._succs[src]: - yield dest, self._edge_data[src, dest] - - def predecessors(self, dest): - """ - Yield (node, data) pairs representing the predecessors of node *dest*. - (*data* will be None if no data was specified when adding the edge) - """ - for src in self._preds[dest]: - yield src, self._edge_data[src, dest] - - def set_entry_point(self, node): - """ - Set the entry point of the graph to *node*. - """ - assert node in self._nodes - self._entry_point = node - - def process(self): - """ - Compute various properties of the control flow graph. The graph - must have been fully populated, and its entry point specified. - """ - if self._entry_point is None: - raise RuntimeError("no entry point defined!") - self._eliminate_dead_blocks() - self._find_exit_points() - self._find_dominators() - self._find_back_edges() - self._find_topo_order() - self._find_descendents() - self._find_loops() - self._find_post_dominators() - - def dominators(self): - """ - Return a dictionary of {node -> set(nodes)} mapping each node to - the nodes dominating it. - - A node D dominates a node N when any path leading to N must go through D. - """ - return self._doms - - def post_dominators(self): - """ - Return a dictionary of {node -> set(nodes)} mapping each node to - the nodes post-dominating it. - - A node P post-dominates a node N when any path starting from N must go - through P. - """ - return self._post_doms - - def descendents(self, node): - """ - Return the set of descendents of the given *node*, in topological - order (ignoring back edges). - """ - return self._descs[node] - - def entry_point(self): - """ - Return the entry point node. - """ - assert self._entry_point is not None - return self._entry_point - - def exit_points(self): - """ - Return the computed set of exit nodes (may be empty). - """ - return self._exit_points - - def backbone(self): - """ - Return the set of nodes constituting the graph's backbone. - (i.e. the nodes that every path starting from the entry point - must go through). By construction, it is non-empty: it contains - at least the entry point. - """ - return self._post_doms[self._entry_point] - - def loops(self): - """ - Return a dictionary of {node -> loop} mapping each loop header - to the loop (a Loop instance) starting with it. - """ - return self._loops - - def in_loops(self, node): - """ - Return the list of Loop objects the *node* belongs to, - from innermost to outermost. - """ - return [self._loops[x] for x in self._in_loops[node]] - - def dead_nodes(self): - """ - Return the set of dead nodes (eliminated from the graph). - """ - return self._dead_nodes - - def nodes(self): - """ - Return the set of live nodes. - """ - return self._nodes - - def topo_order(self): - """ - Return the sequence of nodes in topological order (ignoring back - edges). - """ - return self._topo_order - - def topo_sort(self, nodes, reverse=False): - """ - Iterate over the *nodes* in topological order (ignoring back edges). - The sort isn't guaranteed to be stable. - """ - nodes = set(nodes) - it = self._topo_order - if reverse: - it = reversed(it) - for n in it: - if n in nodes: - yield n - - def dump(self, file=None): - """ - Dump extensive debug information. - """ - import pprint - file = file or sys.stdout - if 1: - print("CFG adjacency lists:", file=file) - self._dump_adj_lists(file) - print("CFG dominators:", file=file) - pprint.pprint(self._doms, stream=file) - print("CFG post-dominators:", file=file) - pprint.pprint(self._post_doms, stream=file) - print("CFG back edges:", sorted(self._back_edges), file=file) - print("CFG loops:", file=file) - pprint.pprint(self._loops, stream=file) - print("CFG node-to-loops:", file=file) - pprint.pprint(self._in_loops, stream=file) - - # Internal APIs - - def _add_edge(self, from_, to, data=None): - # This internal version allows adding edges to/from unregistered - # (ghost) nodes. - self._preds[to].add(from_) - self._succs[from_].add(to) - self._edge_data[from_, to] = data - - def _remove_node_edges(self, node): - for succ in self._succs.pop(node, ()): - self._preds[succ].remove(node) - del self._edge_data[node, succ] - for pred in self._preds.pop(node, ()): - self._succs[pred].remove(node) - del self._edge_data[pred, node] - - def _dfs(self, entries=None): - if entries is None: - entries = (self._entry_point,) - seen = set() - stack = list(entries) - while stack: - node = stack.pop() - if node not in seen: - yield node - seen.add(node) - for succ in self._succs[node]: - stack.append(succ) - - def _eliminate_dead_blocks(self): - """ - Eliminate all blocks not reachable from the entry point, and - stash them into self._dead_nodes. - """ - live = set() - for node in self._dfs(): - live.add(node) - self._dead_nodes = self._nodes - live - self._nodes = live - # Remove all edges leading from dead nodes - for dead in self._dead_nodes: - self._remove_node_edges(dead) - - def _find_exit_points(self): - """ - Compute the graph's exit points. - """ - exit_points = set() - for n in self._nodes: - if not self._succs.get(n): - exit_points.add(n) - self._exit_points = exit_points - - def _find_dominators_internal(self, post=False): - # See theoretical description in - # http://en.wikipedia.org/wiki/Dominator_%28graph_theory%29 - # The algorithm implemented here uses a todo-list as described - # in http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html - if post: - entries = set(self._exit_points) - preds_table = self._succs - succs_table = self._preds - else: - entries = set([self._entry_point]) - preds_table = self._preds - succs_table = self._succs - - if not entries: - raise RuntimeError("no entry points: dominator algorithm " - "cannot be seeded") - - doms = {} - for e in entries: - doms[e] = set([e]) - - todo = [] - for n in self._nodes: - if n not in entries: - doms[n] = set(self._nodes) - todo.append(n) - - while todo: - n = todo.pop() - if n in entries: - continue - new_doms = set([n]) - preds = preds_table[n] - if preds: - new_doms |= functools.reduce(set.intersection, - [doms[p] for p in preds]) - if new_doms != doms[n]: - assert len(new_doms) < len(doms[n]) - doms[n] = new_doms - todo.extend(succs_table[n]) - return doms - - def _find_dominators(self): - self._doms = self._find_dominators_internal(post=False) - - def _find_post_dominators(self): - # To handle infinite loops correctly, we need to add a dummy - # exit point, and link members of infinite loops to it. - dummy_exit = object() - self._exit_points.add(dummy_exit) - for loop in self._loops.values(): - if not loop.exits: - for b in loop.body: - self._add_edge(b, dummy_exit) - self._post_doms = self._find_dominators_internal(post=True) - # Fix the _post_doms table to make no reference to the dummy exit - del self._post_doms[dummy_exit] - for doms in self._post_doms.values(): - doms.discard(dummy_exit) - self._remove_node_edges(dummy_exit) - self._exit_points.remove(dummy_exit) - - # Finding loops and back edges: see - # http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html - - def _find_back_edges(self): - """ - Find back edges. An edge (src, dest) is a back edge if and - only if *dest* dominates *src*. - """ - back_edges = set() - for src, succs in self._succs.items(): - back = self._doms[src] & succs - # In CPython bytecode, at most one back edge can flow from a - # given block. - assert len(back) <= 1 - back_edges.update((src, dest) for dest in back) - self._back_edges = back_edges - - def _find_topo_order(self): - succs = self._succs - back_edges = self._back_edges - post_order = [] - seen = set() - - def _dfs_rec(node): - if node not in seen: - seen.add(node) - for dest in succs[node]: - if (node, dest) not in back_edges: - _dfs_rec(dest) - post_order.append(node) - - _dfs_rec(self._entry_point) - post_order.reverse() - self._topo_order = post_order - - def _find_descendents(self): - descs = {} - for node in reversed(self._topo_order): - descs[node] = node_descs = set() - for succ in self._succs[node]: - if (node, succ) not in self._back_edges: - node_descs.add(succ) - node_descs.update(descs[succ]) - self._descs = descs - - def _find_loops(self): - """ - Find the loops defined by the graph's back edges. - """ - bodies = {} - for src, dest in self._back_edges: - # The destination of the back edge is the loop header - header = dest - # Build up the loop body from the back edge's source node, - # up to the source header. - body = set([header]) - queue = [src] - while queue: - n = queue.pop() - if n not in body: - body.add(n) - queue.extend(self._preds[n]) - # There can be several back edges to a given loop header; - # if so, merge the resulting body fragments. - if header in bodies: - bodies[header].update(body) - else: - bodies[header] = body - - # Create a Loop object for each header. - loops = {} - for header, body in bodies.items(): - entries = set() - exits = set() - for n in body: - entries.update(self._preds[n] - body) - exits.update(self._succs[n] - body) - loop = Loop(header=header, body=body, entries=entries, exits=exits) - loops[header] = loop - self._loops = loops - - # Compute the loops to which each node belongs. - in_loops = dict((n, []) for n in self._nodes) - # Sort loops from longest to shortest - # This ensures that outer loops will come before inner loops - for loop in sorted(loops.values(), key=lambda loop: len(loop.body)): - for n in loop.body: - in_loops[n].append(loop.header) - self._in_loops = in_loops - - def _dump_adj_lists(self, file): - adj_lists = dict((src, list(dests)) - for src, dests in self._succs.items()) - import pprint - pprint.pprint(adj_lists, stream=file) - - -class ControlFlowAnalysis(object): - """ - Attributes - ---------- - - bytecode - - - blocks - - - blockseq - - - doms: dict of set - Dominators - - - backbone: set of block offsets - The set of block that is common to all possible code path. - - """ - def __init__(self, bytecode): - self.bytecode = bytecode - self.blocks = {} - self.liveblocks = {} - self.blockseq = [] - self.doms = None - self.backbone = None - # Internal temp states - self._force_new_block = True - self._curblock = None - self._blockstack = [] - self._loops = [] - - def iterblocks(self): - """ - Return all blocks in sequence of occurrence - """ - for i in self.blockseq: - yield self.blocks[i] - - def iterliveblocks(self): - """ - Return all live blocks in sequence of occurrence - """ - for i in self.blockseq: - if i in self.liveblocks: - yield self.blocks[i] - - def incoming_blocks(self, block): - """ - Yield (incoming block, number of stack pops) pairs for *block*. - """ - for i, pops in block.incoming_jumps.items(): - if i in self.liveblocks: - yield self.blocks[i], pops - - def dump(self, file=None): - self.graph.dump(file=None) - - def run(self): - for inst in self._iter_inst(): - fname = "op_%s" % inst.opname - fn = getattr(self, fname, None) - if fn is not None: - fn(inst) - else: - assert not inst.is_jump, inst - - # Close all blocks - for cur, nxt in zip(self.blockseq, self.blockseq[1:]): - blk = self.blocks[cur] - if not blk.outgoing_jumps and not blk.terminating: - blk.outgoing_jumps[nxt] = 0 - - graph = CFGraph() - for b in self.blocks: - graph.add_node(b) - for b in self.blocks.values(): - for out, pops in b.outgoing_jumps.items(): - graph.add_edge(b.offset, out, pops) - graph.set_entry_point(min(self.blocks)) - graph.process() - self.graph = graph - - # Fill incoming - for b in utils.itervalues(self.blocks): - for out, pops in b.outgoing_jumps.items(): - self.blocks[out].incoming_jumps[b.offset] = pops - - # Find liveblocks - self.liveblocks = dict((i, self.blocks[i]) - for i in self.graph.nodes()) - - for lastblk in reversed(self.blockseq): - if lastblk in self.liveblocks: - break - else: - raise AssertionError("No live block that exits!?") - - # Find backbone - backbone = self.graph.backbone() - # Filter out in loop blocks (Assuming no other cyclic control blocks) - # This is to unavoid variable defined in loops to be considered as - # function scope. - inloopblocks = set() - - for b in self.blocks.keys(): - for s, e in self._loops: - if s <= b < e: - inloopblocks.add(b) - - self.backbone = backbone - inloopblocks - - def jump(self, target, pops=0): - """ - Register a jump (conditional or not) to *target* offset. - *pops* is the number of stack pops implied by the jump (default 0). - """ - self._curblock.outgoing_jumps[target] = pops - - def _iter_inst(self): - for inst in self.bytecode: - if self._use_new_block(inst): - self._start_new_block(inst) - self._curblock.body.append(inst.offset) - yield inst - - def _use_new_block(self, inst): - if inst.offset in self.bytecode.labels: - res = True - elif inst.opname in NEW_BLOCKERS: - res = True - else: - res = self._force_new_block - - self._force_new_block = False - return res - - def _start_new_block(self, inst): - self._curblock = CFBlock(inst.offset) - self.blocks[inst.offset] = self._curblock - self.blockseq.append(inst.offset) - - def op_SETUP_LOOP(self, inst): - end = inst.get_jump_target() - self._blockstack.append(end) - self._loops.append((inst.offset, end)) - # TODO: Looplifting requires the loop entry be its own block. - # Forcing a new block here is the simplest solution for now. - # But, we should consider other less ad-hoc ways. - self.jump(inst.next) - self._force_new_block = True - - def op_POP_BLOCK(self, inst): - self._blockstack.pop() - - def op_FOR_ITER(self, inst): - self.jump(inst.get_jump_target()) - self.jump(inst.next) - self._force_new_block = True - - def _op_ABSOLUTE_JUMP_IF(self, inst): - self.jump(inst.get_jump_target()) - self.jump(inst.next) - self._force_new_block = True - - op_POP_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF - op_POP_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF - op_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF - op_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF - - def _op_ABSOLUTE_JUMP_OR_POP(self, inst): - self.jump(inst.get_jump_target()) - self.jump(inst.next, pops=1) - self._force_new_block = True - - op_JUMP_IF_FALSE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP - op_JUMP_IF_TRUE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP - - def op_JUMP_ABSOLUTE(self, inst): - self.jump(inst.get_jump_target()) - self._force_new_block = True - - def op_JUMP_FORWARD(self, inst): - self.jump(inst.get_jump_target()) - self._force_new_block = True - - def op_RETURN_VALUE(self, inst): - self._curblock.terminating = True - self._force_new_block = True - - def op_RAISE_VARARGS(self, inst): - self._curblock.terminating = True - self._force_new_block = True - - def op_BREAK_LOOP(self, inst): - self.jump(self._blockstack[-1]) - self._force_new_block = True diff --git a/numba/numba/ctypes_support.py b/numba/numba/ctypes_support.py deleted file mode 100644 index 34fe9f524..000000000 --- a/numba/numba/ctypes_support.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -This file fixes portability issues for ctypes. -""" - -from __future__ import absolute_import -from numba.config import PYVERSION -from ctypes import * - -if PYVERSION <= (2, 7): - c_ssize_t = { - 4: c_int32, - 8: c_int64, - }[sizeof(c_size_t)] - diff --git a/numba/numba/cuda/__init__.py b/numba/numba/cuda/__init__.py deleted file mode 100644 index eea07957c..000000000 --- a/numba/numba/cuda/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from numba import config -import numba.testing - -if config.ENABLE_CUDASIM: - from .simulator_init import * -else: - from .device_init import * - from .device_init import _auto_device - - -def test(*args, **kwargs): - if not is_available(): - raise cuda_error() - - return numba.testing.test("numba.cuda.tests", *args, **kwargs) diff --git a/numba/numba/cuda/api.py b/numba/numba/cuda/api.py deleted file mode 100644 index 14d06e20e..000000000 --- a/numba/numba/cuda/api.py +++ /dev/null @@ -1,376 +0,0 @@ -""" -API that are reported to numba.cuda -""" - -from __future__ import print_function, absolute_import - -import contextlib - -import numpy as np - -from .cudadrv import devicearray, devices, driver -from .args import In, Out, InOut - - -try: - long -except NameError: - long = int - -# NDarray device helper - -require_context = devices.require_context -current_context = devices.get_context -gpus = devices.gpus - - -@require_context -def from_cuda_array_interface(desc, owner=None): - """Create a DeviceNDArray from a cuda-array-interface description. - The *owner* is the owner of the underlying memory. - The resulting DeviceNDArray will acquire a reference from it. - """ - shape = desc['shape'] - strides = desc.get('strides') - dtype = np.dtype(desc['typestr']) - - shape, strides, dtype = _prepare_shape_strides_dtype( - shape, strides, dtype, order='C') - - devptr = driver.get_devptr_for_active_ctx(desc['data'][0]) - data = driver.MemoryPointer( - current_context(), devptr, size=np.prod(shape) * dtype.itemsize, - owner=owner) - da = devicearray.DeviceNDArray(shape=shape, strides=strides, - dtype=dtype, gpu_data=data) - return da - - -def as_cuda_array(obj): - """Create a DeviceNDArray from any object that implements - the cuda-array-interface. - - A view of the underlying GPU buffer is created. No copying of the data - is done. The resulting DeviceNDArray will acquire a reference from `obj`. - """ - if not is_cuda_array(obj): - raise TypeError("*obj* doesn't implement the cuda array interface.") - else: - return from_cuda_array_interface(obj.__cuda_array_interface__, - owner=obj) - - -def is_cuda_array(obj): - """Test if the object has defined the `__cuda_array_interface__`. - - Does not verify the validity of the interface. - """ - return hasattr(obj, '__cuda_array_interface__') - - -@require_context -def to_device(obj, stream=0, copy=True, to=None): - """to_device(obj, stream=0, copy=True, to=None) - - Allocate and transfer a numpy ndarray or structured scalar to the device. - - To copy host->device a numpy array:: - - ary = np.arange(10) - d_ary = cuda.to_device(ary) - - To enqueue the transfer to a stream:: - - stream = cuda.stream() - d_ary = cuda.to_device(ary, stream=stream) - - The resulting ``d_ary`` is a ``DeviceNDArray``. - - To copy device->host:: - - hary = d_ary.copy_to_host() - - To copy device->host to an existing array:: - - ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype) - d_ary.copy_to_host(ary) - - To enqueue the transfer to a stream:: - - hary = d_ary.copy_to_host(stream=stream) - """ - if to is None: - to, new = devicearray.auto_device(obj, stream=stream, copy=copy) - return to - if copy: - to.copy_to_device(obj, stream=stream) - return to - - -@require_context -def device_array(shape, dtype=np.float, strides=None, order='C', stream=0): - """device_array(shape, dtype=np.float, strides=None, order='C', stream=0) - - Allocate an empty device ndarray. Similar to :meth:`numpy.empty`. - """ - shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype, - order) - return devicearray.DeviceNDArray(shape=shape, strides=strides, dtype=dtype, - stream=stream) - - -@require_context -def pinned_array(shape, dtype=np.float, strides=None, order='C'): - """pinned_array(shape, dtype=np.float, strides=None, order='C') - - Allocate a np.ndarray with a buffer that is pinned (pagelocked). - Similar to np.empty(). - """ - shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype, - order) - bytesize = driver.memory_size_from_info(shape, strides, - dtype.itemsize) - buffer = current_context().memhostalloc(bytesize) - return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order, - buffer=buffer) - - -@require_context -def mapped_array(shape, dtype=np.float, strides=None, order='C', stream=0, - portable=False, wc=False): - """mapped_array(shape, dtype=np.float, strides=None, order='C', stream=0, portable=False, wc=False) - - Allocate a mapped ndarray with a buffer that is pinned and mapped on - to the device. Similar to np.empty() - - :param portable: a boolean flag to allow the allocated device memory to be - usable in multiple devices. - :param wc: a boolean flag to enable writecombined allocation which is faster - to write by the host and to read by the device, but slower to - write by the host and slower to write by the device. - """ - shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype, - order) - bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize) - buffer = current_context().memhostalloc(bytesize, mapped=True) - npary = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order, - buffer=buffer) - mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray) - mappedview.device_setup(buffer, stream=stream) - return mappedview - - -@contextlib.contextmanager -@require_context -def open_ipc_array(handle, shape, dtype, strides=None): - """ - A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is - represented as a sequence of bytes (e.g. *bytes*, tuple of int) - and represent it as an array of the given *shape*, *strides* and *dtype*. - The *strides* can be omitted. In that case, it is assumed to be a 1D - C contiguous array. - - Yields a device array. - - The IPC handle is closed automatically when context manager exits. - """ - dtype = np.dtype(dtype) - # compute size - size = np.prod(shape) * dtype.itemsize - # manually recreate the IPC mem handle - handle = driver.drvapi.cu_ipc_mem_handle(*handle) - # use *IpcHandle* to open the IPC memory - ipchandle = driver.IpcHandle(None, handle, size) - yield ipchandle.open_array(current_context(), shape=shape, - strides=strides, dtype=dtype) - ipchandle.close() - - -def synchronize(): - "Synchronize the current context." - return current_context().synchronize() - - -def _prepare_shape_strides_dtype(shape, strides, dtype, order): - dtype = np.dtype(dtype) - if isinstance(shape, (int, long)): - shape = (shape,) - if isinstance(strides, (int, long)): - strides = (strides,) - else: - if shape == (): - shape = (1,) - strides = strides or _fill_stride_by_order(shape, dtype, order) - return shape, strides, dtype - - -def _fill_stride_by_order(shape, dtype, order): - nd = len(shape) - strides = [0] * nd - if order == 'C': - strides[-1] = dtype.itemsize - for d in reversed(range(nd - 1)): - strides[d] = strides[d + 1] * shape[d + 1] - elif order == 'F': - strides[0] = dtype.itemsize - for d in range(1, nd): - strides[d] = strides[d - 1] * shape[d - 1] - else: - raise ValueError('must be either C/F order') - return tuple(strides) - - -def device_array_like(ary, stream=0): - """Call cuda.devicearray() with information from the array. - """ - return device_array(shape=ary.shape, dtype=ary.dtype, - strides=ary.strides, stream=stream) - -# Stream helper -@require_context -def stream(): - """stream() - - Create a CUDA stream that represents a command queue for the device. - """ - return current_context().create_stream() - -# Page lock -@require_context -@contextlib.contextmanager -def pinned(*arylist): - """A context manager for temporary pinning a sequence of host ndarrays. - """ - pmlist = [] - for ary in arylist: - pm = current_context().mempin(ary, driver.host_pointer(ary), - driver.host_memory_size(ary), - mapped=False) - pmlist.append(pm) - yield - del pmlist - - -@require_context -@contextlib.contextmanager -def mapped(*arylist, **kws): - """A context manager for temporarily mapping a sequence of host ndarrays. - """ - assert not kws or 'stream' in kws, "Only accept 'stream' as keyword." - pmlist = [] - stream = kws.get('stream', 0) - for ary in arylist: - pm = current_context().mempin(ary, driver.host_pointer(ary), - driver.host_memory_size(ary), - mapped=True) - pmlist.append(pm) - - devarylist = [] - for ary, pm in zip(arylist, pmlist): - devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream) - devarylist.append(devary) - if len(devarylist) == 1: - yield devarylist[0] - else: - yield devarylist - - -def event(timing=True): - """ - Create a CUDA event. Timing data is only recorded by the event if it is - created with ``timing=True``. - """ - evt = current_context().create_event(timing=timing) - return evt - -event_elapsed_time = driver.event_elapsed_time - -# Device selection - -def select_device(device_id): - """ - Make the context associated with device *device_id* the current context. - - Returns a Device instance. - - Raises exception on error. - """ - context = devices.get_context(device_id) - return context.device - - -def get_current_device(): - "Get current device associated with the current thread" - return current_context().device - - -def list_devices(): - "Return a list of all detected devices" - return devices.gpus - - -def close(): - """ - Explicitly clears all contexts in the current thread, and destroys all - contexts if the current thread is the main thread. - """ - devices.reset() - - -def _auto_device(ary, stream=0, copy=True): - return devicearray.auto_device(ary, stream=stream, copy=copy) - - -def detect(): - """ - Detect supported CUDA hardware and print a summary of the detected hardware. - - Returns a boolean indicating whether any supported devices were detected. - """ - devlist = list_devices() - print('Found %d CUDA devices' % len(devlist)) - supported_count = 0 - for dev in devlist: - attrs = [] - cc = dev.compute_capability - attrs += [('compute capability', '%d.%d' % cc)] - attrs += [('pci device id', dev.PCI_DEVICE_ID)] - attrs += [('pci bus id', dev.PCI_BUS_ID)] - if cc < (2, 0): - support = '[NOT SUPPORTED: CC < 2.0]' - else: - support = '[SUPPORTED]' - supported_count += 1 - - print('id %d %20s %40s' % (dev.id, dev.name, support)) - for key, val in attrs: - print('%40s: %s' % (key, val)) - - print('Summary:') - print('\t%d/%d devices are supported' % (supported_count, len(devlist))) - return supported_count > 0 - - -@contextlib.contextmanager -def defer_cleanup(): - """ - Temporarily disable memory deallocation. - Use this to prevent resource deallocation breaking asynchronous execution. - - For example:: - - with defer_cleanup(): - # all cleanup is deferred in here - do_speed_critical_code() - # cleanup can occur here - - Note: this context manager can be nested. - """ - deallocs = current_context().deallocations - with deallocs.disable(): - yield - - -profiling = require_context(driver.profiling) -profile_start = require_context(driver.profile_start) -profile_stop = require_context(driver.profile_stop) diff --git a/numba/numba/cuda/args.py b/numba/numba/cuda/args.py deleted file mode 100644 index 2a2c47641..000000000 --- a/numba/numba/cuda/args.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Hints to wrap Kernel arguments to indicate how to manage host-device -memory transfers before & after the kernel call. -""" -import abc - -from numba.six import add_metaclass - -from numba.typing.typeof import typeof, Purpose - - -@add_metaclass(abc.ABCMeta) -class ArgHint: - def __init__(self, value): - self.value = value - - @abc.abstractmethod - def to_device(self, retr, stream=0): - """ - :param stream: a stream to use when copying data - :param retr: - a list of clean-up work to do after the kernel's been run. - Append 0-arg lambdas to it! - :return: a value (usually an `DeviceNDArray`) to be passed to - the kernel - """ - pass - - @property - def _numba_type_(self): - return typeof(self.value, Purpose.argument) - - -class In(ArgHint): - def to_device(self, retr, stream=0): - from .cudadrv.devicearray import auto_device - devary, _ = auto_device( - self.value, - stream=stream) - # A dummy writeback functor to keep devary alive until the kernel - # is called. - retr.append(lambda: devary) - return devary - - -class Out(ArgHint): - def to_device(self, retr, stream=0): - from .cudadrv.devicearray import auto_device - devary, conv = auto_device( - self.value, - copy=False, - stream=stream) - if conv: - retr.append(lambda: devary.copy_to_host(self.value, stream=stream)) - return devary - - -class InOut(ArgHint): - def to_device(self, retr, stream=0): - from .cudadrv.devicearray import auto_device - devary, conv = auto_device( - self.value, - stream=stream) - if conv: - retr.append(lambda: devary.copy_to_host(self.value, stream=stream)) - return devary - - -def wrap_arg(value, default=InOut): - return value if isinstance(value, ArgHint) else default(value) - - -__all__ = [ - 'In', - 'Out', - 'InOut', - - 'ArgHint', - 'wrap_arg', -] diff --git a/numba/numba/cuda/codegen.py b/numba/numba/cuda/codegen.py deleted file mode 100644 index 0c97f1ce4..000000000 --- a/numba/numba/cuda/codegen.py +++ /dev/null @@ -1,71 +0,0 @@ -from llvmlite import binding as ll -from llvmlite.llvmpy import core as lc - -from numba.targets.codegen import BaseCPUCodegen, CodeLibrary -from numba import utils -from .cudadrv import nvvm - - - -CUDA_TRIPLE = {32: 'nvptx-nvidia-cuda', - 64: 'nvptx64-nvidia-cuda'} - - -class CUDACodeLibrary(CodeLibrary): - def _optimize_functions(self, ll_module): - pass - - def _optimize_final_module(self): - # Run some lightweight optimization to simplify the module. - # This seems to workaround a libnvvm compilation bug (see #1341) - pmb = ll.PassManagerBuilder() - pmb.opt_level = 1 - pmb.disable_unit_at_a_time = False - pmb.disable_unroll_loops = True - pmb.loop_vectorize = False - pmb.slp_vectorize = False - - pm = ll.ModulePassManager() - pmb.populate(pm) - pm.run(self._final_module) - - def _finalize_specific(self): - # Fix global naming - for gv in self._final_module.global_variables: - if '.' in gv.name: - gv.name = gv.name.replace('.', '_') - - def get_asm_str(self): - # Return nothing: we can only dump assembler code when it is later - # generated (in numba.cuda.compiler). - return None - - -class JITCUDACodegen(BaseCPUCodegen): - """ - This codegen implementation for CUDA actually only generates optimized - LLVM IR. Generation of PTX code is done separately (see numba.cuda.compiler). - """ - - _library_class = CUDACodeLibrary - - def _init(self, llvm_module): - assert list(llvm_module.global_variables) == [], "Module isn't empty" - self._data_layout = nvvm.default_data_layout - self._target_data = ll.create_target_data(self._data_layout) - - def _create_empty_module(self, name): - ir_module = lc.Module(name) - ir_module.triple = CUDA_TRIPLE[utils.MACHINE_BITS] - if self._data_layout: - ir_module.data_layout = self._data_layout - return ir_module - - def _module_pass_manager(self): - raise NotImplementedError - - def _function_pass_manager(self, llvm_module): - raise NotImplementedError - - def _add_module(self, module): - pass diff --git a/numba/numba/cuda/compiler.py b/numba/numba/cuda/compiler.py deleted file mode 100644 index 33f32fd21..000000000 --- a/numba/numba/cuda/compiler.py +++ /dev/null @@ -1,822 +0,0 @@ -from __future__ import absolute_import, print_function - - -from functools import reduce, wraps -import operator -import sys -import threading -import warnings - -from numba import ctypes_support as ctypes -from numba import config, compiler, types, sigutils -from numba.typing.templates import AbstractTemplate, ConcreteTemplate -from numba import funcdesc, typing, utils, serialize - -from .cudadrv.autotune import AutoTuner -from .cudadrv.devices import get_context -from .cudadrv import nvvm, devicearray, driver -from .errors import normalize_kernel_dimensions -from .api import get_current_device -from .args import wrap_arg - - -_cuda_compiler_lock = threading.RLock() - - -def nonthreadsafe(fn): - """ - Wraps a function to prevent multiple threads from executing it in parallel - due to LLVM is not threadsafe. - This is preferred over contextmanager due to llvm.Module.__del__ being - non-threadsafe and it is cumbersome to manually keep track of when it is - triggered. - """ - @wraps(fn) - def core(*args, **kwargs): - with _cuda_compiler_lock: - return fn(*args, **kwargs) - return core - - -@nonthreadsafe -def compile_cuda(pyfunc, return_type, args, debug, inline): - # First compilation will trigger the initialization of the CUDA backend. - from .descriptor import CUDATargetDesc - - typingctx = CUDATargetDesc.typingctx - targetctx = CUDATargetDesc.targetctx - # TODO handle debug flag - flags = compiler.Flags() - # Do not compile (generate native code), just lower (to LLVM) - flags.set('no_compile') - flags.set('no_cpython_wrapper') - if debug: - flags.set('boundcheck') - flags.set('debuginfo') - if inline: - flags.set('forceinline') - # Run compilation pipeline - cres = compiler.compile_extra(typingctx=typingctx, - targetctx=targetctx, - func=pyfunc, - args=args, - return_type=return_type, - flags=flags, - locals={}) - - library = cres.library - library.finalize() - - return cres - - -@nonthreadsafe -def compile_kernel(pyfunc, args, link, debug=False, inline=False, - fastmath=False, extensions=[]): - cres = compile_cuda(pyfunc, types.void, args, debug=debug, inline=inline) - fname = cres.fndesc.llvm_func_name - lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library, fname, - cres.signature.args, - debug=debug) - - cukern = CUDAKernel(llvm_module=lib._final_module, - name=kernel.name, - pretty_name=cres.fndesc.qualname, - argtypes=cres.signature.args, - type_annotation=cres.type_annotation, - link=link, - debug=debug, - call_helper=cres.call_helper, - fastmath=fastmath, - extensions=extensions) - return cukern - - -class DeviceFunctionTemplate(object): - """Unmaterialized device function - """ - def __init__(self, pyfunc, debug, inline): - self.py_func = pyfunc - self.debug = debug - self.inline = inline - self._compileinfos = {} - - def __reduce__(self): - glbls = serialize._get_function_globals_for_reduction(self.py_func) - func_reduced = serialize._reduce_function(self.py_func, glbls) - args = (self.__class__, func_reduced, self.debug, self.inline) - return (serialize._rebuild_reduction, args) - - @classmethod - def _rebuild(cls, func_reduced, debug, inline): - func = serialize._rebuild_function(*func_reduced) - return compile_device_template(func, debug=debug, inline=inline) - - def compile(self, args): - """Compile the function for the given argument types. - - Each signature is compiled once by caching the compiled function inside - this object. - """ - if args not in self._compileinfos: - cres = compile_cuda(self.py_func, None, args, debug=self.debug, - inline=self.inline) - first_definition = not self._compileinfos - self._compileinfos[args] = cres - libs = [cres.library] - - if first_definition: - # First definition - cres.target_context.insert_user_function(self, cres.fndesc, - libs) - else: - cres.target_context.add_user_function(self, cres.fndesc, libs) - - else: - cres = self._compileinfos[args] - - return cres.signature - - -def compile_device_template(pyfunc, debug=False, inline=False): - """Create a DeviceFunctionTemplate object and register the object to - the CUDA typing context. - """ - from .descriptor import CUDATargetDesc - - dft = DeviceFunctionTemplate(pyfunc, debug=debug, inline=inline) - - class device_function_template(AbstractTemplate): - key = dft - - def generic(self, args, kws): - assert not kws - return dft.compile(args) - - typingctx = CUDATargetDesc.typingctx - typingctx.insert_user_function(dft, device_function_template) - return dft - - -def compile_device(pyfunc, return_type, args, inline=True, debug=False): - return DeviceFunction(pyfunc, return_type, args, inline=True, debug=False) - - -def declare_device_function(name, restype, argtypes): - from .descriptor import CUDATargetDesc - - typingctx = CUDATargetDesc.typingctx - targetctx = CUDATargetDesc.targetctx - sig = typing.signature(restype, *argtypes) - extfn = ExternFunction(name, sig) - - class device_function_template(ConcreteTemplate): - key = extfn - cases = [sig] - - fndesc = funcdesc.ExternalFunctionDescriptor( - name=name, restype=restype, argtypes=argtypes) - typingctx.insert_user_function(extfn, device_function_template) - targetctx.insert_user_function(extfn, fndesc) - return extfn - - -class DeviceFunction(object): - - def __init__(self, pyfunc, return_type, args, inline, debug): - self.py_func = pyfunc - self.return_type = return_type - self.args = args - self.inline = True - self.debug = False - cres = compile_cuda(self.py_func, self.return_type, self.args, - debug=self.debug, inline=self.inline) - self.cres = cres - # Register - class device_function_template(ConcreteTemplate): - key = self - cases = [cres.signature] - - cres.typing_context.insert_user_function( - self, device_function_template) - cres.target_context.insert_user_function(self, cres.fndesc, - [cres.library]) - - def __reduce__(self): - globs = serialize._get_function_globals_for_reduction(self.py_func) - func_reduced = serialize._reduce_function(self.py_func, globs) - args = (self.__class__, func_reduced, self.return_type, self.args, - self.inline, self.debug) - return (serialize._rebuild_reduction, args) - - @classmethod - def _rebuild(cls, func_reduced, return_type, args, inline, debug): - return cls(serialize._rebuild_function(*func_reduced), return_type, - args, inline, debug) - - def __repr__(self): - fmt = "" - return fmt.format(self.py_func, self.cres.signature) - - -class ExternFunction(object): - def __init__(self, name, sig): - self.name = name - self.sig = sig - - -def _compute_thread_per_block(kernel, tpb): - if tpb != 0: - return tpb - - else: - try: - tpb = kernel.autotune.best() - except ValueError: - warnings.warn('Could not autotune, using default tpb of 128') - tpb = 128 - - return tpb - -class ForAll(object): - def __init__(self, kernel, ntasks, tpb, stream, sharedmem): - self.kernel = kernel - self.ntasks = ntasks - self.thread_per_block = tpb - self.stream = stream - self.sharedmem = sharedmem - - def __call__(self, *args): - if isinstance(self.kernel, AutoJitCUDAKernel): - kernel = self.kernel.specialize(*args) - else: - kernel = self.kernel - - tpb = _compute_thread_per_block(kernel, self.thread_per_block) - tpbm1 = tpb - 1 - blkct = (self.ntasks + tpbm1) // tpb - - return kernel.configure(blkct, tpb, stream=self.stream, - sharedmem=self.sharedmem)(*args) - - -class CUDAKernelBase(object): - """Define interface for configurable kernels - """ - - def __init__(self): - self.griddim = (1, 1) - self.blockdim = (1, 1, 1) - self.sharedmem = 0 - self.stream = 0 - - def copy(self): - """ - Shallow copy the instance - """ - # Note: avoid using ``copy`` which calls __reduce__ - cls = self.__class__ - # new bare instance - new = cls.__new__(cls) - # update the internal states - new.__dict__.update(self.__dict__) - return new - - def configure(self, griddim, blockdim, stream=0, sharedmem=0): - griddim, blockdim = normalize_kernel_dimensions(griddim, blockdim) - - clone = self.copy() - clone.griddim = tuple(griddim) - clone.blockdim = tuple(blockdim) - clone.stream = stream - clone.sharedmem = sharedmem - return clone - - def __getitem__(self, args): - if len(args) not in [2, 3, 4]: - raise ValueError('must specify at least the griddim and blockdim') - return self.configure(*args) - - def forall(self, ntasks, tpb=0, stream=0, sharedmem=0): - """Returns a configured kernel for 1D kernel of given number of tasks - ``ntasks``. - - This assumes that: - - the kernel 1-to-1 maps global thread id ``cuda.grid(1)`` to tasks. - - the kernel must check if the thread id is valid.""" - - return ForAll(self, ntasks, tpb=tpb, stream=stream, sharedmem=sharedmem) - - def _serialize_config(self): - """ - Helper for serializing the grid, block and shared memory configuration. - CUDA stream config is not serialized. - """ - return self.griddim, self.blockdim, self.sharedmem - - def _deserialize_config(self, config): - """ - Helper for deserializing the grid, block and shared memory - configuration. - """ - self.griddim, self.blockdim, self.sharedmem = config - - -class CachedPTX(object): - """A PTX cache that uses compute capability as a cache key - """ - def __init__(self, name, llvmir, options): - self.name = name - self.llvmir = llvmir - self.cache = {} - self._extra_options = options.copy() - - def get(self): - """ - Get PTX for the current active context. - """ - cuctx = get_context() - device = cuctx.device - cc = device.compute_capability - ptx = self.cache.get(cc) - if ptx is None: - arch = nvvm.get_arch_option(*cc) - ptx = nvvm.llvm_to_ptx(self.llvmir, opt=3, arch=arch, - **self._extra_options) - self.cache[cc] = ptx - if config.DUMP_ASSEMBLY: - print(("ASSEMBLY %s" % self.name).center(80, '-')) - print(ptx.decode('utf-8')) - print('=' * 80) - return ptx - - -class CachedCUFunction(object): - """ - Get or compile CUDA function for the current active context - - Uses device ID as key for cache. - """ - - def __init__(self, entry_name, ptx, linking): - self.entry_name = entry_name - self.ptx = ptx - self.linking = linking - self.cache = {} - self.ccinfos = {} - - def get(self): - cuctx = get_context() - device = cuctx.device - cufunc = self.cache.get(device.id) - if cufunc is None: - ptx = self.ptx.get() - - # Link - linker = driver.Linker() - linker.add_ptx(ptx) - for path in self.linking: - linker.add_file_guess_ext(path) - cubin, _size = linker.complete() - compile_info = linker.info_log - module = cuctx.create_module_image(cubin) - - # Load - cufunc = module.get_function(self.entry_name) - self.cache[device.id] = cufunc - self.ccinfos[device.id] = compile_info - return cufunc - - def get_info(self): - self.get() # trigger compilation - cuctx = get_context() - device = cuctx.device - ci = self.ccinfos[device.id] - return ci - - def __reduce__(self): - """ - Reduce the instance for serialization. - Pre-compiled PTX code string is serialized inside the `ptx` (CachedPTX). - Loaded CUfunctions are discarded. They are recreated when unserialized. - """ - if self.linking: - msg = ('cannot pickle CUDA kernel function with additional ' - 'libraries to link against') - raise RuntimeError(msg) - args = (self.__class__, self.entry_name, self.ptx, self.linking) - return (serialize._rebuild_reduction, args) - - @classmethod - def _rebuild(cls, entry_name, ptx, linking): - """ - Rebuild an instance. - """ - return cls(entry_name, ptx, linking) - - -class CUDAKernel(CUDAKernelBase): - ''' - CUDA Kernel specialized for a given set of argument types. When called, this - object will validate that the argument types match those for which it is - specialized, and then launch the kernel on the device. - ''' - def __init__(self, llvm_module, name, pretty_name, argtypes, call_helper, - link=(), debug=False, fastmath=False, type_annotation=None, - extensions=[]): - super(CUDAKernel, self).__init__() - # initialize CUfunction - options = {'debug': debug} - if fastmath: - options.update(dict(ftz=True, - prec_sqrt=False, - prec_div=False, - fma=True)) - - ptx = CachedPTX(pretty_name, str(llvm_module), options=options) - cufunc = CachedCUFunction(name, ptx, link) - # populate members - self.entry_name = name - self.argument_types = tuple(argtypes) - self.linking = tuple(link) - self._type_annotation = type_annotation - self._func = cufunc - self.debug = debug - self.call_helper = call_helper - self.extensions = list(extensions) - - @classmethod - def _rebuild(cls, name, argtypes, cufunc, link, debug, call_helper, extensions, config): - """ - Rebuild an instance. - """ - instance = cls.__new__(cls) - # invoke parent constructor - super(cls, instance).__init__() - # populate members - instance.entry_name = name - instance.argument_types = tuple(argtypes) - instance.linking = tuple(link) - instance._type_annotation = None - instance._func = cufunc - instance.debug = debug - instance.call_helper = call_helper - instance.extensions = extensions - # update config - instance._deserialize_config(config) - return instance - - def __reduce__(self): - """ - Reduce the instance for serialization. - Compiled definitions are serialized in PTX form. - Type annotation are discarded. - Thread, block and shared memory configuration are serialized. - Stream information is discarded. - """ - config = self._serialize_config() - args = (self.__class__, self.entry_name, self.argument_types, - self._func, self.linking, self.debug, self.call_helper, - self.extensions, config) - return (serialize._rebuild_reduction, args) - - def __call__(self, *args, **kwargs): - assert not kwargs - self._kernel_call(args=args, - griddim=self.griddim, - blockdim=self.blockdim, - stream=self.stream, - sharedmem=self.sharedmem) - - def bind(self): - """ - Force binding to current CUDA context - """ - self._func.get() - - @property - def ptx(self): - ''' - PTX code for this kernel. - ''' - return self._func.ptx.get().decode('utf8') - - @property - def device(self): - """ - Get current active context - """ - return get_current_device() - - def inspect_llvm(self): - ''' - Returns the LLVM IR for this kernel. - ''' - return str(self._func.ptx.llvmir) - - def inspect_asm(self): - ''' - Returns the PTX code for this kernel. - ''' - return self._func.ptx.get().decode('ascii') - - def inspect_types(self, file=None): - ''' - Produce a dump of the Python source of this function annotated with the - corresponding Numba IR and type information. The dump is written to - *file*, or *sys.stdout* if *file* is *None*. - ''' - if self._type_annotation is None: - raise ValueError("Type annotation is not available") - - if file is None: - file = sys.stdout - - print("%s %s" % (self.entry_name, self.argument_types), file=file) - print('-' * 80, file=file) - print(self._type_annotation, file=file) - print('=' * 80, file=file) - - def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0): - # Prepare kernel - cufunc = self._func.get() - - if self.debug: - excname = cufunc.name + "__errcode__" - excmem, excsz = cufunc.module.get_global_symbol(excname) - assert excsz == ctypes.sizeof(ctypes.c_int) - excval = ctypes.c_int() - excmem.memset(0, stream=stream) - - # Prepare arguments - retr = [] # hold functors for writeback - - kernelargs = [] - for t, v in zip(self.argument_types, args): - self._prepare_args(t, v, stream, retr, kernelargs) - - # Configure kernel - cu_func = cufunc.configure(griddim, blockdim, - stream=stream, - sharedmem=sharedmem) - # Invoke kernel - cu_func(*kernelargs) - - if self.debug: - driver.device_to_host(ctypes.addressof(excval), excmem, excsz) - if excval.value != 0: - # An error occurred - def load_symbol(name): - mem, sz = cufunc.module.get_global_symbol("%s__%s__" % - (cufunc.name, - name)) - val = ctypes.c_int() - driver.device_to_host(ctypes.addressof(val), mem, sz) - return val.value - - tid = [load_symbol("tid" + i) for i in 'zyx'] - ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] - code = excval.value - exccls, exc_args = self.call_helper.get_exception(code) - # Prefix the exception message with the thread position - prefix = "tid=%s ctaid=%s" % (tid, ctaid) - if exc_args: - exc_args = ("%s: %s" % (prefix, exc_args[0]),) + exc_args[1:] - else: - exc_args = prefix, - raise exccls(*exc_args) - - # retrieve auto converted arrays - for wb in retr: - wb() - - def _prepare_args(self, ty, val, stream, retr, kernelargs): - """ - Convert arguments to ctypes and append to kernelargs - """ - - # map the arguments using any extension you've registered - for extension in reversed(self.extensions): - ty, val = extension.prepare_args( - ty, - val, - stream=stream, - retr=retr) - - if isinstance(ty, types.Array): - if isinstance(ty, types.SmartArrayType): - devary = val.get('gpu') - retr.append(lambda: val.mark_changed('gpu')) - outer_parent = ctypes.c_void_p(0) - kernelargs.append(outer_parent) - else: - devary = wrap_arg(val).to_device(retr, stream) - - c_intp = ctypes.c_ssize_t - - meminfo = ctypes.c_void_p(0) - parent = ctypes.c_void_p(0) - nitems = c_intp(devary.size) - itemsize = c_intp(devary.dtype.itemsize) - data = ctypes.c_void_p(driver.device_pointer(devary)) - kernelargs.append(meminfo) - kernelargs.append(parent) - kernelargs.append(nitems) - kernelargs.append(itemsize) - kernelargs.append(data) - for ax in range(devary.ndim): - kernelargs.append(c_intp(devary.shape[ax])) - for ax in range(devary.ndim): - kernelargs.append(c_intp(devary.strides[ax])) - - elif isinstance(ty, types.Integer): - cval = getattr(ctypes, "c_%s" % ty)(val) - kernelargs.append(cval) - - elif ty == types.float64: - cval = ctypes.c_double(val) - kernelargs.append(cval) - - elif ty == types.float32: - cval = ctypes.c_float(val) - kernelargs.append(cval) - - elif ty == types.boolean: - cval = ctypes.c_uint8(int(val)) - kernelargs.append(cval) - - elif ty == types.complex64: - kernelargs.append(ctypes.c_float(val.real)) - kernelargs.append(ctypes.c_float(val.imag)) - - elif ty == types.complex128: - kernelargs.append(ctypes.c_double(val.real)) - kernelargs.append(ctypes.c_double(val.imag)) - - elif isinstance(ty, types.Record): - devrec = wrap_arg(val).to_device(retr, stream) - kernelargs.append(devrec) - - else: - raise NotImplementedError(ty, val) - - - @property - def autotune(self): - """Return the autotuner object associated with this kernel.""" - has_autotune = hasattr(self, '_autotune') - if has_autotune and self._autotune.dynsmem == self.sharedmem: - return self._autotune - else: - # Get CUDA Function - cufunc = self._func.get() - at = AutoTuner(info=cufunc.attrs, cc=cufunc.device.compute_capability) - self._autotune = at - return self._autotune - - @property - def occupancy(self): - """Occupancy is the ratio of the number of active warps per multiprocessor to the maximum - number of warps that can be active on the multiprocessor at once. - Calculate the theoretical occupancy of the kernel given the - current configuration.""" - thread_per_block = reduce(operator.mul, self.blockdim, 1) - return self.autotune.closest(thread_per_block) - - -class AutoJitCUDAKernel(CUDAKernelBase): - ''' - CUDA Kernel object. When called, the kernel object will specialize itself - for the given arguments (if no suitable specialized version already exists) - and launch on the device associated with the current context. - - Kernel objects are not to be constructed by the user, but instead are - created using the :func:`numba.cuda.jit` decorator. - ''' - def __init__(self, func, bind, targetoptions): - super(AutoJitCUDAKernel, self).__init__() - self.py_func = func - self.bind = bind - self.definitions = {} - self.targetoptions = targetoptions - - # defensive copy - self.targetoptions['extensions'] = \ - list(self.targetoptions.get('extensions', [])) - - from .descriptor import CUDATargetDesc - - self.typingctx = CUDATargetDesc.typingctx - - @property - def extensions(self): - ''' - A list of objects that must have a `prepare_args` function. When a - specialized kernel is called, each argument will be passed through - to the `prepare_args` (from the last object in this list to the - first). The arguments to `prepare_args` are: - - - `ty` the numba type of the argument - - `val` the argument value itself - - `stream` the CUDA stream used for the current call to the kernel - - `retr` a list of zero-arg functions that you may want to append - post-call cleanup work to. - - The `prepare_args` function must return a tuple `(ty, val)`, which - will be passed in turn to the next right-most `extension`. After all - the extensions have been called, the resulting `(ty, val)` will be - passed into Numba's default argument marshalling logic. - ''' - return self.targetoptions['extensions'] - - def __call__(self, *args): - ''' - Specialize and invoke this kernel with *args*. - ''' - kernel = self.specialize(*args) - cfg = kernel[self.griddim, self.blockdim, self.stream, self.sharedmem] - cfg(*args) - - def specialize(self, *args): - ''' - Compile and bind to the current context a version of this kernel - specialized for the given *args*. - ''' - argtypes = tuple( - [self.typingctx.resolve_argument_type(a) for a in args]) - kernel = self.compile(argtypes) - return kernel - - def compile(self, sig): - ''' - Compile and bind to the current context a version of this kernel - specialized for the given signature. - ''' - argtypes, return_type = sigutils.normalize_signature(sig) - assert return_type is None - kernel = self.definitions.get(argtypes) - if kernel is None: - if 'link' not in self.targetoptions: - self.targetoptions['link'] = () - kernel = compile_kernel(self.py_func, argtypes, - **self.targetoptions) - self.definitions[argtypes] = kernel - if self.bind: - kernel.bind() - return kernel - - def inspect_llvm(self, signature=None): - ''' - Return the LLVM IR for all signatures encountered thus far, or the LLVM - IR for a specific signature if given. - ''' - if signature is not None: - return self.definitions[signature].inspect_llvm() - else: - return dict((sig, defn.inspect_llvm()) - for sig, defn in self.definitions.items()) - - def inspect_asm(self, signature=None): - ''' - Return the generated assembly code for all signatures encountered thus - far, or the LLVM IR for a specific signature if given. - ''' - if signature is not None: - return self.definitions[signature].inspect_asm() - else: - return dict((sig, defn.inspect_asm()) - for sig, defn in self.definitions.items()) - - def inspect_types(self, file=None): - ''' - Produce a dump of the Python source of this function annotated with the - corresponding Numba IR and type information. The dump is written to - *file*, or *sys.stdout* if *file* is *None*. - ''' - if file is None: - file = sys.stdout - - for ver, defn in utils.iteritems(self.definitions): - defn.inspect_types(file=file) - - @classmethod - def _rebuild(cls, func_reduced, bind, targetoptions, config): - """ - Rebuild an instance. - """ - func = serialize._rebuild_function(*func_reduced) - instance = cls(func, bind, targetoptions) - instance._deserialize_config(config) - return instance - - def __reduce__(self): - """ - Reduce the instance for serialization. - Compiled definitions are serialized in PTX form. - """ - glbls = serialize._get_function_globals_for_reduction(self.py_func) - func_reduced = serialize._reduce_function(self.py_func, glbls) - config = self._serialize_config() - args = (self.__class__, func_reduced, self.bind, self.targetoptions, - config) - return (serialize._rebuild_reduction, args) diff --git a/numba/numba/cuda/cudadecl.py b/numba/numba/cuda/cudadecl.py deleted file mode 100644 index 95c90421c..000000000 --- a/numba/numba/cuda/cudadecl.py +++ /dev/null @@ -1,524 +0,0 @@ -from __future__ import print_function, division, absolute_import -from numba import types -from numba.typing.npydecl import register_number_classes -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - AbstractTemplate, MacroTemplate, - signature, Registry) -from numba import cuda - - -registry = Registry() -intrinsic = registry.register -intrinsic_attr = registry.register_attr -intrinsic_global = registry.register_global - -register_number_classes(intrinsic_global) - - -class Cuda_grid(MacroTemplate): - key = cuda.grid - - -class Cuda_gridsize(MacroTemplate): - key = cuda.gridsize - - -class Cuda_threadIdx_x(MacroTemplate): - key = cuda.threadIdx.x - - -class Cuda_threadIdx_y(MacroTemplate): - key = cuda.threadIdx.y - - -class Cuda_threadIdx_z(MacroTemplate): - key = cuda.threadIdx.z - - -class Cuda_blockIdx_x(MacroTemplate): - key = cuda.blockIdx.x - - -class Cuda_blockIdx_y(MacroTemplate): - key = cuda.blockIdx.y - - -class Cuda_blockIdx_z(MacroTemplate): - key = cuda.blockIdx.z - - -class Cuda_blockDim_x(MacroTemplate): - key = cuda.blockDim.x - - -class Cuda_blockDim_y(MacroTemplate): - key = cuda.blockDim.y - - -class Cuda_blockDim_z(MacroTemplate): - key = cuda.blockDim.z - - -class Cuda_gridDim_x(MacroTemplate): - key = cuda.gridDim.x - - -class Cuda_gridDim_y(MacroTemplate): - key = cuda.gridDim.y - - -class Cuda_gridDim_z(MacroTemplate): - key = cuda.gridDim.z - - -class Cuda_warpsize(MacroTemplate): - key = cuda.warpsize - - -class Cuda_laneid(MacroTemplate): - key = cuda.laneid - - -class Cuda_shared_array(MacroTemplate): - key = cuda.shared.array - - -class Cuda_local_array(MacroTemplate): - key = cuda.local.array - - -class Cuda_const_arraylike(MacroTemplate): - key = cuda.const.array_like - - -@intrinsic -class Cuda_syncthreads(ConcreteTemplate): - key = cuda.syncthreads - cases = [signature(types.none)] - - -@intrinsic -class Cuda_syncthreads_count(ConcreteTemplate): - key = cuda.syncthreads_count - cases = [signature(types.i4, types.i4)] - - -@intrinsic -class Cuda_syncthreads_and(ConcreteTemplate): - key = cuda.syncthreads_and - cases = [signature(types.i4, types.i4)] - - -@intrinsic -class Cuda_syncthreads_or(ConcreteTemplate): - key = cuda.syncthreads_or - cases = [signature(types.i4, types.i4)] - - -@intrinsic -class Cuda_threadfence_device(ConcreteTemplate): - key = cuda.threadfence - cases = [signature(types.none)] - - -@intrinsic -class Cuda_threadfence_block(ConcreteTemplate): - key = cuda.threadfence_block - cases = [signature(types.none)] - - -@intrinsic -class Cuda_threadfence_system(ConcreteTemplate): - key = cuda.threadfence_system - cases = [signature(types.none)] - - -@intrinsic -class Cuda_syncwarp(ConcreteTemplate): - key = cuda.syncwarp - cases = [signature(types.none, types.i4)] - - -@intrinsic -class Cuda_shfl_sync_intrinsic(ConcreteTemplate): - key = cuda.shfl_sync_intrinsic - cases = [ - signature(types.Tuple((types.i4, types.b1)), types.i4, types.i4, types.i4, types.i4, types.i4), - signature(types.Tuple((types.i8, types.b1)), types.i4, types.i4, types.i8, types.i4, types.i4), - signature(types.Tuple((types.f4, types.b1)), types.i4, types.i4, types.f4, types.i4, types.i4), - signature(types.Tuple((types.f8, types.b1)), types.i4, types.i4, types.f8, types.i4, types.i4), - ] - - -@intrinsic -class Cuda_vote_sync_intrinsic(ConcreteTemplate): - key = cuda.vote_sync_intrinsic - cases = [signature(types.Tuple((types.i4, types.b1)), types.i4, types.i4, types.b1)] - - -@intrinsic -class Cuda_match_any_sync(ConcreteTemplate): - key = cuda.match_any_sync - cases = [ - signature(types.i4, types.i4, types.i4), - signature(types.i4, types.i4, types.i8), - signature(types.i4, types.i4, types.f4), - signature(types.i4, types.i4, types.f8), - ] - - -@intrinsic -class Cuda_match_all_sync(ConcreteTemplate): - key = cuda.match_all_sync - cases = [ - signature(types.Tuple((types.i4, types.b1)), types.i4, types.i4), - signature(types.Tuple((types.i4, types.b1)), types.i4, types.i8), - signature(types.Tuple((types.i4, types.b1)), types.i4, types.f4), - signature(types.Tuple((types.i4, types.b1)), types.i4, types.f8), - ] - - -@intrinsic -class Cuda_popc(ConcreteTemplate): - """ - Supported types from `llvm.popc` - [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics) - """ - key = cuda.popc - cases = [ - signature(types.int8, types.int8), - signature(types.int16, types.int16), - signature(types.int32, types.int32), - signature(types.int64, types.int64), - signature(types.uint8, types.uint8), - signature(types.uint16, types.uint16), - signature(types.uint32, types.uint32), - signature(types.uint64, types.uint64), - ] - - -@intrinsic -class Cuda_brev(ConcreteTemplate): - key = cuda.brev - cases = [ - signature(types.uint32, types.uint32), - signature(types.uint64, types.uint64), - ] - - -@intrinsic -class Cuda_clz(ConcreteTemplate): - """ - Supported types from `llvm.ctlz` - [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics) - """ - key = cuda.clz - cases = [ - signature(types.int8, types.int8), - signature(types.int16, types.int16), - signature(types.int32, types.int32), - signature(types.int64, types.int64), - signature(types.uint8, types.uint8), - signature(types.uint16, types.uint16), - signature(types.uint32, types.uint32), - signature(types.uint64, types.uint64), - ] - - -@intrinsic -class Cuda_ffs(ConcreteTemplate): - """ - Supported types from `llvm.cttz` - [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics) - """ - key = cuda.ffs - cases = [ - signature(types.int8, types.int8), - signature(types.int16, types.int16), - signature(types.int32, types.int32), - signature(types.int64, types.int64), - signature(types.uint8, types.uint8), - signature(types.uint16, types.uint16), - signature(types.uint32, types.uint32), - signature(types.uint64, types.uint64), - ] - - -@intrinsic -class Cuda_selp(AbstractTemplate): - key = cuda.selp - - def generic(self, args, kws): - assert not kws - test, a, b = args - - - # per docs - # http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp - supported_types = (types.float64, types.float32, - types.int16, types.uint16, - types.int32, types.uint32, - types.int64, types.uint64) - - if a != b or a not in supported_types: - return - - return signature(a, test, a, a) - - -@intrinsic -class Cuda_atomic_add(AbstractTemplate): - key = cuda.atomic.add - - def generic(self, args, kws): - assert not kws - ary, idx, val = args - - if ary.ndim == 1: - return signature(ary.dtype, ary, types.intp, ary.dtype) - elif ary.ndim > 1: - return signature(ary.dtype, ary, idx, ary.dtype) - - -class Cuda_atomic_maxmin(AbstractTemplate): - def generic(self, args, kws): - assert not kws - ary, idx, val = args - # Implementation presently supports: - # float64, float32, int32, int64, uint32, uint64 only, - # so fail typing otherwise - supported_types = (types.float64, types.float32, - types.int32, types.uint32, - types.int64, types.uint64) - if ary.dtype not in supported_types: - return - - if ary.ndim == 1: - return signature(ary.dtype, ary, types.intp, ary.dtype) - elif ary.ndim > 1: - return signature(ary.dtype, ary, idx, ary.dtype) - - -@intrinsic -class Cuda_atomic_max(Cuda_atomic_maxmin): - key = cuda.atomic.max - - -@intrinsic -class Cuda_atomic_min(Cuda_atomic_maxmin): - key = cuda.atomic.min - - -@intrinsic -class Cuda_atomic_compare_and_swap(AbstractTemplate): - key = cuda.atomic.compare_and_swap - - def generic(self, args, kws): - assert not kws - ary, old, val = args - dty = ary.dtype - # only support int32 - if dty == types.int32 and ary.ndim == 1: - return signature(dty, ary, dty, dty) - - -@intrinsic_attr -class Cuda_threadIdx(AttributeTemplate): - key = types.Module(cuda.threadIdx) - - def resolve_x(self, mod): - return types.Macro(Cuda_threadIdx_x) - - def resolve_y(self, mod): - return types.Macro(Cuda_threadIdx_y) - - def resolve_z(self, mod): - return types.Macro(Cuda_threadIdx_z) - - -@intrinsic_attr -class Cuda_blockIdx(AttributeTemplate): - key = types.Module(cuda.blockIdx) - - def resolve_x(self, mod): - return types.Macro(Cuda_blockIdx_x) - - def resolve_y(self, mod): - return types.Macro(Cuda_blockIdx_y) - - def resolve_z(self, mod): - return types.Macro(Cuda_blockIdx_z) - - -@intrinsic_attr -class Cuda_blockDim(AttributeTemplate): - key = types.Module(cuda.blockDim) - - def resolve_x(self, mod): - return types.Macro(Cuda_blockDim_x) - - def resolve_y(self, mod): - return types.Macro(Cuda_blockDim_y) - - def resolve_z(self, mod): - return types.Macro(Cuda_blockDim_z) - - -@intrinsic_attr -class Cuda_gridDim(AttributeTemplate): - key = types.Module(cuda.gridDim) - - def resolve_x(self, mod): - return types.Macro(Cuda_gridDim_x) - - def resolve_y(self, mod): - return types.Macro(Cuda_gridDim_y) - - def resolve_z(self, mod): - return types.Macro(Cuda_gridDim_z) - - -@intrinsic_attr -class CudaSharedModuleTemplate(AttributeTemplate): - key = types.Module(cuda.shared) - - def resolve_array(self, mod): - return types.Macro(Cuda_shared_array) - - -@intrinsic_attr -class CudaConstModuleTemplate(AttributeTemplate): - key = types.Module(cuda.const) - - def resolve_array_like(self, mod): - return types.Macro(Cuda_const_arraylike) - - -@intrinsic_attr -class CudaLocalModuleTemplate(AttributeTemplate): - key = types.Module(cuda.local) - - def resolve_array(self, mod): - return types.Macro(Cuda_local_array) - - -@intrinsic_attr -class CudaAtomicTemplate(AttributeTemplate): - key = types.Module(cuda.atomic) - - def resolve_add(self, mod): - return types.Function(Cuda_atomic_add) - - def resolve_max(self, mod): - return types.Function(Cuda_atomic_max) - - def resolve_min(self, mod): - return types.Function(Cuda_atomic_min) - - def resolve_compare_and_swap(self, mod): - return types.Function(Cuda_atomic_compare_and_swap) - - -@intrinsic_attr -class CudaModuleTemplate(AttributeTemplate): - key = types.Module(cuda) - - def resolve_grid(self, mod): - return types.Macro(Cuda_grid) - - def resolve_gridsize(self, mod): - return types.Macro(Cuda_gridsize) - - def resolve_threadIdx(self, mod): - return types.Module(cuda.threadIdx) - - def resolve_blockIdx(self, mod): - return types.Module(cuda.blockIdx) - - def resolve_blockDim(self, mod): - return types.Module(cuda.blockDim) - - def resolve_gridDim(self, mod): - return types.Module(cuda.gridDim) - - def resolve_warpsize(self, mod): - return types.Macro(Cuda_warpsize) - - def resolve_laneid(self, mod): - return types.Macro(Cuda_laneid) - - def resolve_shared(self, mod): - return types.Module(cuda.shared) - - def resolve_popc(self, mod): - return types.Function(Cuda_popc) - - def resolve_brev(self, mod): - return types.Function(Cuda_brev) - - def resolve_clz(self, mod): - return types.Function(Cuda_clz) - - def resolve_ffs(self, mod): - return types.Function(Cuda_ffs) - - def resolve_syncthreads(self, mod): - return types.Function(Cuda_syncthreads) - - def resolve_syncthreads_count(self, mod): - return types.Function(Cuda_syncthreads_count) - - def resolve_syncthreads_and(self, mod): - return types.Function(Cuda_syncthreads_and) - - def resolve_syncthreads_or(self, mod): - return types.Function(Cuda_syncthreads_or) - - def resolve_threadfence(self, mod): - return types.Function(Cuda_threadfence_device) - - def resolve_threadfence_block(self, mod): - return types.Function(Cuda_threadfence_block) - - def resolve_threadfence_system(self, mod): - return types.Function(Cuda_threadfence_system) - - def resolve_syncwarp(self, mod): - return types.Function(Cuda_syncwarp) - - def resolve_shfl_sync_intrinsic(self, mod): - return types.Function(Cuda_shfl_sync_intrinsic) - - def resolve_vote_sync_intrinsic(self, mod): - return types.Function(Cuda_vote_sync_intrinsic) - - def resolve_match_any_sync(self, mod): - return types.Function(Cuda_match_any_sync) - - def resolve_match_all_sync(self, mod): - return types.Function(Cuda_match_all_sync) - - def resolve_selp(self, mod): - return types.Function(Cuda_selp) - - def resolve_atomic(self, mod): - return types.Module(cuda.atomic) - - def resolve_const(self, mod): - return types.Module(cuda.const) - - def resolve_local(self, mod): - return types.Module(cuda.local) - - -intrinsic_global(cuda, types.Module(cuda)) -## Forces the use of the cuda namespace by not recognizing individual the -## following as globals. -# intrinsic_global(cuda.grid, types.Function(Cuda_grid)) -# intrinsic_global(cuda.gridsize, types.Function(Cuda_gridsize)) -# intrinsic_global(cuda.threadIdx, types.Module(cuda.threadIdx)) -# intrinsic_global(cuda.shared, types.Module(cuda.shared)) -# intrinsic_global(cuda.shared.array, types.Function(Cuda_shared_array)) -# intrinsic_global(cuda.syncthreads, types.Function(Cuda_syncthreads)) -# intrinsic_global(cuda.atomic, types.Module(cuda.atomic)) - diff --git a/numba/numba/cuda/cudadrv/__init__.py b/numba/numba/cuda/cudadrv/__init__.py deleted file mode 100644 index 87af73c93..000000000 --- a/numba/numba/cuda/cudadrv/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""CUDA Driver - -- Driver API binding -- NVVM API binding -- Device array implementation - -""" -from numba import config -assert not config.ENABLE_CUDASIM, 'Cannot use real driver API with simulator' diff --git a/numba/numba/cuda/cudadrv/_extras.c b/numba/numba/cuda/cudadrv/_extras.c deleted file mode 100644 index c28936065..000000000 --- a/numba/numba/cuda/cudadrv/_extras.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Helper binding to call some CUDA Runtime API that cannot be directly - * encoded using ctypes. - */ - -#include "_pymodule.h" - - -#define CUDA_IPC_HANDLE_SIZE 64 - -typedef int CUresult; -typedef void* CUdeviceptr; - -typedef struct CUipcMemHandle_st{ - char reserved[CUDA_IPC_HANDLE_SIZE]; -} CUipcMemHandle; - -typedef CUresult (*cuIpcOpenMemHandle_t)(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int flags ); - -static -cuIpcOpenMemHandle_t cuIpcOpenMemHandle = 0; - -static -void set_cuIpcOpenMemHandle(void* fnptr) -{ - cuIpcOpenMemHandle = (cuIpcOpenMemHandle_t)fnptr; -} - -static -CUresult call_cuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle* handle, unsigned int flags) -{ - return cuIpcOpenMemHandle(pdptr, *handle, flags); -} - - -MOD_INIT(_extras) { - PyObject *m; - MOD_DEF(m, "_extras", "No docs", NULL) - if (m == NULL) - return MOD_ERROR_VAL; - PyModule_AddObject(m, "set_cuIpcOpenMemHandle", PyLong_FromVoidPtr(&set_cuIpcOpenMemHandle)); - PyModule_AddObject(m, "call_cuIpcOpenMemHandle", PyLong_FromVoidPtr(&call_cuIpcOpenMemHandle)); - PyModule_AddIntConstant(m, "CUDA_IPC_HANDLE_SIZE", CUDA_IPC_HANDLE_SIZE); - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/cuda/cudadrv/autotune.py b/numba/numba/cuda/cudadrv/autotune.py deleted file mode 100644 index ce3bf97b1..000000000 --- a/numba/numba/cuda/cudadrv/autotune.py +++ /dev/null @@ -1,321 +0,0 @@ -""" -- Parse jit compile info -- Compute warp occupany histogram -""" -from __future__ import division, absolute_import, print_function -import math -import re - -SMEM16K = 16 * 2 ** 10 -SMEM48K = 48 * 2 ** 10 -SMEM64K = 64 * 2 ** 10 -SMEM96K = 96 * 2 ** 10 -SMEM112K = 112 * 2 ** 10 - -#------------------------------------------------------------------------------ -# autotuning - - -class OccupancyThreadKey(object): - def __init__(self, item): - self.occupancy, self.threads = item - self.comparison = self.occupancy, 1 / self.threads - - def __lt__(self, other): - return self.comparison < other.comparison - - def __eq__(self, other): - return self.comparison == other.comparison - - def __ne__(self, other): - return self.comparison != other.comparison - - def __gt__(self, other): - return self.comparison > other.comparison - - def __le__(self, other): - return self.comparison <= other.comparison - - def __ge__(self, other): - return self.comparison >= other.comparison - - -class AutoTuner(object): - """Autotune a kernel based upon the theoretical occupancy. - """ - def __init__(self, cc, info, smem_config=None, dynsmem=0): - self.cc = cc - self.dynsmem = dynsmem - self._table = warp_occupancy(info=info, cc=cc) - self._by_occupancy = list(reversed(sorted(((occup, tpb) - for tpb, (occup, factor) - in self.table.items()), - key=OccupancyThreadKey))) - - @property - def table(self): - """A dict with thread-per-block as keys and tuple-2 of - (occupency, limiting factor) as values. - """ - return self._table - - @property - def by_occupancy(self): - """A list of tuple-2 of (occupancy, thread-per-block) sorted in - descending. - - The first item has the highest occupancy and the lowest number of - thread-per-block. - """ - return self._by_occupancy - - def best(self): - return self.max_occupancy_min_blocks() - - def max_occupancy_min_blocks(self): - """Returns the thread-per-block that optimizes for - maximum occupancy and minimum blocks. - - Maximum blocks allows for the best utilization of parallel execution - because each block can be executed concurrently on different SM. - """ - return self.by_occupancy[0][1] - - def closest(self, tpb): - """Find the occupancy of the closest tpb - """ - # round to the nearest multiple of warpsize - warpsize = PHYSICAL_LIMITS[self.cc]['thread_per_warp'] - tpb = ceil(tpb, warpsize) - # search - return self.table.get(tpb, [0])[0] - - - def best_within(self, mintpb, maxtpb): - """Returns the best tpb in the given range inclusively. - """ - warpsize = PHYSICAL_LIMITS[self.cc]['thread_per_warp'] - mintpb = int(ceil(mintpb, warpsize)) - maxtpb = int(floor(maxtpb, warpsize)) - return self.prefer(*range(mintpb, maxtpb + 1, warpsize)) - - def prefer(self, *tpblist): - """Prefer the thread-per-block with the highest warp occupancy - and the lowest thread-per-block. - - May return None if all threads-per-blocks are invalid - """ - bin = [] - for tpb in tpblist: - occ = self.closest(tpb) - if occ > 0: - bin.append((occ, tpb)) - if bin: - return sorted(bin, key=OccupancyThreadKey)[-1][1] - - -#------------------------------------------------------------------------------ -# warp occupancy calculator - -# Reference: NVIDIA CUDA Toolkit v6.5 Programming Guide, Appendix G. -# URL: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities - -LIMITS_CC_20 = { - 'thread_per_warp': 32, - 'warp_per_sm': 48, - 'thread_per_sm': 1536, - 'block_per_sm': 8, - 'registers': 32768, - 'reg_alloc_unit': 64, - 'reg_alloc_gran': 'warp', - 'reg_per_thread': 63, - 'smem_per_sm': SMEM48K, - 'smem_alloc_unit': 128, - 'warp_alloc_gran': 2, - 'max_block_size': 1024, - 'default_smem_config': SMEM16K, -} - -LIMITS_CC_21 = LIMITS_CC_20 - -LIMITS_CC_30 = { - 'thread_per_warp': 32, - 'warp_per_sm': 64, - 'thread_per_sm': 2048, - 'block_per_sm': 16, - 'registers': 65536, - 'reg_alloc_unit': 256, - 'reg_alloc_gran': 'warp', - 'reg_per_thread': 63, - 'smem_per_sm': SMEM48K, - 'smem_alloc_unit': 256, - 'warp_alloc_gran': 4, - 'max_block_size': 1024, - 'default_smem_config': SMEM48K, -} - -LIMITS_CC_35 = LIMITS_CC_30.copy() -LIMITS_CC_35.update({ - 'reg_per_thread': 255, -}) - -LIMITS_CC_37 = LIMITS_CC_35.copy() - -LIMITS_CC_37.update({ - 'registers': 131072, - 'default_smem_config': SMEM112K, -}) - - -LIMITS_CC_50 = { - 'thread_per_warp': 32, - 'warp_per_sm': 64, - 'thread_per_sm': 2048, - 'block_per_sm': 32, - 'registers': 65536, - 'reg_alloc_unit': 256, - 'reg_alloc_gran': 'warp', - 'reg_per_thread': 255, - 'smem_per_sm': SMEM64K, - 'smem_per_block': SMEM48K, - 'smem_alloc_unit': 256, - 'warp_alloc_gran': 4, - 'max_block_size': 1024, - 'default_smem_config': SMEM64K, -} - -LIMITS_CC_52 = LIMITS_CC_50.copy() -LIMITS_CC_52.update({ - 'smem_per_sm': SMEM96K, - 'default_smem_config': SMEM96K, -}) -LIMITS_CC_53 = LIMITS_CC_50.copy() -LIMITS_CC_53.update({ - 'registers': 32768, -}) - -LIMITS_CC_60 = LIMITS_CC_50.copy() -LIMITS_CC_60.update({ - 'warp_alloc_gran': 2, -}) -LIMITS_CC_61 = LIMITS_CC_60.copy() -LIMITS_CC_61.update({ - 'smem_per_sm': SMEM96K, - 'default_smem_config': SMEM96K, - 'warp_alloc_gran': 4, -}) -LIMITS_CC_62 = LIMITS_CC_60.copy() -LIMITS_CC_62.update({ - 'thread_per_sm': 4096, - 'warp_per_sm': 128, - 'warp_alloc_gran': 4, -}) - -PHYSICAL_LIMITS = { - (2, 0): LIMITS_CC_20, - (2, 1): LIMITS_CC_21, - (3, 0): LIMITS_CC_30, - (3, 5): LIMITS_CC_35, - (3, 7): LIMITS_CC_35, - (5, 0): LIMITS_CC_50, - (5, 2): LIMITS_CC_52, - (5, 3): LIMITS_CC_53, - (6, 0): LIMITS_CC_50, - (6, 1): LIMITS_CC_61, - (6, 2): LIMITS_CC_62, -} - - -def ceil(x, s=1): - return s * math.ceil(x / s) - - -def floor(x, s=1): - return s * math.floor(x / s) - - -def warp_occupancy(info, cc, smem_config=None): - """Returns a dictionary of {threadperblock: occupancy, factor} - - Only threadperblock of multiple of warpsize is used. - Only threadperblock of non-zero occupancy is returned. - """ - ret = {} - try: - limits = PHYSICAL_LIMITS[cc] - except KeyError: - raise ValueError("%s is not a supported compute capability" - % ".".join(str(c) for c in cc)) - if smem_config is None: - smem_config = limits['default_smem_config'] - warpsize = limits['thread_per_warp'] - max_thread = info.maxthreads - - for tpb in range(warpsize, max_thread + 1, warpsize): - result = compute_warp_occupancy(tpb=tpb, - reg=info.regs, - smem=info.shared, - smem_config=smem_config, - limits=limits) - if result[0]: - ret[tpb] = result - return ret - - -def compute_warp_occupancy(tpb, reg, smem, smem_config, limits): - assert limits['reg_alloc_gran'] == 'warp', \ - "assume warp register allocation granularity" - limit_block_per_sm = limits['block_per_sm'] - limit_warp_per_sm = limits['warp_per_sm'] - limit_thread_per_warp = limits['thread_per_warp'] - limit_reg_per_thread = limits['reg_per_thread'] - limit_total_regs = limits['registers'] - limit_total_smem = min(limits['smem_per_sm'], smem_config) - my_smem_alloc_unit = limits['smem_alloc_unit'] - reg_alloc_unit = limits['reg_alloc_unit'] - warp_alloc_gran = limits['warp_alloc_gran'] - - my_warp_per_block = ceil(tpb / limit_thread_per_warp) - my_reg_count = reg - my_reg_per_block = my_warp_per_block - my_smem = smem - my_smem_per_block = ceil(my_smem, my_smem_alloc_unit) - - # allocated resource - limit_blocks_due_to_warps = min(limit_block_per_sm, - floor( - limit_warp_per_sm / my_warp_per_block)) - - c39 = floor(limit_total_regs / ceil(my_reg_count * limit_thread_per_warp, - reg_alloc_unit), - warp_alloc_gran) - - limit_blocks_due_to_regs = (0 - if my_reg_count > limit_reg_per_thread - else (floor(c39 / my_reg_per_block) - if my_reg_count > 0 - else limit_block_per_sm)) - - limit_blocks_due_to_smem = (floor(limit_total_smem / - my_smem_per_block) - if my_smem_per_block > 0 - else limit_block_per_sm) - - # occupancy - active_block_per_sm = min(limit_blocks_due_to_smem, - limit_blocks_due_to_warps, - limit_blocks_due_to_regs) - - if active_block_per_sm == limit_blocks_due_to_warps: - factor = 'warps' - elif active_block_per_sm == limit_blocks_due_to_regs: - factor = 'regs' - else: - factor = 'smem' - - active_warps_per_sm = active_block_per_sm * my_warp_per_block - #active_threads_per_sm = active_warps_per_sm * limit_thread_per_warp - - occupancy = active_warps_per_sm / limit_warp_per_sm - return occupancy, factor - diff --git a/numba/numba/cuda/cudadrv/devicearray.py b/numba/numba/cuda/cudadrv/devicearray.py deleted file mode 100644 index d7baa825e..000000000 --- a/numba/numba/cuda/cudadrv/devicearray.py +++ /dev/null @@ -1,651 +0,0 @@ -""" -A CUDA ND Array is recognized by checking the __cuda_memory__ attribute -on the object. If it exists and evaluate to True, it must define shape, -strides, dtype and size attributes similar to a NumPy ndarray. -""" -from __future__ import print_function, absolute_import, division - -import warnings -import math -import functools -import copy -from numba import six -from ctypes import c_void_p - -import numpy as np - -import numba -from . import driver as _driver -from . import devices -from numba import dummyarray, types, numpy_support -from numba.unsafe.ndarray import to_fixed_tuple - -try: - lru_cache = getattr(functools, 'lru_cache')(None) -except AttributeError: - # Python 3.1 or lower - def lru_cache(func): - return func - - -def is_cuda_ndarray(obj): - "Check if an object is a CUDA ndarray" - return getattr(obj, '__cuda_ndarray__', False) - - -def verify_cuda_ndarray_interface(obj): - "Verify the CUDA ndarray interface for an obj" - require_cuda_ndarray(obj) - - def requires_attr(attr, typ): - if not hasattr(obj, attr): - raise AttributeError(attr) - if not isinstance(getattr(obj, attr), typ): - raise AttributeError('%s must be of type %s' % (attr, typ)) - - requires_attr('shape', tuple) - requires_attr('strides', tuple) - requires_attr('dtype', np.dtype) - requires_attr('size', six.integer_types) - - -def require_cuda_ndarray(obj): - "Raises ValueError is is_cuda_ndarray(obj) evaluates False" - if not is_cuda_ndarray(obj): - raise ValueError('require an cuda ndarray object') - - -class DeviceNDArrayBase(object): - """A on GPU NDArray representation - """ - __cuda_memory__ = True - __cuda_ndarray__ = True # There must be gpu_data attribute - - def __init__(self, shape, strides, dtype, stream=0, writeback=None, - gpu_data=None): - """ - Args - ---- - - shape - array shape. - strides - array strides. - dtype - data type as np.dtype. - stream - cuda stream. - writeback - Deprecated. - gpu_data - user provided device memory for the ndarray data buffer - """ - if isinstance(shape, six.integer_types): - shape = (shape,) - if isinstance(strides, six.integer_types): - strides = (strides,) - self.ndim = len(shape) - if len(strides) != self.ndim: - raise ValueError('strides not match ndim') - self._dummy = dummyarray.Array.from_desc(0, shape, strides, - dtype.itemsize) - self.shape = tuple(shape) - self.strides = tuple(strides) - self.dtype = np.dtype(dtype) - self.size = int(np.prod(self.shape)) - # prepare gpu memory - if self.size > 0: - if gpu_data is None: - self.alloc_size = _driver.memory_size_from_info(self.shape, - self.strides, - self.dtype.itemsize) - gpu_data = devices.get_context().memalloc(self.alloc_size) - else: - self.alloc_size = _driver.device_memory_size(gpu_data) - else: - # Make NULL pointer for empty allocation - gpu_data = _driver.MemoryPointer(context=devices.get_context(), - pointer=c_void_p(0), size=0) - self.alloc_size = 0 - - self.gpu_data = gpu_data - - self.__writeback = writeback # should deprecate the use of this - self.stream = stream - - @property - def __cuda_array_interface__(self): - return { - 'shape': tuple(self.shape), - 'strides': tuple(self.strides), - 'data': (self.device_ctypes_pointer.value, False), - 'typestr': self.dtype.str, - 'version': 0, - } - - def bind(self, stream=0): - """Bind a CUDA stream to this object so that all subsequent operation - on this array defaults to the given stream. - """ - clone = copy.copy(self) - clone.stream = stream - return clone - - @property - def T(self): - return self.transpose() - - def transpose(self, axes=None): - if axes and tuple(axes) == tuple(range(self.ndim)): - return self - elif self.ndim != 2: - raise NotImplementedError("transposing a non-2D DeviceNDArray isn't supported") - elif axes is not None and set(axes) != set(range(self.ndim)): - raise ValueError("invalid axes list %r" % (axes,)) - else: - from numba.cuda.kernels.transpose import transpose - return transpose(self) - - def _default_stream(self, stream): - return self.stream if not stream else stream - - @property - def _numba_type_(self): - """ - Magic attribute expected by Numba to get the numba type that - represents this object. - """ - dtype = numpy_support.from_dtype(self.dtype) - return types.Array(dtype, self.ndim, 'A') - - @property - def device_ctypes_pointer(self): - """Returns the ctypes pointer to the GPU data buffer - """ - if self.gpu_data is None: - return c_void_p(0) - else: - return self.gpu_data.device_ctypes_pointer - - @devices.require_context - def copy_to_device(self, ary, stream=0): - """Copy `ary` to `self`. - - If `ary` is a CUDA memory, perform a device-to-device transfer. - Otherwise, perform a a host-to-device transfer. - """ - if ary.size == 0: - # Nothing to do - return - - sentry_contiguous(self) - stream = self._default_stream(stream) - - if _driver.is_device_memory(ary): - sentry_contiguous(ary) - - if self.flags['C_CONTIGUOUS'] != ary.flags['C_CONTIGUOUS']: - raise ValueError("Can't copy %s-contiguous array to a %s-contiguous array" % ( - 'C' if ary.flags['C_CONTIGUOUS'] else 'F', - 'C' if self.flags['C_CONTIGUOUS'] else 'F', - )) - - sz = min(self.alloc_size, ary.alloc_size) - _driver.device_to_device(self, ary, sz, stream=stream) - else: - # Ensure same contiguous-nous. Only copies (host-side) - # if necessary (e.g. it needs to materialize a strided view) - ary = np.array( - ary, - order='C' if self.flags['C_CONTIGUOUS'] else 'F', - subok=True, - copy=False) - - sz = min(_driver.host_memory_size(ary), self.alloc_size) - _driver.host_to_device(self, ary, sz, stream=stream) - - @devices.require_context - def copy_to_host(self, ary=None, stream=0): - """Copy ``self`` to ``ary`` or create a new Numpy ndarray - if ``ary`` is ``None``. - - If a CUDA ``stream`` is given, then the transfer will be made - asynchronously as part as the given stream. Otherwise, the transfer is - synchronous: the function returns after the copy is finished. - - Always returns the host array. - - Example:: - - import numpy as np - from numba import cuda - - arr = np.arange(1000) - d_arr = cuda.to_device(arr) - - my_kernel[100, 100](d_arr) - - result_array = d_arr.copy_to_host() - """ - stream = self._default_stream(stream) - if ary is None: - hostary = np.empty(shape=self.alloc_size, dtype=np.byte) - else: - if ary.dtype != self.dtype: - raise TypeError('incompatible dtype') - - if ary.shape != self.shape: - scalshapes = (), (1,) - if not (ary.shape in scalshapes and self.shape in scalshapes): - raise TypeError('incompatible shape; device %s; host %s' % - (self.shape, ary.shape)) - if ary.strides != self.strides: - scalstrides = (), (self.dtype.itemsize,) - if not (ary.strides in scalstrides and - self.strides in scalstrides): - raise TypeError('incompatible strides; device %s; host %s' % - (self.strides, ary.strides)) - hostary = ary - - assert self.alloc_size >= 0, "Negative memory size" - if self.alloc_size != 0: - _driver.device_to_host(hostary, self, self.alloc_size, stream=stream) - - if ary is None: - if self.size == 0: - hostary = np.ndarray(shape=self.shape, dtype=self.dtype, - buffer=hostary) - else: - hostary = np.ndarray(shape=self.shape, dtype=self.dtype, - strides=self.strides, buffer=hostary) - return hostary - - def to_host(self, stream=0): - stream = self._default_stream(stream) - warnings.warn("to_host() is deprecated and will be removed", - DeprecationWarning) - if self.__writeback is None: - raise ValueError("no associated writeback array") - self.copy_to_host(self.__writeback, stream=stream) - - def split(self, section, stream=0): - """Split the array into equal partition of the `section` size. - If the array cannot be equally divided, the last section will be - smaller. - """ - stream = self._default_stream(stream) - if self.ndim != 1: - raise ValueError("only support 1d array") - if self.strides[0] != self.dtype.itemsize: - raise ValueError("only support unit stride") - nsect = int(math.ceil(float(self.size) / section)) - strides = self.strides - itemsize = self.dtype.itemsize - for i in range(nsect): - begin = i * section - end = min(begin + section, self.size) - shape = (end - begin,) - gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize) - yield DeviceNDArray(shape, strides, dtype=self.dtype, stream=stream, - gpu_data=gpu_data) - - def as_cuda_arg(self): - """Returns a device memory object that is used as the argument. - """ - return self.gpu_data - - def get_ipc_handle(self): - """ - Returns a *IpcArrayHandle* object that is safe to serialize and transfer - to another process to share the local allocation. - - Note: this feature is only available on Linux. - """ - ipch = devices.get_context().get_ipc_handle(self.gpu_data) - desc = dict(shape=self.shape, strides=self.strides, dtype=self.dtype) - return IpcArrayHandle(ipc_handle=ipch, array_desc=desc) - - -class DeviceRecord(DeviceNDArrayBase): - ''' - An on-GPU record type - ''' - def __init__(self, dtype, stream=0, gpu_data=None): - shape = () - strides = () - super(DeviceRecord, self).__init__(shape, strides, dtype, stream, - gpu_data) - - @property - def flags(self): - """ - For `numpy.ndarray` compatibility. Ideally this would return a - `np.core.multiarray.flagsobj`, but that needs to be constructed - with an existing `numpy.ndarray` (as the C- and F- contiguous flags - aren't writeable). - """ - return dict(self._dummy.flags) # defensive copy - - @property - def _numba_type_(self): - """ - Magic attribute expected by Numba to get the numba type that - represents this object. - """ - return numpy_support.from_dtype(self.dtype) - - -@lru_cache -def _assign_kernel(ndim): - """ - A separate method so we don't need to compile code every assignment (!). - - :param ndim: We need to have static array sizes for cuda.local.array, so - bake in the number of dimensions into the kernel - """ - from numba import cuda # circular! - - @cuda.jit - def kernel(lhs, rhs): - location = cuda.grid(1) - - n_elements = 1 - for i in range(lhs.ndim): - n_elements *= lhs.shape[i] - if location >= n_elements: - # bake n_elements into the kernel, better than passing it in - # as another argument. - return - - # [0, :] is the to-index (into `lhs`) - # [1, :] is the from-index (into `rhs`) - idx = cuda.local.array( - shape=(2, ndim), - dtype=types.int64) - - for i in range(ndim - 1, -1, -1): - idx[0, i] = location % lhs.shape[i] - idx[1, i] = (location % lhs.shape[i]) * (rhs.shape[i] > 1) - location //= lhs.shape[i] - - lhs[to_fixed_tuple(idx[0], ndim)] = rhs[to_fixed_tuple(idx[1], ndim)] - return kernel - - -class DeviceNDArray(DeviceNDArrayBase): - ''' - An on-GPU array type - ''' - def is_f_contiguous(self): - ''' - Return true if the array is Fortran-contiguous. - ''' - return self._dummy.is_f_contig - - @property - def flags(self): - """ - For `numpy.ndarray` compatibility. Ideally this would return a - `np.core.multiarray.flagsobj`, but that needs to be constructed - with an existing `numpy.ndarray` (as the C- and F- contiguous flags - aren't writeable). - """ - return dict(self._dummy.flags) # defensive copy - - def is_c_contiguous(self): - ''' - Return true if the array is C-contiguous. - ''' - return self._dummy.is_c_contig - - def __array__(self, dtype=None): - """ - :return: an `numpy.ndarray`, so copies to the host. - """ - return self.copy_to_host().__array__(dtype) - - def __len__(self): - return self.shape[0] - - def reshape(self, *newshape, **kws): - """ - Reshape the array without changing its contents, similarly to - :meth:`numpy.ndarray.reshape`. Example:: - - d_arr = d_arr.reshape(20, 50, order='F') - """ - if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)): - newshape = newshape[0] - - cls = type(self) - if newshape == self.shape: - # nothing to do - return cls(shape=self.shape, strides=self.strides, - dtype=self.dtype, gpu_data=self.gpu_data) - - newarr, extents = self._dummy.reshape(*newshape, **kws) - - if extents == [self._dummy.extent]: - return cls(shape=newarr.shape, strides=newarr.strides, - dtype=self.dtype, gpu_data=self.gpu_data) - else: - raise NotImplementedError("operation requires copying") - - def ravel(self, order='C', stream=0): - ''' - Flatten the array without changing its contents, similar to - :meth:`numpy.ndarray.ravel`. - ''' - stream = self._default_stream(stream) - cls = type(self) - newarr, extents = self._dummy.ravel(order=order) - - if extents == [self._dummy.extent]: - return cls(shape=newarr.shape, strides=newarr.strides, - dtype=self.dtype, gpu_data=self.gpu_data, - stream=stream) - - else: - raise NotImplementedError("operation requires copying") - - @devices.require_context - def __getitem__(self, item): - return self._do_getitem(item) - - def getitem(self, item, stream=0): - """Do `__getitem__(item)` with CUDA stream - """ - return self._do_getitem(item, stream) - - def _do_getitem(self, item, stream=0): - stream = self._default_stream(stream) - - arr = self._dummy.__getitem__(item) - extents = list(arr.iter_contiguous_extent()) - cls = type(self) - if len(extents) == 1: - newdata = self.gpu_data.view(*extents[0]) - - if not arr.is_array: - # Element indexing - hostary = np.empty(1, dtype=self.dtype) - _driver.device_to_host(dst=hostary, src=newdata, - size=self._dummy.itemsize, - stream=stream) - return hostary[0] - else: - return cls(shape=arr.shape, strides=arr.strides, - dtype=self.dtype, gpu_data=newdata, stream=stream) - else: - newdata = self.gpu_data.view(*arr.extent) - return cls(shape=arr.shape, strides=arr.strides, - dtype=self.dtype, gpu_data=newdata, stream=stream) - - @devices.require_context - def __setitem__(self, key, value): - return self._do_setitem(key, value) - - def setitem(self, key, value, stream=0): - """Do `__setitem__(key, value)` with CUDA stream - """ - return self._so_getitem(key, value, stream) - - def _do_setitem(self, key, value, stream=0): - - stream = self._default_stream(stream) - - # (1) prepare LHS - - arr = self._dummy.__getitem__(key) - newdata = self.gpu_data.view(*arr.extent) - - if isinstance(arr, dummyarray.Element): - # convert to a 1d array - shape = (1,) - strides = (self.dtype.itemsize,) - else: - shape = arr.shape - strides = arr.strides - - lhs = type(self)( - shape=shape, - strides=strides, - dtype=self.dtype, - gpu_data=newdata, - stream=stream) - - # (2) prepare RHS - - rhs, _ = auto_device(value, stream=stream) - if rhs.ndim > lhs.ndim: - raise ValueError("Can't assign %s-D array to %s-D self" % ( - rhs.ndim, - lhs.ndim)) - rhs_shape = np.ones(lhs.ndim, dtype=np.int64) - rhs_shape[-rhs.ndim:] = rhs.shape - rhs = rhs.reshape(*rhs_shape) - for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)): - if r != 1 and l != r: - raise ValueError("Can't copy sequence with size %d to array axis %d with dimension %d" % ( - r, - i, - l)) - - # (3) do the copy - - n_elements = np.prod(lhs.shape) - _assign_kernel(lhs.ndim).forall(n_elements, stream=stream)(lhs, rhs) - - - -class IpcArrayHandle(object): - """ - An IPC array handle that can be serialized and transfer to another process - in the same machine for share a GPU allocation. - - On the destination process, use the *.open()* method to creates a new - *DeviceNDArray* object that shares the allocation from the original process. - To release the resources, call the *.close()* method. After that, the - destination can no longer use the shared array object. (Note: the - underlying weakref to the resource is now dead.) - - This object implements the context-manager interface that calls the - *.open()* and *.close()* method automatically:: - - with the_ipc_array_handle as ipc_array: - # use ipc_array here as a normal gpu array object - some_code(ipc_array) - # ipc_array is dead at this point - """ - def __init__(self, ipc_handle, array_desc): - self._array_desc = array_desc - self._ipc_handle = ipc_handle - - def open(self): - """ - Returns a new *DeviceNDArray* that shares the allocation from the - original process. Must not be used on the original process. - """ - dptr = self._ipc_handle.open(devices.get_context()) - return DeviceNDArray(gpu_data=dptr, **self._array_desc) - - def close(self): - """ - Closes the IPC handle to the array. - """ - self._ipc_handle.close() - - def __enter__(self): - return self.open() - - def __exit__(self, type, value, traceback): - self.close() - - -class MappedNDArray(DeviceNDArrayBase, np.ndarray): - """ - A host array that uses CUDA mapped memory. - """ - - def device_setup(self, gpu_data, stream=0): - self.gpu_data = gpu_data - - -def from_array_like(ary, stream=0, gpu_data=None): - "Create a DeviceNDArray object that is like ary." - if ary.ndim == 0: - ary = ary.reshape(1) - return DeviceNDArray(ary.shape, ary.strides, ary.dtype, - writeback=ary, stream=stream, gpu_data=gpu_data) - - -def from_record_like(rec, stream=0, gpu_data=None): - "Create a DeviceRecord object that is like rec." - return DeviceRecord(rec.dtype, stream=stream, gpu_data=gpu_data) - - -errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot " - "be transferred as a single memory region. Please " - "ensure contiguous buffer with numpy " - ".ascontiguousarray()") - - -def sentry_contiguous(ary): - if not ary.flags['C_CONTIGUOUS'] and not ary.flags['F_CONTIGUOUS']: - if ary.strides[0] == 0: - # Broadcasted, ensure inner contiguous - return sentry_contiguous(ary[0]) - - else: - raise ValueError(errmsg_contiguous_buffer) - - -def auto_device(obj, stream=0, copy=True): - """ - Create a DeviceRecord or DeviceArray like obj and optionally copy data from - host to device. If obj already represents device memory, it is returned and - no copy is made. - """ - if _driver.is_device_memory(obj): - return obj, False - elif hasattr(obj, '__cuda_array_interface__'): - return numba.cuda.as_cuda_array(obj), False - else: - if isinstance(obj, np.void): - devobj = from_record_like(obj, stream=stream) - else: - # This allows you to pass non-array objects like constants - # and objects implementing the - # [array interface](https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html) - # into this function (with no overhead -- copies -- for `obj`s - # that are already `ndarray`s. - obj = np.array( - obj, - copy=False, - subok=True) - sentry_contiguous(obj) - devobj = from_array_like(obj, stream=stream) - if copy: - devobj.copy_to_device(obj, stream=stream) - return devobj, True - diff --git a/numba/numba/cuda/cudadrv/devices.py b/numba/numba/cuda/cudadrv/devices.py deleted file mode 100644 index 20c343da7..000000000 --- a/numba/numba/cuda/cudadrv/devices.py +++ /dev/null @@ -1,228 +0,0 @@ -""" -Expose each GPU devices directly. - -This module implements a API that is like the "CUDA runtime" context manager -for managing CUDA context stack and clean up. It relies on thread-local globals -to separate the context stack management of each thread. Contexts are also -sharable among threads. Only the main thread can destroy Contexts. - -Note: -- This module must be imported by the main-thread. - -""" -from __future__ import print_function, absolute_import, division -import functools -import threading -from numba import servicelib -from .driver import driver - - -class _DeviceList(object): - def __getattr__(self, attr): - # First time looking at "lst" attribute. - if attr == "lst": - # Device list is not initialized. - # Query all CUDA devices. - numdev = driver.get_device_count() - gpus = [_DeviceContextManager(driver.get_device(devid)) - for devid in range(numdev)] - # Define "lst" to avoid re-initialization - self.lst = gpus - return gpus - - # Other attributes - return super(_DeviceList, self).__getattr__(attr) - - def __getitem__(self, devnum): - ''' - Returns the context manager for device *devnum*. - ''' - return self.lst[devnum] - - def __str__(self): - return ', '.join([str(d) for d in self.lst]) - - def __iter__(self): - return iter(self.lst) - - def __len__(self): - return len(self.lst) - - @property - def current(self): - """Returns the active device or None if there's no active device - """ - if _runtime.context_stack: - return self.lst[_runtime.current_context.device.id] - - -class _DeviceContextManager(object): - """ - Provides a context manager for executing in the context of the chosen - device. The normal use of instances of this type is from - ``numba.cuda.gpus``. For example, to execute on device 2:: - - with numba.cuda.gpus[2]: - d_a = numba.cuda.to_device(a) - - to copy the array *a* onto device 2, referred to by *d_a*. - """ - - def __init__(self, device): - self._device = device - - def __getattr__(self, item): - return getattr(self._device, item) - - def __enter__(self): - _runtime.push_context(self) - - def __exit__(self, exc_type, exc_val, exc_tb): - _runtime.pop_context() - - def __str__(self): - return "".format(self=self) - - -class _Runtime(object): - """Emulate the CUDA runtime context management. - - It owns all Devices and Contexts. - Keeps at most one Context per Device - """ - - def __init__(self): - self.gpus = _DeviceList() - - # A thread local stack - self.context_stack = servicelib.TLStack() - - # Remember the main thread - # Only the main thread can *actually* destroy - self._mainthread = threading.current_thread() - - # Avoid mutation of runtime state in multithreaded programs - self._lock = threading.RLock() - - @property - def current_context(self): - """Return the active gpu context - """ - return self.context_stack.top - - def _get_or_create_context(self, gpu): - """Try to use a already created context for the given gpu. If none - existed, create a new context. - - Returns the context - """ - with self._lock: - ctx = gpu.get_primary_context() - ctx.push() - return ctx - - def push_context(self, gpu): - """Push a context for the given GPU or create a new one if no context - exist for the given GPU. - """ - # Context stack is empty or the active device is not the given gpu - if self.context_stack.is_empty or self.current_context.device != gpu: - ctx = self._get_or_create_context(gpu) - - # Active context is from the gpu - else: - ctx = self.current_context - - # Always put the new context on the stack - self.context_stack.push(ctx) - return ctx - - def pop_context(self): - """Pop a context from the context stack if there is more than - one context in the stack. - - Will not remove the last context in the stack. - """ - ctx = self.current_context - # If there is more than one context - # Do not pop the last context so there is always a active context - if len(self.context_stack) > 1: - ctx.pop() - self.context_stack.pop() - assert self.context_stack - - def get_or_create_context(self, devnum): - """Returns the current context or push/create a context for the GPU - with the given device number. - """ - if self.context_stack: - return self.current_context - else: - with self._lock: - return self.push_context(self.gpus[devnum]) - - def reset(self): - """Clear all contexts in the thread. Destroy the context if and only - if we are in the main thread. - """ - # Clear the context stack - while self.context_stack: - ctx = self.context_stack.pop() - ctx.pop() - - # If it is the main thread - if threading.current_thread() == self._mainthread: - self._destroy_all_contexts() - - def _destroy_all_contexts(self): - # Reset all devices - for gpu in self.gpus: - gpu.reset() - - -_runtime = _Runtime() - -# ================================ PUBLIC API ================================ - -gpus = _runtime.gpus - - -def get_context(devnum=0): - """Get the current device or use a device by device number, and - return the CUDA context. - """ - return _runtime.get_or_create_context(devnum) - - -def require_context(fn): - """ - A decorator that ensures a CUDA context is available when *fn* is executed. - - Decorating *fn* is equivalent to writing:: - - get_context() - fn() - - at each call site. - """ - - @functools.wraps(fn) - def _require_cuda_context(*args, **kws): - get_context() - return fn(*args, **kws) - - return _require_cuda_context - - -def reset(): - """Reset the CUDA subsystem for the current thread. - - In the main thread: - This removes all CUDA contexts. Only use this at shutdown or for - cleaning up between tests. - - In non-main threads: - This clear the CUDA context stack only. - - """ - _runtime.reset() diff --git a/numba/numba/cuda/cudadrv/driver.py b/numba/numba/cuda/cudadrv/driver.py deleted file mode 100644 index 7519a489f..000000000 --- a/numba/numba/cuda/cudadrv/driver.py +++ /dev/null @@ -1,1840 +0,0 @@ -""" -CUDA driver bridge implementation - -NOTE: -The new driver implementation uses a *_PendingDeallocs* that help prevents a -crashing the system (particularly OSX) when the CUDA context is corrupted at -resource deallocation. The old approach ties resource management directly -into the object destructor; thus, at corruption of the CUDA context, -subsequent deallocation could further corrupt the CUDA context and causes the -system to freeze in some cases. - -""" - -from __future__ import absolute_import, print_function, division -import sys -import os -import ctypes -import weakref -import functools -import copy -import warnings -import logging -from ctypes import (c_int, byref, c_size_t, c_char, c_char_p, addressof, - c_void_p, c_float) -import contextlib -import numpy as np -from collections import namedtuple, deque - -from numba import utils, mviewbuf -from .error import CudaSupportError, CudaDriverError -from .drvapi import API_PROTOTYPES -from .drvapi import cu_occupancy_b2d_size -from . import enums, drvapi, _extras -from numba import config, serialize -from numba.utils import longint as long - - -VERBOSE_JIT_LOG = int(os.environ.get('NUMBAPRO_VERBOSE_CU_JIT_LOG', 1)) -MIN_REQUIRED_CC = (2, 0) -SUPPORTS_IPC = sys.platform.startswith('linux') - - -def _make_logger(): - logger = logging.getLogger(__name__) - # is logging configured? - if not utils.logger_hasHandlers(logger): - # read user config - lvl = str(config.CUDA_LOG_LEVEL).upper() - lvl = getattr(logging, lvl, None) - if not isinstance(lvl, int): - # default to critical level - lvl = logging.CRITICAL - logger.setLevel(lvl) - # did user specify a level? - if config.CUDA_LOG_LEVEL: - # create a simple handler that prints to stderr - handler = logging.StreamHandler(sys.stderr) - fmt = '== CUDA [%(relativeCreated)d] %(levelname)5s -- %(message)s' - handler.setFormatter(logging.Formatter(fmt=fmt)) - logger.addHandler(handler) - else: - # otherwise, put a null handler - logger.addHandler(logging.NullHandler()) - return logger - - -class DeadMemoryError(RuntimeError): - pass - - -class LinkerError(RuntimeError): - pass - - -class CudaAPIError(CudaDriverError): - def __init__(self, code, msg): - self.code = code - self.msg = msg - super(CudaAPIError, self).__init__(code, msg) - - def __str__(self): - return "[%s] %s" % (self.code, self.msg) - - -def find_driver(): - envpath = os.environ.get('NUMBAPRO_CUDA_DRIVER', None) - if envpath == '0': - # Force fail - _raise_driver_not_found() - - # Determine DLL type - if sys.platform == 'win32': - dlloader = ctypes.WinDLL - dldir = ['\\windows\\system32'] - dlname = 'nvcuda.dll' - elif sys.platform == 'darwin': - dlloader = ctypes.CDLL - dldir = ['/usr/local/cuda/lib'] - dlname = 'libcuda.dylib' - else: - # Assume to be *nix like - dlloader = ctypes.CDLL - dldir = ['/usr/lib', '/usr/lib64'] - dlname = 'libcuda.so' - - if envpath is not None: - try: - envpath = os.path.abspath(envpath) - except ValueError: - raise ValueError("NUMBAPRO_CUDA_DRIVER %s is not a valid path" % - envpath) - if not os.path.isfile(envpath): - raise ValueError("NUMBAPRO_CUDA_DRIVER %s is not a valid file " - "path. Note it must be a filepath of the .so/" - ".dll/.dylib or the driver" % envpath) - candidates = [envpath] - else: - # First search for the name in the default library path. - # If that is not found, try the specific path. - candidates = [dlname] + [os.path.join(x, dlname) for x in dldir] - - # Load the driver; Collect driver error information - path_not_exist = [] - driver_load_error = [] - - for path in candidates: - try: - dll = dlloader(path) - except OSError as e: - # Problem opening the DLL - path_not_exist.append(not os.path.isfile(path)) - driver_load_error.append(e) - else: - return dll - - # Problem loading driver - if all(path_not_exist): - _raise_driver_not_found() - else: - errmsg = '\n'.join(str(e) for e in driver_load_error) - _raise_driver_error(errmsg) - - -DRIVER_NOT_FOUND_MSG = """ -CUDA driver library cannot be found. -If you are sure that a CUDA driver is installed, -try setting environment variable NUMBAPRO_CUDA_DRIVER -with the file path of the CUDA driver shared library. -""" - -DRIVER_LOAD_ERROR_MSG = """ -Possible CUDA driver libraries are found but error occurred during load: -%s -""" - - -def _raise_driver_not_found(): - raise CudaSupportError(DRIVER_NOT_FOUND_MSG) - - -def _raise_driver_error(e): - raise CudaSupportError(DRIVER_LOAD_ERROR_MSG % e) - - -def _build_reverse_error_map(): - prefix = 'CUDA_ERROR' - map = utils.UniqueDict() - for name in dir(enums): - if name.startswith(prefix): - code = getattr(enums, name) - map[code] = name - return map - - -def _getpid(): - return os.getpid() - - -ERROR_MAP = _build_reverse_error_map() - -MISSING_FUNCTION_ERRMSG = """driver missing function: %s. -Requires CUDA 8.0 or above. -""" - - -class Driver(object): - """ - Driver API functions are lazily bound. - """ - _singleton = None - - def __new__(cls): - obj = cls._singleton - if obj is not None: - return obj - else: - obj = object.__new__(cls) - cls._singleton = obj - return obj - - def __init__(self): - self.devices = utils.UniqueDict() - self.is_initialized = False - self.initialization_error = None - self.pid = None - try: - if config.DISABLE_CUDA: - msg = ("CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 " - "in the environment, or because CUDA is unsupported on " - "32-bit systems.") - raise CudaSupportError(msg) - self.lib = find_driver() - except CudaSupportError as e: - self.is_initialized = True - self.initialization_error = e - - def initialize(self): - # lazily initialize logger - global _logger - _logger = _make_logger() - - self.is_initialized = True - try: - _logger.info('init') - self.cuInit(0) - except CudaAPIError as e: - self.initialization_error = e - raise CudaSupportError("Error at driver init: \n%s:" % e) - else: - self.pid = _getpid() - - self._initialize_extras() - - def _initialize_extras(self): - # set pointer to original cuIpcOpenMemHandle - set_proto = ctypes.CFUNCTYPE(None, c_void_p) - set_cuIpcOpenMemHandle = set_proto(_extras.set_cuIpcOpenMemHandle) - set_cuIpcOpenMemHandle(self._find_api('cuIpcOpenMemHandle')) - # bind caller to cuIpcOpenMemHandle that fixes the ABI - call_proto = ctypes.CFUNCTYPE(c_int, - ctypes.POINTER(drvapi.cu_device_ptr), - ctypes.POINTER(drvapi.cu_ipc_mem_handle), - ctypes.c_uint) - call_cuIpcOpenMemHandle = call_proto(_extras.call_cuIpcOpenMemHandle) - call_cuIpcOpenMemHandle.__name__ = 'call_cuIpcOpenMemHandle' - safe_call = self._wrap_api_call('call_cuIpcOpenMemHandle', - call_cuIpcOpenMemHandle) - # override cuIpcOpenMemHandle - self.cuIpcOpenMemHandle = safe_call - - @property - def is_available(self): - if not self.is_initialized: - self.initialize() - return self.initialization_error is None - - def __getattr__(self, fname): - # First request of a driver API function - try: - proto = API_PROTOTYPES[fname] - except KeyError: - raise AttributeError(fname) - restype = proto[0] - argtypes = proto[1:] - - # Initialize driver - if not self.is_initialized: - self.initialize() - - if self.initialization_error is not None: - raise CudaSupportError("Error at driver init: \n%s:" % - self.initialization_error) - - # Find function in driver library - libfn = self._find_api(fname) - libfn.restype = restype - libfn.argtypes = argtypes - - safe_call = self._wrap_api_call(fname, libfn) - setattr(self, fname, safe_call) - return safe_call - - def _wrap_api_call(self, fname, libfn): - @functools.wraps(libfn) - def safe_cuda_api_call(*args): - _logger.debug('call driver api: %s', libfn.__name__) - retcode = libfn(*args) - self._check_error(fname, retcode) - return safe_cuda_api_call - - def _find_api(self, fname): - # Try version 2 - try: - return getattr(self.lib, fname + "_v2") - except AttributeError: - pass - - # Try regular - try: - return getattr(self.lib, fname) - except AttributeError: - pass - - # Not found. - # Delay missing function error to use - def absent_function(*args, **kws): - raise CudaDriverError(MISSING_FUNCTION_ERRMSG % fname) - - setattr(self, fname, absent_function) - return absent_function - - def _check_error(self, fname, retcode): - if retcode != enums.CUDA_SUCCESS: - errname = ERROR_MAP.get(retcode, "UNKNOWN_CUDA_ERROR") - msg = "Call to %s results in %s" % (fname, errname) - _logger.error(msg) - if retcode == enums.CUDA_ERROR_NOT_INITIALIZED: - # Detect forking - if self.pid is not None and _getpid() != self.pid: - msg = 'pid %s forked from pid %s after CUDA driver init' - _logger.critical(msg, _getpid(), self.pid) - raise CudaDriverError("CUDA initialized before forking") - raise CudaAPIError(retcode, msg) - - def get_device(self, devnum=0): - dev = self.devices.get(devnum) - if dev is None: - dev = Device(devnum) - self.devices[devnum] = dev - return weakref.proxy(dev) - - def get_device_count(self): - count = c_int() - self.cuDeviceGetCount(byref(count)) - return count.value - - def list_devices(self): - """Returns a list of active devices - """ - return list(self.devices.values()) - - def reset(self): - """Reset all devices - """ - for dev in self.devices.values(): - dev.reset() - - def get_context(self): - """Get current active context in CUDA driver runtime. - Note: Lowlevel calls that returns the handle. - """ - handle = drvapi.cu_context(0) - driver.cuCtxGetCurrent(byref(handle)) - if not handle.value: - return None - return handle - - -driver = Driver() - - -def _build_reverse_device_attrs(): - prefix = "CU_DEVICE_ATTRIBUTE_" - map = utils.UniqueDict() - for name in dir(enums): - if name.startswith(prefix): - map[name[len(prefix):]] = getattr(enums, name) - return map - - -DEVICE_ATTRIBUTES = _build_reverse_device_attrs() - - -class Device(object): - """ - The device object owns the CUDA contexts. This is owned by the driver - object. User should not construct devices directly. - """ - @classmethod - def from_identity(self, identity): - """Create Device object from device identity created by - ``Device.get_device_identity()``. - """ - for devid in range(driver.get_device_count()): - d = driver.get_device(devid) - if d.get_device_identity() == identity: - return d - else: - errmsg = ( - "No device of {} is found. " - "Target device may not be visible in this process." - ).format(identity) - raise RuntimeError(errmsg) - - def __init__(self, devnum): - got_devnum = c_int() - driver.cuDeviceGet(byref(got_devnum), devnum) - assert devnum == got_devnum.value, "Driver returned another device" - self.id = got_devnum.value - self.attributes = {} - # Read compute capability - cc_major = c_int() - cc_minor = c_int() - driver.cuDeviceComputeCapability(byref(cc_major), byref(cc_minor), - self.id) - self.compute_capability = (cc_major.value, cc_minor.value) - # Read name - bufsz = 128 - buf = (c_char * bufsz)() - driver.cuDeviceGetName(buf, bufsz, self.id) - self.name = buf.value - self.primary_context = None - - def get_device_identity(self): - return { - 'pci_domain_id': self.PCI_DOMAIN_ID, - 'pci_bus_id': self.PCI_BUS_ID, - 'pci_device_id': self.PCI_DEVICE_ID, - } - - @property - def COMPUTE_CAPABILITY(self): - """ - For backward compatibility - """ - warnings.warn("Deprecated attribute 'COMPUTE_CAPABILITY'; use lower " - "case version", DeprecationWarning) - return self.compute_capability - - def __repr__(self): - return "" % (self.id, self.name) - - def __getattr__(self, attr): - """Read attributes lazily - """ - try: - code = DEVICE_ATTRIBUTES[attr] - except KeyError: - raise AttributeError(attr) - - value = c_int() - driver.cuDeviceGetAttribute(byref(value), code, self.id) - setattr(self, attr, value.value) - - return value.value - - def __hash__(self): - return hash(self.id) - - def __eq__(self, other): - if isinstance(other, Device): - return self.id == other.id - return False - - def __ne__(self, other): - return not (self == other) - - def get_primary_context(self): - """ - Returns the primary context for the device. - Note: it is not pushed to the CPU thread. - """ - if self.primary_context is not None: - return self.primary_context - - met_requirement_for_device(self) - - # create primary context - hctx = drvapi.cu_context() - driver.cuDevicePrimaryCtxRetain(byref(hctx), self.id) - - ctx = Context(weakref.proxy(self), hctx) - self.primary_context = ctx - return ctx - - def release_primary_context(self): - """ - Release reference to primary context - """ - driver.cuDevicePrimaryCtxRelease(self.id) - self.primary_context = None - - def reset(self): - try: - if self.primary_context is not None: - self.primary_context.reset() - self.release_primary_context() - finally: - # reset at the driver level - driver.cuDevicePrimaryCtxReset(self.id) - - -def met_requirement_for_device(device): - if device.compute_capability < MIN_REQUIRED_CC: - raise CudaSupportError("%s has compute capability < %s" % - (device, MIN_REQUIRED_CC)) - - -class _SizeNotSet(object): - """ - Dummy object for _PendingDeallocs when *size* is not set. - """ - def __str__(self): - return '?' - - def __int__(self): - return 0 - -_SizeNotSet = _SizeNotSet() - - -class _PendingDeallocs(object): - """ - Pending deallocations of a context (or device since we are using the primary - context). - """ - def __init__(self, capacity): - self._cons = deque() - self._disable_count = 0 - self._size = 0 - self._memory_capacity = capacity - - @property - def _max_pending_bytes(self): - return int(self._memory_capacity * config.CUDA_DEALLOCS_RATIO) - - def add_item(self, dtor, handle, size=_SizeNotSet): - """ - Add a pending deallocation. - - The *dtor* arg is the destructor function that takes an argument, - *handle*. It is used as ``dtor(handle)``. The *size* arg is the - byte size of the resource added. It is an optional argument. Some - resources (e.g. CUModule) has an unknown memory footprint on the device. - """ - _logger.info('add pending dealloc: %s %s bytes', dtor.__name__, size) - self._cons.append((dtor, handle, size)) - self._size += int(size) - if (len(self._cons) > config.CUDA_DEALLOCS_COUNT or - self._size > self._max_pending_bytes): - self.clear() - - def clear(self): - """ - Flush any pending deallocations unless it is disabled. - Do nothing if disabled. - """ - if not self.is_disabled: - while self._cons: - [dtor, handle, size] = self._cons.popleft() - _logger.info('dealloc: %s %s bytes', dtor.__name__, size) - dtor(handle) - self._size = 0 - - @contextlib.contextmanager - def disable(self): - """ - Context manager to temporarily disable flushing pending deallocation. - This can be nested. - """ - self._disable_count += 1 - try: - yield - finally: - self._disable_count -= 1 - assert self._disable_count >= 0 - - @property - def is_disabled(self): - return self._disable_count > 0 - - def __len__(self): - """ - Returns number of pending deallocations. - """ - return len(self._cons) - - -_MemoryInfo = namedtuple("_MemoryInfo", "free,total") - - -class Context(object): - """ - This object wraps a CUDA Context resource. - - Contexts should not be constructed directly by user code. - """ - - def __init__(self, device, handle): - self.device = device - self.handle = handle - self.allocations = utils.UniqueDict() - # *deallocations* is lazily initialized on context push - self.deallocations = None - self.modules = utils.UniqueDict() - # For storing context specific data - self.extras = {} - - def reset(self): - """ - Clean up all owned resources in this context. - """ - _logger.info('reset context of device %s', self.device.id) - # Free owned resources - _logger.info('reset context of device %s', self.device.id) - self.allocations.clear() - self.modules.clear() - # Clear trash - self.deallocations.clear() - - def get_memory_info(self): - """Returns (free, total) memory in bytes in the context. - """ - free = c_size_t() - total = c_size_t() - driver.cuMemGetInfo(byref(free), byref(total)) - return _MemoryInfo(free=free.value, total=total.value) - - def get_active_blocks_per_multiprocessor(self, func, blocksize, memsize, flags=None): - """Return occupancy of a function. - :param func: kernel for which occupancy is calculated - :param blocksize: block size the kernel is intended to be launched with - :param memsize: per-block dynamic shared memory usage intended, in bytes""" - - retval = c_int() - if not flags: - driver.cuOccupancyMaxActiveBlocksPerMultiprocessor(byref(retval), func.handle, blocksize, memsize) - else: - driver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(byref(retval), func.handle, blocksize, memsize, flags) - return retval.value - - def get_max_potential_block_size(self, func, b2d_func, memsize, blocksizelimit, flags=None): - """Suggest a launch configuration with reasonable occupancy. - :param func: kernel for which occupancy is calculated - :param b2d_func: function that calculates how much per-block dynamic shared memory 'func' uses based on the block size. - :param memsize: per-block dynamic shared memory usage intended, in bytes - :param blocksizelimit: maximum block size the kernel is designed to handle""" - - gridsize = c_int() - blocksize = c_int() - b2d_cb = cu_occupancy_b2d_size(b2d_func) - if not flags: - driver.cuOccupancyMaxPotentialBlockSize(byref(gridsize), byref(blocksize), - func.handle, - b2d_cb, - memsize, blocksizelimit) - else: - driver.cuOccupancyMaxPotentialBlockSizeWithFlags(byref(gridsize), byref(blocksize), - func.handle, b2d_cb, - memsize, blocksizelimit, flags) - return (gridsize.value, blocksize.value) - - def push(self): - """ - Pushes this context on the current CPU Thread. - """ - driver.cuCtxPushCurrent(self.handle) - # setup *deallocations* as the context becomes active for the first time - if self.deallocations is None: - self.deallocations = _PendingDeallocs(self.get_memory_info().total) - - def pop(self): - """ - Pops this context off the current CPU thread. Note that this context must - be at the top of the context stack, otherwise an error will occur. - """ - popped = drvapi.cu_context() - driver.cuCtxPopCurrent(byref(popped)) - assert popped.value == self.handle.value - - def _attempt_allocation(self, allocator): - """ - Attempt allocation by calling *allocator*. If a out-of-memory error - is raised, the pending deallocations are flushed and the allocation - is retried. If it fails in the second attempt, the error is reraised. - """ - try: - allocator() - except CudaAPIError as e: - # is out-of-memory? - if e.code == enums.CUDA_ERROR_OUT_OF_MEMORY: - # clear pending deallocations - self.deallocations.clear() - # try again - allocator() - else: - raise - - def memalloc(self, bytesize): - ptr = drvapi.cu_device_ptr() - - def allocator(): - driver.cuMemAlloc(byref(ptr), bytesize) - - self._attempt_allocation(allocator) - - _memory_finalizer = _make_mem_finalizer(driver.cuMemFree, bytesize) - mem = MemoryPointer(weakref.proxy(self), ptr, bytesize, - _memory_finalizer(self, ptr)) - self.allocations[ptr.value] = mem - return mem.own() - - def memhostalloc(self, bytesize, mapped=False, portable=False, wc=False): - pointer = c_void_p() - flags = 0 - if mapped: - flags |= enums.CU_MEMHOSTALLOC_DEVICEMAP - if portable: - flags |= enums.CU_MEMHOSTALLOC_PORTABLE - if wc: - flags |= enums.CU_MEMHOSTALLOC_WRITECOMBINED - - def allocator(): - driver.cuMemHostAlloc(byref(pointer), bytesize, flags) - - if mapped: - self._attempt_allocation(allocator) - else: - allocator() - - owner = None - - if mapped: - _hostalloc_finalizer = _make_mem_finalizer(driver.cuMemFreeHost, - bytesize) - finalizer = _hostalloc_finalizer(self, pointer) - mem = MappedMemory(weakref.proxy(self), owner, pointer, - bytesize, finalizer=finalizer) - - self.allocations[mem.handle.value] = mem - return mem.own() - else: - finalizer = _pinnedalloc_finalizer(self.deallocations, pointer) - mem = PinnedMemory(weakref.proxy(self), owner, pointer, bytesize, - finalizer=finalizer) - return mem - - def mempin(self, owner, pointer, size, mapped=False): - if isinstance(pointer, (int, long)): - pointer = c_void_p(pointer) - - if mapped and not self.device.CAN_MAP_HOST_MEMORY: - raise CudaDriverError("%s cannot map host memory" % self.device) - - # possible flags are "portable" (between context) - # and "device-map" (map host memory to device thus no need - # for memory transfer). - flags = 0 - - if mapped: - flags |= enums.CU_MEMHOSTREGISTER_DEVICEMAP - - def allocator(): - driver.cuMemHostRegister(pointer, size, flags) - - if mapped: - self._attempt_allocation(allocator) - else: - allocator() - - if mapped: - _mapped_finalizer = _make_mem_finalizer(driver.cuMemHostUnregister, - size) - finalizer = _mapped_finalizer(self, pointer) - mem = MappedMemory(weakref.proxy(self), owner, pointer, size, - finalizer=finalizer) - self.allocations[mem.handle.value] = mem - return mem.own() - else: - mem = PinnedMemory(weakref.proxy(self), owner, pointer, size, - finalizer=_pinned_finalizer(self.deallocations, - pointer)) - return mem - - def memunpin(self, pointer): - raise NotImplementedError - - def get_ipc_handle(self, memory): - """ - Returns a *IpcHandle* from a GPU allocation. - """ - if not SUPPORTS_IPC: - raise OSError('OS does not support CUDA IPC') - ipchandle = drvapi.cu_ipc_mem_handle() - driver.cuIpcGetMemHandle( - ctypes.cast( - ipchandle, - ctypes.POINTER(drvapi.cu_ipc_mem_handle), - ), - memory.handle, - ) - - source_info = self.device.get_device_identity() - return IpcHandle(memory, ipchandle, memory.size, source_info) - - def open_ipc_handle(self, handle, size): - # open the IPC handle to get the device pointer - dptr = drvapi.cu_device_ptr() - flags = 1 # CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS - driver.cuIpcOpenMemHandle(byref(dptr), handle, flags) - # wrap it - return MemoryPointer(context=weakref.proxy(self), pointer=dptr, - size=size) - - def enable_peer_access(self, peer_context, flags=0): - """Enable peer access between the current context and the peer context - """ - assert flags == 0, '*flags* is reserved and MUST be zero' - driver.cuCtxEnablePeerAccess(peer_context, flags) - - def can_access_peer(self, peer_device): - """Returns a bool indicating whether the peer access between the - current and peer device is possible. - """ - can_access_peer = c_int() - driver.cuDeviceCanAccessPeer( - byref(can_access_peer), - self.device.id, - peer_device, - ) - return bool(can_access_peer) - - def create_module_ptx(self, ptx): - if isinstance(ptx, str): - ptx = ptx.encode('utf8') - image = c_char_p(ptx) - return self.create_module_image(image) - - def create_module_image(self, image): - module = load_module_image(self, image) - self.modules[module.handle.value] = module - return weakref.proxy(module) - - def unload_module(self, module): - del self.modules[module.handle.value] - - def create_stream(self): - handle = drvapi.cu_stream() - driver.cuStreamCreate(byref(handle), 0) - return Stream(weakref.proxy(self), handle, - _stream_finalizer(self.deallocations, handle)) - - def create_event(self, timing=True): - handle = drvapi.cu_event() - flags = 0 - if not timing: - flags |= enums.CU_EVENT_DISABLE_TIMING - driver.cuEventCreate(byref(handle), flags) - return Event(weakref.proxy(self), handle, - finalizer=_event_finalizer(self.deallocations, handle)) - - def synchronize(self): - driver.cuCtxSynchronize() - - def __repr__(self): - return "" % (self.handle, self.device.id) - - def __eq__(self, other): - if isinstance(other, Context): - return self.handle == other.handle - else: - return NotImplemented - - def __ne__(self, other): - return not self.__eq__(other) - - -def load_module_image(context, image): - """ - image must be a pointer - """ - logsz = os.environ.get('NUMBAPRO_CUDA_LOG_SIZE', 1024) - - jitinfo = (c_char * logsz)() - jiterrors = (c_char * logsz)() - - options = { - enums.CU_JIT_INFO_LOG_BUFFER: addressof(jitinfo), - enums.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz), - enums.CU_JIT_ERROR_LOG_BUFFER: addressof(jiterrors), - enums.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz), - enums.CU_JIT_LOG_VERBOSE: c_void_p(VERBOSE_JIT_LOG), - } - - option_keys = (drvapi.cu_jit_option * len(options))(*options.keys()) - option_vals = (c_void_p * len(options))(*options.values()) - - handle = drvapi.cu_module() - try: - driver.cuModuleLoadDataEx(byref(handle), image, len(options), - option_keys, option_vals) - except CudaAPIError as e: - msg = "cuModuleLoadDataEx error:\n%s" % jiterrors.value.decode("utf8") - raise CudaAPIError(e.code, msg) - - info_log = jitinfo.value - - return Module(weakref.proxy(context), handle, info_log, - _module_finalizer(context, handle)) - - -def _make_mem_finalizer(dtor, bytesize): - def mem_finalize(context, handle): - allocations = context.allocations - deallocations = context.deallocations - - def core(): - if allocations: - del allocations[handle.value] - - deallocations.add_item(dtor, handle, size=bytesize) - - return core - - return mem_finalize - - -def _pinnedalloc_finalizer(deallocs, handle): - def core(): - deallocs.add_item(driver.cuMemFreeHost, handle) - - return core - - -def _pinned_finalizer(deallocs, handle): - def core(): - deallocs.add_item(driver.cuMemHostUnregister, handle) - - return core - - -def _event_finalizer(deallocs, handle): - def core(): - deallocs.add_item(driver.cuEventDestroy, handle) - - return core - - -def _stream_finalizer(deallocs, handle): - def core(): - deallocs.add_item(driver.cuStreamDestroy, handle) - - return core - - -def _module_finalizer(context, handle): - dealloc = context.deallocations - modules = context.modules - - def core(): - shutting_down = utils.shutting_down # early bind - - def module_unload(handle): - # If we are not shutting down, we must be called due to - # Context.reset() of Context.unload_module(). Both must have - # cleared the module reference from the context. - assert shutting_down() or handle.value not in modules - driver.cuModuleUnload(handle) - - dealloc.add_item(module_unload, handle) - - return core - - -class _CudaIpcImpl(object): - """Implementation of GPU IPC using CUDA driver API. - This requires the devices to be peer accessible. - """ - def __init__(self, parent): - self.base = parent.base - self.handle = parent.handle - self.size = parent.size - # remember if the handle is already opened - self._opened_mem = None - - def open(self, context): - """ - Import the IPC memory and returns a raw CUDA memory pointer object - """ - if self.base is not None: - raise ValueError('opening IpcHandle from original process') - - if self._opened_mem is not None: - raise ValueError('IpcHandle is already opened') - - mem = context.open_ipc_handle(self.handle, self.size) - # this object owns the opened allocation - # note: it is required the memory be freed after the ipc handle is - # closed by the importing context. - self._opened_mem = mem - return mem.own() - - def close(self): - if self._opened_mem is None: - raise ValueError('IpcHandle not opened') - driver.cuIpcCloseMemHandle(self._opened_mem.handle) - self._opened_mem = None - - -class _StagedIpcImpl(object): - """Implementation of GPU IPC using custom staging logic to workaround - CUDA IPC limitation on peer accessibility between devices. - """ - def __init__(self, parent, source_info): - self.parent = parent - self.base = parent.base - self.handle = parent.handle - self.size = parent.size - self.source_info = source_info - - def open(self, context): - from numba import cuda - - srcdev = Device.from_identity(self.source_info) - - impl = _CudaIpcImpl(parent=self.parent) - # Open context on the source device. - with cuda.gpus[srcdev.id]: - source_ptr = impl.open(cuda.devices.get_context()) - - # Allocate GPU buffer. - newmem = context.memalloc(self.size) - # Do D->D from the source peer-context - # This performs automatic host staging - device_to_device(newmem, source_ptr, self.size) - - # Cleanup source context - with cuda.gpus[srcdev.id]: - impl.close() - - return newmem.own() - - def close(self): - # Nothing has to be done here - pass - - -class IpcHandle(object): - """ - Internal IPC handle. - - Serialization of the CUDA IPC handle object is implemented here. - - The *base* attribute is a reference to the original allocation to keep it - alive. The *handle* is a ctypes object of the CUDA IPC handle. The *size* - is the allocation size. - """ - def __init__(self, base, handle, size, source_info=None): - self.base = base - self.handle = handle - self.size = size - self.source_info = source_info - self._impl = None - - def _sentry_source_info(self): - if self.source_info is None: - raise RuntimeError("IPC handle doesn't have source info") - - def can_access_peer(self, context): - """Returns a bool indicating whether the active context can peer - access the IPC handle - """ - self._sentry_source_info() - if self.source_info == context.device.get_device_identity(): - return True - source_device = Device.from_identity(self.source_info) - return context.can_access_peer(source_device.id) - - def open_staged(self, context): - """Open the IPC by allowing staging on the host memory first. - """ - self._sentry_source_info() - - if self._impl is not None: - raise ValueError('IpcHandle is already opened') - - self._impl = _StagedIpcImpl(self, self.source_info) - return self._impl.open(context) - - def open_direct(self, context): - """ - Import the IPC memory and returns a raw CUDA memory pointer object - """ - if self._impl is not None: - raise ValueError('IpcHandle is already opened') - - self._impl = _CudaIpcImpl(self) - return self._impl.open(context) - - def open(self, context): - """Open the IPC handle and import the memory for usage in the given - context. Returns a raw CUDA memory pointer object. - - This is enhanced over CUDA IPC that it will work regardless of whether - the source device is peer-accessible by the destination device. - If the devices are peer-accessible, it uses .open_direct(). - If the devices are not peer-accessible, it uses .open_staged(). - """ - if self.source_info is None or self.can_access_peer(context): - fn = self.open_direct - else: - fn = self.open_staged - return fn(context) - - def open_array(self, context, shape, dtype, strides=None): - """ - Simliar to `.open()` but returns an device array. - """ - from . import devicearray - - # by default, set strides to itemsize - if strides is None: - strides = dtype.itemsize - dptr = self.open(context) - # read the device pointer as an array - return devicearray.DeviceNDArray(shape=shape, strides=strides, - dtype=dtype, gpu_data=dptr) - - def close(self): - if self._impl is None: - raise ValueError('IpcHandle not opened') - self._impl.close() - self._impl = None - - def __reduce__(self): - # Preprocess the IPC handle, which is defined as a byte array. - preprocessed_handle = tuple(self.handle) - args = ( - self.__class__, - preprocessed_handle, - self.size, - self.source_info, - ) - return (serialize._rebuild_reduction, args) - - @classmethod - def _rebuild(cls, handle_ary, size, source_info): - handle = drvapi.cu_ipc_mem_handle(*handle_ary) - return cls(base=None, handle=handle, size=size, - source_info=source_info) - - -class MemoryPointer(object): - __cuda_memory__ = True - - def __init__(self, context, pointer, size, finalizer=None, owner=None): - self.context = context - self.device_pointer = pointer - self.size = size - self._cuda_memsize_ = size - self.is_managed = finalizer is not None - self.refct = 0 - self.handle = self.device_pointer - self._owner = owner - - if finalizer is not None: - self._finalizer = utils.finalize(self, finalizer) - - @property - def owner(self): - return self if self._owner is None else self._owner - - def own(self): - return OwnedPointer(weakref.proxy(self)) - - def free(self): - """ - Forces the device memory to the trash. - """ - if self.is_managed: - if not self._finalizer.alive: - raise RuntimeError("Freeing dead memory") - self._finalizer() - assert not self._finalizer.alive - - def memset(self, byte, count=None, stream=0): - count = self.size if count is None else count - if stream: - driver.cuMemsetD8Async(self.device_pointer, byte, count, - stream.handle) - else: - driver.cuMemsetD8(self.device_pointer, byte, count) - - def view(self, start, stop=None): - if stop is None: - size = self.size - start - else: - size = stop - start - - # Handle NULL/empty memory buffer - if self.device_pointer.value is None: - if size != 0: - raise RuntimeError("non-empty slice into empty slice") - view = self # new view is just a reference to self - # Handle normal case - else: - base = self.device_pointer.value + start - if size < 0: - raise RuntimeError('size cannot be negative') - pointer = drvapi.cu_device_ptr(base) - view = MemoryPointer(self.context, pointer, size, owner=self.owner) - return OwnedPointer(weakref.proxy(self.owner), view) - - @property - def device_ctypes_pointer(self): - return self.device_pointer - - -class MappedMemory(MemoryPointer): - __cuda_memory__ = True - - def __init__(self, context, owner, hostpointer, size, - finalizer=None): - self.owned = owner - self.host_pointer = hostpointer - devptr = drvapi.cu_device_ptr() - driver.cuMemHostGetDevicePointer(byref(devptr), hostpointer, 0) - self.device_pointer = devptr - super(MappedMemory, self).__init__(context, devptr, size, - finalizer=finalizer) - self.handle = self.host_pointer - - # For buffer interface - self._buflen_ = self.size - self._bufptr_ = self.host_pointer.value - - def own(self): - return MappedOwnedPointer(weakref.proxy(self)) - - -class PinnedMemory(mviewbuf.MemAlloc): - def __init__(self, context, owner, pointer, size, finalizer=None): - self.context = context - self.owned = owner - self.size = size - self.host_pointer = pointer - self.is_managed = finalizer is not None - self.handle = self.host_pointer - - # For buffer interface - self._buflen_ = self.size - self._bufptr_ = self.host_pointer.value - - if finalizer is not None: - utils.finalize(self, finalizer) - - def own(self): - return self - - -class OwnedPointer(object): - def __init__(self, memptr, view=None): - self._mem = memptr - - if view is None: - self._view = self._mem - else: - assert not view.is_managed - self._view = view - - mem = self._mem - - def deref(): - try: - mem.refct -= 1 - assert mem.refct >= 0 - if mem.refct == 0: - mem.free() - except ReferenceError: - # ignore reference error here - pass - - self._mem.refct += 1 - utils.finalize(self, deref) - - def __getattr__(self, fname): - """Proxy MemoryPointer methods - """ - return getattr(self._view, fname) - - -class MappedOwnedPointer(OwnedPointer, mviewbuf.MemAlloc): - pass - - -class Stream(object): - def __init__(self, context, handle, finalizer): - self.context = context - self.handle = handle - if finalizer is not None: - utils.finalize(self, finalizer) - - def __int__(self): - return self.handle.value - - def __repr__(self): - return "" % (self.handle.value, self.context) - - def synchronize(self): - ''' - Wait for all commands in this stream to execute. This will commit any - pending memory transfers. - ''' - driver.cuStreamSynchronize(self.handle) - - @contextlib.contextmanager - def auto_synchronize(self): - ''' - A context manager that waits for all commands in this stream to execute - and commits any pending memory transfers upon exiting the context. - ''' - yield self - self.synchronize() - - -class Event(object): - def __init__(self, context, handle, finalizer=None): - self.context = context - self.handle = handle - if finalizer is not None: - utils.finalize(self, finalizer) - - def query(self): - """ - Returns True if all work before the most recent record has completed; - otherwise, returns False. - """ - try: - driver.cuEventQuery(self.handle) - except CudaAPIError as e: - if e.code == enums.CUDA_ERROR_NOT_READY: - return False - else: - raise - else: - return True - - def record(self, stream=0): - """ - Set the record point of the event to the current point in the given - stream. - - The event will be considered to have occurred when all work that was - queued in the stream at the time of the call to ``record()`` has been - completed. - """ - hstream = stream.handle if stream else 0 - driver.cuEventRecord(self.handle, hstream) - - def synchronize(self): - """ - Synchronize the host thread for the completion of the event. - """ - driver.cuEventSynchronize(self.handle) - - def wait(self, stream=0): - """ - All future works submitted to stream will wait util the event completes. - """ - hstream = stream.handle if stream else 0 - flags = 0 - driver.cuStreamWaitEvent(hstream, self.handle, flags) - - def elapsed_time(self, evtend): - return event_elapsed_time(self, evtend) - - -def event_elapsed_time(evtstart, evtend): - ''' - Compute the elapsed time between two events in milliseconds. - ''' - msec = c_float() - driver.cuEventElapsedTime(byref(msec), evtstart.handle, evtend.handle) - return msec.value - - -class Module(object): - def __init__(self, context, handle, info_log, finalizer=None): - self.context = context - self.handle = handle - self.info_log = info_log - if finalizer is not None: - self._finalizer = utils.finalize(self, finalizer) - - def unload(self): - self.context.unload_module(self) - - def get_function(self, name): - handle = drvapi.cu_function() - driver.cuModuleGetFunction(byref(handle), self.handle, - name.encode('utf8')) - return Function(weakref.proxy(self), handle, name) - - def get_global_symbol(self, name): - ptr = drvapi.cu_device_ptr() - size = drvapi.c_size_t() - driver.cuModuleGetGlobal(byref(ptr), byref(size), self.handle, - name.encode('utf8')) - return MemoryPointer(self.context, ptr, size), size.value - - -FuncAttr = namedtuple("FuncAttr", ["regs", "shared", "local", "const", - "maxthreads"]) - - -class Function(object): - griddim = 1, 1, 1 - blockdim = 1, 1, 1 - stream = 0 - sharedmem = 0 - - def __init__(self, module, handle, name): - self.module = module - self.handle = handle - self.name = name - self.attrs = self._read_func_attr_all() - - def __repr__(self): - return "" % self.name - - def cache_config(self, prefer_equal=False, prefer_cache=False, - prefer_shared=False): - prefer_equal = prefer_equal or (prefer_cache and prefer_shared) - if prefer_equal: - flag = enums.CU_FUNC_CACHE_PREFER_EQUAL - elif prefer_cache: - flag = enums.CU_FUNC_CACHE_PREFER_L1 - elif prefer_shared: - flag = enums.CU_FUNC_CACHE_PREFER_SHARED - else: - flag = enums.CU_FUNC_CACHE_PREFER_NONE - driver.cuFuncSetCacheConfig(self.handle, flag) - - def configure(self, griddim, blockdim, sharedmem=0, stream=0): - while len(griddim) < 3: - griddim += (1,) - - while len(blockdim) < 3: - blockdim += (1,) - - inst = copy.copy(self) # shallow clone the object - inst.griddim = griddim - inst.blockdim = blockdim - inst.sharedmem = sharedmem - if stream: - inst.stream = stream - else: - inst.stream = 0 - return inst - - def __call__(self, *args): - ''' - *args -- Must be either ctype objects of DevicePointer instances. - ''' - if self.stream: - streamhandle = self.stream.handle - else: - streamhandle = None - - launch_kernel(self.handle, self.griddim, self.blockdim, - self.sharedmem, streamhandle, args) - - @property - def device(self): - return self.module.context.device - - def _read_func_attr(self, attrid): - """ - Read CUfunction attributes - """ - retval = c_int() - driver.cuFuncGetAttribute(byref(retval), attrid, self.handle) - return retval.value - - def _read_func_attr_all(self): - nregs = self._read_func_attr(enums.CU_FUNC_ATTRIBUTE_NUM_REGS) - cmem = self._read_func_attr(enums.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES) - lmem = self._read_func_attr(enums.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES) - smem = self._read_func_attr(enums.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES) - maxtpb = self._read_func_attr( - enums.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK) - return FuncAttr(regs=nregs, const=cmem, local=lmem, shared=smem, - maxthreads=maxtpb) - - -def launch_kernel(cufunc_handle, griddim, blockdim, sharedmem, hstream, args): - gx, gy, gz = griddim - bx, by, bz = blockdim - - param_vals = [] - for arg in args: - if is_device_memory(arg): - param_vals.append(addressof(device_ctypes_pointer(arg))) - else: - param_vals.append(addressof(arg)) - - params = (c_void_p * len(param_vals))(*param_vals) - - driver.cuLaunchKernel(cufunc_handle, - gx, gy, gz, - bx, by, bz, - sharedmem, - hstream, - params, - None) - - -FILE_EXTENSION_MAP = { - 'o': enums.CU_JIT_INPUT_OBJECT, - 'ptx': enums.CU_JIT_INPUT_PTX, - 'a': enums.CU_JIT_INPUT_LIBRARY, - 'cubin': enums.CU_JIT_INPUT_CUBIN, - 'fatbin': enums.CU_JIT_INPUT_FATBINAR, -} - - -class Linker(object): - def __init__(self): - logsz = int(os.environ.get('NUMBAPRO_CUDA_LOG_SIZE', 1024)) - linkerinfo = (c_char * logsz)() - linkererrors = (c_char * logsz)() - - options = { - enums.CU_JIT_INFO_LOG_BUFFER: addressof(linkerinfo), - enums.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz), - enums.CU_JIT_ERROR_LOG_BUFFER: addressof(linkererrors), - enums.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz), - enums.CU_JIT_LOG_VERBOSE: c_void_p(1), - } - - raw_keys = list(options.keys()) + [enums.CU_JIT_TARGET_FROM_CUCONTEXT] - raw_values = list(options.values()) - del options - - option_keys = (drvapi.cu_jit_option * len(raw_keys))(*raw_keys) - option_vals = (c_void_p * len(raw_values))(*raw_values) - - self.handle = handle = drvapi.cu_link_state() - driver.cuLinkCreate(len(raw_keys), option_keys, option_vals, - byref(self.handle)) - - utils.finalize(self, driver.cuLinkDestroy, handle) - - self.linker_info_buf = linkerinfo - self.linker_errors_buf = linkererrors - - self._keep_alive = [linkerinfo, linkererrors, option_keys, option_vals] - - @property - def info_log(self): - return self.linker_info_buf.value.decode('utf8') - - @property - def error_log(self): - return self.linker_errors_buf.value.decode('utf8') - - def add_ptx(self, ptx, name=''): - ptxbuf = c_char_p(ptx) - namebuf = c_char_p(name.encode('utf8')) - self._keep_alive += [ptxbuf, namebuf] - try: - driver.cuLinkAddData(self.handle, enums.CU_JIT_INPUT_PTX, - ptxbuf, len(ptx), namebuf, 0, None, None) - except CudaAPIError as e: - raise LinkerError("%s\n%s" % (e, self.error_log)) - - def add_file(self, path, kind): - pathbuf = c_char_p(path.encode("utf8")) - self._keep_alive.append(pathbuf) - - try: - driver.cuLinkAddFile(self.handle, kind, pathbuf, 0, None, None) - except CudaAPIError as e: - raise LinkerError("%s\n%s" % (e, self.error_log)) - - def add_file_guess_ext(self, path): - ext = path.rsplit('.', 1)[1] - kind = FILE_EXTENSION_MAP[ext] - self.add_file(path, kind) - - def complete(self): - ''' - Returns (cubin, size) - cubin is a pointer to a internal buffer of cubin owned - by the linker; thus, it should be loaded before the linker - is destroyed. - ''' - cubin = c_void_p(0) - size = c_size_t(0) - - try: - driver.cuLinkComplete(self.handle, byref(cubin), byref(size)) - except CudaAPIError as e: - raise LinkerError("%s\n%s" % (e, self.error_log)) - - size = size.value - assert size > 0, 'linker returned a zero sized cubin' - del self._keep_alive[:] - return cubin, size - - -# ----------------------------------------------------------------------------- - - -def _device_pointer_attr(devmem, attr, odata): - """Query attribute on the device pointer - """ - error = driver.cuPointerGetAttribute(byref(odata), attr, - device_ctypes_pointer(devmem)) - driver.check_error(error, "Failed to query pointer attribute") - - -def device_pointer_type(devmem): - """Query the device pointer type: host, device, array, unified? - """ - ptrtype = c_int(0) - _device_pointer_attr(devmem, enums.CU_POINTER_ATTRIBUTE_MEMORY_TYPE, - ptrtype) - map = { - enums.CU_MEMORYTYPE_HOST: 'host', - enums.CU_MEMORYTYPE_DEVICE: 'device', - enums.CU_MEMORYTYPE_ARRAY: 'array', - enums.CU_MEMORYTYPE_UNIFIED: 'unified', - } - return map[ptrtype.value] - - -def get_devptr_for_active_ctx(ptr): - """Query the device pointer usable in the current context from an arbitrary - pointer. - """ - devptr = c_void_p(0) - attr = enums.CU_POINTER_ATTRIBUTE_DEVICE_POINTER - driver.cuPointerGetAttribute(byref(devptr), attr, ptr) - return devptr - - -def device_extents(devmem): - """Find the extents (half open begin and end pointer) of the underlying - device memory allocation. - - NOTE: it always returns the extents of the allocation but the extents - of the device memory view that can be a subsection of the entire allocation. - """ - s = drvapi.cu_device_ptr() - n = c_size_t() - devptr = device_ctypes_pointer(devmem) - driver.cuMemGetAddressRange(byref(s), byref(n), devptr) - s, n = s.value, n.value - return s, s + n - - -def device_memory_size(devmem): - """Check the memory size of the device memory. - The result is cached in the device memory object. - It may query the driver for the memory size of the device memory allocation. - """ - sz = getattr(devmem, '_cuda_memsize_', None) - if sz is None: - s, e = device_extents(devmem) - sz = e - s - devmem._cuda_memsize_ = sz - assert sz > 0, "zero length array" - return sz - - -def host_pointer(obj): - """ - NOTE: The underlying data pointer from the host data buffer is used and - it should not be changed until the operation which can be asynchronous - completes. - """ - if isinstance(obj, (int, long)): - return obj - - forcewritable = isinstance(obj, np.void) - return mviewbuf.memoryview_get_buffer(obj, forcewritable) - - -def host_memory_extents(obj): - "Returns (start, end) the start and end pointer of the array (half open)." - return mviewbuf.memoryview_get_extents(obj) - - -def memory_size_from_info(shape, strides, itemsize): - """Get the byte size of a contiguous memory buffer given the shape, strides - and itemsize. - """ - assert len(shape) == len(strides), "# dim mismatch" - ndim = len(shape) - s, e = mviewbuf.memoryview_get_extents_info(shape, strides, ndim, itemsize) - return e - s - - -def host_memory_size(obj): - "Get the size of the memory" - s, e = host_memory_extents(obj) - assert e >= s, "memory extend of negative size" - return e - s - - -def device_pointer(obj): - "Get the device pointer as an integer" - return device_ctypes_pointer(obj).value - - -def device_ctypes_pointer(obj): - "Get the ctypes object for the device pointer" - if obj is None: - return c_void_p(0) - require_device_memory(obj) - return obj.device_ctypes_pointer - - -def is_device_memory(obj): - """All CUDA memory object is recognized as an instance with the attribute - "__cuda_memory__" defined and its value evaluated to True. - - All CUDA memory object should also define an attribute named - "device_pointer" which value is an int(or long) object carrying the pointer - value of the device memory address. This is not tested in this method. - """ - return getattr(obj, '__cuda_memory__', False) - - -def require_device_memory(obj): - """A sentry for methods that accept CUDA memory object. - """ - if not is_device_memory(obj): - raise Exception("Not a CUDA memory object.") - - -def device_memory_depends(devmem, *objs): - """Add dependencies to the device memory. - - Mainly used for creating structures that points to other device memory, - so that the referees are not GC and released. - """ - depset = getattr(devmem, "_depends_", []) - depset.extend(objs) - - -def host_to_device(dst, src, size, stream=0): - """ - NOTE: The underlying data pointer from the host data buffer is used and - it should not be changed until the operation which can be asynchronous - completes. - """ - varargs = [] - - if stream: - assert isinstance(stream, Stream) - fn = driver.cuMemcpyHtoDAsync - varargs.append(stream.handle) - else: - fn = driver.cuMemcpyHtoD - - fn(device_pointer(dst), host_pointer(src), size, *varargs) - - -def device_to_host(dst, src, size, stream=0): - """ - NOTE: The underlying data pointer from the host data buffer is used and - it should not be changed until the operation which can be asynchronous - completes. - """ - varargs = [] - - if stream: - assert isinstance(stream, Stream) - fn = driver.cuMemcpyDtoHAsync - varargs.append(stream.handle) - else: - fn = driver.cuMemcpyDtoH - - fn(host_pointer(dst), device_pointer(src), size, *varargs) - - -def device_to_device(dst, src, size, stream=0): - """ - NOTE: The underlying data pointer from the host data buffer is used and - it should not be changed until the operation which can be asynchronous - completes. - """ - varargs = [] - - if stream: - assert isinstance(stream, Stream) - fn = driver.cuMemcpyDtoDAsync - varargs.append(stream.handle) - else: - fn = driver.cuMemcpyDtoD - - fn(device_pointer(dst), device_pointer(src), size, *varargs) - - -def device_memset(dst, val, size, stream=0): - """Memset on the device. - If stream is not zero, asynchronous mode is used. - - dst: device memory - val: byte value to be written - size: number of byte to be written - stream: a CUDA stream - """ - varargs = [] - - if stream: - assert isinstance(stream, Stream) - fn = driver.cuMemsetD8Async - varargs.append(stream.handle) - else: - fn = driver.cuMemsetD8 - - fn(device_pointer(dst), val, size, *varargs) - - -def profile_start(): - ''' - Enable profile collection in the current context. - ''' - driver.cuProfilerStart() - - -def profile_stop(): - ''' - Disable profile collection in the current context. - ''' - driver.cuProfilerStop() - - -@contextlib.contextmanager -def profiling(): - """ - Context manager that enables profiling on entry and disables profiling on - exit. - """ - profile_start() - yield - profile_stop() diff --git a/numba/numba/cuda/cudadrv/drvapi.py b/numba/numba/cuda/cudadrv/drvapi.py deleted file mode 100644 index 0efbfff7f..000000000 --- a/numba/numba/cuda/cudadrv/drvapi.py +++ /dev/null @@ -1,348 +0,0 @@ -from __future__ import print_function, absolute_import, division -from ctypes import * - -from . import _extras - -cu_device = c_int -cu_device_attribute = c_int # enum -cu_context = c_void_p # an opaque handle -cu_module = c_void_p # an opaque handle -cu_jit_option = c_int # enum -cu_jit_input_type = c_int # enum -cu_function = c_void_p # an opaque handle -cu_device_ptr = c_size_t # defined as unsigned int on 32-bit - # and unsigned long long on 64-bit machine -cu_stream = c_void_p # an opaque handle -cu_event = c_void_p -cu_link_state = c_void_p -cu_function_attribute = c_int -cu_ipc_mem_handle = (c_byte * _extras.CUDA_IPC_HANDLE_SIZE) # 64 bytes wide - -cu_occupancy_b2d_size = CFUNCTYPE(c_size_t, c_int) - - -API_PROTOTYPES = { -# CUresult cuInit(unsigned int Flags); -'cuInit' : (c_int, c_uint), - -# CUresult cuDriverGetVersion ( int* driverVersion ) -'cuDriverGetVersion': (c_int, POINTER(c_int)), - -# CUresult cuDeviceGetCount(int *count); -'cuDeviceGetCount': (c_int, POINTER(c_int)), - -# CUresult cuDeviceGet(CUdevice *device, int ordinal); -'cuDeviceGet': (c_int, POINTER(cu_device), c_int), - -# CUresult cuDeviceGetName ( char* name, int len, CUdevice dev ) -'cuDeviceGetName': (c_int, c_char_p, c_int, cu_device), - -# CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, -# CUdevice dev); -'cuDeviceGetAttribute': (c_int, POINTER(c_int), cu_device_attribute, - cu_device), - -# CUresult cuDeviceComputeCapability(int *major, int *minor, -# CUdevice dev); -'cuDeviceComputeCapability': (c_int, POINTER(c_int), POINTER(c_int), - cu_device), - -# CUresult cuDevicePrimaryCtxGetState ( CUdevice dev, unsigned int* flags, int* active ) -'cuDevicePrimaryCtxGetState': (c_int, - cu_device, POINTER(c_uint), POINTER(c_int)), - -# CUresult cuDevicePrimaryCtxRelease ( CUdevice dev ) -'cuDevicePrimaryCtxRelease': (c_int, cu_device), - -# CUresult cuDevicePrimaryCtxReset ( CUdevice dev ) -'cuDevicePrimaryCtxReset': (c_int, cu_device), - -# CUresult cuDevicePrimaryCtxRetain ( CUcontext* pctx, CUdevice dev ) -'cuDevicePrimaryCtxRetain': (c_int, POINTER(cu_context), cu_device), - -# CUresult cuDevicePrimaryCtxSetFlags ( CUdevice dev, unsigned int flags ) -'cuDevicePrimaryCtxSetFlags': (c_int, cu_device, c_uint), - -# CUresult cuCtxCreate(CUcontext *pctx, unsigned int flags, -# CUdevice dev); -'cuCtxCreate': (c_int, POINTER(cu_context), c_uint, cu_device), - -# CUresult cuCtxGetDevice ( CUdevice * device ) -'cuCtxGetDevice': (c_int, POINTER(cu_device)), - -# CUresult cuCtxGetCurrent (CUcontext *pctx); -'cuCtxGetCurrent': (c_int, POINTER(cu_context)), - -# CUresult cuCtxPushCurrent (CUcontext pctx); -'cuCtxPushCurrent': (c_int, cu_context), - -# CUresult cuCtxPopCurrent (CUcontext *pctx); -'cuCtxPopCurrent': (c_int, POINTER(cu_context)), - -# CUresult cuCtxDestroy(CUcontext pctx); -'cuCtxDestroy': (c_int, cu_context), - -# CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, -# unsigned int numOptions, -# CUjit_option *options, -# void **optionValues); -'cuModuleLoadDataEx': (c_int, cu_module, c_void_p, c_uint, - POINTER(cu_jit_option), POINTER(c_void_p)), - -# CUresult cuModuleUnload(CUmodule hmod); -'cuModuleUnload': (c_int, cu_module), - -# CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, -# const char *name); -'cuModuleGetFunction': (c_int, cu_function, cu_module, c_char_p), - -# CUresult cuModuleGetGlobal ( CUdeviceptr* dptr, size_t* bytes, CUmodule -# hmod, const char* name ) -'cuModuleGetGlobal': (c_int, POINTER(cu_device_ptr), POINTER(c_size_t), - cu_module, c_char_p), - -# CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, -# CUfunc_cache config); -'cuFuncSetCacheConfig': (c_int, cu_function, c_uint), - -# CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize); -'cuMemAlloc': (c_int, POINTER(cu_device_ptr), c_size_t), - -# CUresult cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) -'cuMemsetD8': (c_int, cu_device_ptr, c_uint8, c_size_t), - -# CUresult cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, -# size_t N, CUstream hStream); -'cuMemsetD8Async': (c_int, - cu_device_ptr, c_uint8, c_size_t, cu_stream), - -# CUresult cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, -# size_t ByteCount); -'cuMemcpyHtoD': (c_int, cu_device_ptr, c_void_p, c_size_t), - -# CUresult cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, -# size_t ByteCount, CUstream hStream); -'cuMemcpyHtoDAsync': (c_int, cu_device_ptr, c_void_p, c_size_t, - cu_stream), - -# CUresult cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, -# size_t ByteCount); -'cuMemcpyDtoD': (c_int, cu_device_ptr, cu_device_ptr, c_size_t), - -# CUresult cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, -# size_t ByteCount, CUstream hStream); -'cuMemcpyDtoDAsync': (c_int, cu_device_ptr, cu_device_ptr, c_size_t, - cu_stream), - - -# CUresult cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, -# size_t ByteCount); -'cuMemcpyDtoH': (c_int, c_void_p, cu_device_ptr, c_size_t), - -# CUresult cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, -# size_t ByteCount, CUstream hStream); -'cuMemcpyDtoHAsync': (c_int, c_void_p, cu_device_ptr, c_size_t, - cu_stream), - -# CUresult cuMemFree(CUdeviceptr dptr); -'cuMemFree': (c_int, cu_device_ptr), - -# CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags); -'cuStreamCreate': (c_int, POINTER(cu_stream), c_uint), - -# CUresult cuStreamDestroy(CUstream hStream); -'cuStreamDestroy': (c_int, cu_stream), - -# CUresult cuStreamSynchronize(CUstream hStream); -'cuStreamSynchronize': (c_int, cu_stream), - -# CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, -# unsigned int gridDimY, -# unsigned int gridDimZ, -# unsigned int blockDimX, -# unsigned int blockDimY, -# unsigned int blockDimZ, -# unsigned int sharedMemBytes, -# CUstream hStream, void **kernelParams, -# void ** extra) -'cuLaunchKernel': (c_int, cu_function, c_uint, c_uint, c_uint, - c_uint, c_uint, c_uint, c_uint, cu_stream, - POINTER(c_void_p), POINTER(c_void_p)), - -# CUresult cuMemHostAlloc ( void ** pp, -# size_t bytesize, -# unsigned int Flags -# ) -'cuMemHostAlloc': (c_int, c_void_p, c_size_t, c_uint), - -# CUresult cuMemFreeHost ( void * p ) -'cuMemFreeHost': (c_int, c_void_p), - -# CUresult cuMemHostRegister(void * p, -# size_t bytesize, -# unsigned int Flags) -'cuMemHostRegister': (c_int, c_void_p, c_size_t, c_uint), - -# CUresult cuMemHostUnregister(void * p) -'cuMemHostUnregister': (c_int, c_void_p), - -# CUresult cuMemHostGetDevicePointer(CUdeviceptr * pdptr, -# void * p, -# unsigned int Flags) -'cuMemHostGetDevicePointer': (c_int, POINTER(cu_device_ptr), - c_void_p, c_uint), - -# CUresult cuMemGetInfo(size_t * free, size_t * total) -'cuMemGetInfo' : (c_int, POINTER(c_size_t), POINTER(c_size_t)), - -# CUresult cuEventCreate ( CUevent * phEvent, -# unsigned int Flags ) -'cuEventCreate': (c_int, POINTER(cu_event), c_uint), - -# CUresult cuEventDestroy ( CUevent hEvent ) -'cuEventDestroy': (c_int, cu_event), - -# CUresult cuEventElapsedTime ( float * pMilliseconds, -# CUevent hStart, -# CUevent hEnd ) -'cuEventElapsedTime': (c_int, POINTER(c_float), cu_event, cu_event), - -# CUresult cuEventQuery ( CUevent hEvent ) -'cuEventQuery': (c_int, cu_event), - -# CUresult cuEventRecord ( CUevent hEvent, -# CUstream hStream ) -'cuEventRecord': (c_int, cu_event, cu_stream), - -# CUresult cuEventSynchronize ( CUevent hEvent ) -'cuEventSynchronize': (c_int, cu_event), - - -# CUresult cuStreamWaitEvent ( CUstream hStream, -# CUevent hEvent, -# unsigned int Flags ) -'cuStreamWaitEvent': (c_int, cu_stream, cu_event, c_uint), - -# CUresult cuPointerGetAttribute (void *data, CUpointer_attribute attribute, CUdeviceptr ptr) -'cuPointerGetAttribute': (c_int, c_void_p, c_uint, cu_device_ptr), - -# CUresult cuMemGetAddressRange ( CUdeviceptr * pbase, -# size_t * psize, -# CUdeviceptr dptr -# ) -'cuMemGetAddressRange': (c_int, - POINTER(cu_device_ptr), - POINTER(c_size_t), - cu_device_ptr), - -# CUresult cuMemHostGetFlags ( unsigned int * pFlags, -# void * p ) -'cuMemHostGetFlags': (c_int, - POINTER(c_uint), - c_void_p), - -# CUresult cuCtxSynchronize ( void ) -'cuCtxSynchronize' : (c_int,), - -# CUresult -# cuLinkCreate(unsigned int numOptions, CUjit_option *options, -# void **optionValues, CUlinkState *stateOut); -'cuLinkCreate': (c_int, - c_uint, POINTER(cu_jit_option), - POINTER(c_void_p), POINTER(cu_link_state)), - -# CUresult -# cuLinkAddData(CUlinkState state, CUjitInputType type, void *data, -# size_t size, const char *name, unsigned -# int numOptions, CUjit_option *options, -# void **optionValues); -'cuLinkAddData': (c_int, - cu_link_state, cu_jit_input_type, c_void_p, - c_size_t, c_char_p, c_uint, POINTER(cu_jit_option), - POINTER(c_void_p)), - -# CUresult -# cuLinkAddFile(CUlinkState state, CUjitInputType type, -# const char *path, unsigned int numOptions, -# CUjit_option *options, void **optionValues); - -'cuLinkAddFile': (c_int, - cu_link_state, cu_jit_input_type, c_char_p, c_uint, - POINTER(cu_jit_option), POINTER(c_void_p)), - -# CUresult CUDAAPI -# cuLinkComplete(CUlinkState state, void **cubinOut, size_t *sizeOut) -'cuLinkComplete': (c_int, - cu_link_state, POINTER(c_void_p), POINTER(c_size_t)), - -# CUresult CUDAAPI -# cuLinkDestroy(CUlinkState state) -'cuLinkDestroy': (c_int, cu_link_state), - - -# cuProfilerInitialize ( const char* configFile, const char* -# outputFile, CUoutput_mode outputMode ) -# 'cuProfilerInitialize': (c_int, c_char_p, c_char_p, cu_output_mode), - -# cuProfilerStart ( void ) -'cuProfilerStart': (c_int,), - -# cuProfilerStop ( void ) -'cuProfilerStop': (c_int,), - -# CUresult cuFuncGetAttribute ( int* pi, CUfunction_attribute attrib, -# CUfunction hfunc ) -'cuFuncGetAttribute': (c_int, - POINTER(c_int), cu_function_attribute, cu_function), - -# CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, -# CUfunction func, -# int blockSize, -# size_t dynamicSMemSize); -'cuOccupancyMaxActiveBlocksPerMultiprocessor': (c_int, - POINTER(c_int), cu_function, c_size_t, c_uint), - -# CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, -# CUfunction func, -# int blockSize, -# size_t dynamicSMemSize, -# unsigned int flags); -'cuOccupancyMaxActiveBlocksPerMultiprocessor': (c_int, - POINTER(c_int), cu_function, c_size_t, c_uint), - -# CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize(int *minGridSize, int *blockSize, -# CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, -# size_t dynamicSMemSize, int blockSizeLimit); -'cuOccupancyMaxPotentialBlockSize': (c_int, - POINTER(c_int), POINTER(c_int), cu_function, cu_occupancy_b2d_size, c_size_t, c_int), - -# CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags(int *minGridSize, int *blockSize, -# CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, -# size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags); -'cuOccupancyMaxPotentialBlockSizeWithFlags': (c_int, - POINTER(c_int), POINTER(c_int), cu_function, cu_occupancy_b2d_size, c_size_t, c_int, c_uint), - -# CUresult cuIpcGetMemHandle ( CUipcMemHandle* pHandle, CUdeviceptr dptr ) -'cuIpcGetMemHandle': (c_int, - POINTER(cu_ipc_mem_handle), cu_device_ptr), - -# CUresult cuIpcOpenMemHandle ( CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags ) - -'cuIpcOpenMemHandle': (c_int, - POINTER(cu_device_ptr), cu_ipc_mem_handle, c_uint), - -# CUresult cuIpcCloseMemHandle ( CUdeviceptr dptr ) - -'cuIpcCloseMemHandle': (c_int, - cu_device_ptr), - -# CUresult cuCtxEnablePeerAccess ( CUcontext peerContext, unsigned int Flags ) -'cuCtxEnablePeerAccess': (c_int, - cu_context, c_int), - -# CUresult cuDeviceCanAccessPeer ( int* canAccessPeer, -# CUdevice dev, CUdevice peerDev ) -'cuDeviceCanAccessPeer': (c_int, - POINTER(c_int), cu_device, cu_device), - -} diff --git a/numba/numba/cuda/cudadrv/enums.py b/numba/numba/cuda/cudadrv/enums.py deleted file mode 100644 index 665a49aa4..000000000 --- a/numba/numba/cuda/cudadrv/enums.py +++ /dev/null @@ -1,432 +0,0 @@ -""" -Enum values for CUDA driver -""" -from __future__ import print_function, absolute_import, division - - -CUDA_SUCCESS = 0 -CUDA_ERROR_INVALID_VALUE = 1 -CUDA_ERROR_OUT_OF_MEMORY = 2 -CUDA_ERROR_NOT_INITIALIZED = 3 -CUDA_ERROR_DEINITIALIZED = 4 -CUDA_ERROR_PROFILER_DISABLED = 5 -CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6 -CUDA_ERROR_PROFILER_ALREADY_STARTED = 7 -CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8 -CUDA_ERROR_NO_DEVICE = 100 -CUDA_ERROR_INVALID_DEVICE = 101 -CUDA_ERROR_INVALID_IMAGE = 200 -CUDA_ERROR_INVALID_CONTEXT = 201 -CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202 -CUDA_ERROR_MAP_FAILED = 205 -CUDA_ERROR_UNMAP_FAILED = 206 -CUDA_ERROR_ARRAY_IS_MAPPED = 207 -CUDA_ERROR_ALREADY_MAPPED = 208 -CUDA_ERROR_NO_BINARY_FOR_GPU = 209 -CUDA_ERROR_ALREADY_ACQUIRED = 210 -CUDA_ERROR_NOT_MAPPED = 211 -CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212 -CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213 -CUDA_ERROR_ECC_UNCORRECTABLE = 214 -CUDA_ERROR_UNSUPPORTED_LIMIT = 215 -CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216 -CUDA_ERROR_INVALID_SOURCE = 300 -CUDA_ERROR_FILE_NOT_FOUND = 301 -CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302 -CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303 -CUDA_ERROR_OPERATING_SYSTEM = 304 -CUDA_ERROR_INVALID_HANDLE = 400 -CUDA_ERROR_NOT_FOUND = 500 -CUDA_ERROR_NOT_READY = 600 -CUDA_ERROR_LAUNCH_FAILED = 700 -CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701 -CUDA_ERROR_LAUNCH_TIMEOUT = 702 -CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703 -CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704 -CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705 -CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708 -CUDA_ERROR_CONTEXT_IS_DESTROYED = 709 -CUDA_ERROR_ASSERT = 710 -CUDA_ERROR_TOO_MANY_PEERS = 711 -CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712 -CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713 -CUDA_ERROR_HARDWARE_STACK_ERROR = 714 -CUDA_ERROR_ILLEGAL_INSTRUCTION = 715 -CUDA_ERROR_MISALIGNED_ADDRESS = 716 -CUDA_ERROR_INVALID_ADDRESS_SPACE = 717 -CUDA_ERROR_INVALID_PC = 718 -CUDA_ERROR_LAUNCH_FAILED = 719 -CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720 -CUDA_ERROR_NOT_PERMITTED = 800 -CUDA_ERROR_NOT_SUPPORTED = 801 -CUDA_ERROR_UNKNOWN = 999 - - -# no preference for shared memory or L1 (default) -CU_FUNC_CACHE_PREFER_NONE = 0x00 -# prefer larger shared memory and smaller L1 cache -CU_FUNC_CACHE_PREFER_SHARED = 0x01 -# prefer larger L1 cache and smaller shared memory -CU_FUNC_CACHE_PREFER_L1 = 0x02 -# prefer equal sized L1 cache and shared memory -CU_FUNC_CACHE_PREFER_EQUAL = 0x03 - -# Automatic scheduling -CU_CTX_SCHED_AUTO = 0x00 -# Set spin as default scheduling -CU_CTX_SCHED_SPIN = 0x01 -# Set yield as default scheduling -CU_CTX_SCHED_YIELD = 0x02 -# Set blocking synchronization as default scheduling -CU_CTX_SCHED_BLOCKING_SYNC = 0x04 - -CU_CTX_SCHED_MASK = 0x07 - -# Support mapped pinned allocations -CU_CTX_MAP_HOST = 0x08 -# Keep local memory allocation after launch -CU_CTX_LMEM_RESIZE_TO_MAX = 0x10 - -CU_CTX_FLAGS_MASK = 0x1f - - - -# If set, host memory is portable between CUDA contexts. -# Flag for cuMemHostAlloc() -CU_MEMHOSTALLOC_PORTABLE = 0x01 - -# If set, host memory is mapped into CUDA address space and -# cuMemHostGetDevicePointer() may be called on the host pointer. -# Flag for cuMemHostAlloc() -CU_MEMHOSTALLOC_DEVICEMAP = 0x02 - -# If set, host memory is allocated as write-combined - fast to write, -# faster to DMA, slow to read except via SSE4 streaming load instruction -# (MOVNTDQA). -# Flag for cuMemHostAlloc() -CU_MEMHOSTALLOC_WRITECOMBINED = 0x04 - -# If set, host memory is portable between CUDA contexts. -# Flag for cuMemHostRegister() -CU_MEMHOSTREGISTER_PORTABLE = 0x01 - -# If set, host memory is mapped into CUDA address space and -# cuMemHostGetDevicePointer() may be called on the host pointer. -# Flag for cuMemHostRegister() -CU_MEMHOSTREGISTER_DEVICEMAP = 0x02 - - -# Default event flag -CU_EVENT_DEFAULT = 0x0 -# Event uses blocking synchronization -CU_EVENT_BLOCKING_SYNC = 0x1 -# Event will not record timing data -CU_EVENT_DISABLE_TIMING = 0x2 -# Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set -CU_EVENT_INTERPROCESS = 0x4 - -# The CUcontext on which a pointer was allocated or registered -CU_POINTER_ATTRIBUTE_CONTEXT = 1 -# The CUmemorytype describing the physical location of a pointer -CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2 -# The address at which a pointer's memory may be accessed on the device -CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3 -# The address at which a pointer's memory may be accessed on the host -CU_POINTER_ATTRIBUTE_HOST_POINTER = 4 -# A pair of tokens for use with the nv-p2p.h Linux kernel interface -CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5 - -# Host memory -CU_MEMORYTYPE_HOST = 0x01 -# Device memory -CU_MEMORYTYPE_DEVICE = 0x02 -# Array memory -CU_MEMORYTYPE_ARRAY = 0x03 -# Unified device or host memory -CU_MEMORYTYPE_UNIFIED = 0x04 - - - -# Compiled device-class-specific device code -# Applicable options: none -CU_JIT_INPUT_CUBIN = 0 - -# PTX source code -# Applicable options: PTX compiler options -CU_JIT_INPUT_PTX = 1 - -# Bundle of multiple cubins and/or PTX of some device code -# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY -CU_JIT_INPUT_FATBINAR = 2 - -# Host object with embedded device code -# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY -CU_JIT_INPUT_OBJECT = 3 - -# Archive of host objects with embedded device code -# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY -CU_JIT_INPUT_LIBRARY = 4 - - - -# Max number of registers that a thread may use. -# Option type: unsigned int -# Applies to: compiler only - -CU_JIT_MAX_REGISTERS = 0 - - -# IN: Specifies minimum number of threads per block to target compilation -# for -# OUT: Returns the number of threads the compiler actually targeted. -# This restricts the resource utilization fo the compiler (e.g. max -# registers) such that a block with the given number of threads should be -# able to launch based on register limitations. Note, this option does not -# currently take into account any other resource limitations, such as -# shared memory utilization. -# Cannot be combined with ::CU_JIT_TARGET. -# Option type: unsigned int -# Applies to: compiler only - -CU_JIT_THREADS_PER_BLOCK = 1 - - -# Overwrites the option value with the total wall clock time, in -# milliseconds, spent in the compiler and linker -# Option type: float -# Applies to: compiler and linker - -CU_JIT_WALL_TIME = 2 - - -# Pointer to a buffer in which to print any log messages -# that are informational in nature (the buffer size is specified via -# option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) -# Option type: char * -# Applies to: compiler and linker - -CU_JIT_INFO_LOG_BUFFER = 3 - - -# IN: Log buffer size in bytes. Log messages will be capped at this size -# (including null terminator) -# OUT: Amount of log buffer filled with messages -# Option type: unsigned int -# Applies to: compiler and linker - -CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4 - - -# Pointer to a buffer in which to print any log messages that -# reflect errors (the buffer size is specified via option -# ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES) -# Option type: char * -# Applies to: compiler and linker - -CU_JIT_ERROR_LOG_BUFFER = 5 - - -# IN: Log buffer size in bytes. Log messages will be capped at this size -# (including null terminator) -# OUT: Amount of log buffer filled with messages -# Option type: unsigned int -# Applies to: compiler and linker - -CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6 - - -# Level of optimizations to apply to generated code (0 - 4), with 4 -# being the default and highest level of optimizations. -# Option type: unsigned int -# Applies to: compiler only - -CU_JIT_OPTIMIZATION_LEVEL = 7 - - -# No option value required. Determines the target based on the current -# attached context (default) -# Option type: No option value needed -# Applies to: compiler and linker - -CU_JIT_TARGET_FROM_CUCONTEXT = 8 - - -# Target is chosen based on supplied ::CUjit_target. Cannot be -# combined with ::CU_JIT_THREADS_PER_BLOCK. -# Option type: unsigned int for enumerated type ::CUjit_target -# Applies to: compiler and linker - -CU_JIT_TARGET = 9 - - -# Specifies choice of fallback strategy if matching cubin is not found. -# Choice is based on supplied ::CUjit_fallback. -# Option type: unsigned int for enumerated type ::CUjit_fallback -# Applies to: compiler only - -CU_JIT_FALLBACK_STRATEGY = 10 - - -# Specifies whether to create debug information in output (-g) -# (0: false, default) -# Option type: int -# Applies to: compiler and linker - -CU_JIT_GENERATE_DEBUG_INFO = 11 - - -# Generate verbose log messages (0: false, default) -# Option type: int -# Applies to: compiler and linker - -CU_JIT_LOG_VERBOSE = 12 - - -# Generate line number information (-lineinfo) (0: false, default) -# Option type: int -# Applies to: compiler only - -CU_JIT_GENERATE_LINE_INFO = 13 - - -# Specifies whether to enable caching explicitly (-dlcm) -# Choice is based on supplied ::CUjit_cacheMode_enum. -# Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum -# Applies to: compiler only - -CU_JIT_CACHE_MODE = 14 - - -# Device attributes - - -CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1 -CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2 -CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3 -CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4 -CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5 -CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6 -CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7 -CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8 -CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9 -CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10 -CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11 -CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12 -CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13 -CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14 -CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15 -CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16 -CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17 -CU_DEVICE_ATTRIBUTE_INTEGRATED = 18 -CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19 -CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_WIDTH = 21 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_WIDTH = 22 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_HEIGHT = 23 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH = 24 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT = 25 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH = 26 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_WIDTH = 27 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_HEIGHT = 28 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_LAYERS = 29 -CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30 -CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31 -CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32 -CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33 -CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34 -CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 -CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36 -CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37 -CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38 -CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTI_PROCESSOR = 39 -CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40 -CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_WIDTH = 42 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_LAYERS = 43 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_WIDTH = 45 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_HEIGHT = 46 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH_ALT = 47 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT_ALT = 48 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH_ALT = 49 -CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50 -CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_WIDTH = 52 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_WIDTH = 53 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_LAYERS = 54 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_WIDTH = 55 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_WIDTH = 56 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_HEIGHT = 57 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_WIDTH = 58 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_HEIGHT = 59 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_DEPTH = 60 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_WIDTH = 61 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_LAYERS = 62 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_WIDTH = 63 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_HEIGHT = 64 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_LAYERS = 65 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_WIDTH = 66 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_WIDTH = 67 -CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_LAYERS = 68 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LINEAR_WIDTH = 69 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_WIDTH = 70 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_HEIGHT = 71 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_PITCH = 72 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_WIDTH = 73 -CU_DEVICE_ATTRIBUTE_MAX_MAX_TEXTURE_2D_MIPMAPPED_HEIGHT = 74 -CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75 -CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76 -CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_MIPMAPPED_WIDTH = 77 -CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78 -CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79 -CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80 -CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81 -CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 -CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83 -CU_DEVICE_ATTRIBUTE_IS_MULTI_GPU_BOARD = 84 -CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85 -CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86 -CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87 -CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88 -CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89 -CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90 -CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91 -CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95 -CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96 -CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97 - - -# CUfunction_attribute - -# The maximum number of threads per block, beyond which a launch of the -# function would fail. This number depends on both the function and the -# device on which the function is currently loaded. -CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0 - -# The size in bytes of statically-allocated shared memory required by -# this function. This does not include dynamically-allocated shared -# memory requested by the user at runtime. -CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1 - -# The size in bytes of user-allocated constant memory required by this -# function. -CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2 - -# The size in bytes of local memory used by each thread of this function. -CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3 - -# The number of registers used by each thread of this function. -CU_FUNC_ATTRIBUTE_NUM_REGS = 4 - -# The PTX virtual architecture version for which the function was -# compiled. This value is the major PTX version * 10 + the minor PTX -# version, so a PTX version 1.3 function would return the value 13. -# Note that this may return the undefined value of 0 for cubins -# compiled prior to CUDA 3.0. -CU_FUNC_ATTRIBUTE_PTX_VERSION = 5 - -# The binary architecture version for which the function was compiled. -# This value is the major binary version * 10 + the minor binary version, -# so a binary version 1.3 function would return the value 13. Note that -# this will return a value of 10 for legacy cubins that do not have a -# properly-encoded binary architecture version. -CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6 diff --git a/numba/numba/cuda/cudadrv/error.py b/numba/numba/cuda/cudadrv/error.py deleted file mode 100644 index 8c0a9f7f5..000000000 --- a/numba/numba/cuda/cudadrv/error.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import print_function, absolute_import, division - - -class CudaDriverError(Exception): - pass - - -class CudaSupportError(ImportError): - pass - - -class NvvmError(Exception): - def __str__(self): - return '\n'.join(map(str, self.args)) - - -class NvvmSupportError(ImportError): - pass diff --git a/numba/numba/cuda/cudadrv/libs.py b/numba/numba/cuda/cudadrv/libs.py deleted file mode 100644 index 2b2ed50d5..000000000 --- a/numba/numba/cuda/cudadrv/libs.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import print_function -import re -import os -import sys -import ctypes -import platform -from numba.findlib import find_lib, find_file - -if sys.platform == 'win32': - _dllopener = ctypes.WinDLL -elif sys.platform == 'darwin': - _dllopener = ctypes.CDLL -else: - _dllopener = ctypes.CDLL - - -def get_libdevice(arch): - libdir = (os.environ.get('NUMBAPRO_LIBDEVICE') or - os.environ.get('NUMBAPRO_CUDALIB')) - - pat = r'libdevice\.%s(\.\d+)*\.bc$' % arch - candidates = find_file(re.compile(pat), libdir) - - if not candidates: - # CUDA 9 switches to fat library, with no arch in name - pat = r'libdevice(\.\d+)*\.bc$' - candidates = find_file(re.compile(pat), libdir) - - return max(candidates) if candidates else None - - -def open_libdevice(arch): - with open(get_libdevice(arch), 'rb') as bcfile: - return bcfile.read() - - -def get_cudalib(lib, platform=None): - if lib == 'nvvm' and os.environ.get('NUMBAPRO_NVVM'): - return os.environ.get('NUMBAPRO_NVVM') - libdir = os.environ.get('NUMBAPRO_CUDALIB') - candidates = find_lib(lib, libdir, platform) - return max(candidates) if candidates else None - - -def open_cudalib(lib, ccc=False): - path = get_cudalib(lib) - if path is None: - raise OSError('library %s not found' % lib) - if ccc: - return ctypes.CDLL(path) - return _dllopener(path) - - -def test(_platform=None, print_paths=True): - failed = False - libs = 'cublas cusparse cufft curand nvvm'.split() - for lib in libs: - path = get_cudalib(lib, _platform) - print('Finding', lib) - if path: - if print_paths: - print('\tlocated at', path) - else: - print('\tnamed ', os.path.basename(path)) - else: - print('\tERROR: can\'t locate lib') - failed = True - - if not failed and _platform in (None, sys.platform): - try: - print('\ttrying to open library', end='...') - open_cudalib(lib, ccc=True) - print('\tok') - except OSError as e: - print('\tERROR: failed to open %s:\n%s' % (lib, e)) - # NOTE: ignore failure of dlopen on cuBlas on OSX 10.5 - failed = True if not _if_osx_10_5() else False - - archs = 'compute_20', 'compute_30', 'compute_35', 'compute_50' - for arch in archs: - print('\tfinding libdevice for', arch, end='...') - path = get_libdevice(arch) - if path: - print('\tok') - else: - print('\tERROR: can\'t open libdevice for %s' % arch) - failed = True - return not failed - - -def _if_osx_10_5(): - if sys.platform == 'darwin': - vers = tuple(map(int, platform.mac_ver()[0].split('.'))) - if vers < (10, 6): - return True - return False diff --git a/numba/numba/cuda/cudadrv/ndarray.py b/numba/numba/cuda/cudadrv/ndarray.py deleted file mode 100644 index b8db583cb..000000000 --- a/numba/numba/cuda/cudadrv/ndarray.py +++ /dev/null @@ -1,22 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from . import devices, driver -from numba.targets.registry import cpu_target - - -def _calc_array_sizeof(ndim): - """ - Use the ABI size in the CPU target - """ - ctx = cpu_target.target_context - return ctx.calc_array_sizeof(ndim) - - -def ndarray_device_allocate_data(ary): - """ - Allocate gpu data buffer - """ - datasize = driver.host_memory_size(ary) - # allocate - gpu_data = devices.get_context().memalloc(datasize) - return gpu_data diff --git a/numba/numba/cuda/cudadrv/nvvm.py b/numba/numba/cuda/cudadrv/nvvm.py deleted file mode 100644 index 32a515654..000000000 --- a/numba/numba/cuda/cudadrv/nvvm.py +++ /dev/null @@ -1,662 +0,0 @@ -""" -This is a direct translation of nvvm.h -""" -from __future__ import print_function, absolute_import, division -import sys, logging, re -from ctypes import (c_void_p, c_int, POINTER, c_char_p, c_size_t, byref, - c_char) - -import threading - -from llvmlite import ir - -from numba import config -from .error import NvvmError, NvvmSupportError -from .libs import get_libdevice, open_libdevice, open_cudalib - - -logger = logging.getLogger(__name__) - -ADDRSPACE_GENERIC = 0 -ADDRSPACE_GLOBAL = 1 -ADDRSPACE_SHARED = 3 -ADDRSPACE_CONSTANT = 4 -ADDRSPACE_LOCAL = 5 - -# Opaque handle for comilation unit -nvvm_program = c_void_p - -# Result code -nvvm_result = c_int - -RESULT_CODE_NAMES = ''' -NVVM_SUCCESS -NVVM_ERROR_OUT_OF_MEMORY -NVVM_ERROR_PROGRAM_CREATION_FAILURE -NVVM_ERROR_IR_VERSION_MISMATCH -NVVM_ERROR_INVALID_INPUT -NVVM_ERROR_INVALID_PROGRAM -NVVM_ERROR_INVALID_IR -NVVM_ERROR_INVALID_OPTION -NVVM_ERROR_NO_MODULE_IN_PROGRAM -NVVM_ERROR_COMPILATION -'''.split() - -for i, k in enumerate(RESULT_CODE_NAMES): - setattr(sys.modules[__name__], k, i) - - -def is_available(): - """ - Return if libNVVM is available - """ - try: - NVVM() - except NvvmSupportError: - return False - else: - return True - - -_nvvm_lock = threading.Lock() - -class NVVM(object): - '''Process-wide singleton. - ''' - _PROTOTYPES = { - - # nvvmResult nvvmVersion(int *major, int *minor) - 'nvvmVersion': (nvvm_result, POINTER(c_int), POINTER(c_int)), - - # nvvmResult nvvmCreateProgram(nvvmProgram *cu) - 'nvvmCreateProgram': (nvvm_result, POINTER(nvvm_program)), - - # nvvmResult nvvmDestroyProgram(nvvmProgram *cu) - 'nvvmDestroyProgram': (nvvm_result, POINTER(nvvm_program)), - - # nvvmResult nvvmAddModuleToProgram(nvvmProgram cu, const char *buffer, - # size_t size, const char *name) - 'nvvmAddModuleToProgram': ( - nvvm_result, nvvm_program, c_char_p, c_size_t, c_char_p), - - # nvvmResult nvvmCompileProgram(nvvmProgram cu, int numOptions, - # const char **options) - 'nvvmCompileProgram': ( - nvvm_result, nvvm_program, c_int, POINTER(c_char_p)), - - # nvvmResult nvvmGetCompiledResultSize(nvvmProgram cu, - # size_t *bufferSizeRet) - 'nvvmGetCompiledResultSize': ( - nvvm_result, nvvm_program, POINTER(c_size_t)), - - # nvvmResult nvvmGetCompiledResult(nvvmProgram cu, char *buffer) - 'nvvmGetCompiledResult': (nvvm_result, nvvm_program, c_char_p), - - # nvvmResult nvvmGetProgramLogSize(nvvmProgram cu, - # size_t *bufferSizeRet) - 'nvvmGetProgramLogSize': (nvvm_result, nvvm_program, POINTER(c_size_t)), - - # nvvmResult nvvmGetProgramLog(nvvmProgram cu, char *buffer) - 'nvvmGetProgramLog': (nvvm_result, nvvm_program, c_char_p), - } - - # Singleton reference - __INSTANCE = None - - def __new__(cls): - with _nvvm_lock: - if cls.__INSTANCE is None: - cls.__INSTANCE = inst = object.__new__(cls) - try: - inst.driver = open_cudalib('nvvm', ccc=True) - except OSError as e: - cls.__INSTANCE = None - errmsg = ("libNVVM cannot be found. Do `conda install " - "cudatoolkit`:\n%s") - raise NvvmSupportError(errmsg % e) - - # Find & populate functions - for name, proto in inst._PROTOTYPES.items(): - func = getattr(inst.driver, name) - func.restype = proto[0] - func.argtypes = proto[1:] - setattr(inst, name, func) - - return cls.__INSTANCE - - def get_version(self): - major = c_int() - minor = c_int() - err = self.nvvmVersion(byref(major), byref(minor)) - self.check_error(err, 'Failed to get version.') - return major.value, minor.value - - def check_error(self, error, msg, exit=False): - if error: - exc = NvvmError(msg, RESULT_CODE_NAMES[error]) - if exit: - print(exc) - sys.exit(1) - else: - raise exc - - -class CompilationUnit(object): - def __init__(self): - self.driver = NVVM() - self._handle = nvvm_program() - err = self.driver.nvvmCreateProgram(byref(self._handle)) - self.driver.check_error(err, 'Failed to create CU') - - def __del__(self): - driver = NVVM() - err = driver.nvvmDestroyProgram(byref(self._handle)) - driver.check_error(err, 'Failed to destroy CU', exit=True) - - def add_module(self, buffer): - """ - Add a module level NVVM IR to a compilation unit. - - The buffer should contain an NVVM module IR either in the bitcode - representation (LLVM3.0) or in the text representation. - """ - err = self.driver.nvvmAddModuleToProgram(self._handle, buffer, - len(buffer), None) - self.driver.check_error(err, 'Failed to add module') - - def compile(self, **options): - """Perform Compliation - - The valid compiler options are - - * - -g (enable generation of debugging information) - * - -opt= - * - 0 (disable optimizations) - * - 3 (default, enable optimizations) - * - -arch= - * - compute_20 (default) - * - compute_30 - * - compute_35 - * - -ftz= - * - 0 (default, preserve denormal values, when performing - * single-precision floating-point operations) - * - 1 (flush denormal values to zero, when performing - * single-precision floating-point operations) - * - -prec-sqrt= - * - 0 (use a faster approximation for single-precision - * floating-point square root) - * - 1 (default, use IEEE round-to-nearest mode for - * single-precision floating-point square root) - * - -prec-div= - * - 0 (use a faster approximation for single-precision - * floating-point division and reciprocals) - * - 1 (default, use IEEE round-to-nearest mode for - * single-precision floating-point division and reciprocals) - * - -fma= - * - 0 (disable FMA contraction) - * - 1 (default, enable FMA contraction) - * - """ - - # stringify options - opts = [] - if 'debug' in options: - if options.pop('debug'): - opts.append('-g') - - if options.get('opt'): - opts.append('-opt=%d' % options.pop('opt')) - - if options.get('arch'): - opts.append('-arch=%s' % options.pop('arch')) - - other_options = ( - 'ftz', - 'prec_sqrt', - 'prec_div', - 'fma', - ) - - for k in other_options: - if k in options: - v = int(bool(options.pop(k))) - opts.append('-%s=%d' % (k.replace('_', '-'), v)) - - # If there are any option left - if options: - optstr = ', '.join(map(repr, options.keys())) - raise NvvmError("unsupported option {0}".format(optstr)) - - # compile - c_opts = (c_char_p * len(opts))(*[c_char_p(x.encode('utf8')) - for x in opts]) - err = self.driver.nvvmCompileProgram(self._handle, len(opts), c_opts) - self._try_error(err, 'Failed to compile\n') - - # get result - reslen = c_size_t() - err = self.driver.nvvmGetCompiledResultSize(self._handle, byref(reslen)) - - self._try_error(err, 'Failed to get size of compiled result.') - - ptxbuf = (c_char * reslen.value)() - err = self.driver.nvvmGetCompiledResult(self._handle, ptxbuf) - self._try_error(err, 'Failed to get compiled result.') - - # get log - self.log = self.get_log() - - return ptxbuf[:] - - def _try_error(self, err, msg): - self.driver.check_error(err, "%s\n%s" % (msg, self.get_log())) - - def get_log(self): - reslen = c_size_t() - err = self.driver.nvvmGetProgramLogSize(self._handle, byref(reslen)) - self.driver.check_error(err, 'Failed to get compilation log size.') - - if reslen.value > 1: - logbuf = (c_char * reslen.value)() - err = self.driver.nvvmGetProgramLog(self._handle, logbuf) - self.driver.check_error(err, 'Failed to get compilation log.') - - return logbuf.value.decode('utf8') # populate log attribute - - return '' - - -data_layout = { - 32: ('e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-' - 'f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64'), - 64: ('e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-' - 'f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64')} - -default_data_layout = data_layout[tuple.__itemsize__ * 8] - - -try: - NVVM_VERSION = NVVM().get_version() -except: - # the CUDA driver may not be present - NVVM_VERSION = (0, 0) - -# List of supported compute capability in sorted order -if NVVM_VERSION < (1, 4): - # CUDA 8.0 - SUPPORTED_CC = (2, 0), (2, 1), (3, 0), (3, 5), (5, 0), (5, 2), (5, 3), (6, 0), (6, 1), (6, 2) -else: - # CUDA 9.0 and later - SUPPORTED_CC = (3, 0), (3, 5), (5, 0), (5, 2), (5, 3), (6, 0), (6, 1), (6, 2), (7, 0) - - -def _find_arch(mycc): - for i, cc in enumerate(SUPPORTED_CC): - if cc == mycc: - # Matches - return cc - elif cc > mycc: - # Exceeded - if i == 0: - # CC lower than supported - raise NvvmSupportError("GPU compute capability %d.%d is " - "not supported (requires >=2.0)" % mycc) - else: - # return the previous CC - return SUPPORTED_CC[i - 1] - - # CC higher than supported - return SUPPORTED_CC[-1] # Choose the highest - - -def get_arch_option(major, minor): - """Matches with the closest architecture option - """ - if config.FORCE_CUDA_CC: - arch = config.FORCE_CUDA_CC - else: - arch = _find_arch((major, minor)) - return 'compute_%d%d' % arch - - -MISSING_LIBDEVICE_MSG = ''' -Please define environment variable NUMBAPRO_LIBDEVICE=/path/to/libdevice -/path/to/libdevice -- is the path to the directory containing the libdevice.*.bc -files in the installation of CUDA. (requires CUDA >=8.0) -''' - -MISSING_LIBDEVICE_FILE_MSG = '''Missing libdevice file for {arch}. -Please ensure you have package cudatoolkit 8.0. -Install package by: - - conda install cudatoolkit=8.0 -''' - - -class LibDevice(object): - _cache_ = {} - _known_arch = [ - "compute_20", - "compute_30", - "compute_35", - "compute_50", - ] - - def __init__(self, arch): - """ - arch --- must be result from get_arch_option() - """ - if arch not in self._cache_: - arch = self._get_closest_arch(arch) - if get_libdevice(arch) is None: - raise RuntimeError(MISSING_LIBDEVICE_FILE_MSG.format(arch=arch)) - self._cache_[arch] = open_libdevice(arch) - - self.arch = arch - self.bc = self._cache_[arch] - - def _get_closest_arch(self, arch): - res = self._known_arch[0] - for potential in self._known_arch: - if arch >= potential: - res = potential - return res - - def get(self): - return self.bc - - -ir_numba_cas_hack = """ -define internal i32 @___numba_cas_hack(i32* %ptr, i32 %cmp, i32 %val) alwaysinline { - %out = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %val monotonic - ret i32 %out -} -""" - -# Translation of code from CUDA Programming Guide v6.5, section B.12 -ir_numba_atomic_double_add = """ -define internal double @___numba_atomic_double_add(double* %ptr, double %val) alwaysinline { -entry: - %iptr = bitcast double* %ptr to i64* - %old2 = load volatile i64, i64* %iptr - br label %attempt - -attempt: - %old = phi i64 [ %old2, %entry ], [ %cas, %attempt ] - %dold = bitcast i64 %old to double - %dnew = fadd double %dold, %val - %new = bitcast double %dnew to i64 - %cas = cmpxchg volatile i64* %iptr, i64 %old, i64 %new monotonic - %repeat = icmp ne i64 %cas, %old - br i1 %repeat, label %attempt, label %done - -done: - %result = bitcast i64 %old to double - ret double %result -} -""" - -ir_numba_atomic_max = """ -define internal {T} @___numba_atomic_{T}_max({T}* %ptr, {T} %val) alwaysinline {{ -entry: - %ptrval = load volatile {T}, {T}* %ptr - ; Check if val is a NaN and return *ptr early if so - %valnan = fcmp uno {T} %val, %val - br i1 %valnan, label %done, label %lt_check - -lt_check: - %dold = phi {T} [ %ptrval, %entry ], [ %dcas, %attempt ] - ; Continue attempts if dold < val or dold is NaN (using ult semantics) - %lt = fcmp ult {T} %dold, %val - br i1 %lt, label %attempt, label %done - -attempt: - ; Attempt to swap in the larger value - %iold = bitcast {T} %dold to {Ti} - %iptr = bitcast {T}* %ptr to {Ti}* - %ival = bitcast {T} %val to {Ti} - %cas = cmpxchg volatile {Ti}* %iptr, {Ti} %iold, {Ti} %ival monotonic - %dcas = bitcast {Ti} %cas to {T} - br label %lt_check - -done: - ; Return max - %ret = phi {T} [ %ptrval, %entry ], [ %dold, %lt_check ] - ret {T} %ret -}} -""" - - -ir_numba_atomic_min = """ -define internal {T} @___numba_atomic_{T}_min({T}* %ptr, {T} %val) alwaysinline{{ -entry: - %ptrval = load volatile {T}, {T}* %ptr - ; Check if val is a NaN and return *ptr early if so - %valnan = fcmp uno {T} %val, %val - br i1 %valnan, label %done, label %gt_check - -gt_check: - %dold = phi {T} [ %ptrval, %entry ], [ %dcas, %attempt ] - ; Continue attempts if dold > val or dold is NaN (using ugt semantics) - %lt = fcmp ugt {T} %dold, %val - br i1 %lt, label %attempt, label %done - -attempt: - ; Attempt to swap in the smaller value - %iold = bitcast {T} %dold to {Ti} - %iptr = bitcast {T}* %ptr to {Ti}* - %ival = bitcast {T} %val to {Ti} - %cas = cmpxchg volatile {Ti}* %iptr, {Ti} %iold, {Ti} %ival monotonic - %dcas = bitcast {Ti} %cas to {T} - br label %gt_check - -done: - ; Return min - %ret = phi {T} [ %ptrval, %entry ], [ %dold, %gt_check ] - ret {T} %ret -}} -""" - - -def _replace_datalayout(llvmir): - """ - Find the line containing the datalayout and replace it - """ - lines = llvmir.splitlines() - for i, ln in enumerate(lines): - if ln.startswith("target datalayout"): - tmp = 'target datalayout = "{0}"' - lines[i] = tmp.format(default_data_layout) - break - return '\n'.join(lines) - - -def llvm_to_ptx(llvmir, **opts): - cu = CompilationUnit() - libdevice = LibDevice(arch=opts.get('arch', 'compute_20')) - # New LLVM generate a shorthand for datalayout that NVVM does not know - llvmir = _replace_datalayout(llvmir) - # Replace with our cmpxchg and atomic implementations because LLVM 3.5 has - # a new semantic for cmpxchg. - replacements = [ - ('declare i32 @___numba_cas_hack(i32*, i32, i32)', - ir_numba_cas_hack), - ('declare double @___numba_atomic_double_add(double*, double)', - ir_numba_atomic_double_add), - ('declare float @___numba_atomic_float_max(float*, float)', - ir_numba_atomic_max.format(T='float', Ti='i32')), - ('declare double @___numba_atomic_double_max(double*, double)', - ir_numba_atomic_max.format(T='double', Ti='i64')), - ('declare float @___numba_atomic_float_min(float*, float)', - ir_numba_atomic_min.format(T='float', Ti='i32')), - ('declare double @___numba_atomic_double_min(double*, double)', - ir_numba_atomic_min.format(T='double', Ti='i64')), - ] - - for decl, fn in replacements: - llvmir = llvmir.replace(decl, fn) - - llvmir = llvm39_to_34_ir(llvmir) - cu.add_module(llvmir.encode('utf8')) - cu.add_module(libdevice.get()) - - ptx = cu.compile(**opts) - # XXX remove debug_pubnames seems to be necessary sometimes - return patch_ptx_debug_pubnames(ptx) - - -def patch_ptx_debug_pubnames(ptx): - """ - Patch PTX to workaround .debug_pubnames NVVM error:: - - ptxas fatal : Internal error: overlapping non-identical data - - """ - while True: - # Repeatedly remove debug_pubnames sections - start = ptx.find(b'.section .debug_pubnames') - if start < 0: - break - stop = ptx.find(b'}', start) - if stop < 0: - raise ValueError('missing "}"') - ptx = ptx[:start] + ptx[stop + 1:] - return ptx - - -re_metadata_def = re.compile(r"\!\d+\s*=") -re_metadata_correct_usage = re.compile(r"metadata\s*\![{'\"0-9]") -re_metadata_ref = re.compile(r"\!\d+") -re_metadata_debuginfo = re.compile(r"\!{i32 \d, \!\"Debug Info Version\", i32 \d}".replace(' ', r'\s+')) - -re_attributes_def = re.compile(r"^attributes #\d+ = \{ ([\w\s]+)\ }") -supported_attributes = {'alwaysinline', 'cold', 'inlinehint', 'minsize', - 'noduplicate', 'noinline', 'noreturn', 'nounwind', - 'optnone', 'optisze', 'readnone', 'readonly'} - -re_getelementptr = re.compile(r"\bgetelementptr\s(?:inbounds )?\(?") - -re_load = re.compile(r"=\s*\bload\s(?:\bvolatile\s)?") - -re_call = re.compile(r"(call\s[^@]+\))(\s@)") -re_range = re.compile(r"\s*!range\s+!\d+") - -re_type_tok = re.compile(r"[,{}()[\]]") - -re_annotations = re.compile(r"\bnonnull\b") - -re_unsupported_keywords = re.compile(r"\b(local_unnamed_addr|writeonly)\b") - - -def llvm39_to_34_ir(ir): - """ - Convert LLVM 3.9 IR for LLVM 3.4. - """ - def parse_out_leading_type(s): - par_level = 0 - pos = 0 - # Parse out the first (which may be an aggregate type) - while True: - m = re_type_tok.search(s, pos) - if m is None: - # End of line - raise RuntimeError("failed parsing leading type: %s" % (s,)) - break - pos = m.end() - tok = m.group(0) - if tok == ',': - if par_level == 0: - # End of operand - break - elif tok in '{[(': - par_level += 1 - elif tok in ')]}': - par_level -= 1 - return s[pos:].lstrip() - - buf = [] - for line in ir.splitlines(): - - # Fix llvm.dbg.cu - if line.startswith('!numba.llvm.dbg.cu'): - line = line.replace('!numba.llvm.dbg.cu', '!llvm.dbg.cu') - - # We insert a dummy inlineasm to put debuginfo - if (line.lstrip().startswith('tail call void asm sideeffect "// dbg') and - '!numba.dbg' in line): - # Fix the metadata - line = line.replace('!numba.dbg', '!dbg') - if re_metadata_def.match(line): - # Rewrite metadata since LLVM 3.7 dropped the "metadata" type prefix. - if None is re_metadata_correct_usage.search(line): - # Reintroduce the "metadata" prefix - line = line.replace('!{', 'metadata !{') - line = line.replace('!"', 'metadata !"') - - assigpos = line.find('=') - lhs, rhs = line[:assigpos + 1], line[assigpos + 1:] - - # Fix metadata reference - def fix_metadata_ref(m): - return 'metadata ' + m.group(0) - line = ' '.join((lhs, re_metadata_ref.sub(fix_metadata_ref, rhs))) - if line.startswith('source_filename ='): - continue # skip line - if re_unsupported_keywords.search(line) is not None: - line = re_unsupported_keywords.sub(lambda m: '', line) - - if line.startswith('attributes #'): - # Remove function attributes unsupported pre-3.8 - m = re_attributes_def.match(line) - attrs = m.group(1).split() - attrs = ' '.join(a for a in attrs if a in supported_attributes) - line = line.replace(m.group(1), attrs) - if 'getelementptr ' in line: - # Rewrite "getelementptr ty, ty* ptr, ..." - # to "getelementptr ty *ptr, ..." - m = re_getelementptr.search(line) - if m is None: - raise RuntimeError("failed parsing getelementptr: %s" % (line,)) - pos = m.end() - line = line[:pos] + parse_out_leading_type(line[pos:]) - if 'load ' in line: - # Rewrite "load ty, ty* ptr" - # to "load ty *ptr" - m = re_load.search(line) - if m: - pos = m.end() - line = line[:pos] + parse_out_leading_type(line[pos:]) - if 'call ' in line: - # Rewrite "call ty (...) @foo" - # to "call ty (...)* @foo" - line = re_call.sub(r"\1*\2", line) - - # no !range metadata on calls - line = re_range.sub('', line).rstrip(',') - - # Remove unknown annotations - line = re_annotations.sub('', line) - - buf.append(line) - - return '\n'.join(buf) - - -def set_cuda_kernel(lfunc): - from llvmlite.llvmpy.core import MetaData, MetaDataString, Constant, Type - - m = lfunc.module - - ops = lfunc, MetaDataString.get(m, "kernel"), Constant.int(Type.int(), 1) - md = MetaData.get(m, ops) - - nmd = m.get_or_insert_named_metadata('nvvm.annotations') - nmd.add(md) - - # set nvvm ir version - i32 = ir.IntType(32) - md_ver = m.add_metadata([i32(1), i32(2), i32(2), i32(0)]) - m.add_named_metadata('nvvmir.version', md_ver) - - -def fix_data_layout(module): - module.data_layout = default_data_layout diff --git a/numba/numba/cuda/cudaimpl.py b/numba/numba/cuda/cudaimpl.py deleted file mode 100644 index beb248370..000000000 --- a/numba/numba/cuda/cudaimpl.py +++ /dev/null @@ -1,601 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from functools import reduce -import operator - -from llvmlite.llvmpy.core import Type -import llvmlite.llvmpy.core as lc -import llvmlite.binding as ll - -from numba.targets.imputils import Registry -from numba import cgutils -from numba import types -from .cudadrv import nvvm -from . import nvvmutils, stubs - -registry = Registry() -lower = registry.lower - - -@lower('ptx.grid.1d', types.intp) -def ptx_grid1d(context, builder, sig, args): - assert len(args) == 1 - return nvvmutils.get_global_id(builder, dim=1) - - -@lower('ptx.grid.2d', types.intp) -def ptx_grid2d(context, builder, sig, args): - assert len(args) == 1 - r1, r2 = nvvmutils.get_global_id(builder, dim=2) - return cgutils.pack_array(builder, [r1, r2]) - - -@lower('ptx.grid.3d', types.intp) -def ptx_grid3d(context, builder, sig, args): - assert len(args) == 1 - r1, r2, r3 = nvvmutils.get_global_id(builder, dim=3) - return cgutils.pack_array(builder, [r1, r2, r3]) - - -@lower('ptx.gridsize.1d', types.intp) -def ptx_gridsize1d(context, builder, sig, args): - assert len(args) == 1 - ntidx = nvvmutils.call_sreg(builder, "ntid.x") - nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") - - res = builder.mul(ntidx, nctaidx) - return res - - -@lower('ptx.gridsize.2d', types.intp) -def ptx_gridsize2d(context, builder, sig, args): - assert len(args) == 1 - ntidx = nvvmutils.call_sreg(builder, "ntid.x") - nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") - - ntidy = nvvmutils.call_sreg(builder, "ntid.y") - nctaidy = nvvmutils.call_sreg(builder, "nctaid.y") - - r1 = builder.mul(ntidx, nctaidx) - r2 = builder.mul(ntidy, nctaidy) - return cgutils.pack_array(builder, [r1, r2]) - - -@lower('ptx.gridsize.3d', types.intp) -def ptx_gridsize3d(context, builder, sig, args): - assert len(args) == 1 - ntidx = nvvmutils.call_sreg(builder, "ntid.x") - nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") - - ntidy = nvvmutils.call_sreg(builder, "ntid.y") - nctaidy = nvvmutils.call_sreg(builder, "nctaid.y") - - ntidz = nvvmutils.call_sreg(builder, "ntid.z") - nctaidz = nvvmutils.call_sreg(builder, "nctaid.z") - - r1 = builder.mul(ntidx, nctaidx) - r2 = builder.mul(ntidy, nctaidy) - r3 = builder.mul(ntidz, nctaidz) - return cgutils.pack_array(builder, [r1, r2, r3]) - - -# ----------------------------------------------------------------------------- - -def ptx_sreg_template(sreg): - def ptx_sreg_impl(context, builder, sig, args): - assert not args - return nvvmutils.call_sreg(builder, sreg) - - return ptx_sreg_impl - - -# Dynamic create all special register -for sreg in nvvmutils.SREG_MAPPING.keys(): - lower(sreg)(ptx_sreg_template(sreg)) - - -# ----------------------------------------------------------------------------- - -@lower('ptx.cmem.arylike', types.Array) -def ptx_cmem_arylike(context, builder, sig, args): - lmod = builder.module - [arr] = args - flat = arr.flatten(order='A') - aryty = sig.return_type - dtype = aryty.dtype - - if isinstance(dtype, types.Complex): - elemtype = (types.float32 - if dtype == types.complex64 - else types.float64) - constvals = [] - for i in range(flat.size): - elem = flat[i] - real = context.get_constant(elemtype, elem.real) - imag = context.get_constant(elemtype, elem.imag) - constvals.extend([real, imag]) - - elif dtype in types.number_domain: - constvals = [context.get_constant(dtype, flat[i]) - for i in range(flat.size)] - - else: - raise TypeError("unsupport type: %s" % dtype) - - constary = lc.Constant.array(constvals[0].type, constvals) - - addrspace = nvvm.ADDRSPACE_CONSTANT - gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", - addrspace=addrspace) - gv.linkage = lc.LINKAGE_INTERNAL - gv.global_constant = True - gv.initializer = constary - - # Convert to generic address-space - conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) - addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) - genptr = builder.call(conv, [addrspaceptr]) - - # Create array object - ary = context.make_array(aryty)(context, builder) - kshape = [context.get_constant(types.intp, s) for s in arr.shape] - kstrides = [context.get_constant(types.intp, s) for s in arr.strides] - context.populate_array(ary, - data=builder.bitcast(genptr, ary.data.type), - shape=cgutils.pack_array(builder, kshape), - strides=cgutils.pack_array(builder, kstrides), - itemsize=ary.itemsize, - parent=ary.parent, - meminfo=None) - - return ary._getvalue() - - -_unique_smem_id = 0 - - -def _get_unique_smem_id(name): - """Due to bug with NVVM invalid internalizing of shared memory in the - PTX output. We can't mark shared memory to be internal. We have to - ensure unique name is generated for shared memory symbol. - """ - global _unique_smem_id - _unique_smem_id += 1 - return "{0}_{1}".format(name, _unique_smem_id) - - -@lower('ptx.smem.alloc', types.intp, types.Any) -def ptx_smem_alloc_intp(context, builder, sig, args): - length, dtype = args - return _generic_array(context, builder, shape=(length,), dtype=dtype, - symbol_name=_get_unique_smem_id('_cudapy_smem'), - addrspace=nvvm.ADDRSPACE_SHARED, - can_dynsized=True) - - -@lower('ptx.smem.alloc', types.UniTuple, types.Any) -def ptx_smem_alloc_array(context, builder, sig, args): - shape, dtype = args - return _generic_array(context, builder, shape=shape, dtype=dtype, - symbol_name=_get_unique_smem_id('_cudapy_smem'), - addrspace=nvvm.ADDRSPACE_SHARED, - can_dynsized=True) - - -@lower('ptx.lmem.alloc', types.intp, types.Any) -def ptx_lmem_alloc_intp(context, builder, sig, args): - length, dtype = args - return _generic_array(context, builder, shape=(length,), dtype=dtype, - symbol_name='_cudapy_lmem', - addrspace=nvvm.ADDRSPACE_LOCAL, - can_dynsized=False) - - -@lower('ptx.lmem.alloc', types.UniTuple, types.Any) -def ptx_lmem_alloc_array(context, builder, sig, args): - shape, dtype = args - return _generic_array(context, builder, shape=shape, dtype=dtype, - symbol_name='_cudapy_lmem', - addrspace=nvvm.ADDRSPACE_LOCAL, - can_dynsized=False) - - -@lower(stubs.syncthreads) -def ptx_syncthreads(context, builder, sig, args): - assert not args - fname = 'llvm.nvvm.barrier0' - lmod = builder.module - fnty = Type.function(Type.void(), ()) - sync = lmod.get_or_insert_function(fnty, name=fname) - builder.call(sync, ()) - return context.get_dummy_value() - - -@lower(stubs.syncthreads_count, types.i4) -def ptx_syncthreads_count(context, builder, sig, args): - fname = 'llvm.nvvm.barrier0.popc' - lmod = builder.module - fnty = Type.function(Type.int(32), (Type.int(32),)) - sync = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(sync, args) - - -@lower(stubs.syncthreads_and, types.i4) -def ptx_syncthreads_and(context, builder, sig, args): - fname = 'llvm.nvvm.barrier0.and' - lmod = builder.module - fnty = Type.function(Type.int(32), (Type.int(32),)) - sync = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(sync, args) - - -@lower(stubs.syncthreads_or, types.i4) -def ptx_syncthreads_or(context, builder, sig, args): - fname = 'llvm.nvvm.barrier0.or' - lmod = builder.module - fnty = Type.function(Type.int(32), (Type.int(32),)) - sync = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(sync, args) - - -@lower(stubs.threadfence_block) -def ptx_threadfence_block(context, builder, sig, args): - assert not args - fname = 'llvm.nvvm.membar.cta' - lmod = builder.module - fnty = Type.function(Type.void(), ()) - sync = lmod.get_or_insert_function(fnty, name=fname) - builder.call(sync, ()) - return context.get_dummy_value() - - -@lower(stubs.threadfence_system) -def ptx_threadfence_system(context, builder, sig, args): - assert not args - fname = 'llvm.nvvm.membar.sys' - lmod = builder.module - fnty = Type.function(Type.void(), ()) - sync = lmod.get_or_insert_function(fnty, name=fname) - builder.call(sync, ()) - return context.get_dummy_value() - - -@lower(stubs.threadfence) -def ptx_threadfence_device(context, builder, sig, args): - assert not args - fname = 'llvm.nvvm.membar.gl' - lmod = builder.module - fnty = Type.function(Type.void(), ()) - sync = lmod.get_or_insert_function(fnty, name=fname) - builder.call(sync, ()) - return context.get_dummy_value() - - -@lower(stubs.syncwarp, types.i4) -def ptx_warp_sync(context, builder, sig, args): - fname = 'llvm.nvvm.bar.warp.sync' - lmod = builder.module - fnty = Type.function(Type.void(), (Type.int(32),)) - sync = lmod.get_or_insert_function(fnty, name=fname) - builder.call(sync, args) - return context.get_dummy_value() - - -@lower(stubs.shfl_sync_intrinsic, types.i4, types.i4, types.i4, types.i4, types.i4) -@lower(stubs.shfl_sync_intrinsic, types.i4, types.i4, types.i8, types.i4, types.i4) -@lower(stubs.shfl_sync_intrinsic, types.i4, types.i4, types.f4, types.i4, types.i4) -@lower(stubs.shfl_sync_intrinsic, types.i4, types.i4, types.f8, types.i4, types.i4) -def ptx_shfl_sync_i32(context, builder, sig, args): - """ - The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports - both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented. - Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back. - And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together. - """ - mask, mode, value, index, clamp = args - value_type = sig.args[2] - if value_type in types.real_domain: - value = builder.bitcast(value, Type.int(value_type.bitwidth)) - fname = 'llvm.nvvm.shfl.sync.i32' - lmod = builder.module - fnty = Type.function( - Type.struct((Type.int(32), Type.int(1))), - (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32)) - ) - func = lmod.get_or_insert_function(fnty, name=fname) - if value_type.bitwidth == 32: - ret = builder.call(func, (mask, mode, value, index, clamp)) - if value_type == types.float32: - rv = builder.extract_value(ret, 0) - pred = builder.extract_value(ret, 1) - fv = builder.bitcast(rv, Type.float()) - ret = cgutils.make_anonymous_struct(builder, (fv, pred)) - else: - value1 = builder.trunc(value, Type.int(32)) - value_lshr = builder.lshr(value, context.get_constant(types.i8, 32)) - value2 = builder.trunc(value_lshr, Type.int(32)) - ret1 = builder.call(func, (mask, mode, value1, index, clamp)) - ret2 = builder.call(func, (mask, mode, value2, index, clamp)) - rv1 = builder.extract_value(ret1, 0) - rv2 = builder.extract_value(ret2, 0) - pred = builder.extract_value(ret1, 1) - rv1_64 = builder.zext(rv1, Type.int(64)) - rv2_64 = builder.zext(rv2, Type.int(64)) - rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32)) - rv = builder.or_(rv_shl, rv1_64) - if value_type == types.float64: - rv = builder.bitcast(rv, Type.double()) - ret = cgutils.make_anonymous_struct(builder, (rv, pred)) - return ret - - -@lower(stubs.vote_sync_intrinsic, types.i4, types.i4, types.boolean) -def ptx_vote_sync(context, builder, sig, args): - fname = 'llvm.nvvm.vote.sync' - lmod = builder.module - fnty = Type.function(Type.struct((Type.int(32), Type.int(1))), - (Type.int(32), Type.int(32), Type.int(1))) - func = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(func, args) - - -@lower(stubs.match_any_sync, types.i4, types.i4) -@lower(stubs.match_any_sync, types.i4, types.i8) -@lower(stubs.match_any_sync, types.i4, types.f4) -@lower(stubs.match_any_sync, types.i4, types.f8) -def ptx_match_any_sync(context, builder, sig, args): - mask, value = args - width = sig.args[1].bitwidth - if sig.args[1] in types.real_domain: - value = builder.bitcast(value, Type.int(width)) - fname = 'llvm.nvvm.match.any.sync.i{}'.format(width) - lmod = builder.module - fnty = Type.function(Type.int(32), (Type.int(32), Type.int(width))) - func = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(func, (mask, value)) - - -@lower(stubs.match_all_sync, types.i4, types.i4) -@lower(stubs.match_all_sync, types.i4, types.i8) -@lower(stubs.match_all_sync, types.i4, types.f4) -@lower(stubs.match_all_sync, types.i4, types.f8) -def ptx_match_all_sync(context, builder, sig, args): - mask, value = args - width = sig.args[1].bitwidth - if sig.args[1] in types.real_domain: - value = builder.bitcast(value, Type.int(width)) - fname = 'llvm.nvvm.match.all.sync.i{}'.format(width) - lmod = builder.module - fnty = Type.function(Type.struct((Type.int(32), Type.int(1))), - (Type.int(32), Type.int(width))) - func = lmod.get_or_insert_function(fnty, name=fname) - return builder.call(func, (mask, value)) - - -@lower(stubs.popc, types.Any) -def ptx_popc(context, builder, sig, args): - return builder.ctpop(args[0]) - - -@lower(stubs.brev, types.u4) -def ptx_brev_u4(context, builder, sig, args): - # FIXME the llvm.bitreverse.i32 intrinsic isn't supported by nvcc - # return builder.bitreverse(args[0]) - - fn = builder.module.get_or_insert_function( - lc.Type.function(lc.Type.int(32), (lc.Type.int(32),)), - '__nv_brev') - return builder.call(fn, args) - - -@lower(stubs.brev, types.u8) -def ptx_brev_u8(context, builder, sig, args): - # FIXME the llvm.bitreverse.i64 intrinsic isn't supported by nvcc - # return builder.bitreverse(args[0]) - - fn = builder.module.get_or_insert_function( - lc.Type.function(lc.Type.int(64), (lc.Type.int(64),)), - '__nv_brevll') - return builder.call(fn, args) - - -@lower(stubs.clz, types.Any) -def ptx_clz(context, builder, sig, args): - return builder.ctlz( - args[0], - context.get_constant(types.boolean, 0)) - - -@lower(stubs.ffs, types.Any) -def ptx_ffs(context, builder, sig, args): - return builder.cttz( - args[0], - context.get_constant(types.boolean, 0)) - - -@lower(stubs.selp, types.Any, types.Any, types.Any) -def ptx_selp(context, builder, sig, args): - test, a, b = args - return builder.select(test, a, b) - - -def _normalize_indices(context, builder, indty, inds): - """ - Convert integer indices into tuple of intp - """ - if indty in types.integer_domain: - indty = types.UniTuple(dtype=indty, count=1) - indices = [inds] - else: - indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) - indices = [context.cast(builder, i, t, types.intp) - for t, i in zip(indty, indices)] - return indty, indices - - -def _atomic_dispatcher(dispatch_fn): - def imp(context, builder, sig, args): - # The common argument handling code - aryty, indty, valty = sig.args - ary, inds, val = args - dtype = aryty.dtype - - indty, indices = _normalize_indices(context, builder, indty, inds) - - if dtype != valty: - raise TypeError("expect %s but got %s" % (dtype, valty)) - - if aryty.ndim != len(indty): - raise TypeError("indexing %d-D array with %d-D index" % - (aryty.ndim, len(indty))) - - lary = context.make_array(aryty)(context, builder, ary) - ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) - # dispatcher to implementation base on dtype - return dispatch_fn(context, builder, dtype, ptr, val) - return imp - - -@lower(stubs.atomic.add, types.Array, types.intp, types.Any) -@lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any) -@lower(stubs.atomic.add, types.Array, types.Tuple, types.Any) -@_atomic_dispatcher -def ptx_atomic_add_tuple(context, builder, dtype, ptr, val): - if dtype == types.float32: - lmod = builder.module - return builder.call(nvvmutils.declare_atomic_add_float32(lmod), (ptr, val)) - elif dtype == types.float64: - lmod = builder.module - return builder.call(nvvmutils.declare_atomic_add_float64(lmod), (ptr, val)) - else: - return builder.atomic_rmw('add', ptr, val, 'monotonic') - - -@lower(stubs.atomic.max, types.Array, types.intp, types.Any) -@lower(stubs.atomic.max, types.Array, types.Tuple, types.Any) -@lower(stubs.atomic.max, types.Array, types.UniTuple, types.Any) -@_atomic_dispatcher -def ptx_atomic_max(context, builder, dtype, ptr, val): - lmod = builder.module - if dtype == types.float64: - return builder.call(nvvmutils.declare_atomic_max_float64(lmod), (ptr, val)) - elif dtype == types.float32: - return builder.call(nvvmutils.declare_atomic_max_float32(lmod), (ptr, val)) - elif dtype in (types.int32, types.int64): - return builder.atomic_rmw('max', ptr, val, ordering='monotonic') - elif dtype in (types.uint32, types.uint64): - return builder.atomic_rmw('umax', ptr, val, ordering='monotonic') - else: - raise TypeError('Unimplemented atomic max with %s array' % dtype) - - -@lower(stubs.atomic.min, types.Array, types.intp, types.Any) -@lower(stubs.atomic.min, types.Array, types.Tuple, types.Any) -@lower(stubs.atomic.min, types.Array, types.UniTuple, types.Any) -@_atomic_dispatcher -def ptx_atomic_min(context, builder, dtype, ptr, val): - lmod = builder.module - if dtype == types.float64: - return builder.call(nvvmutils.declare_atomic_min_float64(lmod), (ptr, val)) - elif dtype == types.float32: - return builder.call(nvvmutils.declare_atomic_min_float32(lmod), (ptr, val)) - elif dtype in (types.int32, types.int64): - return builder.atomic_rmw('min', ptr, val, ordering='monotonic') - elif dtype in (types.uint32, types.uint64): - return builder.atomic_rmw('umin', ptr, val, ordering='monotonic') - else: - raise TypeError('Unimplemented atomic min with %s array' % dtype) - - -@lower(stubs.atomic.compare_and_swap, types.Array, types.Any, types.Any) -def ptx_atomic_cas_tuple(context, builder, sig, args): - aryty, oldty, valty = sig.args - ary, old, val = args - dtype = aryty.dtype - - lary = context.make_array(aryty)(context, builder, ary) - zero = context.get_constant(types.intp, 0) - ptr = cgutils.get_item_pointer(builder, aryty, lary, (zero,)) - if aryty.dtype == types.int32: - lmod = builder.module - return builder.call(nvvmutils.declare_atomic_cas_int32(lmod), (ptr, old, val)) - else: - raise TypeError('Unimplemented atomic compare_and_swap with %s array' % dtype) - - -# ----------------------------------------------------------------------------- - - -def _get_target_data(context): - return ll.create_target_data(nvvm.data_layout[context.address_size]) - - -def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, - can_dynsized=False): - elemcount = reduce(operator.mul, shape) - lldtype = context.get_data_type(dtype) - laryty = Type.array(lldtype, elemcount) - - if addrspace == nvvm.ADDRSPACE_LOCAL: - # Special case local addrespace allocation to use alloca - # NVVM is smart enough to only use local memory if no register is - # available - dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name) - else: - lmod = builder.module - - # Create global variable in the requested address-space - gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) - # Specify alignment to avoid misalignment bug - gvmem.align = context.get_abi_sizeof(lldtype) - - if elemcount <= 0: - if can_dynsized: # dynamic shared memory - gvmem.linkage = lc.LINKAGE_EXTERNAL - else: - raise ValueError("array length <= 0") - else: - ## Comment out the following line to workaround a NVVM bug - ## which generates a invalid symbol name when the linkage - ## is internal and in some situation. - ## See _get_unique_smem_id() - # gvmem.linkage = lc.LINKAGE_INTERNAL - - gvmem.initializer = lc.Constant.undef(laryty) - - if dtype not in types.number_domain: - raise TypeError("unsupported type: %s" % dtype) - - # Convert to generic address-space - conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) - addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) - dataptr = builder.call(conv, [addrspaceptr]) - - return _make_array(context, builder, dataptr, dtype, shape) - - -def _make_array(context, builder, dataptr, dtype, shape, layout='C'): - ndim = len(shape) - # Create array object - aryty = types.Array(dtype=dtype, ndim=ndim, layout='C') - ary = context.make_array(aryty)(context, builder) - - targetdata = _get_target_data(context) - lldtype = context.get_data_type(dtype) - itemsize = lldtype.get_abi_size(targetdata) - # Compute strides - rstrides = [itemsize] - for i, lastsize in enumerate(reversed(shape[1:])): - rstrides.append(lastsize * rstrides[-1]) - strides = [s for s in reversed(rstrides)] - - kshape = [context.get_constant(types.intp, s) for s in shape] - kstrides = [context.get_constant(types.intp, s) for s in strides] - - context.populate_array(ary, - data=builder.bitcast(dataptr, ary.data.type), - shape=cgutils.pack_array(builder, kshape), - strides=cgutils.pack_array(builder, kstrides), - itemsize=context.get_constant(types.intp, itemsize), - meminfo=None) - return ary._getvalue() diff --git a/numba/numba/cuda/cudamath.py b/numba/numba/cuda/cudamath.py deleted file mode 100644 index 12e98810d..000000000 --- a/numba/numba/cuda/cudamath.py +++ /dev/null @@ -1,97 +0,0 @@ -from __future__ import print_function, absolute_import, division -import math -from numba import types, utils -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - signature, Registry) - -registry = Registry() -infer_global = registry.register_global - - -@infer_global(math.acos) -@infer_global(math.acosh) -@infer_global(math.asin) -@infer_global(math.asinh) -@infer_global(math.atan) -@infer_global(math.atanh) -@infer_global(math.ceil) -@infer_global(math.cos) -@infer_global(math.cosh) -@infer_global(math.degrees) -@infer_global(math.erf) -@infer_global(math.erfc) -@infer_global(math.exp) -@infer_global(math.expm1) -@infer_global(math.fabs) -@infer_global(math.floor) -@infer_global(math.gamma) -@infer_global(math.lgamma) -@infer_global(math.log) -@infer_global(math.log10) -@infer_global(math.log1p) -@infer_global(math.radians) -@infer_global(math.sin) -@infer_global(math.sinh) -@infer_global(math.sqrt) -@infer_global(math.tan) -@infer_global(math.tanh) -@infer_global(math.trunc) -class Math_unary(ConcreteTemplate): - cases = [ - signature(types.float64, types.int64), - signature(types.float64, types.uint64), - signature(types.float32, types.float32), - signature(types.float64, types.float64), - ] - - -@infer_global(math.atan2) -class Math_atan2(ConcreteTemplate): - key = math.atan2 - cases = [ - signature(types.float64, types.int64, types.int64), - signature(types.float64, types.uint64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.hypot) -class Math_hypot(ConcreteTemplate): - key = math.hypot - cases = [ - signature(types.float64, types.int64, types.int64), - signature(types.float64, types.uint64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.copysign) -@infer_global(math.fmod) -class Math_binary(ConcreteTemplate): - cases = [ - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.pow) -class Math_pow(ConcreteTemplate): - cases = [ - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - signature(types.float32, types.float32, types.int32), - signature(types.float64, types.float64, types.int32), - ] - - -@infer_global(math.isinf) -@infer_global(math.isnan) -class Math_isnan(ConcreteTemplate): - cases = [ - signature(types.boolean, types.int64), - signature(types.boolean, types.uint64), - signature(types.boolean, types.float32), - signature(types.boolean, types.float64), - ] diff --git a/numba/numba/cuda/decorators.py b/numba/numba/cuda/decorators.py deleted file mode 100644 index 53fac2962..000000000 --- a/numba/numba/cuda/decorators.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import print_function, absolute_import, division -from numba import config, sigutils, types -from warnings import warn -from .compiler import (compile_kernel, compile_device, declare_device_function, - AutoJitCUDAKernel, compile_device_template) -from .simulator.kernel import FakeCUDAKernel - - -def jitdevice(func, link=[], debug=None, inline=False): - """Wrapper for device-jit. - """ - debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug - if link: - raise ValueError("link keyword invalid for device function") - return compile_device_template(func, debug=debug, inline=inline) - - -def jit(func_or_sig=None, argtypes=None, device=False, inline=False, bind=True, - link=[], debug=None, **kws): - """ - JIT compile a python function conforming to the CUDA Python specification. - If a signature is supplied, then a function is returned that takes a - function to compile. If - - :param func_or_sig: A function to JIT compile, or a signature of a function - to compile. If a function is supplied, then an :class:`AutoJitCUDAKernel` - is returned. If a signature is supplied, then a function which takes a - function to compile and returns an :class:`AutoJitCUDAKernel` is - returned. - - .. note:: A kernel cannot have any return value. - :type func_or_sig: function or numba.typing.Signature - :param device: Indicates whether this is a device function. - :type device: bool - :param bind: Force binding to CUDA context immediately - :type bind: bool - :param link: A list of files containing PTX source to link with the function - :type link: list - :param debug: If True, check for exceptions thrown when executing the - kernel. Since this degrades performance, this should only be used for - debugging purposes. Defaults to False. (The default value can be - overriden by setting environment variable ``NUMBA_CUDA_DEBUGINFO=1``.) - :param fastmath: If true, enables flush-to-zero and fused-multiply-add, - disables precise division and square root. This parameter has no effect - on device function, whose fastmath setting depends on the kernel function - from which they are called. - """ - debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug - - if link and config.ENABLE_CUDASIM: - raise NotImplementedError('Cannot link PTX in the simulator') - - fastmath = kws.get('fastmath', False) - if argtypes is None and not sigutils.is_signature(func_or_sig): - if func_or_sig is None: - if config.ENABLE_CUDASIM: - def autojitwrapper(func): - return FakeCUDAKernel(func, device=device, fastmath=fastmath, - debug=debug) - else: - def autojitwrapper(func): - return jit(func, device=device, bind=bind, debug=debug, - **kws) - - return autojitwrapper - # func_or_sig is a function - else: - if config.ENABLE_CUDASIM: - return FakeCUDAKernel(func_or_sig, device=device, fastmath=fastmath, - debug=debug) - elif device: - return jitdevice(func_or_sig, debug=debug, **kws) - else: - targetoptions = kws.copy() - targetoptions['debug'] = debug - return AutoJitCUDAKernel(func_or_sig, bind=bind, targetoptions=targetoptions) - - else: - if config.ENABLE_CUDASIM: - def jitwrapper(func): - return FakeCUDAKernel(func, device=device, fastmath=fastmath, - debug=debug) - return jitwrapper - - restype, argtypes = convert_types(func_or_sig, argtypes) - - if restype and not device and restype != types.void: - raise TypeError("CUDA kernel must have void return type.") - - def kernel_jit(func): - kernel = compile_kernel(func, argtypes, link=link, debug=debug, - inline=inline, fastmath=fastmath) - - # Force compilation for the current context - if bind: - kernel.bind() - - return kernel - - def device_jit(func): - return compile_device(func, restype, argtypes, inline=inline, - debug=debug) - - if device: - return device_jit - else: - return kernel_jit - - -def autojit(*args, **kwargs): - warn('autojit is deprecated and will be removed in a future release. Use jit instead.') - return jit(*args, **kwargs) - - -def declare_device(name, restype=None, argtypes=None): - restype, argtypes = convert_types(restype, argtypes) - return declare_device_function(name, restype, argtypes) - - -def convert_types(restype, argtypes): - # eval type string - if sigutils.is_signature(restype): - assert argtypes is None - argtypes, restype = sigutils.normalize_signature(restype) - - return restype, argtypes - diff --git a/numba/numba/cuda/descriptor.py b/numba/numba/cuda/descriptor.py deleted file mode 100644 index b92d79e5c..000000000 --- a/numba/numba/cuda/descriptor.py +++ /dev/null @@ -1,14 +0,0 @@ -from __future__ import print_function, division, absolute_import -from numba.targets.descriptors import TargetDescriptor -from numba.targets.options import TargetOptions -from .target import CUDATargetContext, CUDATypingContext - - -class CPUTargetOptions(TargetOptions): - OPTIONS = {} - - -class CUDATargetDesc(TargetDescriptor): - options = CPUTargetOptions - typingctx = CUDATypingContext() - targetctx = CUDATargetContext(typingctx) diff --git a/numba/numba/cuda/device_init.py b/numba/numba/cuda/device_init.py deleted file mode 100644 index eea458b70..000000000 --- a/numba/numba/cuda/device_init.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import print_function, absolute_import, division - -# Re export -from .stubs import (threadIdx, blockIdx, blockDim, gridDim, laneid, - warpsize, syncthreads, syncthreads_count, syncwarp, - syncthreads_and, syncthreads_or, shared, local, - const, grid, gridsize, atomic, shfl_sync_intrinsic, - vote_sync_intrinsic, match_any_sync, match_all_sync, - threadfence_block, threadfence_system, - threadfence, selp, popc, brev, clz, ffs) -from .cudadrv.error import CudaSupportError -from .cudadrv import nvvm -from . import initialize -from .errors import KernelRuntimeError - -from .decorators import jit, autojit, declare_device -from .api import * -from .api import _auto_device - -from .kernels import reduction -reduce = Reduce = reduction.Reduce - -from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync, - shfl_sync, shfl_up_sync, shfl_down_sync, - shfl_xor_sync) - - -def is_available(): - """Returns a boolean to indicate the availability of a CUDA GPU. - - This will initialize the driver if it hasn't been initialized. - """ - # whilst `driver.is_available` will init the driver itself, - # the driver initialization may raise and as a result break - # test discovery/orchestration as `cuda.is_available` is often - # used as a guard for whether to run a CUDA test, the try/except - # below is to handle this case. - driver_is_available = False - try: - driver_is_available = driver.driver.is_available - except CudaSupportError: - pass - - return driver_is_available and nvvm.is_available() - -def cuda_error(): - """Returns None or an exception if the CUDA driver fails to initialize. - """ - return driver.driver.initialization_error - -initialize.initialize_all() diff --git a/numba/numba/cuda/dispatcher.py b/numba/numba/cuda/dispatcher.py deleted file mode 100644 index a3863623a..000000000 --- a/numba/numba/cuda/dispatcher.py +++ /dev/null @@ -1,231 +0,0 @@ -from __future__ import absolute_import, print_function - -import numpy as np - -from numba.targets.descriptors import TargetDescriptor -from numba.targets.options import TargetOptions -from numba import cuda -from numba.cuda import jit, autojit -from numba.cuda.cudadrv import devicearray -from .descriptor import CUDATargetDesc -from numba.npyufunc.deviceufunc import (UFuncMechanism, GenerializedUFunc, - GUFuncCallSteps) - - -class CUDADispatcher(object): - targetdescr = CUDATargetDesc - - def __init__(self, py_func, locals={}, targetoptions={}): - assert not locals - self.py_func = py_func - self.targetoptions = targetoptions - self.doc = py_func.__doc__ - self._compiled = None - - def compile(self, sig, locals={}, **targetoptions): - assert self._compiled is None - assert not locals - options = self.targetoptions.copy() - options.update(targetoptions) - kernel = jit(sig, **options)(self.py_func) - self._compiled = kernel - if hasattr(kernel, "_npm_context_"): - self._npm_context_ = kernel._npm_context_ - - @property - def compiled(self): - if self._compiled is None: - self._compiled = autojit(self.py_func, **self.targetoptions) - return self._compiled - - def __call__(self, *args, **kws): - return self.compiled(*args, **kws) - - def disable_compile(self, val=True): - """Disable the compilation of new signatures at call time. - """ - # Do nothing - pass - - def configure(self, *args, **kws): - return self.compiled.configure(*args, **kws) - - def __getitem__(self, *args): - return self.compiled.__getitem__(*args) - - def __getattr__(self, key): - return getattr(self.compiled, key) - - -class CUDAUFuncDispatcher(object): - """ - Invoke the CUDA ufunc specialization for the given inputs. - """ - - def __init__(self, types_to_retty_kernels): - self.functions = types_to_retty_kernels - self._maxblocksize = 0 # ignored - - @property - def max_blocksize(self): - return self._maxblocksize - - @max_blocksize.setter - def max_blocksize(self, blksz): - self._max_blocksize = blksz - - def __call__(self, *args, **kws): - """ - *args: numpy arrays or DeviceArrayBase (created by cuda.to_device). - Cannot mix the two types in one call. - - **kws: - stream -- cuda stream; when defined, asynchronous mode is used. - out -- output array. Can be a numpy array or DeviceArrayBase - depending on the input arguments. Type must match - the input arguments. - """ - return CUDAUFuncMechanism.call(self.functions, args, kws) - - def reduce(self, arg, stream=0): - assert len(list(self.functions.keys())[0]) == 2, "must be a binary " \ - "ufunc" - assert arg.ndim == 1, "must use 1d array" - - n = arg.shape[0] - gpu_mems = [] - - if n == 0: - raise TypeError("Reduction on an empty array.") - elif n == 1: # nothing to do - return arg[0] - - # always use a stream - stream = stream or cuda.stream() - with stream.auto_synchronize(): - # transfer memory to device if necessary - if devicearray.is_cuda_ndarray(arg): - mem = arg - else: - mem = cuda.to_device(arg, stream) - # do reduction - out = self.__reduce(mem, gpu_mems, stream) - # use a small buffer to store the result element - buf = np.array((1,), dtype=arg.dtype) - out.copy_to_host(buf, stream=stream) - - return buf[0] - - def __reduce(self, mem, gpu_mems, stream): - n = mem.shape[0] - if n % 2 != 0: # odd? - fatcut, thincut = mem.split(n - 1) - # prevent freeing during async mode - gpu_mems.append(fatcut) - gpu_mems.append(thincut) - # execute the kernel - out = self.__reduce(fatcut, gpu_mems, stream) - gpu_mems.append(out) - return self(out, thincut, out=out, stream=stream) - else: # even? - left, right = mem.split(n // 2) - # prevent freeing during async mode - gpu_mems.append(left) - gpu_mems.append(right) - # execute the kernel - self(left, right, out=left, stream=stream) - if n // 2 > 1: - return self.__reduce(left, gpu_mems, stream) - else: - return left - - -class _CUDAGUFuncCallSteps(GUFuncCallSteps): - __slots__ = [ - '_stream', - ] - - def is_device_array(self, obj): - return cuda.is_cuda_array(obj) - - def as_device_array(self, obj): - return cuda.as_cuda_array(obj) - - def to_device(self, hostary): - return cuda.to_device(hostary, stream=self._stream) - - def to_host(self, devary, hostary): - out = devary.copy_to_host(hostary, stream=self._stream) - return out - - def device_array(self, shape, dtype): - return cuda.device_array(shape=shape, dtype=dtype, stream=self._stream) - - def prepare_inputs(self): - self._stream = self.kwargs.get('stream', 0) - - def launch_kernel(self, kernel, nelem, args): - kernel.forall(nelem, stream=self._stream)(*args) - - -class CUDAGenerializedUFunc(GenerializedUFunc): - @property - def _call_steps(self): - return _CUDAGUFuncCallSteps - - def _broadcast_scalar_input(self, ary, shape): - return devicearray.DeviceNDArray(shape=shape, - strides=(0,), - dtype=ary.dtype, - gpu_data=ary.gpu_data) - - def _broadcast_add_axis(self, ary, newshape): - newax = len(newshape) - len(ary.shape) - # Add 0 strides for missing dimension - newstrides = (0,) * newax + ary.strides - return devicearray.DeviceNDArray(shape=newshape, - strides=newstrides, - dtype=ary.dtype, - gpu_data=ary.gpu_data) - - -class CUDAUFuncMechanism(UFuncMechanism): - """ - Provide OpenCL specialization - """ - DEFAULT_STREAM = 0 - ARRAY_ORDER = 'A' - - def launch(self, func, count, stream, args): - func.forall(count, stream=stream)(*args) - - def is_device_array(self, obj): - return cuda.is_cuda_array(obj) - - def as_device_array(self, obj): - return cuda.as_cuda_array(obj) - - def to_device(self, hostary, stream): - return cuda.to_device(hostary, stream=stream) - - def to_host(self, devary, stream): - return devary.copy_to_host(stream=stream) - - def device_array(self, shape, dtype, stream): - return cuda.device_array(shape=shape, dtype=dtype, stream=stream) - - def broadcast_device(self, ary, shape): - ax_differs = [ax for ax in range(len(shape)) - if ax >= ary.ndim - or ary.shape[ax] != shape[ax]] - - missingdim = len(shape) - len(ary.shape) - strides = [0] * missingdim + list(ary.strides) - - for ax in ax_differs: - strides[ax] = 0 - - return devicearray.DeviceNDArray(shape=shape, - strides=strides, - dtype=ary.dtype, - gpu_data=ary.gpu_data) diff --git a/numba/numba/cuda/errors.py b/numba/numba/cuda/errors.py deleted file mode 100644 index 9bd75770d..000000000 --- a/numba/numba/cuda/errors.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import print_function, absolute_import - -import numbers - - -class KernelRuntimeError(RuntimeError): - def __init__(self, msg, tid=None, ctaid=None): - self.tid = tid - self.ctaid = ctaid - self.msg = msg - t = ("An exception was raised in thread=%s block=%s\n" - "\t%s") - msg = t % (self.tid, self.ctaid, self.msg) - super(KernelRuntimeError, self).__init__(msg) - - -def normalize_kernel_dimensions(griddim, blockdim): - """ - Normalize and validate the user-supplied kernel dimensions. - """ - - def check_dim(dim, name): - if not isinstance(dim, (tuple, list)): - dim = [dim] - else: - dim = list(dim) - if len(dim) > 3: - raise ValueError('%s must be a sequence of 1, 2 or 3 integers, got %r' - % (name, dim)) - for v in dim: - if not isinstance(v, numbers.Integral): - raise TypeError('%s must be a sequence of integers, got %r' - % (name, dim)) - while len(dim) < 3: - dim.append(1) - return dim - - griddim = check_dim(griddim, 'griddim') - blockdim = check_dim(blockdim, 'blockdim') - - return griddim, blockdim diff --git a/numba/numba/cuda/initialize.py b/numba/numba/cuda/initialize.py deleted file mode 100644 index 4b4878367..000000000 --- a/numba/numba/cuda/initialize.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import absolute_import, print_function - - -def init_jit(): - from numba.cuda.dispatcher import CUDADispatcher - return CUDADispatcher - -def initialize_all(): - from numba.targets.registry import dispatcher_registry - dispatcher_registry.ondemand['gpu'] = init_jit - dispatcher_registry.ondemand['cuda'] = init_jit diff --git a/numba/numba/cuda/intrinsic_wrapper.py b/numba/numba/cuda/intrinsic_wrapper.py deleted file mode 100644 index d81bf69a0..000000000 --- a/numba/numba/cuda/intrinsic_wrapper.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import absolute_import -from .decorators import jit -import numba - - -@jit(device=True) -def all_sync(mask, predicate): - """ - If for all threads in the masked warp the predicate is true, then - a non-zero value is returned, otherwise 0 is returned. - """ - return numba.cuda.vote_sync_intrinsic(mask, 0, predicate)[1] - - -@jit(device=True) -def any_sync(mask, predicate): - """ - If for any thread in the masked warp the predicate is true, then - a non-zero value is returned, otherwise 0 is returned. - """ - return numba.cuda.vote_sync_intrinsic(mask, 1, predicate)[1] - - -@jit(device=True) -def eq_sync(mask, predicate): - """ - If for all threads in the masked warp the boolean predicate is the same, - then a non-zero value is returned, otherwise 0 is returned. - """ - return numba.cuda.vote_sync_intrinsic(mask, 2, predicate)[1] - - -@jit(device=True) -def ballot_sync(mask, predicate): - """ - Returns a mask of all threads in the warp whoose predicate is true, - and are within the given mask. - """ - return numba.cuda.vote_sync_intrinsic(mask, 3, predicate)[0] - - -@jit(device=True) -def shfl_sync(mask, value, src_lane): - """ - Shuffles value across the masked warp and returns the value - from src_lane. If this is outside the warp, then the - given value is returned. - """ - return numba.cuda.shfl_sync_intrinsic(mask, 0, value, src_lane, 0x1f)[0] - - -@jit(device=True) -def shfl_up_sync(mask, value, delta): - """ - Shuffles value across the masked warp and returns the value - from (laneid - delta). If this is outside the warp, then the - given value is returned. - """ - return numba.cuda.shfl_sync_intrinsic(mask, 1, value, delta, 0)[0] - - -@jit(device=True) -def shfl_down_sync(mask, value, delta): - """ - Shuffles value across the masked warp and returns the value - from (laneid + delta). If this is outside the warp, then the - given value is returned. - """ - return numba.cuda.shfl_sync_intrinsic(mask, 2, value, delta, 0x1f)[0] - - -@jit(device=True) -def shfl_xor_sync(mask, value, lane_mask): - """ - Shuffles value across the masked warp and returns the value - from (laneid ^ lane_mask). - """ - return numba.cuda.shfl_sync_intrinsic(mask, 3, value, lane_mask, 0x1f)[0] diff --git a/numba/numba/cuda/kernels/__init__.py b/numba/numba/cuda/kernels/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/numba/cuda/kernels/reduction.py b/numba/numba/cuda/kernels/reduction.py deleted file mode 100644 index 84904e6d4..000000000 --- a/numba/numba/cuda/kernels/reduction.py +++ /dev/null @@ -1,263 +0,0 @@ -""" -A library written in CUDA Python for generating reduction kernels -""" -from __future__ import division - -from numba.numpy_support import from_dtype - - -_WARPSIZE = 32 -_NUMWARPS = 4 - - -def _gpu_reduce_factory(fn, nbtype): - from numba import cuda - - reduce_op = cuda.jit(device=True)(fn) - inner_sm_size = _WARPSIZE + 1 # plus one to avoid SM collision - max_blocksize = _NUMWARPS * _WARPSIZE - - @cuda.jit(device=True) - def inner_warp_reduction(sm_partials, init): - """ - Compute reduction within a single warp - """ - tid = cuda.threadIdx.x - warpid = tid // _WARPSIZE - laneid = tid % _WARPSIZE - - sm_this = sm_partials[warpid, :] - sm_this[laneid] = init - # XXX expect warp synchronization - width = _WARPSIZE // 2 - while width: - if laneid < width: - old = sm_this[laneid] - sm_this[laneid] = reduce_op(old, sm_this[laneid + width]) - width //= 2 - # XXX expect warp synchronization - - @cuda.jit(device=True) - def device_reduce_full_block(arr, partials, sm_partials): - """ - Partially reduce `arr` into `partials` using `sm_partials` as working - space. The algorithm goes like: - - array chunks of 128: | 0 | 128 | 256 | 384 | 512 | - block-0: | x | | | x | | - block-1: | | x | | | x | - block-2: | | | x | | | - - The array is divided into chunks of 128 (size of a threadblock). - The threadblocks consumes the chunks in roundrobin scheduling. - First, a threadblock loads a chunk into temp memory. Then, all - subsequent chunks are combined into the temp memory. - - Once all chunks are processed. Inner-block reduction is performed - on the temp memory. So that, there will just be one scalar result - per block. The result from each block is stored to `partials` at - the dedicated slot. - """ - tid = cuda.threadIdx.x - blkid = cuda.blockIdx.x - blksz = cuda.blockDim.x - gridsz = cuda.gridDim.x - - # block strided loop to compute the reduction - start = tid + blksz * blkid - stop = arr.size - step = blksz * gridsz - - # load first value - tmp = arr[start] - # loop over all values in block-stride - for i in range(start + step, stop, step): - tmp = reduce_op(tmp, arr[i]) - - cuda.syncthreads() - # inner-warp reduction - inner_warp_reduction(sm_partials, tmp) - - cuda.syncthreads() - # at this point, only the first slot for each warp in tsm_partials - # is valid. - - # finish up block reduction - # warning: this is assuming 4 warps. - # assert numwarps == 4 - if tid < 2: - sm_partials[tid, 0] = reduce_op(sm_partials[tid, 0], - sm_partials[tid + 2, 0]) - if tid == 0: - partials[blkid] = reduce_op(sm_partials[0, 0], sm_partials[1, 0]) - - @cuda.jit(device=True) - def device_reduce_partial_block(arr, partials, sm_partials): - """ - This computes reduction on `arr`. - This device function must be used by 1 threadblock only. - The blocksize must match `arr.size` and must not be greater than 128. - """ - tid = cuda.threadIdx.x - blkid = cuda.blockIdx.x - blksz = cuda.blockDim.x - warpid = tid // _WARPSIZE - laneid = tid % _WARPSIZE - - size = arr.size - # load first value - tid = cuda.threadIdx.x - value = arr[tid] - sm_partials[warpid, laneid] = value - - cuda.syncthreads() - - if (warpid + 1) * _WARPSIZE < size: - # fully populated warps - inner_warp_reduction(sm_partials, value) - else: - # partially populated warps - # NOTE: this uses a very inefficient sequential algorithm - if laneid == 0: - sm_this = sm_partials[warpid, :] - base = warpid * _WARPSIZE - for i in range(1, size - base): - sm_this[0] = reduce_op(sm_this[0], sm_this[i]) - - cuda.syncthreads() - # finish up - if tid == 0: - num_active_warps = (blksz + _WARPSIZE - 1) // _WARPSIZE - - result = sm_partials[0, 0] - for i in range(1, num_active_warps): - result = reduce_op(result, sm_partials[i, 0]) - - partials[blkid] = result - - def gpu_reduce_block_strided(arr, partials, init, use_init): - """ - Perform reductions on *arr* and writing out partial reduction result - into *partials*. The length of *partials* is determined by the - number of threadblocks. The initial value is set with *init*. - - Launch config: - - Blocksize must be mutiple of warpsize and it is limited to 4 warps. - """ - tid = cuda.threadIdx.x - - sm_partials = cuda.shared.array((_NUMWARPS, inner_sm_size), - dtype=nbtype) - if cuda.blockDim.x == max_blocksize: - device_reduce_full_block(arr, partials, sm_partials) - else: - device_reduce_partial_block(arr, partials, sm_partials) - # deal with the initializer - if use_init and tid == 0 and cuda.blockIdx.x == 0: - partials[0] = reduce_op(partials[0], init) - - return cuda.jit(gpu_reduce_block_strided) - - -class Reduce(object): - _cache = {} - - def __init__(self, functor): - """Create a reduction object that reduces values using a given binary - function. The binary function is compiled once and cached inside this - object. Keeping this object alive will prevent re-compilation. - - :param binop: A function to be compiled as a CUDA device function that - will be used as the binary operation for reduction on a - CUDA device. Internally, it is compiled using - ``cuda.jit(device=True)``. - """ - self._functor = functor - - def _compile(self, dtype): - key = self._functor, dtype - if key in self._cache: - kernel = self._cache[key] - else: - kernel = _gpu_reduce_factory(self._functor, from_dtype(dtype)) - self._cache[key] = kernel - return kernel - - def __call__(self, arr, size=None, res=None, init=0, stream=0): - """Performs a full reduction. - - :param arr: A host or device array. If a device array is given, the - reduction is performed inplace and the values in the array - are overwritten. If a host array is given, it is copied to - the device automatically. - :param size: Optional integer specifying the number of elements in - ``arr`` to reduce. If this parameter is not specified, the - entire array is reduced. - :param res: Optional device array into which to write the reduction - result to. The result is written into the first element of - this array. If this parameter is specified, then no - communication of the reduction output takes place from the - device to the host. - :param init: Optional initial value for the reduction, the type of which - must match ``arr.dtype``. - :param stream: Optional CUDA stream in which to perform the reduction. - If no stream is specified, the default stream of 0 is - used. - :return: If ``res`` is specified, ``None`` is returned. Otherwise, the - result of the reduction is returned. - """ - from numba import cuda - - # ensure 1d array - if arr.ndim != 1: - raise TypeError("only support 1D array") - - # adjust array size - if size is not None: - arr = arr[:size] - - init = arr.dtype.type(init) # ensure the right type - - # return `init` if `arr` is empty - if arr.size < 1: - return init - - kernel = self._compile(arr.dtype) - - # Perform the reduction on the GPU - blocksize = _NUMWARPS * _WARPSIZE - size_full = (arr.size // blocksize) * blocksize - size_partial = arr.size - size_full - full_blockct = min(size_full // blocksize, _WARPSIZE * 2) - - # allocate size of partials array - partials_size = full_blockct - if size_partial: - partials_size += 1 - partials = cuda.device_array(shape=partials_size, dtype=arr.dtype) - - if size_full: - # kernel for the fully populated threadblocks - kernel[full_blockct, blocksize, stream](arr[:size_full], - partials[:full_blockct], - init, - True) - - if size_partial: - # kernel for partially populated threadblocks - kernel[1, size_partial, stream](arr[size_full:], - partials[full_blockct:], - init, - not full_blockct) - - if partials.size > 1: - # finish up - kernel[1, partials_size, stream](partials, partials, init, False) - - # handle return value - if res is not None: - res[:1].copy_to_device(partials[:1], stream=stream) - return - else: - return partials[0] diff --git a/numba/numba/cuda/kernels/transpose.py b/numba/numba/cuda/kernels/transpose.py deleted file mode 100644 index 4dd50b988..000000000 --- a/numba/numba/cuda/kernels/transpose.py +++ /dev/null @@ -1,65 +0,0 @@ -from numba import cuda -from numba.cuda.cudadrv.driver import driver -from numba import numpy_support as nps -import math - -def transpose(a, b=None): - """Compute the transpose of 'a' and store it into 'b', if given, - and return it. If 'b' is not given, allocate a new array - and return that. - - This implements the algorithm documented in - http://devblogs.nvidia.com/parallelforall/efficient-matrix-transpose-cuda-cc/ - - :param a: an `np.ndarray` or a `DeviceNDArrayBase` subclass. If already on - the device its stream will be used to perform the transpose (and to copy - `b` to the device if necessary). - """ - - # prefer `a`'s stream if - stream = getattr(a, 'stream', 0) - - if not b: - cols, rows = a.shape - strides = a.dtype.itemsize * cols, a.dtype.itemsize - b = cuda.cudadrv.devicearray.DeviceNDArray( - (rows, cols), - strides, - dtype=a.dtype, - stream=stream) - - dt=nps.from_dtype(a.dtype) - - tpb = driver.get_device().MAX_THREADS_PER_BLOCK - # we need to factor available threads into x and y axis - tile_width = int(math.pow(2, math.log(tpb, 2)/2)) - tile_height = int(tpb / tile_width) - - tile_shape=(tile_height, tile_width + 1) - - @cuda.jit - def kernel(input, output): - - tile = cuda.shared.array(shape=tile_shape, dtype=dt) - - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - bx = cuda.blockIdx.x * cuda.blockDim.x - by = cuda.blockIdx.y * cuda.blockDim.y - x = by + tx - y = bx + ty - - if by+ty < input.shape[0] and bx+tx < input.shape[1]: - tile[ty, tx] = input[by+ty, bx+tx] - cuda.syncthreads() - if y < output.shape[0] and x < output.shape[1]: - output[y, x] = tile[tx, ty] - - - # one block per tile, plus one for remainders - blocks = int(b.shape[0]/tile_height + 1), int(b.shape[1]/tile_width + 1) - # one thread per tile element - threads = tile_height, tile_width - kernel[blocks, threads, stream](a, b) - - return b diff --git a/numba/numba/cuda/libdevice.py b/numba/numba/cuda/libdevice.py deleted file mode 100644 index 4327fc1ce..000000000 --- a/numba/numba/cuda/libdevice.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import print_function, absolute_import, division -import sys -import math -from llvmlite.llvmpy.core import Type -from numba import cgutils, types -from numba.targets.imputils import Registry - -registry = Registry() -lower = registry.lower - -float_set = types.float32, types.float64 - - -def bool_implement(nvname, ty): - def core(context, builder, sig, args): - assert sig.return_type == types.boolean, nvname - fty = context.get_value_type(ty) - lmod = builder.module - fnty = Type.function(Type.int(), [fty]) - fn = lmod.get_or_insert_function(fnty, name=nvname) - result = builder.call(fn, args) - return context.cast(builder, result, types.int32, types.boolean) - - return core - - - -def unary_implement(nvname, ty): - def core(context, builder, sig, args): - fty = context.get_value_type(ty) - lmod = builder.module - fnty = Type.function(fty, [fty]) - fn = lmod.get_or_insert_function(fnty, name=nvname) - return builder.call(fn, args) - - return core - - -def binary_implement(nvname, ty): - def core(context, builder, sig, args): - fty = context.get_value_type(ty) - lmod = builder.module - fnty = Type.function(fty, [fty, fty]) - fn = lmod.get_or_insert_function(fnty, name=nvname) - return builder.call(fn, args) - - return core - - -def powi_implement(nvname): - def core(context, builder, sig, args): - [base, pow] = args - [basety, powty] = sig.args - lmod = builder.module - fty = context.get_value_type(basety) - ity = context.get_value_type(types.int32) - fnty = Type.function(fty, [fty, ity]) - fn = lmod.get_or_insert_function(fnty, name=nvname) - return builder.call(fn, [base, pow]) - - - return core - - -lower(math.pow, types.float32, types.int32)(powi_implement('__nv_powif')) -lower(math.pow, types.float64, types.int32)(powi_implement('__nv_powi')) - - -booleans = [] -booleans += [('__nv_isnand', '__nv_isnanf', math.isnan)] -booleans += [('__nv_isinfd', '__nv_isinff', math.isinf)] - -unarys = [] -unarys += [('__nv_ceil', '__nv_ceilf', math.ceil)] -unarys += [('__nv_floor', '__nv_floorf', math.floor)] -unarys += [('__nv_fabs', '__nv_fabsf', math.fabs)] -unarys += [('__nv_exp', '__nv_expf', math.exp)] -unarys += [('__nv_expm1', '__nv_expm1f', math.expm1)] -unarys += [('__nv_erf', '__nv_erff', math.erf)] -unarys += [('__nv_erfc', '__nv_erfcf', math.erfc)] -unarys += [('__nv_tgamma', '__nv_tgammaf', math.gamma)] -unarys += [('__nv_lgamma', '__nv_lgammaf', math.lgamma)] -unarys += [('__nv_sqrt', '__nv_sqrtf', math.sqrt)] -unarys += [('__nv_log', '__nv_logf', math.log)] -unarys += [('__nv_log10', '__nv_log10f', math.log10)] -unarys += [('__nv_log1p', '__nv_log1pf', math.log1p)] -unarys += [('__nv_acosh', '__nv_acoshf', math.acosh)] -unarys += [('__nv_acos', '__nv_acosf', math.acos)] -unarys += [('__nv_cos', '__nv_cosf', math.cos)] -unarys += [('__nv_cosh', '__nv_coshf', math.cosh)] -unarys += [('__nv_asinh', '__nv_asinhf', math.asinh)] -unarys += [('__nv_asin', '__nv_asinf', math.asin)] -unarys += [('__nv_sin', '__nv_sinf', math.sin)] -unarys += [('__nv_sinh', '__nv_sinhf', math.sinh)] -unarys += [('__nv_atan', '__nv_atanf', math.atan)] -unarys += [('__nv_atanh', '__nv_atanhf', math.atanh)] -unarys += [('__nv_tan', '__nv_tanf', math.tan)] -unarys += [('__nv_tanh', '__nv_tanhf', math.tanh)] - -binarys = [] -binarys += [('__nv_copysign', '__nv_copysignf', math.copysign)] -binarys += [('__nv_atan2', '__nv_atan2f', math.atan2)] -binarys += [('__nv_pow', '__nv_powf', math.pow)] -binarys += [('__nv_fmod', '__nv_fmodf', math.fmod)] -binarys += [('__nv_hypot', '__nv_hypotf', math.hypot)] - - -for name64, name32, key in booleans: - impl64 = bool_implement(name64, types.float64) - lower(key, types.float64)(impl64) - impl32 = bool_implement(name32, types.float32) - lower(key, types.float32)(impl32) - - -for name64, name32, key in unarys: - impl64 = unary_implement(name64, types.float64) - lower(key, types.float64)(impl64) - impl32 = unary_implement(name32, types.float32) - lower(key, types.float32)(impl32) - -for name64, name32, key in binarys: - impl64 = binary_implement(name64, types.float64) - lower(key, types.float64, types.float64)(impl64) - impl32 = binary_implement(name32, types.float32) - lower(key, types.float32, types.float32)(impl32) diff --git a/numba/numba/cuda/nvvmutils.py b/numba/numba/cuda/nvvmutils.py deleted file mode 100644 index 2635b7f1a..000000000 --- a/numba/numba/cuda/nvvmutils.py +++ /dev/null @@ -1,160 +0,0 @@ -from __future__ import print_function, absolute_import, division -import itertools -import llvmlite.llvmpy.core as lc -from .cudadrv import nvvm -from numba import cgutils - - -def declare_atomic_cas_int32(lmod): - fname = '___numba_cas_hack' - fnty = lc.Type.function(lc.Type.int(32), - (lc.Type.pointer(lc.Type.int(32)), lc.Type.int(32), lc.Type.int(32))) - return lmod.get_or_insert_function(fnty, fname) - - -def declare_atomic_add_float32(lmod): - fname = 'llvm.nvvm.atomic.load.add.f32.p0f32' - fnty = lc.Type.function(lc.Type.float(), - (lc.Type.pointer(lc.Type.float(), 0), lc.Type.float())) - return lmod.get_or_insert_function(fnty, name=fname) - - -def declare_atomic_add_float64(lmod): - fname = '___numba_atomic_double_add' - fnty = lc.Type.function(lc.Type.double(), - (lc.Type.pointer(lc.Type.double()), lc.Type.double())) - return lmod.get_or_insert_function(fnty, fname) - - -def declare_atomic_max_float32(lmod): - fname = '___numba_atomic_float_max' - fnty = lc.Type.function(lc.Type.float(), - (lc.Type.pointer(lc.Type.float()), lc.Type.float())) - return lmod.get_or_insert_function(fnty, fname) - - -def declare_atomic_max_float64(lmod): - fname = '___numba_atomic_double_max' - fnty = lc.Type.function(lc.Type.double(), - (lc.Type.pointer(lc.Type.double()), lc.Type.double())) - return lmod.get_or_insert_function(fnty, fname) - - -def declare_atomic_min_float32(lmod): - fname = '___numba_atomic_float_min' - fnty = lc.Type.function(lc.Type.float(), - (lc.Type.pointer(lc.Type.float()), lc.Type.float())) - return lmod.get_or_insert_function(fnty, fname) - - -def declare_atomic_min_float64(lmod): - fname = '___numba_atomic_double_min' - fnty = lc.Type.function(lc.Type.double(), - (lc.Type.pointer(lc.Type.double()), lc.Type.double())) - return lmod.get_or_insert_function(fnty, fname) - - -def insert_addrspace_conv(lmod, elemtype, addrspace): - addrspacename = { - nvvm.ADDRSPACE_SHARED: 'shared', - nvvm.ADDRSPACE_LOCAL: 'local', - nvvm.ADDRSPACE_CONSTANT: 'constant', - }[addrspace] - tyname = str(elemtype) - tyname = {'float': 'f32', 'double': 'f64'}.get(tyname, tyname) - s2g_name_fmt = 'llvm.nvvm.ptr.' + addrspacename + '.to.gen.p0%s.p%d%s' - s2g_name = s2g_name_fmt % (tyname, addrspace, tyname) - elem_ptr_ty = lc.Type.pointer(elemtype) - elem_ptr_ty_addrspace = lc.Type.pointer(elemtype, addrspace) - s2g_fnty = lc.Type.function(elem_ptr_ty, - [elem_ptr_ty_addrspace]) - return lmod.get_or_insert_function(s2g_fnty, s2g_name) - - -def declare_string(builder, value): - lmod = builder.basic_block.function.module - cval = lc.Constant.stringz(value) - gl = lmod.add_global_variable(cval.type, name="_str", - addrspace=nvvm.ADDRSPACE_CONSTANT) - gl.linkage = lc.LINKAGE_INTERNAL - gl.global_constant = True - gl.initializer = cval - - charty = lc.Type.int(8) - constcharptrty = lc.Type.pointer(charty, nvvm.ADDRSPACE_CONSTANT) - charptr = builder.bitcast(gl, constcharptrty) - - conv = insert_addrspace_conv(lmod, charty, nvvm.ADDRSPACE_CONSTANT) - return builder.call(conv, [charptr]) - -def declare_vprint(lmod): - voidptrty = lc.Type.pointer(lc.Type.int(8)) - # NOTE: the second argument to vprintf() points to the variable-length - # array of arguments (after the format) - vprintfty = lc.Type.function(lc.Type.int(), [voidptrty, voidptrty]) - vprintf = lmod.get_or_insert_function(vprintfty, "vprintf") - return vprintf - -# ----------------------------------------------------------------------------- - -SREG_MAPPING = { - 'tid.x': 'llvm.nvvm.read.ptx.sreg.tid.x', - 'tid.y': 'llvm.nvvm.read.ptx.sreg.tid.y', - 'tid.z': 'llvm.nvvm.read.ptx.sreg.tid.z', - - 'ntid.x': 'llvm.nvvm.read.ptx.sreg.ntid.x', - 'ntid.y': 'llvm.nvvm.read.ptx.sreg.ntid.y', - 'ntid.z': 'llvm.nvvm.read.ptx.sreg.ntid.z', - - 'ctaid.x': 'llvm.nvvm.read.ptx.sreg.ctaid.x', - 'ctaid.y': 'llvm.nvvm.read.ptx.sreg.ctaid.y', - 'ctaid.z': 'llvm.nvvm.read.ptx.sreg.ctaid.z', - - 'nctaid.x': 'llvm.nvvm.read.ptx.sreg.nctaid.x', - 'nctaid.y': 'llvm.nvvm.read.ptx.sreg.nctaid.y', - 'nctaid.z': 'llvm.nvvm.read.ptx.sreg.nctaid.z', - - 'warpsize': 'llvm.nvvm.read.ptx.sreg.warpsize', - 'laneid': 'llvm.nvvm.read.ptx.sreg.laneid', -} - - -def call_sreg(builder, name): - module = builder.module - fnty = lc.Type.function(lc.Type.int(), ()) - fn = module.get_or_insert_function(fnty, name=SREG_MAPPING[name]) - return builder.call(fn, ()) - - -class SRegBuilder(object): - def __init__(self, builder): - self.builder = builder - - def tid(self, xyz): - return call_sreg(self.builder, 'tid.%s' % xyz) - - def ctaid(self, xyz): - return call_sreg(self.builder, 'ctaid.%s' % xyz) - - def ntid(self, xyz): - return call_sreg(self.builder, 'ntid.%s' % xyz) - - def nctaid(self, xyz): - return call_sreg(self.builder, 'nctaid.%s' % xyz) - - def getdim(self, xyz): - tid = self.tid(xyz) - ntid = self.ntid(xyz) - nctaid = self.ctaid(xyz) - res = self.builder.add(self.builder.mul(ntid, nctaid), tid) - return res - - -def get_global_id(builder, dim): - sreg = SRegBuilder(builder) - it = (sreg.getdim(xyz) for xyz in 'xyz') - seq = list(itertools.islice(it, None, dim)) - if dim == 1: - return seq[0] - else: - return seq diff --git a/numba/numba/cuda/printimpl.py b/numba/numba/cuda/printimpl.py deleted file mode 100644 index c7fe99f09..000000000 --- a/numba/numba/cuda/printimpl.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from llvmlite.llvmpy.core import Type, Constant - -from numba import types, typing, cgutils, utils -from numba.targets.imputils import Registry -from . import nvvmutils - -registry = Registry() -lower = registry.lower - -voidptr = Type.pointer(Type.int(8)) - - -# NOTE: we don't use @lower here since print_item() doesn't return a LLVM value - -@utils.singledispatch -def print_item(ty, context, builder, val): - """ - Handle printing of a single value of the given Numba type. - A (format string, [list of arguments]) is returned that will allow - forming the final printf()-like call. - """ - raise NotImplementedError("printing unimplemented for values of type %s" - % (ty,)) - - -@print_item.register(types.Integer) -def int_print_impl(ty, context, builder, val): - if ty in types.unsigned_domain: - rawfmt = "%llu" - dsttype = types.uint64 - else: - rawfmt = "%lld" - dsttype = types.int64 - fmt = context.insert_string_const_addrspace(builder, rawfmt) - lld = context.cast(builder, val, ty, dsttype) - return rawfmt, [lld] - -@print_item.register(types.Float) -def real_print_impl(ty, context, builder, val): - lld = context.cast(builder, val, ty, types.float64) - return "%f", [lld] - -@print_item.register(types.Const) -def const_print_impl(ty, context, builder, sigval): - pyval = ty.value - assert isinstance(pyval, str) # Ensured by lowering - rawfmt = "%s" - val = context.insert_string_const_addrspace(builder, pyval) - return rawfmt, [val] - - -@lower(print, types.VarArg(types.Any)) -def print_varargs(context, builder, sig, args): - """This function is a generic 'print' wrapper for arbitrary types. - It dispatches to the appropriate 'print' implementations above - depending on the detected real types in the signature.""" - - vprint = nvvmutils.declare_vprint(builder.module) - - formats = [] - values = [] - - for i, (argtype, argval) in enumerate(zip(sig.args, args)): - argfmt, argvals = print_item(argtype, context, builder, argval) - formats.append(argfmt) - values.extend(argvals) - - rawfmt = " ".join(formats) + "\n" - fmt = context.insert_string_const_addrspace(builder, rawfmt) - array = cgutils.make_anonymous_struct(builder, values) - arrayptr = cgutils.alloca_once_value(builder, array) - - vprint = nvvmutils.declare_vprint(builder.module) - builder.call(vprint, (fmt, builder.bitcast(arrayptr, voidptr))) - - return context.get_dummy_value() diff --git a/numba/numba/cuda/random.py b/numba/numba/cuda/random.py deleted file mode 100644 index 9089a7a74..000000000 --- a/numba/numba/cuda/random.py +++ /dev/null @@ -1,282 +0,0 @@ -from __future__ import print_function, absolute_import -import math - -from numba import cuda, float32, float64, uint32, int64, uint64, from_dtype,\ - jit - -import numpy as np - -# This implementation is based upon the xoroshiro128+ and splitmix64 algorithms -# described at: -# -# http://xoroshiro.di.unimi.it/ -# -# and originally implemented by David Blackman and Sebastiano Vigna. -# -# The implementations below are based on the C source code: -# -# * http://xoroshiro.di.unimi.it/xoroshiro128plus.c -# * http://xoroshiro.di.unimi.it/splitmix64.c -# -# Splitmix64 is used to generate the initial state of the xoroshiro128+ -# generator to ensure that small seeds don't result in predictable output. - -# **WARNING**: There is a lot of verbose casting in this file to ensure that -# NumPy casting conventions (which cast uint64 [op] int32 to float64) don't -# turn integers into floats when using these functions in the CUDA simulator. -# -# There are also no function type signatures to ensure that compilation is -# deferred so that import is quick, and Sphinx autodoc works. We are also -# using the CPU @jit decorator everywhere to create functions that work as -# both CPU and CUDA device functions. - -xoroshiro128p_dtype = np.dtype([('s0', np.uint64), ('s1', np.uint64)], align=True) -xoroshiro128p_type = from_dtype(xoroshiro128p_dtype) - - -@jit -def init_xoroshiro128p_state(states, index, seed): - '''Use SplitMix64 to generate an xoroshiro128p state from 64-bit seed. - - This ensures that manually set small seeds don't result in a predictable - initial sequence from the random number generator. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: uint64 - :param index: offset in states to update - :type seed: int64 - :param seed: seed value to use when initializing state - ''' - index = int64(index) - seed = uint64(seed) - - z = seed + uint64(0x9E3779B97F4A7C15) - z = (z ^ (z >> uint32(30))) * uint64(0xBF58476D1CE4E5B9) - z = (z ^ (z >> uint32(27))) * uint64(0x94D049BB133111EB) - z = z ^ (z >> uint32(31)) - - states[index]['s0'] = z - states[index]['s1'] = z - - -@jit -def rotl(x, k): - '''Left rotate x by k bits.''' - x = uint64(x) - k = uint32(k) - return (x << k) | (x >> uint32(64 - k)) - - -@jit -def xoroshiro128p_next(states, index): - '''Return the next random uint64 and advance the RNG in states[index]. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - :rtype: uint64 - ''' - index = int64(index) - s0 = states[index]['s0'] - s1 = states[index]['s1'] - result = s0 + s1 - - s1 ^= s0 - states[index]['s0'] = uint64(rotl(s0, uint32(55))) ^ s1 ^ (s1 << uint32(14)) - states[index]['s1'] = uint64(rotl(s1, uint32(36))) - - return result - - -XOROSHIRO128P_JUMP = (uint64(0xbeac0467eba5facb), uint64(0xd86b048b86aa9922)) - - -@jit -def xoroshiro128p_jump(states, index): - '''Advance the RNG in ``states[index]`` by 2**64 steps. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - ''' - index = int64(index) - - s0 = uint64(0) - s1 = uint64(0) - - for i in range(2): - for b in range(64): - if XOROSHIRO128P_JUMP[i] & (uint64(1) << uint32(b)): - s0 ^= states[index]['s0'] - s1 ^= states[index]['s1'] - xoroshiro128p_next(states, index) - - states[index]['s0'] = s0 - states[index]['s1'] = s1 - - -@jit -def uint64_to_unit_float64(x): - '''Convert uint64 to float64 value in the range [0.0, 1.0)''' - x = uint64(x) - return (x >> uint32(11)) * (float64(1) / (uint64(1) << uint32(53))) - - -@jit -def uint64_to_unit_float32(x): - '''Convert uint64 to float32 value in the range [0.0, 1.0)''' - x = uint64(x) - return float32(uint64_to_unit_float64(x)) - - -@jit -def xoroshiro128p_uniform_float32(states, index): - '''Return a float32 in range [0.0, 1.0) and advance ``states[index]``. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - :rtype: float32 - ''' - index = int64(index) - return uint64_to_unit_float32(xoroshiro128p_next(states, index)) - - -@jit -def xoroshiro128p_uniform_float64(states, index): - '''Return a float64 in range [0.0, 1.0) and advance ``states[index]``. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - :rtype: float64 - ''' - index = int64(index) - return uint64_to_unit_float64(xoroshiro128p_next(states, index)) - - -TWO_PI_FLOAT32 = np.float32(2 * math.pi) -TWO_PI_FLOAT64 = np.float64(2 * math.pi) - - -@jit -def xoroshiro128p_normal_float32(states, index): - '''Return a normally distributed float32 and advance ``states[index]``. - - The return value is drawn from a Gaussian of mean=0 and sigma=1 using the - Box-Muller transform. This advances the RNG sequence by two steps. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - :rtype: float32 - ''' - index = int64(index) - - u1 = xoroshiro128p_uniform_float32(states, index) - u2 = xoroshiro128p_uniform_float32(states, index) - - z0 = math.sqrt(-float32(2.0) * math.log(u1)) * math.cos(TWO_PI_FLOAT32 * u2) - # discarding second normal value - # z1 = math.sqrt(-float32(2.0) * math.log(u1)) * math.sin(TWO_PI_FLOAT32 * u2) - return z0 - - -@jit -def xoroshiro128p_normal_float64(states, index): - '''Return a normally distributed float32 and advance ``states[index]``. - - The return value is drawn from a Gaussian of mean=0 and sigma=1 using the - Box-Muller transform. This advances the RNG sequence by two steps. - - :type states: 1D array, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type index: int64 - :param index: offset in states to update - :rtype: float64 - ''' - index = int64(index) - - u1 = xoroshiro128p_uniform_float32(states, index) - u2 = xoroshiro128p_uniform_float32(states, index) - - z0 = math.sqrt(-float64(2.0) * math.log(u1)) * math.cos(TWO_PI_FLOAT64 * u2) - # discarding second normal value - # z1 = math.sqrt(-float64(2.0) * math.log(u1)) * math.sin(TWO_PI_FLOAT64 * u2) - return z0 - - -@jit -def init_xoroshiro128p_states_cpu(states, seed, subsequence_start): - n = states.shape[0] - seed = uint64(seed) - subsequence_start = uint64(subsequence_start) - - if n >= 1: - init_xoroshiro128p_state(states, 0, seed) - - # advance to starting subsequence number - for _ in range(subsequence_start): - xoroshiro128p_jump(states, 0) - - # populate the rest of the array - for i in range(1, n): - states[i] = states[i - 1] # take state of previous generator - xoroshiro128p_jump(states, i) # and jump forward 2**64 steps - - -def init_xoroshiro128p_states(states, seed, subsequence_start=0, stream=0): - '''Initialize RNG states on the GPU for parallel generators. - - This intializes the RNG states so that each state in the array corresponds - subsequences in the separated by 2**64 steps from each other in the main - sequence. Therefore, as long no CUDA thread requests more than 2**64 - random numbers, all of the RNG states produced by this function are - guaranteed to be independent. - - The subsequence_start parameter can be used to advance the first RNG state - by a multiple of 2**64 steps. - - :type states: 1D DeviceNDArray, dtype=xoroshiro128p_dtype - :param states: array of RNG states - :type seed: uint64 - :param seed: starting seed for list of generators - ''' - - # Initialization on CPU is much faster than the GPU - states_cpu = np.empty(shape=states.shape, dtype=xoroshiro128p_dtype) - init_xoroshiro128p_states_cpu(states_cpu, seed, subsequence_start) - - states.copy_to_device(states_cpu, stream=stream) - - -def create_xoroshiro128p_states(n, seed, subsequence_start=0, stream=0): - '''Returns a new device array initialized for n random number generators. - - This intializes the RNG states so that each state in the array corresponds - subsequences in the separated by 2**64 steps from each other in the main - sequence. Therefore, as long no CUDA thread requests more than 2**64 - random numbers, all of the RNG states produced by this function are - guaranteed to be independent. - - The subsequence_start parameter can be used to advance the first RNG state - by a multiple of 2**64 steps. - - :type n: int - :param n: number of RNG states to create - :type seed: uint64 - :param seed: starting seed for list of generators - :type subsequence_start: uint64 - :param subsequence_start: - :type stream: CUDA stream - :param stream: stream to run initialization kernel on - ''' - states = cuda.device_array(n, dtype=xoroshiro128p_dtype, stream=stream) - init_xoroshiro128p_states(states, seed, subsequence_start, stream) - return states diff --git a/numba/numba/cuda/simulator/__init__.py b/numba/numba/cuda/simulator/__init__.py deleted file mode 100644 index 6deac6930..000000000 --- a/numba/numba/cuda/simulator/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import absolute_import - -from .api import * -from .reduction import Reduce -from .cudadrv.devicearray import (device_array, device_array_like, pinned, - pinned_array, to_device, auto_device) -from .cudadrv import devicearray -from .cudadrv.devices import require_context, gpus -from .cudadrv.devices import get_context as current_context - -reduce = Reduce - -# Ensure that any user code attempting to import cudadrv etc. gets the -# simulator's version and not the real version if the simulator is enabled. -from numba import config -if config.ENABLE_CUDASIM: - import sys - from . import cudadrv - sys.modules['numba.cuda.cudadrv'] = cudadrv - sys.modules['numba.cuda.cudadrv.devicearray'] = cudadrv.devicearray - sys.modules['numba.cuda.cudadrv.devices'] = cudadrv.devices - sys.modules['numba.cuda.cudadrv.driver'] = cudadrv.driver - sys.modules['numba.cuda.cudadrv.drvapi'] = cudadrv.drvapi - sys.modules['numba.cuda.cudadrv.nvvm'] = cudadrv.nvvm - - from . import compiler - sys.modules['numba.cuda.compiler'] = compiler diff --git a/numba/numba/cuda/simulator/api.py b/numba/numba/cuda/simulator/api.py deleted file mode 100644 index 87b41d348..000000000 --- a/numba/numba/cuda/simulator/api.py +++ /dev/null @@ -1,95 +0,0 @@ -''' -Contains CUDA API functions -''' -from __future__ import absolute_import - -from contextlib import contextmanager -from .cudadrv.devices import require_context, reset, gpus -from .kernel import FakeCUDAKernel -from numba.typing import Signature -from warnings import warn -from ..args import In, Out, InOut - - -def select_device(dev=0): - assert dev == 0, 'Only a single device supported by the simulator' - - -class stream(object): - ''' - The stream API is supported in the simulator - however, all execution - occurs synchronously, so synchronization requires no operation. - ''' - @contextmanager - def auto_synchronize(self): - yield - - def synchronize(self): - pass - - -def synchronize(): - pass - -def close(): - gpus.closed = True - - -def declare_device(*args, **kwargs): - pass - - -def detect(): - print('Found 1 CUDA devices') - print('id %d %20s %40s' % (0, 'SIMULATOR', '[SUPPORTED]')) - print('%40s: 5.2' % 'compute capability') - - -def list_devices(): - return gpus - - -# Events - -class Event(object): - ''' - The simulator supports the event API, but they do not record timing info, - and all simulation is synchronous. Execution time is not recorded. - ''' - def record(self, stream=0): - pass - - def wait(self, stream=0): - pass - - def synchronize(self): - pass - - def elapsed_time(self, event): - warn('Simulator timings are bogus') - return 0.0 - -event = Event - - -def jit(fn_or_sig=None, device=False, debug=False, argtypes=None, inline=False, restype=None, - fastmath=False, link=None): - if link is not None: - raise NotImplementedError('Cannot link PTX in the simulator') - # Check for first argument specifying types - in that case the - # decorator is not being passed a function - if fn_or_sig is None or isinstance(fn_or_sig, (str, tuple, Signature)): - def jitwrapper(fn): - return FakeCUDAKernel(fn, - device=device, - fastmath=fastmath) - return jitwrapper - return FakeCUDAKernel(fn_or_sig, device=device) - -autojit = jit - - -@contextmanager -def defer_cleanup(): - # No effect for simulator - yield diff --git a/numba/numba/cuda/simulator/compiler.py b/numba/numba/cuda/simulator/compiler.py deleted file mode 100644 index 35d5903af..000000000 --- a/numba/numba/cuda/simulator/compiler.py +++ /dev/null @@ -1,6 +0,0 @@ -''' -The compiler is not implemented in the simulator. This module provides a stub -to allow tests to import successfully. -''' - -compile_kernel = None diff --git a/numba/numba/cuda/simulator/cudadrv/__init__.py b/numba/numba/cuda/simulator/cudadrv/__init__.py deleted file mode 100644 index ee3f3c3bf..000000000 --- a/numba/numba/cuda/simulator/cudadrv/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import devicearray, devices, driver, drvapi, nvvm diff --git a/numba/numba/cuda/simulator/cudadrv/devicearray.py b/numba/numba/cuda/simulator/cudadrv/devicearray.py deleted file mode 100644 index 3cf768e63..000000000 --- a/numba/numba/cuda/simulator/cudadrv/devicearray.py +++ /dev/null @@ -1,230 +0,0 @@ -''' -The Device Array API is not implemented in the simulator. This module provides -stubs to allow tests to import correctly. -''' -from contextlib import contextmanager -from warnings import warn - -import numpy as np - -from numba import six, types, numpy_support - -DeviceRecord = None -from_record_like = None - - -def is_cuda_ndarray(obj): - return getattr(obj, '__cuda_ndarray__', False) - - -errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot " - "be transferred as a single memory region. Please " - "ensure contiguous buffer with numpy " - ".ascontiguousarray()") - - -class FakeShape(tuple): - ''' - The FakeShape class is used to provide a shape which does not allow negative - indexing, similar to the shape in CUDA Python. (Numpy shape arrays allow - negative indexing) - ''' - def __getitem__(self, k): - if isinstance(k, six.integer_types) and k < 0: - raise IndexError('tuple index out of range') - return super(FakeShape, self).__getitem__(k) - - -class FakeCUDAArray(object): - ''' - Implements the interface of a DeviceArray/DeviceRecord, but mostly just - wraps a NumPy array. - ''' - - __cuda_ndarray__ = True # There must be gpu_data attribute - - - def __init__(self, ary, stream=0): - self._ary = ary.reshape(1) if ary.ndim == 0 else ary - self.stream = stream - - @property - def alloc_size(self): - return self._ary.nbytes - - def __getattr__(self, attrname): - try: - attr = getattr(self._ary, attrname) - return attr - except AttributeError as e: - six.raise_from(AttributeError("Wrapped array has no attribute '%s'" - % attrname), e) - - def bind(self, stream=0): - return FakeCUDAArray(self._ary, stream) - - @property - def T(self): - return self.transpose() - - def transpose(self, axes=None): - return FakeCUDAArray(np.transpose(self._ary, axes=axes)) - - def __getitem__(self, idx): - item = self._ary.__getitem__(idx) - if isinstance(item, np.ndarray): - return FakeCUDAArray(item, stream=self.stream) - return item - - def __setitem__(self, idx, val): - return self._ary.__setitem__(idx, val) - - def copy_to_host(self, ary=None, stream=0): - if ary is None: - ary = np.empty_like(self._ary) - # NOTE: np.copyto() introduced in Numpy 1.7 - try: - np.copyto(ary, self._ary) - except AttributeError: - ary[:] = self._ary - return ary - - def copy_to_device(self, ary, stream=0): - ''' - Copy from the provided array into this array. - - This may be less forgiving than the CUDA Python implementation, which - will copy data up to the length of the smallest of the two arrays, - whereas this expects the size of the arrays to be equal. - ''' - sentry_contiguous(self) - if isinstance(ary, FakeCUDAArray): - sentry_contiguous(ary) - - if self.flags['C_CONTIGUOUS'] != ary.flags['C_CONTIGUOUS']: - raise ValueError("Can't copy %s-contiguous array to a %s-contiguous array" % ( - 'C' if ary.flags['C_CONTIGUOUS'] else 'F', - 'C' if self.flags['C_CONTIGUOUS'] else 'F', - )) - else: - ary = np.array( - ary, - order='C' if self.flags['C_CONTIGUOUS'] else 'F', - subok=True, - copy=False) - try: - np.copyto(self._ary, ary) - except AttributeError: - self._ary[:] = ary - - def to_host(self): - warn('to_host() is deprecated and will be removed') - raise NotImplementedError - - @property - def shape(self): - return FakeShape(self._ary.shape) - - def ravel(self, *args, **kwargs): - return FakeCUDAArray(self._ary.ravel(*args, **kwargs)) - - def reshape(self, *args, **kwargs): - return FakeCUDAArray(self._ary.reshape(*args, **kwargs)) - - def is_c_contiguous(self): - return self._ary.flags.c_contiguous - - def is_f_contiguous(self): - return self._ary.flags.f_contiguous - - def __str__(self): - return str(self._ary) - - def __repr__(self): - return repr(self._ary) - - def __len__(self): - return len(self._ary) - - def split(self, section, stream=0): - return [ - FakeCUDAArray(a) - for a in np.split(self._ary, range(section, len(self), section)) - ] - -def sentry_contiguous(ary): - if not ary.flags['C_CONTIGUOUS'] and not ary.flags['F_CONTIGUOUS']: - if ary.strides[0] == 0: - # Broadcasted, ensure inner contiguous - return sentry_contiguous(ary[0]) - else: - raise ValueError(errmsg_contiguous_buffer) - - -def to_device(ary, stream=0, copy=True, to=None): - sentry_contiguous(ary) - if to is None: - return FakeCUDAArray(np.array( - ary, - order='C' if ary.flags['C_CONTIGUOUS'] else 'F', copy=True)) - else: - to.copy_to_device(ary, stream=stream) - - -@contextmanager -def pinned(arg): - yield - - -def pinned_array(shape, dtype=np.float, strides=None, order='C'): - return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order) - - -def device_array(*args, **kwargs): - stream = kwargs.pop('stream') if 'stream' in kwargs else 0 - return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream) - - -def device_array_like(ary, stream=0): - return FakeCUDAArray(np.empty_like(ary)) - - -def auto_device(ary, stream=0, copy=True): - if isinstance(ary, FakeCUDAArray): - return ary, False - - if not isinstance(ary, np.void): - ary = np.array( - ary, - copy=False, - subok=True) - return to_device(ary, stream, copy), True - - -def is_cuda_ndarray(obj): - "Check if an object is a CUDA ndarray" - return getattr(obj, '__cuda_ndarray__', False) - - -def verify_cuda_ndarray_interface(obj): - "Verify the CUDA ndarray interface for an obj" - require_cuda_ndarray(obj) - - def requires_attr(attr, typ): - if not hasattr(obj, attr): - raise AttributeError(attr) - if not isinstance(getattr(obj, attr), typ): - raise AttributeError('%s must be of type %s' % (attr, typ)) - - requires_attr('shape', tuple) - requires_attr('strides', tuple) - requires_attr('dtype', np.dtype) - requires_attr('size', six.integer_types) - - -def require_cuda_ndarray(obj): - "Raises ValueError is is_cuda_ndarray(obj) evaluates False" - if not is_cuda_ndarray(obj): - raise ValueError('require an cuda ndarray object') - - diff --git a/numba/numba/cuda/simulator/cudadrv/devices.py b/numba/numba/cuda/simulator/cudadrv/devices.py deleted file mode 100644 index 647c16b37..000000000 --- a/numba/numba/cuda/simulator/cudadrv/devices.py +++ /dev/null @@ -1,86 +0,0 @@ -from collections import namedtuple - -_MemoryInfo = namedtuple("_MemoryInfo", "free,total") - -class FakeCUDAContext(object): - ''' - This stub implements functionality only for simulating a single GPU - at the moment. - ''' - def __init__(self, device): - self._device = device - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def __str__(self): - return "".format(self=self) - - @property - def id(self): - return self._device - - @property - def compute_capability(self): - return (5, 2) - - def reset(self): - pass - - def get_memory_info(self): - """ - Cross-platform free / total host memory is hard without external dependencies, - e.g. `psutil` - so return infinite memory to maintain API type compatibility - """ - return _MemoryInfo(float('inf'), float('inf')) - - -class FakeDeviceList(object): - ''' - This stub implements a device list containing a single GPU. It also - keeps track of the GPU status, i.e. whether the context is closed or not, - which may have been set by the user calling reset() - ''' - def __init__(self): - self.lst = (FakeCUDAContext(0),) - self.closed = False - - def __getitem__(self, devnum): - self.closed = False - return self.lst[devnum] - - def __str__(self): - return ', '.join([str(d) for d in self.lst]) - - def __iter__(self): - return iter(self.lst) - - def __len__(self): - return len(self.lst) - - @property - def current(self): - if self.closed: - return None - return self.lst[0] - - -gpus = FakeDeviceList() - - -def reset(): - gpus[0].closed = True - - -def get_context(devnum=0): - return FakeCUDAContext(devnum) - - -def require_context(func): - ''' - In the simulator, a context is always "available", so this is a no-op. - ''' - return func diff --git a/numba/numba/cuda/simulator/cudadrv/driver.py b/numba/numba/cuda/simulator/cudadrv/driver.py deleted file mode 100644 index fea20dcfa..000000000 --- a/numba/numba/cuda/simulator/cudadrv/driver.py +++ /dev/null @@ -1,15 +0,0 @@ -''' -Most of the driver API is unsupported in the simulator, but some stubs are -provided to allow tests to import correctly. -''' - -host_to_device = None -device_to_host = None - -class FakeDriver(object): - def get_device_count(self): - return 1 - -driver = FakeDriver() - -Linker = None diff --git a/numba/numba/cuda/simulator/cudadrv/drvapi.py b/numba/numba/cuda/simulator/cudadrv/drvapi.py deleted file mode 100644 index 44c697f37..000000000 --- a/numba/numba/cuda/simulator/cudadrv/drvapi.py +++ /dev/null @@ -1,4 +0,0 @@ -''' -drvapi is not implemented in the simulator, but this module exists to allow -tests to import correctly. -''' diff --git a/numba/numba/cuda/simulator/cudadrv/nvvm.py b/numba/numba/cuda/simulator/cudadrv/nvvm.py deleted file mode 100644 index ce1b3897b..000000000 --- a/numba/numba/cuda/simulator/cudadrv/nvvm.py +++ /dev/null @@ -1,23 +0,0 @@ -''' -NVVM is not supported in the simulator, but stubs are provided to allow tests -to import correctly. -''' - -class NvvmSupportError(ImportError): - pass - -class NVVM(object): - def __init__(self): - raise NvvmSupportError('NVVM not supported in the simulator') - -CompilationUnit = None -llvm_to_ptx = None -set_cuda_kernel = None -fix_data_layout = None -get_arch_option = None -SUPPORTED_CC = None -LibDevice = None -NvvmError = None - -def is_available(): - return False diff --git a/numba/numba/cuda/simulator/kernel.py b/numba/numba/cuda/simulator/kernel.py deleted file mode 100644 index c0738a5f8..000000000 --- a/numba/numba/cuda/simulator/kernel.py +++ /dev/null @@ -1,265 +0,0 @@ -from __future__ import print_function - -from contextlib import contextmanager -import sys -import threading - -import numpy as np - -from numba import six -from numba.six import reraise -from .cudadrv.devicearray import to_device, auto_device -from .kernelapi import Dim3, FakeCUDAModule, swapped_cuda_module -from ..errors import normalize_kernel_dimensions -from ..args import wrap_arg, ArgHint - - -""" -Global variable to keep track of the current "kernel context", i.e the -FakeCUDAModule. We only support one kernel launch at a time. -No support for concurrent kernel launch. -""" -_kernel_context = None - - -@contextmanager -def _push_kernel_context(mod): - """ - Push the current kernel context. - """ - global _kernel_context - assert _kernel_context is None, "conrrent simulated kernel not supported" - _kernel_context = mod - try: - yield - finally: - _kernel_context = None - - -def _get_kernel_context(): - """ - Get the current kernel context. This is usually done by a device function. - """ - return _kernel_context - - -class FakeCUDAKernel(object): - ''' - Wraps a @cuda.jit-ed function. - ''' - - def __init__(self, fn, device, fastmath=False, extensions=[]): - self.fn = fn - self._device = device - self._fastmath = fastmath - self.extensions = list(extensions) # defensive copy - # Initial configuration: 1 block, 1 thread, stream 0, no dynamic shared - # memory. - self[1, 1, 0, 0] - - def __call__(self, *args): - if self._device: - with swapped_cuda_module(self.fn, _get_kernel_context()): - return self.fn(*args) - - fake_cuda_module = FakeCUDAModule(self.grid_dim, self.block_dim, - self.dynshared_size) - with _push_kernel_context(fake_cuda_module): - # fake_args substitutes all numpy arrays for FakeCUDAArrays - # because they implement some semantics differently - retr = [] - - def fake_arg(arg): - # map the arguments using any extension you've registered - _, arg = six.moves.reduce( - lambda ty_val, extension: extension.prepare_args( - *ty_val, - stream=0, - retr=retr), - self.extensions, - (None, arg) - ) - - if isinstance(arg, np.ndarray) and arg.ndim > 0: - return wrap_arg(arg).to_device(retr) - elif isinstance(arg, ArgHint): - return arg.to_device(retr) - else: - return arg - - fake_args = [fake_arg(arg) for arg in args] - with swapped_cuda_module(self.fn, fake_cuda_module): - # Execute one block at a time - for grid_point in np.ndindex(*self.grid_dim): - bm = BlockManager(self.fn, self.grid_dim, self.block_dim) - bm.run(grid_point, *fake_args) - - for wb in retr: - wb() - - - def __getitem__(self, configuration): - self.grid_dim, self.block_dim = \ - normalize_kernel_dimensions(*configuration[:2]) - - if len(configuration) == 4: - self.dynshared_size = configuration[3] - - return self - - def bind(self): - pass - - def forall(self, ntasks, tpb=0, stream=0, sharedmem=0): - return self[ntasks, 1, stream, sharedmem] - - @property - def ptx(self): - ''' - Required in order to proceed through some tests, but serves no functional - purpose. - ''' - res = '.const' - res += '\n.local' - if self._fastmath: - res += '\ndiv.full.ftz.f32' - return res - - - - -# Thread emulation - - -class BlockThread(threading.Thread): - ''' - Manages the execution of a function for a single CUDA thread. - ''' - def __init__(self, f, manager, blockIdx, threadIdx): - super(BlockThread, self).__init__(target=f) - self.syncthreads_event = threading.Event() - self.syncthreads_blocked = False - self._manager = manager - self.blockIdx = Dim3(*blockIdx) - self.threadIdx = Dim3(*threadIdx) - self.exception = None - self.daemon = True - self.abort = False - blockDim = Dim3(*self._manager._block_dim) - self.thread_id = self.threadIdx.x + blockDim.x * (self.threadIdx.y + blockDim.y * self.threadIdx.z) - - def run(self): - try: - super(BlockThread, self).run() - except Exception as e: - tid = 'tid=%s' % list(self.threadIdx) - ctaid = 'ctaid=%s' % list(self.blockIdx) - if str(e) == '': - msg = '%s %s' % (tid, ctaid) - else: - msg = '%s %s: %s' % (tid, ctaid, e) - tb = sys.exc_info()[2] - self.exception = (type(e), type(e)(msg), tb) - - def syncthreads(self): - - if self.abort: - raise RuntimeError("abort flag set on syncthreads call") - - self.syncthreads_blocked = True - self.syncthreads_event.wait() - self.syncthreads_event.clear() - - if self.abort: - raise RuntimeError("abort flag set on syncthreads clear") - - def syncthreads_count(self, value): - self._manager.block_state[self.threadIdx.x, self.threadIdx.y, self.threadIdx.z] = value - self.syncthreads() - count = np.count_nonzero(self._manager.block_state) - self.syncthreads() - return count - - def syncthreads_and(self, value): - self._manager.block_state[self.threadIdx.x, self.threadIdx.y, self.threadIdx.z] = value - self.syncthreads() - test = np.all(self._manager.block_state) - self.syncthreads() - return 1 if test else 0 - - def syncthreads_or(self, value): - self._manager.block_state[self.threadIdx.x, self.threadIdx.y, self.threadIdx.z] = value - self.syncthreads() - test = np.any(self._manager.block_state) - self.syncthreads() - return 1 if test else 0 - - def __str__(self): - return 'Thread <<<%s, %s>>>' % (self.blockIdx, self.threadIdx) - - -class BlockManager(object): - ''' - Manages the execution of a thread block. - - When run() is called, all threads are started. Each thread executes until it - hits syncthreads(), at which point it sets its own syncthreads_blocked to - True so that the BlockManager knows it is blocked. It then waits on its - syncthreads_event. - - The BlockManager polls threads to determine if they are blocked in - syncthreads(). If it finds a blocked thread, it adds it to the set of - blocked threads. When all threads are blocked, it unblocks all the threads. - The thread are unblocked by setting their syncthreads_blocked back to False - and setting their syncthreads_event. - - The polling continues until no threads are alive, when execution is - complete. - ''' - def __init__(self, f, grid_dim, block_dim): - self._grid_dim = grid_dim - self._block_dim = block_dim - self._f = f - self.block_state = np.zeros(block_dim, dtype=np.bool) - - def run(self, grid_point, *args): - # Create all threads - threads = set() - livethreads = set() - blockedthreads = set() - for block_point in np.ndindex(*self._block_dim): - def target(): - self._f(*args) - t = BlockThread(target, self, grid_point, block_point) - t.start() - threads.add(t) - livethreads.add(t) - - # Potential optimisations: - # 1. Continue the while loop immediately after finding a blocked thread - # 2. Don't poll already-blocked threads - while livethreads: - for t in livethreads: - if t.syncthreads_blocked: - blockedthreads.add(t) - elif t.exception: - - # Abort all other simulator threads on exception, - # do *not* join immediately to facilitate debugging. - for t_other in threads: - t_other.abort = True - t_other.syncthreads_blocked = False - t_other.syncthreads_event.set() - - reraise(*(t.exception)) - if livethreads == blockedthreads: - for t in blockedthreads: - t.syncthreads_blocked = False - t.syncthreads_event.set() - blockedthreads = set() - livethreads = set([ t for t in livethreads if t.is_alive() ]) - # Final check for exceptions in case any were set prior to thread - # finishing, before we could check it - for t in threads: - if t.exception: - reraise(*(t.exception)) diff --git a/numba/numba/cuda/simulator/kernelapi.py b/numba/numba/cuda/simulator/kernelapi.py deleted file mode 100644 index eeb6d3ca3..000000000 --- a/numba/numba/cuda/simulator/kernelapi.py +++ /dev/null @@ -1,280 +0,0 @@ -''' -Implements the cuda module as called from within an executing kernel -(@cuda.jit-decorated function). -''' - -from contextlib import contextmanager -import sys -import threading -import traceback - -import numpy as np - -from numba import numpy_support - - -class Dim3(object): - ''' - Used to implement thread/block indices/dimensions - ''' - def __init__(self, x, y, z): - self.x = x - self.y = y - self.z = z - - def __str__(self): - return '(%s, %s, %s)' % (self.x, self.y, self.z) - - def __repr__(self): - return 'Dim3(%s, %s, %s)' % (self.x, self.y, self.z) - - def __iter__(self): - yield self.x - yield self.y - yield self.z - - -class FakeCUDALocal(object): - ''' - CUDA Local arrays - ''' - def array(self, shape, dtype): - dtype = numpy_support.as_dtype(dtype) - return np.empty(shape, dtype) - - -class FakeCUDAConst(object): - ''' - CUDA Const arrays - ''' - def array_like(self, ary): - return ary - - -class FakeCUDAShared(object): - ''' - CUDA Shared arrays. - - Limitations: assumes that only one call to cuda.shared.array is on a line, - and that that line is only executed once per thread. i.e.:: - - a = cuda.shared.array(...); b = cuda.shared.array(...) - - will erroneously alias a and b, and:: - - for i in range(10): - sharedarrs[i] = cuda.shared.array(...) - - will alias all arrays created at that point (though it is not certain that - this would be supported by Numba anyway). - ''' - - def __init__(self, dynshared_size): - self._allocations = {} - self._dynshared_size = dynshared_size - self._dynshared = np.zeros(dynshared_size, dtype=np.byte) - - def array(self, shape, dtype): - dtype = numpy_support.as_dtype(dtype) - # Dynamic shared memory is requested with size 0 - this all shares the - # same underlying memory - if shape == 0: - # Count must be the maximum number of whole elements that fit in the - # buffer (Numpy complains if the buffer is not a multiple of the - # element size) - count = self._dynshared_size // dtype.itemsize - return np.frombuffer(self._dynshared.data, dtype=dtype, count=count) - - # Otherwise, identify allocations by source file and line number - # We pass the reference frame explicitly to work around - # http://bugs.python.org/issue25108 - stack = traceback.extract_stack(sys._getframe()) - caller = stack[-2][0:2] - res = self._allocations.get(caller) - if res is None: - res = np.empty(shape, dtype) - self._allocations[caller] = res - return res - -addlock = threading.Lock() -maxlock = threading.Lock() -minlock = threading.Lock() -caslock = threading.Lock() - - -class FakeCUDAAtomic(object): - def add(self, array, index, val): - with addlock: - array[index] += val - - def max(self, array, index, val): - with maxlock: - # CUDA Python's semantics for max differ from Numpy's Python's, - # so we have special handling here (CUDA Python treats NaN as - # missing data). - if np.isnan(array[index]): - array[index] = val - elif np.isnan(val): - return - array[index] = max(array[index], val) - - def min(self, array, index, val): - with minlock: - # CUDA Python's semantics for min differ from Numpy's Python's, - # so we have special handling here (CUDA Python treats NaN as - # missing data). - if np.isnan(array[index]): - array[index] = val - elif np.isnan(val): - return - array[index] = min(array[index], val) - - def compare_and_swap(self, array, old, val): - with caslock: - index = (0,) * array.ndim - loaded = array[index] - if loaded == old: - array[index] = val - return loaded - - -class FakeCUDAModule(object): - ''' - An instance of this class will be injected into the __globals__ for an - executing function in order to implement calls to cuda.*. This will fail to - work correctly if the user code does:: - - from numba import cuda as something_else - - In other words, the CUDA module must be called cuda. - ''' - - def __init__(self, grid_dim, block_dim, dynshared_size): - self.gridDim = Dim3(*grid_dim) - self.blockDim = Dim3(*block_dim) - self._local = FakeCUDALocal() - self._shared = FakeCUDAShared(dynshared_size) - self._const = FakeCUDAConst() - self._atomic = FakeCUDAAtomic() - - @property - def local(self): - return self._local - - @property - def shared(self): - return self._shared - - @property - def const(self): - return self._const - - @property - def atomic(self): - return self._atomic - - @property - def threadIdx(self): - return threading.current_thread().threadIdx - - @property - def blockIdx(self): - return threading.current_thread().blockIdx - - @property - def warpsize(self): - return 32 - - @property - def laneid(self): - return threading.current_thread().thread_id % 32 - - def syncthreads(self): - threading.current_thread().syncthreads() - - def threadfence(self): - # No-op - pass - - def threadfence_block(self): - # No-op - pass - - def threadfence_system(self): - # No-op - pass - - def syncthreads_count(self, val): - return threading.current_thread().syncthreads_count(val) - - def syncthreads_and(self, val): - return threading.current_thread().syncthreads_and(val) - - def syncthreads_or(self, val): - return threading.current_thread().syncthreads_or(val) - - def popc(self, val): - return bin(val).count("1") - - def brev(self, val): - return int('{:032b}'.format(val)[::-1], 2) - - def clz(self, val): - s = '{:032b}'.format(val) - return len(s) - len(s.lstrip('0')) - - def ffs(self, val): - s = '{:032b}'.format(val) - return len(s) - len(s.rstrip('0')) - - def selp(self, a, b, c): - return b if a else c - - def grid(self, n): - bdim = self.blockDim - bid = self.blockIdx - tid = self.threadIdx - x = bid.x * bdim.x + tid.x - if n == 1: - return x - y = bid.y * bdim.y + tid.y - if n == 2: - return (x, y) - z = bid.z * bdim.z + tid.z - if n == 3: - return (x, y, z) - - raise RuntimeError("Global ID has 1-3 dimensions. %d requested" % n) - - def gridsize(self, n): - bdim = self.blockDim - gdim = self.gridDim - x = bdim.x * gdim.x - if n == 1: - return x - y = bdim.y * gdim.y - if n == 2: - return (x, y) - z = bdim.z * gdim.z - if n == 3: - return (x, y, z) - - raise RuntimeError("Global grid has 1-3 dimensions. %d requested" % n) - - -@contextmanager -def swapped_cuda_module(fn, fake_cuda_module): - from numba import cuda - - fn_globs = fn.__globals__ - # get all globals that is the "cuda" module - orig = dict((k, v) for k, v in fn_globs.items() if v is cuda) - # build replacement dict - repl = dict((k, fake_cuda_module) for k, v in orig.items()) - # replace - fn_globs.update(repl) - try: - yield - finally: - # revert - fn_globs.update(orig) diff --git a/numba/numba/cuda/simulator/reduction.py b/numba/numba/cuda/simulator/reduction.py deleted file mode 100644 index d6189661a..000000000 --- a/numba/numba/cuda/simulator/reduction.py +++ /dev/null @@ -1,13 +0,0 @@ -from numba.six.moves import reduce as pyreduce - -def Reduce(func): - def reduce_wrapper(seq, res=None, init=0): - r = pyreduce(func, seq, init) - if res is not None: - res[0] = r - return None - else: - return r - return reduce_wrapper - -reduce = Reduce diff --git a/numba/numba/cuda/simulator_init.py b/numba/numba/cuda/simulator_init.py deleted file mode 100644 index 52d327f45..000000000 --- a/numba/numba/cuda/simulator_init.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import absolute_import -from .simulator import * - - -def is_available(): - """Returns a boolean to indicate the availability of a CUDA GPU. - """ - # Simulator is always available - return True - - -def cuda_error(): - """Returns None or an exception if the CUDA driver fails to initialize. - """ - # Simulator never fails to initialize - return None - - - diff --git a/numba/numba/cuda/stubs.py b/numba/numba/cuda/stubs.py deleted file mode 100644 index 2df3e56fa..000000000 --- a/numba/numba/cuda/stubs.py +++ /dev/null @@ -1,472 +0,0 @@ -""" -This scripts specifies all PTX special objects. -""" -from __future__ import print_function, absolute_import, division -import operator -import numpy -import llvmlite.llvmpy.core as lc -from numba import types, ir, typing, macro -from .cudadrv import nvvm - - -class Stub(object): - '''A stub object to represent special objects which is meaningless - outside the context of CUDA-python. - ''' - _description_ = '' - __slots__ = () # don't allocate __dict__ - - def __new__(cls): - raise NotImplementedError("%s is not instantiable" % cls) - - def __repr__(self): - return self._description_ - -#------------------------------------------------------------------------------- -# SREG - -SREG_SIGNATURE = typing.signature(types.int32) - - -class threadIdx(Stub): - ''' - The thread indices in the current thread block, accessed through the - attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the - range from 0 inclusive to the corresponding value of the attribute in - :attr:`numba.cuda.blockDim` exclusive. - ''' - _description_ = '' - - x = macro.Macro('tid.x', SREG_SIGNATURE) - y = macro.Macro('tid.y', SREG_SIGNATURE) - z = macro.Macro('tid.z', SREG_SIGNATURE) - - -class blockIdx(Stub): - ''' - The block indices in the grid of thread blocks, accessed through the - attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the - range from 0 inclusive to the corresponding value of the attribute in - :attr:`numba.cuda.gridDim` exclusive. - ''' - _description_ = '' - - x = macro.Macro('ctaid.x', SREG_SIGNATURE) - y = macro.Macro('ctaid.y', SREG_SIGNATURE) - z = macro.Macro('ctaid.z', SREG_SIGNATURE) - - -class blockDim(Stub): - ''' - The shape of a block of threads, as declared when instantiating the - kernel. This value is the same for all threads in a given kernel, even - if they belong to different blocks (i.e. each block is "full"). - ''' - x = macro.Macro('ntid.x', SREG_SIGNATURE) - y = macro.Macro('ntid.y', SREG_SIGNATURE) - z = macro.Macro('ntid.z', SREG_SIGNATURE) - - -class gridDim(Stub): - ''' - The shape of the grid of blocks, accressed through the attributes ``x``, - ``y``, and ``z``. - ''' - _description_ = '' - x = macro.Macro('nctaid.x', SREG_SIGNATURE) - y = macro.Macro('nctaid.y', SREG_SIGNATURE) - z = macro.Macro('nctaid.z', SREG_SIGNATURE) - - -warpsize = macro.Macro('warpsize', SREG_SIGNATURE) -laneid = macro.Macro('laneid', SREG_SIGNATURE) - -#------------------------------------------------------------------------------- -# Grid Macro - -def _ptx_grid1d(): pass - - -def _ptx_grid2d(): pass - - -def grid_expand(ndim): - """grid(ndim) - - Return the absolute position of the current thread in the entire - grid of blocks. *ndim* should correspond to the number of dimensions - declared when instantiating the kernel. If *ndim* is 1, a single integer - is returned. If *ndim* is 2 or 3, a tuple of the given number of - integers is returned. - - Computation of the first integer is as follows:: - - cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x - - and is similar for the other two indices, but using the ``y`` and ``z`` - attributes. - """ - if ndim == 1: - fname = "ptx.grid.1d" - restype = types.int32 - elif ndim == 2: - fname = "ptx.grid.2d" - restype = types.UniTuple(types.int32, 2) - elif ndim == 3: - fname = "ptx.grid.3d" - restype = types.UniTuple(types.int32, 3) - else: - raise ValueError('argument can only be 1, 2, 3') - - return ir.Intrinsic(fname, typing.signature(restype, types.intp), - args=[ndim]) - -grid = macro.Macro('ptx.grid', grid_expand, callable=True) - -#------------------------------------------------------------------------------- -# Gridsize Macro - -def gridsize_expand(ndim): - """ - Return the absolute size (or shape) in threads of the entire grid of - blocks. *ndim* should correspond to the number of dimensions declared when - instantiating the kernel. - - Computation of the first integer is as follows:: - - cuda.blockDim.x * cuda.gridDim.x - - and is similar for the other two indices, but using the ``y`` and ``z`` - attributes. - """ - if ndim == 1: - fname = "ptx.gridsize.1d" - restype = types.int32 - elif ndim == 2: - fname = "ptx.gridsize.2d" - restype = types.UniTuple(types.int32, 2) - elif ndim == 3: - fname = "ptx.gridsize.3d" - restype = types.UniTuple(types.int32, 3) - else: - raise ValueError('argument can only be 1, 2 or 3') - - return ir.Intrinsic(fname, typing.signature(restype, types.intp), - args=[ndim]) - - -gridsize = macro.Macro('ptx.gridsize', gridsize_expand, callable=True) - -#------------------------------------------------------------------------------- -# syncthreads - -class syncthreads(Stub): - ''' - Synchronize all threads in the same thread block. This function implements - the same pattern as barriers in traditional multi-threaded programming: this - function waits until all threads in the block call it, at which point it - returns control to all its callers. - ''' - _description_ = '' - - -class syncthreads_count(Stub): - ''' - syncthreads_count(predictate) - - An extension to numba.cuda.syncthreads where the return value is a count - of the threads where predicate is true. - ''' - _description_ = '' - - -class syncthreads_and(Stub): - ''' - syncthreads_and(predictate) - - An extension to numba.cuda.syncthreads where 1 is returned if predicate is - true for all threads or 0 otherwise. - ''' - _description_ = '' - - -class syncthreads_or(Stub): - ''' - syncthreads_or(predictate) - - An extension to numba.cuda.syncthreads where 1 is returned if predicate is - true for any thread or 0 otherwise. - ''' - _description_ = '' - - -# ------------------------------------------------------------------------------- -# warp level operations - -class syncwarp(Stub): - ''' - syncwarp(mask) - - Synchronizes a masked subset of threads in a warp. - ''' - _description_ = '' - - -class shfl_sync_intrinsic(Stub): - ''' - shfl_sync_intrinsic(mask, mode, value, mode_offset, clamp) - - Nvvm intrinsic for shuffling data across a warp - docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-datamove - ''' - _description_ = '' - - -class vote_sync_intrinsic(Stub): - ''' - vote_sync_intrinsic(mask, mode, predictate) - - Nvvm intrinsic for performing a reduce and broadcast across a warp - docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-vote - ''' - _description_ = '' - - -class match_any_sync(Stub): - ''' - match_any_sync(mask, value) - - Nvvm intrinsic for performing a compare and broadcast across a warp. - Returns a mask of threads that have same value as the given value from within the masked warp. - ''' - _description_ = '' - - -class match_all_sync(Stub): - ''' - match_all_sync(mask, value) - - Nvvm intrinsic for performing a compare and broadcast across a warp. - Returns a tuple of (mask, pred), where mask is a mask of threads that have - same value as the given value from within the masked warp, if they - all have the same value, otherwise it is 0. Pred is a boolean of whether - or not all threads in the mask warp have the same warp. - ''' - _description_ = '' - - -# ------------------------------------------------------------------------------- -# memory fences - -class threadfence_block(Stub): - ''' - A memory fence at thread block level - ''' - _description_ = '' - - -class threadfence_system(Stub): - ''' - A memory fence at system level: across devices - ''' - _description_ = '' - - -class threadfence(Stub): - ''' - A memory fence at device level - ''' - _description_ = '' - - -# ------------------------------------------------------------------------------- -# shared - -def _legalize_shape(shape): - if isinstance(shape, tuple): - return shape - elif isinstance(shape, int): - return (shape,) - else: - raise TypeError("invalid type for shape; got {0}".format(type(shape))) - - -def shared_array(shape, dtype): - shape = _legalize_shape(shape) - ndim = len(shape) - fname = "ptx.smem.alloc" - restype = types.Array(dtype, ndim, 'C') - sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) - return ir.Intrinsic(fname, sig, args=(shape, dtype)) - - -class shared(Stub): - """ - Shared memory namespace. - """ - _description_ = '' - - array = macro.Macro('shared.array', shared_array, callable=True, - argnames=['shape', 'dtype']) - ''' - Allocate a shared array of the given *shape* and *type*. *shape* is either - an integer or a tuple of integers representing the array's dimensions. - *type* is a :ref:`Numba type ` of the elements needing to be - stored in the array. - - The returned array-like object can be read and written to like any normal - device array (e.g. through indexing). - ''' - - -#------------------------------------------------------------------------------- -# local array - - -def local_array(shape, dtype): - shape = _legalize_shape(shape) - ndim = len(shape) - fname = "ptx.lmem.alloc" - restype = types.Array(dtype, ndim, 'C') - sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) - return ir.Intrinsic(fname, sig, args=(shape, dtype)) - - -class local(Stub): - ''' - Local memory namespace. - ''' - _description_ = '' - - array = macro.Macro('local.array', local_array, callable=True, - argnames=['shape', 'dtype']) - ''' - Allocate a local array of the given *shape* and *type*. The array is private - to the current thread, and resides in global memory. An array-like object is - returned which can be read and written to like any standard array (e.g. - through indexing). - ''' - -#------------------------------------------------------------------------------- -# const array - - -def const_array_like(ndarray): - fname = "ptx.cmem.arylike" - - from .descriptor import CUDATargetDesc - aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray) - - sig = typing.signature(aryty, aryty) - return ir.Intrinsic(fname, sig, args=[ndarray]) - - -class const(Stub): - ''' - Constant memory namespace. - ''' - _description_ = '' - - array_like = macro.Macro('const.array_like', const_array_like, - callable=True, argnames=['ary']) - ''' - Create a const array from *ary*. The resulting const array will have the - same shape, type, and values as *ary*. - ''' - -#------------------------------------------------------------------------------- -# bit manipulation - -class popc(Stub): - """ - popc(val) - - Returns the number of set bits in the given value. - """ - - -class brev(Stub): - """ - brev(val) - - Reverse the bitpattern of an integer value; for example 0b10110110 - becomes 0b01101101. - """ - - -class clz(Stub): - """ - clz(val) - - Counts the number of leading zeros in a value. - """ - - -class ffs(Stub): - """ - ffs(val) - - Find the position of the least significant bit set to 1 in an integer. - """ - -#------------------------------------------------------------------------------- -# comparison and selection instructions - -class selp(Stub): - """ - selp(a, b, c) - - Select between source operands, based on the value of the predicate source operand. - """ - -#------------------------------------------------------------------------------- -# atomic - -class atomic(Stub): - """Namespace for atomic operations - """ - _description_ = '' - - class add(Stub): - """add(ary, idx, val) - - Perform atomic ary[idx] += val. Supported on int32, float32, and - float64 operands only. - - Returns the old value at the index location as if it is loaded - atomically. - """ - - class max(Stub): - """max(ary, idx, val) - - Perform atomic ary[idx] = max(ary[idx], val). NaN is treated as a - missing value, so max(NaN, n) == max(n, NaN) == n. Note that this - differs from Python and Numpy behaviour, where max(a, b) is always - a when either a or b is a NaN. - - Supported on int32, int64, uint32, uint64, float32, float64 operands only. - - Returns the old value at the index location as if it is loaded - atomically. - """ - - class min(Stub): - """min(ary, idx, val) - - Perform atomic ary[idx] = min(ary[idx], val). NaN is treated as a - missing value, so min(NaN, n) == min(n, NaN) == n. Note that this - differs from Python and Numpy behaviour, where min(a, b) is always - a when either a or b is a NaN. - - Supported on int32, int64, uint32, uint64, float32, float64 operands only. - """ - - class compare_and_swap(Stub): - """compare_and_swap(ary, old, val) - - Conditionally assign ``val`` to the first element of an 1D array ``ary`` - if the current value matches ``old``. - - Returns the current value as if it is loaded atomically. - """ diff --git a/numba/numba/cuda/target.py b/numba/numba/cuda/target.py deleted file mode 100644 index 54b4b4d40..000000000 --- a/numba/numba/cuda/target.py +++ /dev/null @@ -1,255 +0,0 @@ -from __future__ import print_function, absolute_import -import re -from llvmlite.llvmpy.core import (Type, Builder, LINKAGE_INTERNAL, - Constant, ICMP_EQ) -import llvmlite.llvmpy.core as lc -import llvmlite.binding as ll - -from numba import typing, types, cgutils, debuginfo, dispatcher -from numba.utils import cached_property -from numba.targets.base import BaseContext -from numba.targets.callconv import MinimalCallConv -from numba.targets import cmathimpl, operatorimpl -from numba.typing import cmathdecl, operatordecl - -from numba import itanium_mangler -from .cudadrv import nvvm -from . import codegen, nvvmutils -from .decorators import jitdevice - - -# ----------------------------------------------------------------------------- -# Typing - - -class CUDATypingContext(typing.BaseContext): - def load_additional_registries(self): - from . import cudadecl, cudamath - - self.install_registry(cudadecl.registry) - self.install_registry(cudamath.registry) - self.install_registry(cmathdecl.registry) - self.install_registry(operatordecl.registry) - - def resolve_value_type(self, val): - # treat dispatcher object as another device function - if isinstance(val, dispatcher.Dispatcher): - try: - # use cached device function - val = val.__cudajitdevice - except AttributeError: - if not val._can_compile: - raise ValueError('using cpu function on device ' - 'but its compilation is disabled') - jd = jitdevice(val, debug=val.targetoptions.get('debug')) - # cache the device function for future use and to avoid - # duplicated copy of the same function. - val.__cudajitdevice = jd - val = jd - - # continue with parent logic - return super(CUDATypingContext, self).resolve_value_type(val) - -# ----------------------------------------------------------------------------- -# Implementation - -VALID_CHARS = re.compile(r'[^a-z0-9]', re.I) - - -class CUDATargetContext(BaseContext): - implement_powi_as_math_call = True - strict_alignment = True - DIBuilder = debuginfo.NvvmDIBuilder - - # Overrides - def create_module(self, name): - return self._internal_codegen._create_empty_module(name) - - def init(self): - self._internal_codegen = codegen.JITCUDACodegen("numba.cuda.jit") - self._target_data = ll.create_target_data(nvvm.default_data_layout) - - def load_additional_registries(self): - from . import cudaimpl, printimpl, libdevice - self.install_registry(cudaimpl.registry) - self.install_registry(printimpl.registry) - self.install_registry(libdevice.registry) - self.install_registry(cmathimpl.registry) - self.install_registry(operatorimpl.registry) - - def codegen(self): - return self._internal_codegen - - @property - def target_data(self): - return self._target_data - - @cached_property - def call_conv(self): - return CUDACallConv(self) - - def mangler(self, name, argtypes): - return itanium_mangler.mangle(name, argtypes) - - def prepare_cuda_kernel(self, codelib, fname, argtypes, debug): - """ - Adapt a code library ``codelib`` with the numba compiled CUDA kernel - with name ``fname`` and arguments ``argtypes`` for NVVM. - A new library is created with a wrapper function that can be used as - the kernel entry point for the given kernel. - - Returns the new code library and the wrapper function. - """ - library = self.codegen().create_library('') - library.add_linking_library(codelib) - wrapper = self.generate_kernel_wrapper(library, fname, argtypes, - debug=debug) - nvvm.fix_data_layout(library._final_module) - return library, wrapper - - def generate_kernel_wrapper(self, library, fname, argtypes, debug): - """ - Generate the kernel wrapper in the given ``library``. - The function being wrapped have the name ``fname`` and argument types - ``argtypes``. The wrapper function is returned. - """ - arginfo = self.get_arg_packer(argtypes) - argtys = list(arginfo.argument_types) - wrapfnty = Type.function(Type.void(), argtys) - wrapper_module = self.create_module("cuda.kernel.wrapper") - fnty = Type.function(Type.int(), - [self.call_conv.get_return_type(types.pyobject)] + argtys) - func = wrapper_module.add_function(fnty, name=fname) - - prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') - wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed) - builder = Builder(wrapfn.append_basic_block('')) - - # Define error handling variables - def define_error_gv(postfix): - gv = wrapper_module.add_global_variable(Type.int(), - name=wrapfn.name + postfix) - gv.initializer = Constant.null(gv.type.pointee) - return gv - - gv_exc = define_error_gv("__errcode__") - gv_tid = [] - gv_ctaid = [] - for i in 'xyz': - gv_tid.append(define_error_gv("__tid%s__" % i)) - gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) - - callargs = arginfo.from_arguments(builder, wrapfn.args) - status, _ = self.call_conv.call_function( - builder, func, types.void, argtypes, callargs) - - - if debug: - # Check error status - with cgutils.if_likely(builder, status.is_ok): - builder.ret_void() - - with builder.if_then(builder.not_(status.is_python_exc)): - # User exception raised - old = Constant.null(gv_exc.type.pointee) - - # Use atomic cmpxchg to prevent rewriting the error status - # Only the first error is recorded - - casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, - old.type]) - - casfn = wrapper_module.add_function(casfnty, - name="___numba_cas_hack") - xchg = builder.call(casfn, [gv_exc, old, status.code]) - changed = builder.icmp(ICMP_EQ, xchg, old) - - # If the xchange is successful, save the thread ID. - sreg = nvvmutils.SRegBuilder(builder) - with builder.if_then(changed): - for dim, ptr, in zip("xyz", gv_tid): - val = sreg.tid(dim) - builder.store(val, ptr) - - for dim, ptr, in zip("xyz", gv_ctaid): - val = sreg.ctaid(dim) - builder.store(val, ptr) - - builder.ret_void() - - nvvm.set_cuda_kernel(wrapfn) - library.add_ir_module(wrapper_module) - library.finalize() - wrapfn = library.get_function(wrapfn.name) - return wrapfn - - def make_constant_array(self, builder, typ, ary): - """ - Return dummy value. - - XXX: We should be able to move cuda.const.array_like into here. - """ - - a = self.make_array(typ)(self, builder) - return a._getvalue() - - def insert_const_string(self, mod, string): - """ - Unlike the parent version. This returns a a pointer in the constant - addrspace. - """ - text = Constant.stringz(string) - name = '$'.join(["__conststring__", - itanium_mangler.mangle_identifier(string)]) - # Try to reuse existing global - gv = mod.globals.get(name) - if gv is None: - # Not defined yet - gv = mod.add_global_variable(text.type, name=name, - addrspace=nvvm.ADDRSPACE_CONSTANT) - gv.linkage = LINKAGE_INTERNAL - gv.global_constant = True - gv.initializer = text - - # Cast to a i8* pointer - charty = gv.type.pointee.element - return Constant.bitcast(gv, - charty.as_pointer(nvvm.ADDRSPACE_CONSTANT)) - - def insert_string_const_addrspace(self, builder, string): - """ - Insert a constant string in the constant addresspace and return a - generic i8 pointer to the data. - - This function attempts to deduplicate. - """ - lmod = builder.module - gv = self.insert_const_string(lmod, string) - return self.insert_addrspace_conv(builder, gv, - nvvm.ADDRSPACE_CONSTANT) - - def insert_addrspace_conv(self, builder, ptr, addrspace): - """ - Perform addrspace conversion according to the NVVM spec - """ - lmod = builder.module - base_type = ptr.type.pointee - conv = nvvmutils.insert_addrspace_conv(lmod, base_type, addrspace) - return builder.call(conv, [ptr]) - - def optimize_function(self, func): - """Run O1 function passes - """ - pass - ## XXX skipped for now - # fpm = lp.FunctionPassManager.new(func.module) - # - # lp.PassManagerBuilder.new().populate(fpm) - # - # fpm.initialize() - # fpm.run(func) - # fpm.finalize() - - -class CUDACallConv(MinimalCallConv): - pass diff --git a/numba/numba/cuda/testing.py b/numba/numba/cuda/testing.py deleted file mode 100644 index 2e8a97817..000000000 --- a/numba/numba/cuda/testing.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import contextlib -import io -import os -import sys - -from numba import config, unittest_support as unittest -from numba.tests.support import captured_stdout, SerialMixin - - -class CUDATestCase(SerialMixin, unittest.TestCase): - def tearDown(self): - from numba.cuda.cudadrv.devices import reset - - reset() - - -def skip_on_cudasim(reason): - return unittest.skipIf(config.ENABLE_CUDASIM, reason) - - -def skip_unless_cudasim(reason): - return unittest.skipUnless(config.ENABLE_CUDASIM, reason) - - -@contextlib.contextmanager -def redirect_fd(fd): - """ - Temporarily redirect *fd* to a pipe's write end and return a file object - wrapping the pipe's read end. - """ - save = os.dup(fd) - r, w = os.pipe() - try: - os.dup2(w, fd) - yield io.open(r, "r") - finally: - os.close(w) - os.dup2(save, fd) - os.close(save) - - -class CUDATextCapture(object): - - def __init__(self, stream): - self._stream = stream - - def getvalue(self): - return self._stream.read() - - -class PythonTextCapture(object): - - def __init__(self, stream): - self._stream = stream - - def getvalue(self): - return self._stream.getvalue() - - -@contextlib.contextmanager -def captured_cuda_stdout(): - """ - Return a minimal stream-like object capturing the text output of - either CUDA or the simulator. - """ - # Prevent accidentally capturing previously output text - sys.stdout.flush() - - if config.ENABLE_CUDASIM: - # The simulator calls print() on Python stdout - with captured_stdout() as stream: - yield PythonTextCapture(stream) - else: - # The CUDA runtime writes onto the system stdout - from numba import cuda - fd = sys.__stdout__.fileno() - with redirect_fd(fd) as stream: - yield CUDATextCapture(stream) - cuda.synchronize() diff --git a/numba/numba/cuda/tests/__init__.py b/numba/numba/cuda/tests/__init__.py deleted file mode 100644 index 84bb37aa6..000000000 --- a/numba/numba/cuda/tests/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from numba.testing import unittest -from numba.testing import load_testsuite -from numba import cuda -from os.path import dirname, join - - -def load_tests(loader, tests, pattern): - - suite = unittest.TestSuite() - this_dir = dirname(__file__) - suite.addTests(load_testsuite(loader, join(this_dir, 'nocuda'))) - if cuda.is_available(): - suite.addTests(load_testsuite(loader, join(this_dir, 'cudasim'))) - gpus = cuda.list_devices() - if gpus and gpus[0].compute_capability >= (2, 0): - suite.addTests(load_testsuite(loader, join(this_dir, 'cudadrv'))) - suite.addTests(load_testsuite(loader, join(this_dir, 'cudapy'))) - else: - print("skipped CUDA tests because GPU CC < 2.0") - else: - print("skipped CUDA tests") - return suite diff --git a/numba/numba/cuda/tests/cudadrv/__init__.py b/numba/numba/cuda/tests/cudadrv/__init__.py deleted file mode 100644 index 0465337eb..000000000 --- a/numba/numba/cuda/tests/cudadrv/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import load_testsuite -import os - - -def load_tests(loader, tests, pattern): - return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba/numba/cuda/tests/cudadrv/data/__init__.py b/numba/numba/cuda/tests/cudadrv/data/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/numba/cuda/tests/cudadrv/data/jitlink.cu b/numba/numba/cuda/tests/cudadrv/data/jitlink.cu deleted file mode 100644 index 3ba4f7969..000000000 --- a/numba/numba/cuda/tests/cudadrv/data/jitlink.cu +++ /dev/null @@ -1,13 +0,0 @@ -// compile with: -// -// nvcc -gencode arch=compute_20,code=compute_20 -ptx jitlink.cu -o jitlink.ptx -// -// - -extern "C"{ - __device__ - int bar(int* out, int a) { - *out = a * 2; - return 0; - } -} diff --git a/numba/numba/cuda/tests/cudadrv/data/jitlink.ptx b/numba/numba/cuda/tests/cudadrv/data/jitlink.ptx deleted file mode 100644 index 8cc1aa6d6..000000000 --- a/numba/numba/cuda/tests/cudadrv/data/jitlink.ptx +++ /dev/null @@ -1,30 +0,0 @@ -// -// Generated by NVIDIA NVVM Compiler -// Compiler built on Tue Apr 1 03:34:02 2014 (1396341242) -// Cuda compilation tools, release 6.0, V6.0.1 -// - -.version 4.0 -.target sm_20 -.address_size 64 - - -.visible .func (.param .b32 func_retval0) bar( - .param .b64 bar_param_0, - .param .b32 bar_param_1 -) -{ - .reg .s32 %r<4>; - .reg .s64 %rd<2>; - - - ld.param.u64 %rd1, [bar_param_0]; - ld.param.u32 %r1, [bar_param_1]; - shl.b32 %r2, %r1, 1; - st.u32 [%rd1], %r2; - mov.u32 %r3, 0; - st.param.b32 [func_retval0+0], %r3; - ret; -} - - diff --git a/numba/numba/cuda/tests/cudadrv/test_array_attr.py b/numba/numba/cuda/tests/cudadrv/test_array_attr.py deleted file mode 100644 index 95c0e884a..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_array_attr.py +++ /dev/null @@ -1,81 +0,0 @@ -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestArrayAttr(SerialMixin, unittest.TestCase): - def test_contigous_2d(self): - ary = np.arange(10) - cary = ary.reshape(2, 5) - fary = np.asfortranarray(cary) - - dcary = cuda.to_device(cary) - dfary = cuda.to_device(fary) - self.assertTrue(dcary.is_c_contiguous()) - self.assertTrue(not dfary.is_c_contiguous()) - self.assertTrue(not dcary.is_f_contiguous()) - self.assertTrue(dfary.is_f_contiguous()) - - def test_contigous_3d(self): - ary = np.arange(20) - cary = ary.reshape(2, 5, 2) - fary = np.asfortranarray(cary) - - dcary = cuda.to_device(cary) - dfary = cuda.to_device(fary) - self.assertTrue(dcary.is_c_contiguous()) - self.assertTrue(not dfary.is_c_contiguous()) - self.assertTrue(not dcary.is_f_contiguous()) - self.assertTrue(dfary.is_f_contiguous()) - - def test_contigous_4d(self): - ary = np.arange(60) - cary = ary.reshape(2, 5, 2, 3) - fary = np.asfortranarray(cary) - - dcary = cuda.to_device(cary) - dfary = cuda.to_device(fary) - self.assertTrue(dcary.is_c_contiguous()) - self.assertTrue(not dfary.is_c_contiguous()) - self.assertTrue(not dcary.is_f_contiguous()) - self.assertTrue(dfary.is_f_contiguous()) - - def test_ravel_c(self): - ary = np.arange(60) - reshaped = ary.reshape(2, 5, 2, 3) - expect = reshaped.ravel(order='C') - dary = cuda.to_device(reshaped) - dflat = dary.ravel() - flat = dflat.copy_to_host() - self.assertTrue(flat.ndim == 1) - self.assertTrue(np.all(expect == flat)) - - def test_ravel_f(self): - ary = np.arange(60) - reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3)) - expect = reshaped.ravel(order='F') - dary = cuda.to_device(reshaped) - dflat = dary.ravel(order='F') - flat = dflat.copy_to_host() - self.assertTrue(flat.ndim == 1) - self.assertTrue(np.all(expect == flat)) - - def test_reshape_c(self): - ary = np.arange(10) - expect = ary.reshape(2, 5) - dary = cuda.to_device(ary) - dary_reshaped = dary.reshape(2, 5) - got = dary_reshaped.copy_to_host() - self.assertTrue(np.all(expect == got)) - - def test_reshape_f(self): - ary = np.arange(10) - expect = ary.reshape(2, 5, order='F') - dary = cuda.to_device(ary) - dary_reshaped = dary.reshape(2, 5, order='F') - got = dary_reshaped.copy_to_host() - self.assertTrue(np.all(expect == got)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_context_stack.py b/numba/numba/cuda/tests/cudadrv/test_context_stack.py deleted file mode 100644 index 5b7dee008..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_context_stack.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import print_function - -import numbers - -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestContextStack(SerialMixin, unittest.TestCase): - def setUp(self): - # Reset before testing - cuda.close() - - def test_gpus_current(self): - self.assertIs(cuda.gpus.current, None) - with cuda.gpus[0]: - self.assertEqual(cuda.gpus.current.id, 0) - - def test_gpus_len(self): - self.assertGreater(len(cuda.gpus), 0) - - def test_gpus_iter(self): - gpulist = list(cuda.gpus) - self.assertGreater(len(gpulist), 0) - - -class TestContextAPI(SerialMixin, unittest.TestCase): - - def test_context_memory(self): - mem = cuda.current_context().get_memory_info() - - self.assertIsInstance(mem.free, numbers.Number) - self.assertEquals(mem.free, mem[0]) - - self.assertIsInstance(mem.total, numbers.Number) - self.assertEquals(mem.total, mem[1]) - - self.assertLessEqual(mem.free, mem.total) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py b/numba/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py deleted file mode 100644 index d5fb364ca..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +++ /dev/null @@ -1,277 +0,0 @@ -from __future__ import print_function - -from itertools import product - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class CudaArrayIndexing(SerialMixin, unittest.TestCase): - def test_index_1d(self): - arr = np.arange(10) - darr = cuda.to_device(arr) - for i in range(arr.size): - self.assertEqual(arr[i], darr[i]) - - def test_index_2d(self): - arr = np.arange(3 * 4).reshape(3, 4) - darr = cuda.to_device(arr) - - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - self.assertEqual(arr[i, j], darr[i, j]) - - def test_index_3d(self): - arr = np.arange(3 * 4 * 5).reshape(3, 4, 5) - darr = cuda.to_device(arr) - - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - for k in range(arr.shape[2]): - self.assertEqual(arr[i, j, k], darr[i, j, k]) - - -class CudaArrayStridedSlice(SerialMixin, unittest.TestCase): - - def test_strided_index_1d(self): - arr = np.arange(10) - darr = cuda.to_device(arr) - for i in range(arr.size): - np.testing.assert_equal(arr[i::2], darr[i::2].copy_to_host()) - - def test_strided_index_2d(self): - arr = np.arange(6 * 7).reshape(6, 7) - darr = cuda.to_device(arr) - - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - np.testing.assert_equal(arr[i::2, j::2], - darr[i::2, j::2].copy_to_host()) - - def test_strided_index_3d(self): - arr = np.arange(6 * 7 * 8).reshape(6, 7, 8) - darr = cuda.to_device(arr) - - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - for k in range(arr.shape[2]): - np.testing.assert_equal(arr[i::2, j::2, k::2], - darr[i::2, j::2, k::2].copy_to_host()) - - -class CudaArraySlicing(SerialMixin, unittest.TestCase): - def test_prefix_1d(self): - arr = np.arange(5) - darr = cuda.to_device(arr) - for i in range(arr.size): - expect = arr[i:] - got = darr[i:].copy_to_host() - self.assertTrue(np.all(expect == got)) - - def test_prefix_2d(self): - arr = np.arange(3 ** 2).reshape(3, 3) - darr = cuda.to_device(arr) - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - expect = arr[i:, j:] - sliced = darr[i:, j:] - self.assertEqual(expect.shape, sliced.shape) - self.assertEqual(expect.strides, sliced.strides) - got = sliced.copy_to_host() - self.assertTrue(np.all(expect == got)) - - def test_select_3d_first_two_dim(self): - arr = np.arange(3 * 4 * 5).reshape(3, 4, 5) - darr = cuda.to_device(arr) - # Select first dimension - for i in range(arr.shape[0]): - expect = arr[i] - sliced = darr[i] - self.assertEqual(expect.shape, sliced.shape) - self.assertEqual(expect.strides, sliced.strides) - got = sliced.copy_to_host() - self.assertTrue(np.all(expect == got)) - # Select second dimension - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - expect = arr[i, j] - sliced = darr[i, j] - self.assertEqual(expect.shape, sliced.shape) - self.assertEqual(expect.strides, sliced.strides) - got = sliced.copy_to_host() - self.assertTrue(np.all(expect == got)) - - def test_select_f(self): - a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='F') - da = cuda.to_device(a) - - for i in range(a.shape[0]): - for j in range(a.shape[1]): - self.assertTrue(np.all(da[i, j, :].copy_to_host() == a[i, j, :])) - for j in range(a.shape[2]): - self.assertTrue(np.all(da[i, :, j].copy_to_host() == a[i, :, j])) - for i in range(a.shape[1]): - for j in range(a.shape[2]): - self.assertTrue(np.all(da[:, i, j].copy_to_host() == a[:, i, j])) - - def test_select_c(self): - a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='C') - da = cuda.to_device(a) - - for i in range(a.shape[0]): - for j in range(a.shape[1]): - self.assertTrue(np.all(da[i, j, :].copy_to_host() == a[i, j, :])) - for j in range(a.shape[2]): - self.assertTrue(np.all(da[i, :, j].copy_to_host() == a[i, :, j])) - for i in range(a.shape[1]): - for j in range(a.shape[2]): - self.assertTrue(np.all(da[:, i, j].copy_to_host() == a[:, i, j])) - - def test_prefix_select(self): - arr = np.arange(5 * 7).reshape(5, 7, order='F') - - darr = cuda.to_device(arr) - self.assertTrue(np.all(darr[:1, 1].copy_to_host() == arr[:1, 1])) - - def test_negative_slicing_1d(self): - arr = np.arange(10) - darr = cuda.to_device(arr) - for i, j in product(range(-10, 10), repeat=2): - np.testing.assert_array_equal(arr[i:j], - darr[i:j].copy_to_host()) - - def test_negative_slicing_2d(self): - arr = np.arange(12).reshape(3, 4) - darr = cuda.to_device(arr) - for x, y, w, s in product(range(-4, 4), repeat=4): - np.testing.assert_array_equal(arr[x:y, w:s], - darr[x:y, w:s].copy_to_host()) - - def test_empty_slice_1d(self): - arr = np.arange(5) - darr = cuda.to_device(arr) - for i in range(darr.shape[0]): - np.testing.assert_array_equal(darr[i:i].copy_to_host(), arr[i:i]) - # empty slice of empty slice - self.assertFalse(darr[:0][:0].copy_to_host()) - # out-of-bound slice just produces empty slices - np.testing.assert_array_equal(darr[:0][:1].copy_to_host(), arr[:0][:1]) - np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(), arr[:0][-1:]) - - def test_empty_slice_2d(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - np.testing.assert_array_equal(darr[:0].copy_to_host(), arr[:0]) - np.testing.assert_array_equal(darr[3, :0].copy_to_host(), arr[3, :0]) - # empty slice of empty slice - self.assertFalse(darr[:0][:0].copy_to_host()) - # out-of-bound slice just produces empty slices - np.testing.assert_array_equal(darr[:0][:1].copy_to_host(), arr[:0][:1]) - np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(), arr[:0][-1:]) - - -class CudaArraySetting(SerialMixin, unittest.TestCase): - """ - Most of the slicing logic is tested in the cases above, so these - tests focus on the setting logic. - """ - - def test_scalar(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - arr[2, 2] = 500 - darr[2, 2] = 500 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_rank(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - arr[2] = 500 - darr[2] = 500 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_broadcast(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - arr[:, 2] = 500 - darr[:, 2] = 500 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_array_assign_column(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - _400 = np.full(shape=7, fill_value=400) - arr[2] = _400 - darr[2] = _400 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_array_assign_row(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - _400 = np.full(shape=5, fill_value=400) - arr[:, 2] = _400 - darr[:, 2] = _400 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_array_assign_subarray(self): - arr = np.arange(5 * 6 * 7).reshape(5, 6, 7) - darr = cuda.to_device(arr) - _400 = np.full(shape=(6, 7), fill_value=400) - arr[2] = _400 - darr[2] = _400 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_array_assign_deep_subarray(self): - arr = np.arange(5 * 6 * 7 * 8).reshape(5, 6, 7, 8) - darr = cuda.to_device(arr) - _400 = np.full(shape=(5, 6, 8), fill_value=400) - arr[:, :, 2] = _400 - darr[:, :, 2] = _400 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_array_assign_all(self): - arr = np.arange(5 * 7).reshape(5, 7) - darr = cuda.to_device(arr) - _400 = np.full(shape=(5, 7), fill_value=400) - arr[:] = _400 - darr[:] = _400 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_strides(self): - arr = np.ones(20) - darr = cuda.to_device(arr) - arr[::2] = 500 - darr[::2] = 500 - np.testing.assert_array_equal(darr.copy_to_host(), arr) - - def test_incompatible_highdim(self): - darr = cuda.to_device(np.arange(5 * 7)) - - with self.assertRaises(ValueError) as e: - darr[:] = np.ones(shape=(1, 2, 3)) - - self.assertIn( - member=str(e.exception), - container=[ - "Can't assign 3-D array to 1-D self", # device - "could not broadcast input array from shape (2,3) into shape (35)", # simulator - ]) - - def test_incompatible_shape(self): - darr = cuda.to_device(np.arange(5)) - - with self.assertRaises(ValueError) as e: - darr[:] = [1, 3] - - self.assertIn( - member=str(e.exception), - container=[ - "Can't copy sequence with size 2 to array axis 0 with dimension 5", # device - "cannot copy sequence with size 2 to array axis with dimension 5", # simulator - ]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_auto_context.py b/numba/numba/cuda/tests/cudadrv/test_cuda_auto_context.py deleted file mode 100644 index db694633b..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaAutoContext(SerialMixin, unittest.TestCase): - def test_auto_context(self): - """A problem was revealed by a customer that the use cuda.to_device - does not create a CUDA context. - This tests the problem - """ - A = np.arange(10, dtype=np.float32) - newA = np.empty_like(A) - dA = cuda.to_device(A) - - dA.copy_to_host(newA) - self.assertTrue(np.allclose(A, newA)) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py b/numba/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py deleted file mode 100644 index 7ac81beff..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +++ /dev/null @@ -1,92 +0,0 @@ -import numpy as np -import ctypes -from numba.cuda.cudadrv.devicearray import (DeviceRecord, from_record_like, - auto_device) -from numba import cuda, numpy_support -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim -import numpy as np - -@skip_on_cudasim('Device Record API unsupported in the simulator') -class TestCudaDeviceRecord(SerialMixin, unittest.TestCase): - """ - Tests the DeviceRecord class with np.void host types. - """ - def setUp(self): - self._create_data(np.zeros) - - def _create_data(self, array_ctor): - self.dtype = np.dtype([('a', np.int32), ('b', np.float32)], align=True) - self.hostz = array_ctor(1, self.dtype)[0] - self.hostnz = array_ctor(1, self.dtype)[0] - self.hostnz['a'] = 10 - self.hostnz['b'] = 11.0 - - def _check_device_record(self, reference, rec): - self.assertEqual(rec.shape, tuple()) - self.assertEqual(rec.strides, tuple()) - self.assertEqual(rec.dtype, reference.dtype) - self.assertEqual(rec.alloc_size, reference.dtype.itemsize) - self.assertIsNotNone(rec.gpu_data) - self.assertNotEqual(rec.device_ctypes_pointer, ctypes.c_void_p(0)) - - numba_type = numpy_support.from_dtype(reference.dtype) - self.assertEqual(rec._numba_type_, numba_type) - - def test_device_record_interface(self): - hostrec = self.hostz.copy() - devrec = DeviceRecord(self.dtype) - self._check_device_record(hostrec, devrec) - - def test_device_record_copy(self): - hostrec = self.hostz.copy() - devrec = DeviceRecord(self.dtype) - devrec.copy_to_device(hostrec) - - # Copy back and check values are all zeros - hostrec2 = self.hostnz.copy() - devrec.copy_to_host(hostrec2) - np.testing.assert_equal(self.hostz, hostrec2) - - # Copy non-zero values to GPU and back and check values - hostrec3 = self.hostnz.copy() - devrec.copy_to_device(hostrec3) - - hostrec4 = self.hostz.copy() - devrec.copy_to_host(hostrec4) - np.testing.assert_equal(hostrec4, self.hostnz) - - def test_from_record_like(self): - # Create record from host record - hostrec = self.hostz.copy() - devrec = from_record_like(hostrec) - self._check_device_record(hostrec, devrec) - - # Create record from device record and check for distinct data - devrec2 = from_record_like(devrec) - self._check_device_record(devrec, devrec2) - self.assertNotEqual(devrec.gpu_data, devrec2.gpu_data) - - def test_auto_device(self): - # Create record from host record - hostrec = self.hostnz.copy() - devrec, new_gpu_obj = auto_device(hostrec) - self._check_device_record(hostrec, devrec) - self.assertTrue(new_gpu_obj) - - # Copy data back and check it is equal to auto_device arg - hostrec2 = self.hostz.copy() - devrec.copy_to_host(hostrec2) - np.testing.assert_equal(hostrec2, hostrec) - - -class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord): - """ - Tests the DeviceRecord class with np.record host types - """ - def setUp(self): - self._create_data(np.recarray) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba/numba/cuda/tests/cudadrv/test_cuda_driver.py deleted file mode 100644 index d74a34336..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_driver.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import print_function, absolute_import - -from ctypes import c_int, sizeof -from numba.cuda.cudadrv.driver import host_to_device, device_to_host -from numba.cuda.cudadrv import devices -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim - -ptx1 = ''' - .version 1.4 - .target sm_10, map_f64_to_f32 - - .entry _Z10helloworldPi ( - .param .u64 __cudaparm__Z10helloworldPi_A) - { - .reg .u32 %r<3>; - .reg .u64 %rd<6>; - .loc 14 4 0 -$LDWbegin__Z10helloworldPi: - .loc 14 6 0 - cvt.s32.u16 %r1, %tid.x; - ld.param.u64 %rd1, [__cudaparm__Z10helloworldPi_A]; - cvt.u64.u16 %rd2, %tid.x; - mul.lo.u64 %rd3, %rd2, 4; - add.u64 %rd4, %rd1, %rd3; - st.global.s32 [%rd4+0], %r1; - .loc 14 7 0 - exit; -$LDWend__Z10helloworldPi: - } // _Z10helloworldPi -''' - -ptx2 = ''' -.version 3.0 -.target sm_20 -.address_size 64 - - .file 1 "/tmp/tmpxft_000012c7_00000000-9_testcuda.cpp3.i" - .file 2 "testcuda.cu" - -.entry _Z10helloworldPi( - .param .u64 _Z10helloworldPi_param_0 -) -{ - .reg .s32 %r<3>; - .reg .s64 %rl<5>; - - - ld.param.u64 %rl1, [_Z10helloworldPi_param_0]; - cvta.to.global.u64 %rl2, %rl1; - .loc 2 6 1 - mov.u32 %r1, %tid.x; - mul.wide.u32 %rl3, %r1, 4; - add.s64 %rl4, %rl2, %rl3; - st.global.u32 [%rl4], %r1; - .loc 2 7 2 - ret; -} -''' - - -@skip_on_cudasim('CUDA Driver API unsupported in the simulator') -class TestCudaDriver(SerialMixin, unittest.TestCase): - def setUp(self): - self.assertTrue(len(devices.gpus) > 0) - self.context = devices.get_context() - device = self.context.device - ccmajor, _ = device.compute_capability - if ccmajor >= 2: - self.ptx = ptx2 - else: - self.ptx = ptx1 - - def tearDown(self): - del self.context - - def test_cuda_driver_basic(self): - module = self.context.create_module_ptx(self.ptx) - function = module.get_function('_Z10helloworldPi') - - array = (c_int * 100)() - - memory = self.context.memalloc(sizeof(array)) - - host_to_device(memory, array, sizeof(array)) - - function = function.configure((1,), (100,)) - function(memory) - - device_to_host(array, memory, sizeof(array)) - for i, v in enumerate(array): - self.assertEqual(i, v) - - module.unload() - - def test_cuda_driver_stream(self): - module = self.context.create_module_ptx(self.ptx) - function = module.get_function('_Z10helloworldPi') - - array = (c_int * 100)() - - stream = self.context.create_stream() - - with stream.auto_synchronize(): - memory = self.context.memalloc(sizeof(array)) - host_to_device(memory, array, sizeof(array), stream=stream) - - function = function.configure((1,), (100,), stream=stream) - function(memory) - - device_to_host(array, memory, sizeof(array), stream=stream) - - for i, v in enumerate(array): - self.assertEqual(i, v) - - def test_cuda_driver_occupancy(self): - module = self.context.create_module_ptx(self.ptx) - function = module.get_function('_Z10helloworldPi') - - value = self.context.get_active_blocks_per_multiprocessor(function, 128, 128) - print('active blocks:', value) - self.assertTrue(value > 0) - def b2d(bs): return bs - grid, block = self.context.get_max_potential_block_size(function, b2d, 128, 128) - print('grid size:', grid, ', block size:', block) - self.assertTrue(grid > 0) - self.assertTrue(block > 0) - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_memory.py b/numba/numba/cuda/tests/cudadrv/test_cuda_memory.py deleted file mode 100644 index 8e9219dc2..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_memory.py +++ /dev/null @@ -1,154 +0,0 @@ -import ctypes - -import numpy as np - -from numba.cuda.cudadrv import driver, drvapi, devices -from numba.cuda.testing import unittest, CUDATestCase -from numba.utils import IS_PY3 -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('CUDA Memory API unsupported in the simulator') -class TestCudaMemory(CUDATestCase): - def setUp(self): - self.context = devices.get_context() - - def tearDown(self): - del self.context - super(TestCudaMemory, self).tearDown() - - def _template(self, obj): - self.assertTrue(driver.is_device_memory(obj)) - driver.require_device_memory(obj) - self.assertTrue(isinstance(obj.device_ctypes_pointer, - drvapi.cu_device_ptr)) - - def test_device_memory(self): - devmem = self.context.memalloc(1024) - self._template(devmem) - - def test_device_view(self): - devmem = self.context.memalloc(1024) - self._template(devmem.view(10)) - - def test_host_alloc(self): - devmem = self.context.memhostalloc(1024, mapped=True) - self._template(devmem) - - def test_pinned_memory(self): - ary = np.arange(10) - arybuf = ary if IS_PY3 else buffer(ary) - devmem = self.context.mempin(arybuf, ary.ctypes.data, - ary.size * ary.dtype.itemsize, - mapped=True) - self._template(devmem) - - def test_derived_pointer(self): - # Use MemoryPointer.view to create derived pointer - def check(m, offset): - # create view - v1 = m.view(offset) - self.assertEqual(v1.owner.handle.value, m.handle.value) - self.assertEqual(m.refct, 2) - self.assertEqual(v1.handle.value - offset, v1.owner.handle.value) - # create a view - v2 = v1.view(offset) - self.assertEqual(v2.owner.handle.value, m.handle.value) - self.assertEqual(v2.owner.handle.value, m.handle.value) - self.assertEqual(v2.handle.value - offset * 2, - v2.owner.handle.value) - self.assertEqual(m.refct, 3) - del v2 - self.assertEqual(m.refct, 2) - del v1 - self.assertEqual(m.refct, 1) - - m = self.context.memalloc(1024) - check(m=m, offset=0) - check(m=m, offset=1) - - -@skip_on_cudasim('CUDA Memory API unsupported in the simulator') -class TestCudaMemoryFunctions(CUDATestCase): - def setUp(self): - self.context = devices.get_context() - - def tearDown(self): - del self.context - super(TestCudaMemoryFunctions, self).tearDown() - - def test_memcpy(self): - hstary = np.arange(100, dtype=np.uint32) - hstary2 = np.arange(100, dtype=np.uint32) - sz = hstary.size * hstary.dtype.itemsize - devary = self.context.memalloc(sz) - - driver.host_to_device(devary, hstary, sz) - driver.device_to_host(hstary2, devary, sz) - - self.assertTrue(np.all(hstary == hstary2)) - - def test_memset(self): - dtype = np.dtype('uint32') - n = 10 - sz = dtype.itemsize * 10 - devary = self.context.memalloc(sz) - driver.device_memset(devary, 0xab, sz) - - hstary = np.empty(n, dtype=dtype) - driver.device_to_host(hstary, devary, sz) - - hstary2 = np.array([0xabababab] * n, dtype=np.dtype('uint32')) - self.assertTrue(np.all(hstary == hstary2)) - - def test_d2d(self): - hst = np.arange(100, dtype=np.uint32) - hst2 = np.empty_like(hst) - sz = hst.size * hst.dtype.itemsize - dev1 = self.context.memalloc(sz) - dev2 = self.context.memalloc(sz) - driver.host_to_device(dev1, hst, sz) - driver.device_to_device(dev2, dev1, sz) - driver.device_to_host(hst2, dev2, sz) - self.assertTrue(np.all(hst == hst2)) - - -@skip_on_cudasim('CUDA Memory API unsupported in the simulator') -class TestMVExtent(CUDATestCase): - def test_c_contiguous_array(self): - ary = np.arange(100) - arysz = ary.dtype.itemsize * ary.size - s, e = driver.host_memory_extents(ary) - self.assertTrue(ary.ctypes.data == s) - self.assertTrue(arysz == driver.host_memory_size(ary)) - - def test_f_contiguous_array(self): - ary = np.asfortranarray(np.arange(100).reshape(2, 50)) - arysz = ary.dtype.itemsize * np.prod(ary.shape) - s, e = driver.host_memory_extents(ary) - self.assertTrue(ary.ctypes.data == s) - self.assertTrue(arysz == driver.host_memory_size(ary)) - - def test_single_element_array(self): - ary = np.asarray(np.uint32(1234)) - arysz = ary.dtype.itemsize - s, e = driver.host_memory_extents(ary) - self.assertTrue(ary.ctypes.data == s) - self.assertTrue(arysz == driver.host_memory_size(ary)) - - def test_ctypes_struct(self): - class mystruct(ctypes.Structure): - _fields_ = [('x', ctypes.c_int), ('y', ctypes.c_int)] - - data = mystruct(x=123, y=432) - sz = driver.host_memory_size(data) - self.assertTrue(ctypes.sizeof(data) == sz) - - def test_ctypes_double(self): - data = ctypes.c_double(1.234) - sz = driver.host_memory_size(data) - self.assertTrue(ctypes.sizeof(data) == sz) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba/numba/cuda/tests/cudadrv/test_cuda_ndarray.py deleted file mode 100644 index c759f7b5e..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +++ /dev/null @@ -1,244 +0,0 @@ -import numpy as np -from numba.cuda.cudadrv import devicearray -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim - - -class TestCudaNDArray(SerialMixin, unittest.TestCase): - def test_device_array_interface(self): - dary = cuda.device_array(shape=100) - devicearray.verify_cuda_ndarray_interface(dary) - - ary = np.empty(100) - dary = cuda.to_device(ary) - devicearray.verify_cuda_ndarray_interface(dary) - - ary = np.asarray(1.234) - dary = cuda.to_device(ary) - self.assertEquals(dary.ndim, 1) - devicearray.verify_cuda_ndarray_interface(dary) - - def test_devicearray_no_copy(self): - array = np.arange(100, dtype=np.float32) - cuda.to_device(array, copy=False) - - def test_devicearray_shape(self): - ary = np.arange(2 * 3 * 4).reshape(2, 3, 4) - dary = cuda.to_device(ary) - self.assertEquals(ary.shape, dary.shape) - self.assertEquals(ary.shape[1:], dary.shape[1:]) - - def test_devicearray(self): - array = np.arange(100, dtype=np.int32) - original = array.copy() - gpumem = cuda.to_device(array) - array[:] = 0 - gpumem.copy_to_host(array) - - np.testing.assert_array_equal(array, original) - - def test_stream_bind(self): - stream = cuda.stream() - with stream.auto_synchronize(): - arr = cuda.device_array( - (3, 3), - dtype=np.float64, - stream=stream) - self.assertEqual(arr.bind(stream).stream, stream) - self.assertEqual(arr.stream, stream) - - def test_len_1d(self): - ary = np.empty((3,)) - dary = cuda.device_array(3) - self.assertEqual(len(ary), len(dary)) - - def test_len_2d(self): - ary = np.empty((3, 5)) - dary = cuda.device_array((3, 5)) - self.assertEqual(len(ary), len(dary)) - - def test_len_3d(self): - ary = np.empty((3, 5, 7)) - dary = cuda.device_array((3, 5, 7)) - self.assertEqual(len(ary), len(dary)) - - def test_devicearray_partition(self): - N = 100 - array = np.arange(N, dtype=np.int32) - original = array.copy() - gpumem = cuda.to_device(array) - left, right = gpumem.split(N // 2) - - array[:] = 0 - - self.assertTrue(np.all(array == 0)) - - right.copy_to_host(array[N//2:]) - left.copy_to_host(array[:N//2]) - - self.assertTrue(np.all(array == original)) - - def test_devicearray_replace(self): - N = 100 - array = np.arange(N, dtype=np.int32) - original = array.copy() - gpumem = cuda.to_device(array) - cuda.to_device(array * 2, to=gpumem) - gpumem.copy_to_host(array) - np.testing.assert_array_equal(array, original * 2) - - @skip_on_cudasim('This works in the simulator') - def test_devicearray_transpose_wrongdim(self): - gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4, 1)) - - with self.assertRaises(NotImplementedError) as e: - np.transpose(gpumem) - - self.assertEqual( - "transposing a non-2D DeviceNDArray isn't supported", - str(e.exception)) - - def test_devicearray_transpose_identity(self): - # any-shape identities should work - original = np.array(np.arange(24)).reshape(3, 4, 2) - array = np.transpose(cuda.to_device(original), axes=(0, 1, 2)).copy_to_host() - self.assertTrue(np.all(array == original)) - - def test_devicearray_transpose_duplicatedaxis(self): - gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4)) - - with self.assertRaises(ValueError) as e: - np.transpose(gpumem, axes=(0, 0)) - - self.assertIn( - str(e.exception), - container=[ - 'invalid axes list (0, 0)', # GPU - 'repeated axis in transpose', # sim - ]) - - def test_devicearray_transpose_wrongaxis(self): - gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4)) - - with self.assertRaises(ValueError) as e: - np.transpose(gpumem, axes=(0, 2)) - - self.assertIn( - str(e.exception), - container=[ - 'invalid axes list (0, 2)', # GPU - 'invalid axis for this array', - 'axis 2 is out of bounds for array of dimension 2', # sim - ]) - - def test_devicearray_transpose_ok(self): - original = np.array(np.arange(12)).reshape(3, 4) - array = np.transpose(cuda.to_device(original)).copy_to_host() - self.assertTrue(np.all(array == original.T)) - - def test_devicearray_transpose_T(self): - original = np.array(np.arange(12)).reshape(3, 4) - array = cuda.to_device(original).T.copy_to_host() - self.assertTrue(np.all(array == original.T)) - - def test_devicearray_contiguous_slice(self): - # memcpys are dumb ranges of bytes, so trying to - # copy to a non-contiguous range shouldn't work! - a = np.arange(25).reshape(5, 5, order='F') - s = np.full(fill_value=5, shape=(5,)) - - d = cuda.to_device(a) - a[2] = s - - # d is in F-order (not C-order), so d[2] is not contiguous - # (40-byte strides). This means we can't memcpy to it! - with self.assertRaises(ValueError) as e: - d[2].copy_to_device(s) - self.assertEqual( - devicearray.errmsg_contiguous_buffer, - str(e.exception)) - - # if d[2].copy_to_device(s), then this would pass: - # self.assertTrue((a == d.copy_to_host()).all()) - - def _test_devicearray_contiguous_host_copy(self, a_c, a_f): - """ - Checks host->device memcpys - """ - self.assertTrue(a_c.flags.c_contiguous) - self.assertTrue(a_f.flags.f_contiguous) - - for original, copy in [ - (a_f, a_f), - (a_f, a_c), - (a_c, a_f), - (a_c, a_c), - ]: - msg = '%s => %s' % ( - 'C' if original.flags.c_contiguous else 'F', - 'C' if copy.flags.c_contiguous else 'F', - ) - - d = cuda.to_device(original) - d.copy_to_device(copy) - self.assertTrue(np.all(d.copy_to_host() == a_c), msg=msg) - self.assertTrue(np.all(d.copy_to_host() == a_f), msg=msg) - - def test_devicearray_contiguous_copy_host_3d(self): - a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5) - a_f = np.array(a_c, order='F') - self._test_devicearray_contiguous_host_copy(a_c, a_f) - - def test_devicearray_contiguous_copy_host_1d(self): - a_c = np.arange(5) - a_f = np.array(a_c, order='F') - self._test_devicearray_contiguous_host_copy(a_c, a_f) - - def test_devicearray_contiguous_copy_device(self): - a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5) - a_f = np.array(a_c, order='F') - self.assertTrue(a_c.flags.c_contiguous) - self.assertTrue(a_f.flags.f_contiguous) - - d = cuda.to_device(a_c) - - with self.assertRaises(ValueError) as e: - d.copy_to_device(cuda.to_device(a_f)) - self.assertEqual( - "Can't copy F-contiguous array to a C-contiguous array", - str(e.exception)) - - d.copy_to_device(cuda.to_device(a_c)) - self.assertTrue(np.all(d.copy_to_host() == a_c)) - - d = cuda.to_device(a_f) - - with self.assertRaises(ValueError) as e: - d.copy_to_device(cuda.to_device(a_c)) - self.assertEqual( - "Can't copy C-contiguous array to a F-contiguous array", - str(e.exception)) - - d.copy_to_device(cuda.to_device(a_f)) - self.assertTrue(np.all(d.copy_to_host() == a_f)) - - def test_devicearray_contiguous_host_strided(self): - a_c = np.arange(10) - d = cuda.to_device(a_c) - arr = np.arange(20)[::2] - d.copy_to_device(arr) - np.testing.assert_array_equal(d.copy_to_host(), arr) - - def test_devicearray_contiguous_device_strided(self): - d = cuda.to_device(np.arange(20)) - arr = np.arange(20) - - with self.assertRaises(ValueError) as e: - d.copy_to_device(cuda.to_device(arr)[::2]) - self.assertEqual( - devicearray.errmsg_contiguous_buffer, - str(e.exception)) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_deallocations.py b/numba/numba/cuda/tests/cudadrv/test_deallocations.py deleted file mode 100644 index 1beedcb50..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_deallocations.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import division - -from contextlib import contextmanager - -import numpy as np - -from numba import cuda, config -from numba.cuda.testing import unittest, skip_on_cudasim, SerialMixin -from numba.tests.support import captured_stderr - - -@skip_on_cudasim('not supported on CUDASIM') -class TestDeallocation(SerialMixin, unittest.TestCase): - def test_max_pending_count(self): - # get deallocation manager and flush it - deallocs = cuda.current_context().deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - # deallocate to maximum count - for i in range(config.CUDA_DEALLOCS_COUNT): - cuda.to_device(np.arange(1)) - self.assertEqual(len(deallocs), i + 1) - # one more to trigger .clear() - cuda.to_device(np.arange(1)) - self.assertEqual(len(deallocs), 0) - - def test_max_pending_bytes(self): - # get deallocation manager and flush it - ctx = cuda.current_context() - deallocs = ctx.deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - - mi = ctx.get_memory_info() - - max_pending = 10**6 # 1MB - old_ratio = config.CUDA_DEALLOCS_RATIO - try: - # change to a smaller ratio - config.CUDA_DEALLOCS_RATIO = max_pending / mi.total - self.assertEqual(deallocs._max_pending_bytes, max_pending) - - # deallocate half the max size - cuda.to_device(np.ones(max_pending // 2, dtype=np.int8)) - self.assertEqual(len(deallocs), 1) - - # deallocate another remaining - cuda.to_device(np.ones(max_pending - deallocs._size, dtype=np.int8)) - self.assertEqual(len(deallocs), 2) - - # another byte to trigger .clear() - cuda.to_device(np.ones(1, dtype=np.int8)) - self.assertEqual(len(deallocs), 0) - finally: - # restore old ratio - config.CUDA_DEALLOCS_RATIO = old_ratio - - -@skip_on_cudasim("defer_cleanup has no effect in CUDASIM") -class TestDeferCleanup(SerialMixin, unittest.TestCase): - def test_basic(self): - harr = np.arange(5) - darr1 = cuda.to_device(harr) - deallocs = cuda.current_context().deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - with cuda.defer_cleanup(): - darr2 = cuda.to_device(harr) - del darr1 - self.assertEqual(len(deallocs), 1) - del darr2 - self.assertEqual(len(deallocs), 2) - deallocs.clear() - self.assertEqual(len(deallocs), 2) - - deallocs.clear() - self.assertEqual(len(deallocs), 0) - - def test_nested(self): - harr = np.arange(5) - darr1 = cuda.to_device(harr) - deallocs = cuda.current_context().deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - with cuda.defer_cleanup(): - with cuda.defer_cleanup(): - darr2 = cuda.to_device(harr) - del darr1 - self.assertEqual(len(deallocs), 1) - del darr2 - self.assertEqual(len(deallocs), 2) - deallocs.clear() - self.assertEqual(len(deallocs), 2) - deallocs.clear() - self.assertEqual(len(deallocs), 2) - - deallocs.clear() - self.assertEqual(len(deallocs), 0) - - def test_exception(self): - harr = np.arange(5) - darr1 = cuda.to_device(harr) - deallocs = cuda.current_context().deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - - class CustomError(Exception): - pass - - with self.assertRaises(CustomError): - with cuda.defer_cleanup(): - darr2 = cuda.to_device(harr) - del darr2 - self.assertEqual(len(deallocs), 1) - deallocs.clear() - self.assertEqual(len(deallocs), 1) - raise CustomError - deallocs.clear() - self.assertEqual(len(deallocs), 0) - del darr1 - self.assertEqual(len(deallocs), 1) - deallocs.clear() - self.assertEqual(len(deallocs), 0) - - -class TestDeferCleanupAvail(SerialMixin, unittest.TestCase): - def test_context_manager(self): - # just make sure the API is available - with cuda.defer_cleanup(): - pass - - -@skip_on_cudasim('not supported on CUDASIM') -class TestDel(SerialMixin, unittest.TestCase): - """ - Ensure resources are deleted properly without ignored exception. - """ - @contextmanager - def check_ignored_exception(self, ctx): - with captured_stderr() as cap: - yield - ctx.deallocations.clear() - self.assertFalse(cap.getvalue()) - - def test_stream(self): - ctx = cuda.current_context() - stream = ctx.create_stream() - with self.check_ignored_exception(ctx): - del stream - - def test_event(self): - ctx = cuda.current_context() - event = ctx.create_event() - with self.check_ignored_exception(ctx): - del event - - def test_pinned_memory(self): - ctx = cuda.current_context() - mem = ctx.memhostalloc(32) - with self.check_ignored_exception(ctx): - del mem - - def test_mapped_memory(self): - ctx = cuda.current_context() - mem = ctx.memhostalloc(32, mapped=True) - with self.check_ignored_exception(ctx): - del mem - - def test_device_memory(self): - ctx = cuda.current_context() - mem = ctx.memalloc(32) - with self.check_ignored_exception(ctx): - del mem - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/numba/numba/cuda/tests/cudadrv/test_detect.py b/numba/numba/cuda/tests/cudadrv/test_detect.py deleted file mode 100644 index bf7a12c49..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_detect.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import absolute_import, print_function -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin -from numba.tests.support import captured_stdout - -class TestCudaDetect(SerialMixin, unittest.TestCase): - def test_cuda_detect(self): - # exercise the code path - with captured_stdout() as out: - cuda.detect() - output = out.getvalue() - self.assertIn('Found', output) - self.assertIn('CUDA devices', output) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_events.py b/numba/numba/cuda/tests/cudadrv/test_events.py deleted file mode 100644 index 6ee96754a..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_events.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, print_function -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaEvent(SerialMixin, unittest.TestCase): - def test_event_elapsed(self): - N = 32 - dary = cuda.device_array(N, dtype=np.double) - evtstart = cuda.event() - evtend = cuda.event() - - evtstart.record() - cuda.to_device(np.arange(N), to=dary) - evtend.record() - evtend.wait() - evtend.synchronize() - # Exercise the code path - evtstart.elapsed_time(evtend) - - def test_event_elapsed_stream(self): - N = 32 - stream = cuda.stream() - dary = cuda.device_array(N, dtype=np.double) - evtstart = cuda.event() - evtend = cuda.event() - - evtstart.record(stream=stream) - cuda.to_device(np.arange(N), to=dary, stream=stream) - evtend.record(stream=stream) - evtend.wait(stream=stream) - evtend.synchronize() - # Exercise the code path - evtstart.elapsed_time(evtend) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_host_alloc.py b/numba/numba/cuda/tests/cudadrv/test_host_alloc.py deleted file mode 100644 index 8249d1f34..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_host_alloc.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np -from numba.cuda.cudadrv import driver -from numba import cuda -from numba.cuda.testing import unittest, CUDATestCase -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('CUDA Driver API unsupported in the simulator') -class TestHostAlloc(CUDATestCase): - def test_host_alloc_driver(self): - n = 32 - mem = cuda.current_context().memhostalloc(n, mapped=True) - - dtype = np.dtype(np.uint8) - ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype, - buffer=mem) - - magic = 0xab - driver.device_memset(mem, magic, n) - - self.assertTrue(np.all(ary == magic)) - - ary.fill(n) - - recv = np.empty_like(ary) - - driver.device_to_host(recv, mem, ary.size) - - self.assertTrue(np.all(ary == recv)) - self.assertTrue(np.all(recv == n)) - - def test_host_alloc_pinned(self): - ary = cuda.pinned_array(10, dtype=np.uint32) - ary.fill(123) - self.assertTrue(all(ary == 123)) - devary = cuda.to_device(ary) - driver.device_memset(devary, 0, driver.device_memory_size(devary)) - self.assertTrue(all(ary == 123)) - devary.copy_to_host(ary) - self.assertTrue(all(ary == 0)) - - def test_host_alloc_mapped(self): - ary = cuda.mapped_array(10, dtype=np.uint32) - ary.fill(123) - self.assertTrue(all(ary == 123)) - driver.device_memset(ary, 0, driver.device_memory_size(ary)) - self.assertTrue(all(ary == 0)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_inline_ptx.py b/numba/numba/cuda/tests/cudadrv/test_inline_ptx.py deleted file mode 100644 index 040f7d793..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_inline_ptx.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from llvmlite.llvmpy.core import Module, Type, Builder, InlineAsm -from llvmlite import binding as ll - -from numba.cuda.cudadrv import nvvm -from numba.cuda.testing import unittest, CUDATestCase -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('Inline PTX cannot be used in the simulator') -class TestCudaInlineAsm(CUDATestCase): - def test_inline_rsqrt(self): - mod = Module(__name__) - fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) - fn = mod.add_function(fnty, 'cu_rsqrt') - bldr = Builder(fn.append_basic_block('entry')) - - rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) - inlineasm = InlineAsm.get(rsqrt_approx_fnty, - 'rsqrt.approx.f32 $0, $1;', - '=f,f', side_effect=True) - val = bldr.load(fn.args[0]) - res = bldr.call(inlineasm, [val]) - - bldr.store(res, fn.args[0]) - bldr.ret_void() - - # generate ptx - nvvm.fix_data_layout(mod) - nvvm.set_cuda_kernel(fn) - nvvmir = str(mod) - ptx = nvvm.llvm_to_ptx(nvvmir) - self.assertTrue('rsqrt.approx.f32' in str(ptx)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_ir_patch.py b/numba/numba/cuda/tests/cudadrv/test_ir_patch.py deleted file mode 100644 index 6f5d8d5da..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_ir_patch.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from numba.cuda.testing import unittest -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('Linking unsupported in the simulator') -class TestIRPatch(unittest.TestCase): - def patch(self, ir): - # Import here to avoid error in CUDASIM - from numba.cuda.cudadrv.nvvm import llvm39_to_34_ir - - return llvm39_to_34_ir(ir) - - def test_load_rewrite(self): - text = "%myload = not really" - out = self.patch(text) - # No rewrite - self.assertEqual(text, out) - - text = "%myload = load i32, i32* val" - out = self.patch(text) - # Rewritten - self.assertEqual("%myload = load i32* val", out) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_linker.py b/numba/numba/cuda/tests/cudadrv/test_linker.py deleted file mode 100644 index 3323bc69c..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_linker.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import print_function, absolute_import, division -import os.path -import numpy as np -from numba.cuda.testing import unittest -from numba.cuda.testing import skip_on_cudasim -from numba.cuda.testing import SerialMixin -from numba.cuda.cudadrv.driver import Linker -from numba.cuda import require_context -from numba import cuda - -@skip_on_cudasim('Linking unsupported in the simulator') -class TestLinker(SerialMixin, unittest.TestCase): - - @require_context - def test_linker_basic(self): - '''Simply go through the constructor and destructor - ''' - linker = Linker() - del linker - - @require_context - def test_linking(self): - global bar # must be a global; other it is recognized as a freevar - bar = cuda.declare_device('bar', 'int32(int32)') - - link = os.path.join(os.path.dirname(__file__), 'data', 'jitlink.ptx') - - @cuda.jit('void(int32[:], int32[:])', link=[link]) - def foo(x, y): - i = cuda.grid(1) - x[i] += bar(y[i]) - - A = np.array([123]) - B = np.array([321]) - - foo(A, B) - - self.assertTrue(A[0] == 123 + 2 * 321) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_nvvm_driver.py b/numba/numba/cuda/tests/cudadrv/test_nvvm_driver.py deleted file mode 100644 index 3efefe2ee..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_nvvm_driver.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import absolute_import, print_function, division - -from llvmlite.llvmpy.core import Module, Type, Builder -from numba.cuda.cudadrv.nvvm import (NVVM, CompilationUnit, llvm_to_ptx, - set_cuda_kernel, fix_data_layout, - get_arch_option, SUPPORTED_CC) -from ctypes import c_size_t, c_uint64, sizeof -from numba.cuda.testing import unittest -from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError -from numba.cuda.testing import skip_on_cudasim - -is64bit = sizeof(c_size_t) == sizeof(c_uint64) - - -@skip_on_cudasim('NVVM Driver unsupported in the simulator') -class TestNvvmDriver(unittest.TestCase): - def get_ptx(self): - nvvm = NVVM() - - if is64bit: - return gpu64 - else: - return gpu32 - - def test_nvvm_compile_simple(self): - nvvmir = self.get_ptx() - ptx = llvm_to_ptx(nvvmir).decode('utf8') - self.assertTrue('simple' in ptx) - self.assertTrue('ave' in ptx) - - def test_nvvm_from_llvm(self): - m = Module("test_nvvm_from_llvm") - fty = Type.function(Type.void(), [Type.int()]) - kernel = m.add_function(fty, name='mycudakernel') - bldr = Builder(kernel.append_basic_block('entry')) - bldr.ret_void() - set_cuda_kernel(kernel) - - fix_data_layout(m) - ptx = llvm_to_ptx(str(m)).decode('utf8') - self.assertTrue('mycudakernel' in ptx) - if is64bit: - self.assertTrue('.address_size 64' in ptx) - else: - self.assertTrue('.address_size 32' in ptx) - - def _test_nvvm_support(self, arch): - nvvmir = self.get_ptx() - compute_xx = 'compute_{0}{1}'.format(*arch) - ptx = llvm_to_ptx(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0, - prec_div=0).decode('utf8') - self.assertIn(".target sm_{0}{1}".format(*arch), ptx) - self.assertIn('simple', ptx) - self.assertIn('ave', ptx) - - def test_nvvm_support(self): - """Test supported CC by NVVM - """ - for arch in SUPPORTED_CC: - self._test_nvvm_support(arch=arch) - - @unittest.skipIf(True, "No new CC unknown to NVVM yet") - def test_nvvm_future_support(self): - """Test unsupported CC to help track the feature support - """ - # List known CC but unsupported by NVVM - future_archs = [ - # (5, 2), # for example - ] - for arch in future_archs: - pat = r"-arch=compute_{0}{1}".format(*arch) - with self.assertRaises(NvvmError) as raises: - self._test_nvvm_support(arch=arch) - self.assertIn(pat, raises.msg) - - -@skip_on_cudasim('NVVM Driver unsupported in the simulator') -class TestArchOption(unittest.TestCase): - def test_get_arch_option(self): - # Test returning the nearest lowest arch. - self.assertEqual(get_arch_option(3, 0), 'compute_30') - self.assertEqual(get_arch_option(3, 3), 'compute_30') - self.assertEqual(get_arch_option(3, 4), 'compute_30') - # Test known arch. - for arch in SUPPORTED_CC: - self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch) - self.assertEqual(get_arch_option(1000, 0), - 'compute_%d%d' % SUPPORTED_CC[-1]) - - -@skip_on_cudasim('NVVM Driver unsupported in the simulator') -class TestLibDevice(unittest.TestCase): - def _libdevice_load(self, arch, expect): - libdevice = LibDevice(arch=arch) - self.assertEqual(libdevice.arch, expect) - - def test_libdevice_arch_fix(self): - self._libdevice_load('compute_20', 'compute_20') - self._libdevice_load('compute_21', 'compute_20') - self._libdevice_load('compute_30', 'compute_30') - self._libdevice_load('compute_35', 'compute_35') - self._libdevice_load('compute_52', 'compute_50') - - -gpu64 = ''' -target triple="nvptx64-" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - -define i32 @ave(i32 %a, i32 %b) { -entry: -%add = add nsw i32 %a, %b -%div = sdiv i32 %add, 2 -ret i32 %div -} - -define void @simple(i32* %data) { -entry: -%0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() -%1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -%mul = mul i32 %0, %1 -%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -%add = add i32 %mul, %2 -%call = call i32 @ave(i32 %add, i32 %add) -%idxprom = sext i32 %add to i64 -%arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom -store i32 %call, i32* %arrayidx, align 4 -ret void -} - -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone - -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone - -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone - -!nvvm.annotations = !{!1} -!1 = metadata !{void (i32*)* @simple, metadata !"kernel", i32 1} -''' - -gpu32 = ''' -target triple="nvptx-" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - -define i32 @ave(i32 %a, i32 %b) { -entry: -%add = add nsw i32 %a, %b -%div = sdiv i32 %add, 2 -ret i32 %div -} - -define void @simple(i32* %data) { -entry: -%0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() -%1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -%mul = mul i32 %0, %1 -%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -%add = add i32 %mul, %2 -%call = call i32 @ave(i32 %add, i32 %add) -%idxprom = sext i32 %add to i64 -%arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom -store i32 %call, i32* %arrayidx, align 4 -ret void -} - -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone - -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone - -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone - -!nvvm.annotations = !{!1} -!1 = metadata !{void (i32*)* @simple, metadata !"kernel", i32 1} - -''' - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_pinned.py b/numba/numba/cuda/tests/cudadrv/test_pinned.py deleted file mode 100644 index 40aff7caf..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_pinned.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest, CUDATestCase - - -REPEAT = 25 - - -class TestPinned(CUDATestCase): - - def _run_copies(self, A): - A0 = np.copy(A) - - stream = cuda.stream() - ptr = cuda.to_device(A, copy=False, stream=stream) - ptr.copy_to_device(A, stream=stream) - ptr.copy_to_host(A, stream=stream) - stream.synchronize() - - self.assertTrue(np.allclose(A, A0)) - - def test_pinned(self): - A = np.arange(2*1024*1024) # 16 MB - with cuda.pinned(A): - self._run_copies(A) - - def test_unpinned(self): - A = np.arange(2*1024*1024) # 16 MB - self._run_copies(A) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudadrv/test_profiler.py b/numba/numba/cuda/tests/cudadrv/test_profiler.py deleted file mode 100644 index 1c2887ad4..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_profiler.py +++ /dev/null @@ -1,22 +0,0 @@ -from __future__ import absolute_import, print_function -import numba.unittest_support as unittest -from numba.cuda.testing import CUDATestCase -from numba import cuda -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('CUDA Profiler unsupported in the simulator') -class TestProfiler(CUDATestCase): - def test_profiling(self): - with cuda.profiling(): - a = cuda.device_array(10) - del a - - with cuda.profiling(): - a = cuda.device_array(100) - del a - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudadrv/test_reset_device.py b/numba/numba/cuda/tests/cudadrv/test_reset_device.py deleted file mode 100644 index ba7cb6250..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_reset_device.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import print_function, absolute_import, division -import threading -from numba import cuda -from numba.cuda.cudadrv.driver import driver -from numba.cuda.testing import unittest, CUDATestCase - -try: - from Queue import Queue # Python 2 -except: - from queue import Queue # Python 3 - - -class TestResetDevice(CUDATestCase): - def test_reset_device(self): - - def newthread(exception_queue): - try: - devices = range(driver.get_device_count()) - for _ in range(2): - for d in devices: - cuda.select_device(d) - cuda.close() - except Exception as e: - exception_queue.put(e) - - # Do test on a separate thread so that we don't affect - # the current context in the main thread. - - exception_queue = Queue() - t = threading.Thread(target=newthread, args=(exception_queue,)) - t.start() - t.join() - - exceptions = [] - while not exception_queue.empty(): - exceptions.append(exception_queue.get()) - self.assertEqual(exceptions, []) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudadrv/test_select_device.py b/numba/numba/cuda/tests/cudadrv/test_select_device.py deleted file mode 100644 index 7fae3eda9..000000000 --- a/numba/numba/cuda/tests/cudadrv/test_select_device.py +++ /dev/null @@ -1,46 +0,0 @@ -# -# Test does not work on some cards. -# -from __future__ import print_function, absolute_import, division -import threading -try: - from Queue import Queue # Python 2 -except: - from queue import Queue # Python 3 - -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, CUDATestCase - - -def newthread(exception_queue): - try: - cuda.select_device(0) - stream = cuda.stream() - A = np.arange(100) - dA = cuda.to_device(A, stream=stream) - stream.synchronize() - del dA - del stream - cuda.close() - except Exception as e: - exception_queue.put(e) - - -class TestSelectDevice(CUDATestCase): - def test_select_device(self): - exception_queue = Queue() - for i in range(10): - t = threading.Thread(target=newthread, args=(exception_queue,)) - t.start() - t.join() - - exceptions = [] - while not exception_queue.empty(): - exceptions.append(exception_queue.get()) - self.assertEqual(exceptions, []) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/__init__.py b/numba/numba/cuda/tests/cudapy/__init__.py deleted file mode 100644 index 0465337eb..000000000 --- a/numba/numba/cuda/tests/cudapy/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import load_testsuite -import os - - -def load_tests(loader, tests, pattern): - return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba/numba/cuda/tests/cudapy/test_alignment.py b/numba/numba/cuda/tests/cudapy/test_alignment.py deleted file mode 100644 index 877c5f290..000000000 --- a/numba/numba/cuda/tests/cudapy/test_alignment.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np -from numba import from_dtype, cuda -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - -class TestAlignment(SerialMixin, unittest.TestCase): - def test_record_alignment(self): - rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')], align=True) - rec = from_dtype(rec_dtype) - - @cuda.jit((rec[:],)) - def foo(a): - i = cuda.grid(1) - a[i].a = a[i].b - - a_recarray = np.recarray(3, dtype=rec_dtype) - for i in range(a_recarray.size): - a_rec = a_recarray[i] - a_rec.a = 0 - a_rec.b = (i + 1) * 123 - - foo[1, 3](a_recarray) - - self.assertTrue(np.all(a_recarray.a == a_recarray.b)) - - @skip_on_cudasim('Simulator does not check alignment') - def test_record_alignment_error(self): - rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')]) - rec = from_dtype(rec_dtype) - - with self.assertRaises(Exception) as raises: - @cuda.jit((rec[:],)) - def foo(a): - i = cuda.grid(1) - a[i].a = a[i].b - - self.assertTrue('type float64 is not aligned' in str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_array.py b/numba/numba/cuda/tests/cudapy/test_array.py deleted file mode 100644 index d8084e0ec..000000000 --- a/numba/numba/cuda/tests/cudapy/test_array.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda - - -class TestCudaArray(SerialMixin, unittest.TestCase): - def test_gpu_array_zero_length(self): - x = np.arange(0) - dx = cuda.to_device(x) - hx = dx.copy_to_host() - self.assertEqual(x.shape, dx.shape) - self.assertEqual(x.size, dx.size) - self.assertEqual(x.shape, hx.shape) - self.assertEqual(x.size, hx.size) - - def test_gpu_array_strided(self): - - @cuda.jit('void(double[:])') - def kernel(x): - i = cuda.grid(1) - if i < x.shape[0]: - x[i] = i - - x = np.arange(10, dtype=np.double) - y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte) - z = np.ndarray(9, buffer=y[4:-4], dtype=np.double) - kernel[10, 10](z) - self.assertTrue(np.allclose(z, list(range(9)))) - - def test_gpu_array_interleaved(self): - - @cuda.jit('void(double[:], double[:])') - def copykernel(x, y): - i = cuda.grid(1) - if i < x.shape[0]: - x[i] = i - y[i] = i - - x = np.arange(10, dtype=np.double) - y = x[:-1:2] - # z = x[1::2] - # n = y.size - try: - cuda.devicearray.auto_device(y) - except ValueError: - pass - else: - raise AssertionError("Should raise exception complaining the " - "contiguous-ness of the array.") - # Should we handle this use case? - # assert z.size == y.size - # copykernel[1, n](y, x) - # print(y, z) - # assert np.all(y == z) - # assert np.all(y == list(range(n))) - - def test_auto_device_const(self): - d, _ = cuda.devicearray.auto_device(2) - self.assertTrue(np.all(d.copy_to_host() == np.array(2))) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_array_args.py b/numba/numba/cuda/tests/cudapy/test_array_args.py deleted file mode 100644 index b6ced8f6e..000000000 --- a/numba/numba/cuda/tests/cudapy/test_array_args.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaArrayArg(SerialMixin, unittest.TestCase): - def test_array_ary(self): - - @cuda.jit('double(double[:],int64)', device=True, inline=True) - def device_function(a, c): - return a[c] - - - @cuda.jit('void(double[:],double[:])') - def kernel(x, y): - i = cuda.grid(1) - y[i] = device_function(x, i) - - x = np.arange(10, dtype=np.double) - y = np.zeros_like(x) - kernel[10, 1](x, y) - self.assertTrue(np.all(x == y)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_array_methods.py b/numba/numba/cuda/tests/cudapy/test_array_methods.py deleted file mode 100644 index 3305f309c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_array_methods.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from numba import unittest_support as unittest -import numpy as np -from numba import cuda -from numba.cuda.testing import SerialMixin - - -def reinterpret_array_type(byte_arr, start, stop, output): - # Tested with just one thread - val = byte_arr[start:stop].view(np.int32)[0] - output[0] = val - - -class TestCudaArrayMethods(SerialMixin, unittest.TestCase): - def test_reinterpret_array_type(self): - """ - Reinterpret byte array as int32 in the GPU. - """ - pyfunc = reinterpret_array_type - kernel = cuda.jit(pyfunc) - - byte_arr = np.arange(256, dtype=np.uint8) - itemsize = np.dtype(np.int32).itemsize - for start in range(0, 256, itemsize): - stop = start + itemsize - expect = byte_arr[start:stop].view(np.int32)[0] - - output = np.zeros(1, dtype=np.int32) - kernel[1, 1](byte_arr, start, stop, output) - - got = output[0] - self.assertEqual(expect, got) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_atomics.py b/numba/numba/cuda/tests/cudapy/test_atomics.py deleted file mode 100644 index ea749a8fa..000000000 --- a/numba/numba/cuda/tests/cudapy/test_atomics.py +++ /dev/null @@ -1,428 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import random -import numpy as np - -from numba import config -from numba import cuda, uint32, uint64, float32, float64 -from numba.cuda.testing import unittest, SerialMixin - - -def cc_X_or_above(major, minor): - if not config.ENABLE_CUDASIM: - return cuda.current_context().device.compute_capability >= (major, minor) - else: - return True - - -def skip_unless_cc_32(fn): - return unittest.skipUnless(cc_X_or_above(3, 2), "require cc >= 3.2")(fn) - -def skip_unless_cc_50(fn): - return unittest.skipUnless(cc_X_or_above(5, 0), "require cc >= 5.0")(fn) - - -def atomic_add(ary): - tid = cuda.threadIdx.x - sm = cuda.shared.array(32, uint32) - sm[tid] = 0 - cuda.syncthreads() - bin = ary[tid] % 32 - cuda.atomic.add(sm, bin, 1) - cuda.syncthreads() - ary[tid] = sm[tid] - - -def atomic_add2(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), uint32) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, ty), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_add3(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), uint32) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, uint64(ty)), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_add_float(ary): - tid = cuda.threadIdx.x - sm = cuda.shared.array(32, float32) - sm[tid] = 0 - cuda.syncthreads() - bin = int(ary[tid] % 32) - cuda.atomic.add(sm, bin, 1.0) - cuda.syncthreads() - ary[tid] = sm[tid] - - -def atomic_add_float_2(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), float32) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, ty), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_add_float_3(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), float32) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, uint64(ty)), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_add_double_global(idx, ary): - tid = cuda.threadIdx.x - bin = idx[tid] % 32 - cuda.atomic.add(ary, bin, 1.0) - - -def atomic_add_double_global_2(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - cuda.atomic.add(ary, (tx, ty), 1) - - -def atomic_add_double_global_3(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - cuda.atomic.add(ary, (tx, uint64(ty)), 1) - - -def atomic_add_double(idx, ary): - tid = cuda.threadIdx.x - sm = cuda.shared.array(32, float64) - sm[tid] = 0.0 - cuda.syncthreads() - bin = idx[tid] % 32 - cuda.atomic.add(sm, bin, 1.0) - cuda.syncthreads() - ary[tid] = sm[tid] - - -def atomic_add_double_2(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), float64) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, ty), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_add_double_3(ary): - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - sm = cuda.shared.array((4, 8), float64) - sm[tx, ty] = ary[tx, ty] - cuda.syncthreads() - cuda.atomic.add(sm, (tx, uint64(ty)), 1) - cuda.syncthreads() - ary[tx, ty] = sm[tx, ty] - - -def atomic_max(res, ary): - tx = cuda.threadIdx.x - bx = cuda.blockIdx.x - cuda.atomic.max(res, 0, ary[tx, bx]) - - -def atomic_min(res, ary): - tx = cuda.threadIdx.x - bx = cuda.blockIdx.x - cuda.atomic.min(res, 0, ary[tx, bx]) - - -def atomic_max_double_normalizedindex(res, ary): - tx = cuda.threadIdx.x - bx = cuda.blockIdx.x - cuda.atomic.max(res, 0, ary[tx, uint64(bx)]) - - -def atomic_max_double_oneindex(res, ary): - tx = cuda.threadIdx.x - cuda.atomic.max(res, 0, ary[tx]) - - -def atomic_max_double_shared(res, ary): - tid = cuda.threadIdx.x - smary = cuda.shared.array(32, float64) - smary[tid] = ary[tid] - smres = cuda.shared.array(1, float64) - if tid == 0: - smres[0] = res[0] - cuda.syncthreads() - cuda.atomic.max(smres, 0, smary[tid]) - cuda.syncthreads() - if tid == 0: - res[0] = smres[0] - - -def atomic_compare_and_swap(res, old, ary): - gid = cuda.grid(1) - if gid < res.size: - out = cuda.atomic.compare_and_swap(res[gid:], -99, ary[gid]) - old[gid] = out - - -class TestCudaAtomics(SerialMixin, unittest.TestCase): - def test_atomic_add(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32) - orig = ary.copy() - cuda_atomic_add = cuda.jit('void(uint32[:])')(atomic_add) - cuda_atomic_add[1, 32](ary) - - gold = np.zeros(32, dtype=np.uint32) - for i in range(orig.size): - gold[orig[i]] += 1 - - self.assertTrue(np.all(ary == gold)) - - def test_atomic_add2(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8) - orig = ary.copy() - cuda_atomic_add2 = cuda.jit('void(uint32[:,:])')(atomic_add2) - cuda_atomic_add2[1, (4, 8)](ary) - self.assertTrue(np.all(ary == orig + 1)) - - def test_atomic_add3(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8) - orig = ary.copy() - cuda_atomic_add3 = cuda.jit('void(uint32[:,:])')(atomic_add3) - cuda_atomic_add3[1, (4, 8)](ary) - - self.assertTrue(np.all(ary == orig + 1)) - - def test_atomic_add_float(self): - ary = np.random.randint(0, 32, size=32).astype(np.float32) - orig = ary.copy().astype(np.intp) - cuda_atomic_add_float = cuda.jit('void(float32[:])')(atomic_add_float) - cuda_atomic_add_float[1, 32](ary) - - gold = np.zeros(32, dtype=np.uint32) - for i in range(orig.size): - gold[orig[i]] += 1.0 - - self.assertTrue(np.all(ary == gold)) - - def test_atomic_add_float_2(self): - ary = np.random.randint(0, 32, size=32).astype(np.float32).reshape(4, 8) - orig = ary.copy() - cuda_atomic_add2 = cuda.jit('void(float32[:,:])')(atomic_add_float_2) - cuda_atomic_add2[1, (4, 8)](ary) - self.assertTrue(np.all(ary == orig + 1)) - - def test_atomic_add_float_3(self): - ary = np.random.randint(0, 32, size=32).astype(np.float32).reshape(4, 8) - orig = ary.copy() - cuda_atomic_add3 = cuda.jit('void(float32[:,:])')(atomic_add_float_3) - cuda_atomic_add3[1, (4, 8)](ary) - - self.assertTrue(np.all(ary == orig + 1)) - - @skip_unless_cc_50 - def test_atomic_add_double(self): - idx = np.random.randint(0, 32, size=32) - ary = np.zeros(32, np.float64) - cuda_func = cuda.jit('void(int64[:], float64[:])')(atomic_add_double) - cuda_func[1, 32](idx, ary) - - gold = np.zeros(32, dtype=np.uint32) - for i in range(idx.size): - gold[idx[i]] += 1.0 - - np.testing.assert_equal(ary, gold) - - def test_atomic_add_double_2(self): - ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8) - orig = ary.copy() - cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_2) - cuda_func[1, (4, 8)](ary) - np.testing.assert_equal(ary, orig + 1) - - def test_atomic_add_double_3(self): - ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8) - orig = ary.copy() - cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_3) - cuda_func[1, (4, 8)](ary) - - np.testing.assert_equal(ary, orig + 1) - - @skip_unless_cc_50 - def test_atomic_add_double_global(self): - idx = np.random.randint(0, 32, size=32) - ary = np.zeros(32, np.float64) - cuda_func = cuda.jit('void(int64[:], float64[:])')(atomic_add_double_global) - cuda_func[1, 32](idx, ary) - - gold = np.zeros(32, dtype=np.uint32) - for i in range(idx.size): - gold[idx[i]] += 1.0 - - np.testing.assert_equal(ary, gold) - - def test_atomic_add_double_global_2(self): - ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8) - orig = ary.copy() - cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_global_2) - cuda_func[1, (4, 8)](ary) - np.testing.assert_equal(ary, orig + 1) - - def test_atomic_add_double_global_3(self): - ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8) - orig = ary.copy() - cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_global_3) - cuda_func[1, (4, 8)](ary) - - np.testing.assert_equal(ary, orig + 1) - - def check_atomic_max(self, dtype, lo, hi): - vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype) - res = np.zeros(1, dtype=vals.dtype) - cuda_func = cuda.jit(atomic_max) - cuda_func[32, 32](res, vals) - gold = np.max(vals) - np.testing.assert_equal(res, gold) - - def test_atomic_max_int32(self): - self.check_atomic_max(dtype=np.int32, lo=-65535, hi=65535) - - def test_atomic_max_uint32(self): - self.check_atomic_max(dtype=np.uint32, lo=0, hi=65535) - - @skip_unless_cc_32 - def test_atomic_max_int64(self): - self.check_atomic_max(dtype=np.int64, lo=-65535, hi=65535) - - @skip_unless_cc_32 - def test_atomic_max_uint64(self): - self.check_atomic_max(dtype=np.uint64, lo=0, hi=65535) - - def test_atomic_max_float32(self): - self.check_atomic_max(dtype=np.float32, lo=-65535, hi=65535) - - def test_atomic_max_double(self): - self.check_atomic_max(dtype=np.float64, lo=-65535, hi=65535) - - def check_atomic_min(self, dtype, lo, hi): - vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype) - res = np.array([65535], dtype=vals.dtype) - cuda_func = cuda.jit(atomic_min) - cuda_func[32, 32](res, vals) - - gold = np.min(vals) - np.testing.assert_equal(res, gold) - - def test_atomic_min_int32(self): - self.check_atomic_min(dtype=np.int32, lo=-65535, hi=65535) - - def test_atomic_min_uint32(self): - self.check_atomic_min(dtype=np.uint32, lo=0, hi=65535) - - @skip_unless_cc_32 - def test_atomic_min_int64(self): - self.check_atomic_min(dtype=np.int64, lo=-65535, hi=65535) - - @skip_unless_cc_32 - def test_atomic_min_uint64(self): - self.check_atomic_min(dtype=np.uint64, lo=0, hi=65535) - - def test_atomic_min_float(self): - self.check_atomic_min(dtype=np.float32, lo=-65535, hi=65535) - - def test_atomic_min_double(self): - self.check_atomic_min(dtype=np.float64, lo=-65535, hi=65535) - - def test_atomic_max_double_normalizedindex(self): - vals = np.random.randint(0, 65535, size=(32, 32)).astype(np.float64) - res = np.zeros(1, np.float64) - cuda_func = cuda.jit('void(float64[:], float64[:,:])')( - atomic_max_double_normalizedindex) - cuda_func[32, 32](res, vals) - - gold = np.max(vals) - np.testing.assert_equal(res, gold) - - def test_atomic_max_double_oneindex(self): - vals = np.random.randint(0, 128, size=32).astype(np.float64) - res = np.zeros(1, np.float64) - cuda_func = cuda.jit('void(float64[:], float64[:])')( - atomic_max_double_oneindex) - cuda_func[1, 32](res, vals) - - gold = np.max(vals) - np.testing.assert_equal(res, gold) - - def test_atomic_max_nan_location(self): - vals = np.random.randint(0, 128, size=(1,1)).astype(np.float64) - gold = vals.copy().reshape(1) - res = np.zeros(1, np.float64) + np.nan - cuda_func = cuda.jit('void(float64[:], float64[:,:])')(atomic_max) - cuda_func[1, 1](res, vals) - - np.testing.assert_equal(res, gold) - - def test_atomic_max_nan_val(self): - res = np.random.randint(0, 128, size=1).astype(np.float64) - gold = res.copy() - vals = np.zeros((1, 1), np.float64) + np.nan - cuda_func = cuda.jit('void(float64[:], float64[:,:])')(atomic_max) - cuda_func[1, 1](res, vals) - - np.testing.assert_equal(res, gold) - - def test_atomic_max_double_shared(self): - vals = np.random.randint(0, 32, size=32).astype(np.float64) - res = np.zeros(1, np.float64) - cuda_func = cuda.jit('void(float64[:], float64[:])')(atomic_max_double_shared) - cuda_func[1, 32](res, vals) - - gold = np.max(vals) - np.testing.assert_equal(res, gold) - - def test_atomic_compare_and_swap(self): - n = 100 - res = [-99] * (n // 2) + [-1] * (n // 2) - random.shuffle(res) - res = np.asarray(res, dtype=np.int32) - out = np.zeros_like(res) - ary = np.random.randint(1, 10, size=res.size).astype(res.dtype) - - fill_mask = res == -99 - unfill_mask = res == -1 - - expect_res = np.zeros_like(res) - expect_res[fill_mask] = ary[fill_mask] - expect_res[unfill_mask] = -1 - - expect_out = np.zeros_like(out) - expect_out[fill_mask] = res[fill_mask] - expect_out[unfill_mask] = -1 - - cuda_func = cuda.jit(atomic_compare_and_swap) - cuda_func[10, 10](res, out, ary) - - np.testing.assert_array_equal(expect_res, res) - np.testing.assert_array_equal(expect_out, out) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_autojit.py b/numba/numba/cuda/tests/cudapy/test_autojit.py deleted file mode 100644 index ab349b52e..000000000 --- a/numba/numba/cuda/tests/cudapy/test_autojit.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import print_function, absolute_import, division -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim - - -@skip_on_cudasim('Simulator does not have definitions attribute') -class TestCudaAutoJit(SerialMixin, unittest.TestCase): - def test_autojit(self): - @cuda.autojit - def what(a, b, c): - pass - - what(np.empty(1), 1.0, 21) - what(np.empty(1), 1.0, 21) - what(np.empty(1), np.empty(1, dtype=np.int32), 21) - what(np.empty(1), np.empty(1, dtype=np.int32), 21) - what(np.empty(1), 1.0, 21) - - self.assertTrue(len(what.definitions) == 2) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_blackscholes.py b/numba/numba/cuda/tests/cudapy/test_blackscholes.py deleted file mode 100644 index 3d4329e94..000000000 --- a/numba/numba/cuda/tests/cudapy/test_blackscholes.py +++ /dev/null @@ -1,128 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np -import math -import time -from numba import cuda, double -from numba.cuda.testing import unittest, SerialMixin - - -RISKFREE = 0.02 -VOLATILITY = 0.30 - -A1 = 0.31938153 -A2 = -0.356563782 -A3 = 1.781477937 -A4 = -1.821255978 -A5 = 1.330274429 -RSQRT2PI = 0.39894228040143267793994605993438 - - -def cnd(d): - K = 1.0 / (1.0 + 0.2316419 * np.abs(d)) - ret_val = (RSQRT2PI * np.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - return np.where(d > 0, 1.0 - ret_val, ret_val) - - -def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears, - Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - sqrtT = np.sqrt(T) - d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd(d1) - cndd2 = cnd(d2) - - expRT = np.exp(- R * T) - callResult[:] = (S * cndd1 - X * expRT * cndd2) - putResult[:] = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)) - - -def randfloat(rand_var, low, high): - return (1.0 - rand_var) * low + rand_var * high - - -class TestBlackScholes(SerialMixin, unittest.TestCase): - def test_blackscholes(self): - OPT_N = 400 - iterations = 2 - - stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0) - optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0) - optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0) - - callResultNumpy = np.zeros(OPT_N) - putResultNumpy = -np.ones(OPT_N) - - callResultNumbapro = np.zeros(OPT_N) - putResultNumbapro = -np.ones(OPT_N) - - # numpy - for i in range(iterations): - black_scholes(callResultNumpy, putResultNumpy, stockPrice, - optionStrike, optionYears, RISKFREE, VOLATILITY) - - - - @cuda.jit(argtypes=(double,), restype=double, device=True, inline=True) - def cnd_cuda(d): - K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) - ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - if d > 0: - ret_val = 1.0 - ret_val - return ret_val - - - @cuda.jit(argtypes=(double[:], double[:], double[:], double[:], double[:], - double, double)) - def black_scholes_cuda(callResult, putResult, S, X, T, R, V): - i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x - if i >= S.shape[0]: - return - sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_cuda(d1) - cndd2 = cnd_cuda(d2) - - expRT = math.exp((-1. * R) * T[i]) - callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2) - putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)) - - # numbapro - time0 = time.time() - blockdim = 512, 1 - griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1 - stream = cuda.stream() - d_callResult = cuda.to_device(callResultNumbapro, stream) - d_putResult = cuda.to_device(putResultNumbapro, stream) - d_stockPrice = cuda.to_device(stockPrice, stream) - d_optionStrike = cuda.to_device(optionStrike, stream) - d_optionYears = cuda.to_device(optionYears, stream) - time1 = time.time() - for i in range(iterations): - black_scholes_cuda[griddim, blockdim, stream]( - d_callResult, d_putResult, d_stockPrice, d_optionStrike, - d_optionYears, RISKFREE, VOLATILITY) - d_callResult.copy_to_host(callResultNumbapro, stream) - d_putResult.copy_to_host(putResultNumbapro, stream) - stream.synchronize() - - dt = (time1 - time0) - - delta = np.abs(callResultNumpy - callResultNumbapro) - L1norm = delta.sum() / np.abs(callResultNumpy).sum() - - max_abs_err = delta.max() - self.assertTrue(L1norm < 1e-13) - self.assertTrue(max_abs_err < 1e-13) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_boolean.py b/numba/numba/cuda/tests/cudapy/test_boolean.py deleted file mode 100644 index 3c788b02c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_boolean.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda - - -def boolean_func(A, vertial): - if vertial: - A[0] = 123 - else: - A[0] = 321 - - -class TestCudaBoolean(SerialMixin, unittest.TestCase): - def test_boolean(self): - func = cuda.jit('void(float64[:], bool_)')(boolean_func) - A = np.array([0], dtype='float64') - func(A, True) - self.assertTrue(A[0] == 123) - func(A, False) - self.assertTrue(A[0] == 321) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_casting.py b/numba/numba/cuda/tests/cudapy/test_casting.py deleted file mode 100644 index 33e63966f..000000000 --- a/numba/numba/cuda/tests/cudapy/test_casting.py +++ /dev/null @@ -1,74 +0,0 @@ -from numba import unittest_support as unittest -import numpy as np -from numba import cuda, types -import struct -from numba.cuda.testing import SerialMixin - - -def float_to_int(x): - return np.int32(x) - - -def int_to_float(x): - return np.float64(x) / 2 - - -def float_to_unsigned(x): - return types.uint32(x) - - -def float_to_complex(x): - return np.complex128(x) - - -class TestCasting(SerialMixin, unittest.TestCase): - def _create_wrapped(self, pyfunc, intype, outtype): - wrapped_func = cuda.jit(device=True)(pyfunc) - - @cuda.jit - def cuda_wrapper_fn(arg, res): - res[0] = wrapped_func(arg[0]) - - def wrapper_fn(arg): - argarray = np.zeros(1, dtype=intype) - argarray[0] = arg - resarray = np.zeros(1, dtype=outtype) - cuda_wrapper_fn(argarray, resarray) - return resarray[0] - - return wrapper_fn - - def test_float_to_int(self): - pyfunc = float_to_int - cfunc = self._create_wrapped(pyfunc, np.float32, np.int32) - - self.assertEqual(cfunc(12.3), pyfunc(12.3)) - self.assertEqual(cfunc(12.3), int(12.3)) - self.assertEqual(cfunc(-12.3), pyfunc(-12.3)) - self.assertEqual(cfunc(-12.3), int(-12.3)) - - def test_int_to_float(self): - pyfunc = int_to_float - cfunc = self._create_wrapped(pyfunc, np.int64, np.float64) - - self.assertEqual(cfunc(321), pyfunc(321)) - self.assertEqual(cfunc(321), 321. / 2) - - def test_float_to_unsigned(self): - pyfunc = float_to_unsigned - cfunc = self._create_wrapped(pyfunc, np.float32, np.uint32) - - self.assertEqual(cfunc(3.21), pyfunc(3.21)) - self.assertEqual(cfunc(3.21), struct.unpack('I', struct.pack('i', - 3))[0]) - - def test_float_to_complex(self): - pyfunc = float_to_complex - cfunc = self._create_wrapped(pyfunc, np.float64, np.complex128) - - self.assertEqual(cfunc(-3.21), pyfunc(-3.21)) - self.assertEqual(cfunc(-3.21), -3.21 + 0j) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_complex.py b/numba/numba/cuda/tests/cudapy/test_complex.py deleted file mode 100644 index 6e5e90d46..000000000 --- a/numba/numba/cuda/tests/cudapy/test_complex.py +++ /dev/null @@ -1,255 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import cmath -import math -import itertools -import string -import sys -import textwrap - -import numpy as np - -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda, types, utils, numpy_support -from numba.tests.support import TestCase, compile_function -from numba.tests.complex_usecases import * - - -def compile_scalar_func(pyfunc, argtypes, restype): - # First compile a scalar device function - assert not any(isinstance(tp, types.Array) for tp in argtypes) - assert not isinstance(restype, types.Array) - device_func = cuda.jit(restype(*argtypes), device=True)(pyfunc) - - kernel_types = [types.Array(tp, 1, "C") - for tp in [restype] + list(argtypes)] - - if len(argtypes) == 1: - def kernel_func(out, a): - i = cuda.grid(1) - if i < out.shape[0]: - out[i] = device_func(a[i]) - elif len(argtypes) == 2: - def kernel_func(out, a, b): - i = cuda.grid(1) - if i < out.shape[0]: - out[i] = device_func(a[i], b[i]) - else: - assert 0 - - kernel = cuda.jit(tuple(kernel_types))(kernel_func) - - def kernel_wrapper(values): - n = len(values) - inputs = [np.empty(n, dtype=numpy_support.as_dtype(tp)) - for tp in argtypes] - output = np.empty(n, dtype=numpy_support.as_dtype(restype)) - for i, vs in enumerate(values): - for v, inp in zip(vs, inputs): - inp[i] = v - args = [output] + inputs - kernel[int(math.ceil(n / 256)), 256](*args) - return list(output) - return kernel_wrapper - - -class BaseComplexTest(SerialMixin): - - def basic_values(self): - reals = [-0.0, +0.0, 1, -1, +1.5, -3.5, - float('-inf'), float('+inf'), float('nan')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def more_values(self): - reals = [0.0, +0.0, 1, -1, -math.pi, +math.pi, - float('-inf'), float('+inf'), float('nan')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def non_nan_values(self): - reals = [-0.0, +0.0, 1, -1, -math.pi, +math.pi, - float('inf'), float('-inf')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def run_func(self, pyfunc, sigs, values, ulps=1, ignore_sign_on_zero=False): - for sig in sigs: - if isinstance(sig, types.Type): - sig = sig, - if isinstance(sig, tuple): - # Assume return type is the type of first argument - sig = sig[0](*sig) - prec = ('single' - if sig.args[0] in (types.float32, types.complex64) - else 'double') - cudafunc = compile_scalar_func(pyfunc, sig.args, sig.return_type) - ok_values = [] - expected_list = [] - for args in values: - if not isinstance(args, (list, tuple)): - args = args, - try: - expected_list.append(pyfunc(*args)) - ok_values.append(args) - except ValueError as e: - self.assertIn("math domain error", str(e)) - continue - got_list = cudafunc(ok_values) - for got, expected, args in zip(got_list, expected_list, ok_values): - msg = 'for input %r with prec %r' % (args, prec) - self.assertPreciseEqual(got, expected, prec=prec, - ulps=ulps, - ignore_sign_on_zero=ignore_sign_on_zero, - msg=msg) - - run_unary = run_func - run_binary = run_func - - -class TestComplex(BaseComplexTest, TestCase): - - def check_real_image(self, pyfunc): - values = self.basic_values() - self.run_unary(pyfunc, - [tp.underlying_float(tp) - for tp in (types.complex64, types.complex128)], - values) - - def test_real(self): - self.check_real_image(real_usecase) - - def test_imag(self): - self.check_real_image(imag_usecase) - - def test_conjugate(self): - pyfunc = conjugate_usecase - values = self.basic_values() - self.run_unary(pyfunc, - [types.complex64, types.complex128], - values) - - -class TestCMath(BaseComplexTest, TestCase): - """ - Tests for cmath module support. - """ - - def check_predicate_func(self, pyfunc): - self.run_unary(pyfunc, - [types.boolean(tp) for tp in (types.complex128, types.complex64)], - self.basic_values()) - - def check_unary_func(self, pyfunc, ulps=1, values=None, - returns_float=False, ignore_sign_on_zero=False): - if returns_float: - def sig(tp): - return tp.underlying_float(tp) - else: - def sig(tp): - return tp(tp) - self.run_unary(pyfunc, [sig(types.complex128)], - values or self.more_values(), ulps=ulps, - ignore_sign_on_zero=ignore_sign_on_zero) - # Avoid discontinuities around pi when in single precision. - self.run_unary(pyfunc, [sig(types.complex64)], - values or self.basic_values(), ulps=ulps, - ignore_sign_on_zero=ignore_sign_on_zero) - - # Conversions - - def test_phase(self): - self.check_unary_func(phase_usecase, returns_float=True) - - def test_polar(self): - self.check_unary_func(polar_as_complex_usecase) - - def test_rect(self): - def do_test(tp, seed_values): - values = [(z.real, z.imag) for z in seed_values - if not math.isinf(z.imag) or z.real == 0] - float_type = tp.underlying_float - self.run_binary(rect_usecase, [tp(float_type, float_type)], - values) - do_test(types.complex128, self.more_values()) - # Avoid discontinuities around pi when in single precision. - do_test(types.complex64, self.basic_values()) - - # Classification - - def test_isnan(self): - self.check_predicate_func(isnan_usecase) - - def test_isinf(self): - self.check_predicate_func(isinf_usecase) - - @unittest.skipIf(utils.PYVERSION < (3, 2), "needs Python 3.2+") - def test_isfinite(self): - self.check_predicate_func(isfinite_usecase) - - # Power and logarithms - - def test_exp(self): - self.check_unary_func(exp_usecase, ulps=2) - - def test_log(self): - self.check_unary_func(log_usecase) - - def test_log_base(self): - values = list(itertools.product(self.more_values(), self.more_values())) - value_types = [(types.complex128, types.complex128), - (types.complex64, types.complex64)] - self.run_binary(log_base_usecase, value_types, values, - ulps=3) - - def test_log10(self): - self.check_unary_func(log10_usecase) - - def test_sqrt(self): - self.check_unary_func(sqrt_usecase) - - # Trigonometric functions - - def test_acos(self): - self.check_unary_func(acos_usecase, ulps=2) - - def test_asin(self): - self.check_unary_func(asin_usecase, ulps=2) - - def test_atan(self): - self.check_unary_func(atan_usecase, ulps=2, - values=self.non_nan_values()) - - def test_cos(self): - self.check_unary_func(cos_usecase, ulps=2) - - def test_sin(self): - # See test_sinh. - self.check_unary_func(sin_usecase, ulps=2) - - def test_tan(self): - self.check_unary_func(tan_usecase, ulps=2, - ignore_sign_on_zero=True) - - # Hyperbolic functions - - def test_acosh(self): - self.check_unary_func(acosh_usecase) - - def test_asinh(self): - self.check_unary_func(asinh_usecase, ulps=2) - - def test_atanh(self): - self.check_unary_func(atanh_usecase, ulps=2, - ignore_sign_on_zero=True) - - def test_cosh(self): - self.check_unary_func(cosh_usecase, ulps=2) - - def test_sinh(self): - self.check_unary_func(sinh_usecase, ulps=2) - - def test_tanh(self): - self.check_unary_func(tanh_usecase, ulps=2, - ignore_sign_on_zero=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_complex_kernel.py b/numba/numba/cuda/tests/cudapy/test_complex_kernel.py deleted file mode 100644 index daee93471..000000000 --- a/numba/numba/cuda/tests/cudapy/test_complex_kernel.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaComplex(SerialMixin, unittest.TestCase): - def test_cuda_complex_arg(self): - @cuda.jit('void(complex128[:], complex128)') - def foo(a, b): - i = cuda.grid(1) - a[i] += b - - - a = np.arange(5, dtype=np.complex128) - a0 = a.copy() - foo[1, a.shape](a, 2j) - self.assertTrue(np.allclose(a, a0 + 2j)) - - -if __name__ == '__main__': - unittest.main() - - diff --git a/numba/numba/cuda/tests/cudapy/test_const_string.py b/numba/numba/cuda/tests/cudapy/test_const_string.py deleted file mode 100644 index 89f41c986..000000000 --- a/numba/numba/cuda/tests/cudapy/test_const_string.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import print_function - -import re -from numba.cuda.testing import unittest, skip_on_cudasim -from llvmlite import ir - - -@skip_on_cudasim("This is testing CUDA backend code generation") -class TestCudaConstString(unittest.TestCase): - def test_const_string(self): - # These imports is incompatible with CUDASIM - from numba.cuda.descriptor import CUDATargetDesc - from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT - - targetctx = CUDATargetDesc.targetctx - mod = targetctx.create_module("") - textstring = 'A Little Brown Fox' - gv0 = targetctx.insert_const_string(mod, textstring) - gv1 = targetctx.insert_const_string(mod, textstring) - - res = re.findall(r"@\"__conststring__.*internal.*constant.*\[" - r"19\s+x\s+i8\]", str(mod)) - self.assertEqual(len(res), 1) - - fnty = ir.FunctionType(ir.IntType(8).as_pointer(), []) - - # Using insert_const_string - fn = mod.add_function(fnty, name="test_insert_const_string") - builder = ir.IRBuilder(fn.append_basic_block()) - res = targetctx.insert_addrspace_conv(builder, gv0, - addrspace=ADDRSPACE_CONSTANT) - builder.ret(res) - - matches = re.findall(r"@\"__conststring__.*internal.*constant.*\[" - r"19\s+x\s+i8\]", str(mod)) - self.assertEqual(len(matches), 1) - - # Using insert_string_const_addrspace - fn = mod.add_function(fnty, name="test_insert_string_const_addrspace") - builder = ir.IRBuilder(fn.append_basic_block()) - res = targetctx.insert_string_const_addrspace(builder, textstring) - builder.ret(res) - - matches = re.findall(r"@\"__conststring__.*internal.*constant.*\[" - r"19\s+x\s+i8\]", str(mod)) - self.assertEqual(len(matches), 1) - - ptx = llvm_to_ptx(str(mod)).decode('ascii') - matches = list(re.findall(r"\.const.*__conststring__", ptx)) - - self.assertEqual(len(matches), 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_constmem.py b/numba/numba/cuda/tests/cudapy/test_constmem.py deleted file mode 100644 index aa908ae0c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_constmem.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import print_function - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -CONST1D = np.arange(10, dtype=np.float64) / 2. -CONST2D = np.asfortranarray( - np.arange(100, dtype=np.int32).reshape(10, 10)) -CONST3D = ((np.arange(5*5*5, dtype=np.complex64).reshape(5, 5, 5) + 1j) / - 2j) - - -def cuconst(A): - C = cuda.const.array_like(CONST1D) - i = cuda.grid(1) - A[i] = C[i] - - -def cuconst2d(A): - C = cuda.const.array_like(CONST2D) - i, j = cuda.grid(2) - A[i, j] = C[i, j] - - -def cuconst3d(A): - C = cuda.const.array_like(CONST3D) - i = cuda.threadIdx.x - j = cuda.threadIdx.y - k = cuda.threadIdx.z - A[i, j, k] = C[i, j, k] - - -class TestCudaConstantMemory(SerialMixin, unittest.TestCase): - def test_const_array(self): - jcuconst = cuda.jit('void(float64[:])')(cuconst) - self.assertTrue('.const' in jcuconst.ptx) - A = np.empty_like(CONST1D) - jcuconst[2, 5](A) - self.assertTrue(np.all(A == CONST1D)) - - def test_const_array_2d(self): - jcuconst2d = cuda.jit('void(int32[:,:])')(cuconst2d) - self.assertTrue('.const' in jcuconst2d.ptx) - A = np.empty_like(CONST2D, order='C') - jcuconst2d[(2,2), (5,5)](A) - self.assertTrue(np.all(A == CONST2D)) - - def test_const_array_3d(self): - jcuconst3d = cuda.jit('void(complex64[:,:,:])')(cuconst3d) - self.assertTrue('.const' in jcuconst3d.ptx) - A = np.empty_like(CONST3D, order='F') - jcuconst3d[1, (5, 5, 5)](A) - self.assertTrue(np.all(A == CONST3D)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba/numba/cuda/tests/cudapy/test_cuda_array_interface.py deleted file mode 100644 index 0be2cc148..000000000 --- a/numba/numba/cuda/tests/cudapy/test_cuda_array_interface.py +++ /dev/null @@ -1,111 +0,0 @@ -import numpy as np - -from numba import vectorize, guvectorize -from numba import cuda -from numba.cuda.testing import unittest, CUDATestCase -from numba.cuda.testing import skip_on_cudasim - - -class MyArray(object): - def __init__(self, arr): - self._arr = arr - self.__cuda_array_interface__ = arr.__cuda_array_interface__ - - -@skip_on_cudasim('CUDA Array Interface is not supported in the simulator') -class TestCudaArrayInterface(CUDATestCase): - def test_as_cuda_array(self): - h_arr = np.arange(10) - self.assertFalse(cuda.is_cuda_array(h_arr)) - d_arr = cuda.to_device(h_arr) - self.assertTrue(cuda.is_cuda_array(d_arr)) - my_arr = MyArray(d_arr) - self.assertTrue(cuda.is_cuda_array(my_arr)) - wrapped = cuda.as_cuda_array(my_arr) - self.assertTrue(cuda.is_cuda_array(wrapped)) - # Their values must equal the original array - np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr) - np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr) - # d_arr and wrapped must be the same buffer - self.assertEqual(wrapped.device_ctypes_pointer.value, - d_arr.device_ctypes_pointer.value) - - def test_ownership(self): - # Get the deallocation queue - ctx = cuda.current_context() - deallocs = ctx.deallocations - # Flush all deallocations - deallocs.clear() - self.assertEqual(len(deallocs), 0) - # Make new device array - d_arr = cuda.to_device(np.arange(100)) - # Convert it - cvted = cuda.as_cuda_array(d_arr) - # Drop reference to the original object such that - # only `cvted` has a reference to it. - del d_arr - # There shouldn't be any new deallocations - self.assertEqual(len(deallocs), 0) - # Try to access the memory and verify its content - np.testing.assert_equal(cvted.copy_to_host(), np.arange(100)) - # Drop last reference to the memory - del cvted - self.assertEqual(len(deallocs), 1) - # Flush - deallocs.clear() - - def test_kernel_arg(self): - h_arr = np.arange(10) - d_arr = cuda.to_device(h_arr) - my_arr = MyArray(d_arr) - wrapped = cuda.as_cuda_array(my_arr) - - @cuda.jit - def mutate(arr, val): - arr[cuda.grid(1)] += val - - val = 7 - mutate.forall(wrapped.size)(wrapped, val) - - np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val) - np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val) - - def test_ufunc_arg(self): - @vectorize(['f8(f8, f8)'], target='cuda') - def vadd(a, b): - return a + b - - # Case 1: use custom array as argument - h_arr = np.random.random(10) - arr = MyArray(cuda.to_device(h_arr)) - val = 6 - out = vadd(arr, val) - np.testing.assert_array_equal(out.copy_to_host(), h_arr + val) - - # Case 2: use custom array as return - out = MyArray(cuda.device_array(h_arr.shape)) - returned = vadd(h_arr, val, out=out) - np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val) - - def test_gufunc_arg(self): - @guvectorize(['(f8, f8, f8[:])'], '(),()->()', target='cuda') - def vadd(inp, val, out): - out[0] = inp + val - - # Case 1: use custom array as argument - h_arr = np.random.random(10) - arr = MyArray(cuda.to_device(h_arr)) - val = np.float64(7) - out = vadd(arr, val) - np.testing.assert_array_equal(out.copy_to_host(), h_arr + val) - - # Case 2: use custom array as return - out = MyArray(cuda.device_array(h_arr.shape)) - returned = vadd(h_arr, val, out=out) - np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val) - self.assertEqual(returned.device_ctypes_pointer.value, - out._arr.device_ctypes_pointer.value) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_cuda_autojit.py b/numba/numba/cuda/tests/cudapy/test_cuda_autojit.py deleted file mode 100644 index 195a5353c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_cuda_autojit.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import print_function -from numba import unittest_support as unittest -from numba import cuda -import numpy as np -from numba.cuda.testing import SerialMixin - - -class TestCudaAutojit(SerialMixin, unittest.TestCase): - def test_device_array(self): - @cuda.autojit - def foo(x, y): - i = cuda.grid(1) - y[i] = x[i] - - x = np.arange(10) - y = np.empty_like(x) - - dx = cuda.to_device(x) - dy = cuda.to_device(y) - - foo[10, 1](dx, dy) - - dy.copy_to_host(y) - - self.assertTrue(np.all(x == y)) - - def test_device_auto_jit(self): - @cuda.jit(device=True) - def mapper(args): - a, b, c = args - return a + b + c - - - @cuda.jit(device=True) - def reducer(a, b): - return a + b - - - @cuda.jit - def driver(A, B): - i = cuda.grid(1) - if i < B.size: - args = A[i], A[i] + B[i], B[i] - B[i] = reducer(mapper(args), 1) - - A = np.arange(100, dtype=np.float32) - B = np.arange(100, dtype=np.float32) - - Acopy = A.copy() - Bcopy = B.copy() - - driver[1, 100](A, B) - - np.testing.assert_allclose(Acopy + Acopy + Bcopy + Bcopy + 1, B) - - def test_device_auto_jit_2(self): - @cuda.jit(device=True) - def inner(arg): - return arg + 1 - - @cuda.jit - def outer(argin, argout): - argout[0] = inner(argin[0]) + inner(2) - - a = np.zeros(1) - b = np.zeros(1) - - stream = cuda.stream() - d_a = cuda.to_device(a, stream) - d_b = cuda.to_device(b, stream) - - outer[1, 1, stream](d_a, d_b) - - d_b.copy_to_host(b, stream) - - self.assertEqual(b[0], (a[0] + 1) + (2 + 1)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_debug.py b/numba/numba/cuda/tests/cudapy/test_debug.py deleted file mode 100644 index 5fbbf1fc0..000000000 --- a/numba/numba/cuda/tests/cudapy/test_debug.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba.cuda.testing import skip_on_cudasim, SerialMixin -from numba.tests.support import override_config, captured_stderr, captured_stdout -from numba import unittest_support as unittest -from numba import cuda, float64 - - -def simple_cuda(A, B): - i = cuda.grid(1) - B[i] = A[i] + 1.5 - - -@skip_on_cudasim('Simulator does not produce debug dumps') -class TestDebugOutput(SerialMixin, unittest.TestCase): - - def compile_simple_cuda(self): - with captured_stderr() as err: - with captured_stdout() as out: - cfunc = cuda.jit((float64[:], float64[:]))(simple_cuda) - # Call compiled function (to ensure PTX is generated) - # and sanity-check results. - A = np.linspace(0, 1, 10).astype(np.float64) - B = np.zeros_like(A) - cfunc[1, 10](A, B) - self.assertTrue(np.allclose(A + 1.5, B)) - # stderr shouldn't be affected by debug output - self.assertFalse(err.getvalue()) - return out.getvalue() - - def assert_fails(self, *args, **kwargs): - self.assertRaises(AssertionError, *args, **kwargs) - - def check_debug_output(self, out, enabled_dumps): - all_dumps = dict.fromkeys(['bytecode', 'cfg', 'ir', 'llvm', - 'assembly'], - False) - for name in enabled_dumps: - assert name in all_dumps - all_dumps[name] = True - for name, enabled in sorted(all_dumps.items()): - check_meth = getattr(self, '_check_dump_%s' % name) - if enabled: - check_meth(out) - else: - self.assertRaises(AssertionError, check_meth, out) - - def _check_dump_bytecode(self, out): - self.assertIn('BINARY_ADD', out) - - def _check_dump_cfg(self, out): - self.assertIn('CFG dominators', out) - - def _check_dump_ir(self, out): - self.assertIn('--IR DUMP: simple_cuda--', out) - self.assertIn('const(float, 1.5)', out) - - def _check_dump_llvm(self, out): - self.assertIn('--LLVM DUMP', out) - - def _check_dump_assembly(self, out): - self.assertIn('--ASSEMBLY simple_cuda', out) - self.assertIn('Generated by NVIDIA NVVM Compiler', out) - - def test_dump_bytecode(self): - with override_config('DUMP_BYTECODE', True): - out = self.compile_simple_cuda() - self.check_debug_output(out, ['bytecode']) - - def test_dump_ir(self): - with override_config('DUMP_IR', True): - out = self.compile_simple_cuda() - self.check_debug_output(out, ['ir']) - - def test_dump_cfg(self): - with override_config('DUMP_CFG', True): - out = self.compile_simple_cuda() - self.check_debug_output(out, ['cfg']) - - def test_dump_llvm(self): - with override_config('DUMP_LLVM', True): - out = self.compile_simple_cuda() - self.check_debug_output(out, ['llvm']) - - def test_dump_assembly(self): - with override_config('DUMP_ASSEMBLY', True): - out = self.compile_simple_cuda() - self.check_debug_output(out, ['assembly']) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_debuginfo.py b/numba/numba/cuda/tests/cudapy/test_debuginfo.py deleted file mode 100644 index bd598c998..000000000 --- a/numba/numba/cuda/tests/cudapy/test_debuginfo.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import print_function, absolute_import - -from numba.tests.support import override_config, TestCase -from numba.cuda.testing import skip_on_cudasim -from numba import unittest_support as unittest -from numba import cuda, types -from numba.cuda.testing import SerialMixin - - -@skip_on_cudasim('Simulator does not produce debug dumps') -class TestCudaDebugInfo(SerialMixin, TestCase): - """ - These tests only checks the compiled PTX for debuginfo section - """ - def _getasm(self, fn, sig): - fn.compile(sig) - return fn.inspect_asm(sig) - - def _check(self, fn, sig, expect): - asm = self._getasm(fn, sig=sig) - assertfn = self.assertIn if expect else self.assertNotIn - assertfn('.section .debug_info {', asm, msg=asm) - - def test_no_debuginfo_in_asm(self): - @cuda.jit(debug=False) - def foo(x): - x[0] = 1 - - self._check(foo, sig=(types.int32[:],), expect=False) - - def test_debuginfo_in_asm(self): - @cuda.jit(debug=True) - def foo(x): - x[0] = 1 - - self._check(foo, sig=(types.int32[:],), expect=True) - - def test_environment_override(self): - with override_config('CUDA_DEBUGINFO_DEFAULT', 1): - # Using default value - @cuda.jit - def foo(x): - x[0] = 1 - - self._check(foo, sig=(types.int32[:],), expect=True) - - # User override default value - @cuda.jit(debug=False) - def bar(x): - x[0] = 1 - - self._check(bar, sig=(types.int32[:],), expect=False) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_device_func.py b/numba/numba/cuda/tests/cudapy/test_device_func.py deleted file mode 100644 index 445fff6fd..000000000 --- a/numba/numba/cuda/tests/cudapy/test_device_func.py +++ /dev/null @@ -1,114 +0,0 @@ -from __future__ import print_function, absolute_import, division - - -import re -import types - -import numpy as np - -from numba.cuda.testing import unittest, skip_on_cudasim, SerialMixin -from numba import cuda, jit -from numba.errors import TypingError - - -class TestDeviceFunc(SerialMixin, unittest.TestCase): - - def test_use_add2f(self): - - @cuda.jit("float32(float32, float32)", device=True) - def add2f(a, b): - return a + b - - def use_add2f(ary): - i = cuda.grid(1) - ary[i] = add2f(ary[i], ary[i]) - - compiled = cuda.jit("void(float32[:])")(use_add2f) - - nelem = 10 - ary = np.arange(nelem, dtype=np.float32) - exp = ary + ary - compiled[1, nelem](ary) - - self.assertTrue(np.all(ary == exp), (ary, exp)) - - def test_indirect_add2f(self): - - @cuda.jit("float32(float32, float32)", device=True) - def add2f(a, b): - return a + b - - @cuda.jit("float32(float32, float32)", device=True) - def indirect(a, b): - return add2f(a, b) - - def indirect_add2f(ary): - i = cuda.grid(1) - ary[i] = indirect(ary[i], ary[i]) - - compiled = cuda.jit("void(float32[:])")(indirect_add2f) - - nelem = 10 - ary = np.arange(nelem, dtype=np.float32) - exp = ary + ary - compiled[1, nelem](ary) - - self.assertTrue(np.all(ary == exp), (ary, exp)) - - def _check_cpu_dispatcher(self, add): - @cuda.jit - def add_kernel(ary): - i = cuda.grid(1) - ary[i] = add(ary[i], 1) - - ary = np.arange(10) - expect = ary + 1 - add_kernel[1, ary.size](ary) - np.testing.assert_equal(expect, ary) - - def test_cpu_dispatcher(self): - # Test correct usage - @jit - def add(a, b): - return a + b - - self._check_cpu_dispatcher(add) - - @skip_on_cudasim('not supported in cudasim') - def test_cpu_dispatcher_invalid(self): - # Test invalid usage - # Explicit signature disables compilation, which also disable - # compiling on CUDA. - @jit('(i4, i4)') - def add(a, b): - return a + b - - # Check that the right error message is provided. - with self.assertRaises(TypingError) as raises: - self._check_cpu_dispatcher(add) - msg = "Untyped global name 'add':.*using cpu function on device" - expected = re.compile(msg) - self.assertTrue(expected.search(str(raises.exception)) is not None) - - def test_cpu_dispatcher_other_module(self): - @jit - def add(a, b): - return a + b - - mymod = types.ModuleType(name='mymod') - mymod.add = add - del add - - @cuda.jit - def add_kernel(ary): - i = cuda.grid(1) - ary[i] = mymod.add(ary[i], 1) - - ary = np.arange(10) - expect = ary + 1 - add_kernel[1, ary.size](ary) - np.testing.assert_equal(expect, ary) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_errors.py b/numba/numba/cuda/tests/cudapy/test_errors.py deleted file mode 100644 index 3028f0e85..000000000 --- a/numba/numba/cuda/tests/cudapy/test_errors.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest - - -def noop(x): - pass - - -class TestJitErrors(unittest.TestCase): - """ - Test compile-time errors with @jit. - """ - - def test_too_many_dims(self): - kernfunc = cuda.jit(noop) - - with self.assertRaises(ValueError) as raises: - kernfunc[(1, 2, 3, 4), (5, 6)] - self.assertIn("griddim must be a sequence of 1, 2 or 3 integers, got [1, 2, 3, 4]", - str(raises.exception)) - - with self.assertRaises(ValueError) as raises: - kernfunc[(1, 2,), (3, 4, 5, 6)] - self.assertIn("blockdim must be a sequence of 1, 2 or 3 integers, got [3, 4, 5, 6]", - str(raises.exception)) - - def test_non_integral_dims(self): - kernfunc = cuda.jit(noop) - - with self.assertRaises(TypeError) as raises: - kernfunc[2.0, 3] - self.assertIn("griddim must be a sequence of integers, got [2.0]", - str(raises.exception)) - - with self.assertRaises(TypeError) as raises: - kernfunc[2, 3.0] - self.assertIn("blockdim must be a sequence of integers, got [3.0]", - str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_exception.py b/numba/numba/cuda/tests/cudapy/test_exception.py deleted file mode 100644 index 6931b8784..000000000 --- a/numba/numba/cuda/tests/cudapy/test_exception.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import config, cuda, jit -from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim - - -def foo(ary): - x = cuda.threadIdx.x - if x == 1: - # NOTE: indexing with a out-of-bounds constant can fail at - # compile-time instead (because the getitem is rewritten as a static_getitem) - # XXX: -1 is actually a valid index for a non-empty tuple... - ary.shape[-x] - - -class TestException(SerialMixin, unittest.TestCase): - def test_exception(self): - unsafe_foo = cuda.jit(foo) - safe_foo = cuda.jit(debug=True)(foo) - - if not config.ENABLE_CUDASIM: - # Simulator throws exceptions regardless of debug - # setting - unsafe_foo[1, 2](np.array([0, 1])) - - with self.assertRaises(IndexError) as cm: - safe_foo[1, 2](np.array([0, 1])) - self.assertIn("tuple index out of range", str(cm.exception)) - - def test_user_raise(self): - @cuda.jit(debug=True) - def foo(do_raise): - if do_raise: - raise ValueError - - foo[1, 1](False) - with self.assertRaises(ValueError): - foo[1, 1](True) - - def case_raise_causing_warp_diverge(self, with_debug_mode): - """Testing issue #2655. - - Exception raising code can cause the compiler to miss location - of unifying branch target and resulting in unexpected warp - divergence. - """ - @cuda.jit(debug=with_debug_mode) - def problematic(x, y): - tid = cuda.threadIdx.x - ntid = cuda.blockDim.x - - if tid > 12: - for i in range(ntid): - y[i] += x[i] // y[i] - - cuda.syncthreads() - if tid < 17: - for i in range(ntid): - x[i] += x[i] // y[i] - - @cuda.jit - def oracle(x, y): - tid = cuda.threadIdx.x - ntid = cuda.blockDim.x - - if tid > 12: - for i in range(ntid): - if y[i] != 0: - y[i] += x[i] // y[i] - - cuda.syncthreads() - if tid < 17: - for i in range(ntid): - if y[i] != 0: - x[i] += x[i] // y[i] - - n = 32 - got_x = 1. / (np.arange(n) + 0.01) - got_y = 1. / (np.arange(n) + 0.01) - problematic[1, n](got_x, got_y) - - expect_x = 1. / (np.arange(n) + 0.01) - expect_y = 1. / (np.arange(n) + 0.01) - oracle[1, n](expect_x, expect_y) - - np.testing.assert_almost_equal(expect_x, got_x) - np.testing.assert_almost_equal(expect_y, got_y) - - def test_raise_causing_warp_diverge(self): - """Test case for issue #2655. - """ - self.case_raise_causing_warp_diverge(with_debug_mode=False) - - @skip_on_cudasim("failing case doesn't happen in CUDASIM") - @unittest.expectedFailure - def test_raise_causing_warp_diverge_failing(self): - """Test case for issue #2655. - - This test that the issue still exists in debug mode. - """ - self.case_raise_causing_warp_diverge(with_debug_mode=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_fastmath.py b/numba/numba/cuda/tests/cudapy/test_fastmath.py deleted file mode 100644 index 7de7c3a4a..000000000 --- a/numba/numba/cuda/tests/cudapy/test_fastmath.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import cuda, float32 -from numba.cuda.testing import SerialMixin - - -class TestFastMathOption(SerialMixin, unittest.TestCase): - def test_kernel(self): - - def foo(arr, val): - i = cuda.grid(1) - if i < arr.size: - arr[i] = float32(i) / val - - fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(foo) - precver = cuda.jit("void(float32[:], float32)")(foo) - - self.assertIn('div.full.ftz.f32', fastver.ptx) - self.assertNotIn('div.full.ftz.f32', precver.ptx) - - def test_device(self): - # fastmath option is ignored for device function - @cuda.jit("float32(float32, float32)", device=True) - def foo(a, b): - return a / b - - def bar(arr, val): - i = cuda.grid(1) - if i < arr.size: - arr[i] = foo(i, val) - - fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(bar) - precver = cuda.jit("void(float32[:], float32)")(bar) - - self.assertIn('div.full.ftz.f32', fastver.ptx) - self.assertNotIn('div.full.ftz.f32', precver.ptx) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_forall.py b/numba/numba/cuda/tests/cudapy/test_forall.py deleted file mode 100644 index 4ff9facce..000000000 --- a/numba/numba/cuda/tests/cudapy/test_forall.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import cuda -import numba.unittest_support as unittest -from numba.cuda.testing import SerialMixin - - -class TestForAll(SerialMixin, unittest.TestCase): - def test_forall_1(self): - @cuda.jit - def foo(x): - i = cuda.grid(1) - if i < x.size: - x[i] += 1 - - arr = np.arange(11) - orig = arr.copy() - foo.forall(arr.size)(arr) - np.testing.assert_array_almost_equal(arr, orig + 1) - - def test_forall_2(self): - @cuda.jit("void(float32, float32[:], float32[:])") - def bar(a, x, y): - i = cuda.grid(1) - if i < x.size: - y[i] = a * x[i] + y[i] - - x = np.arange(13, dtype=np.float32) - y = np.arange(13, dtype=np.float32) - oldy = y.copy() - a = 1.234 - bar.forall(y.size)(a, x, y) - np.testing.assert_array_almost_equal(y, a * x + oldy, decimal=3) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_freevar.py b/numba/numba/cuda/tests/cudapy/test_freevar.py deleted file mode 100644 index 24c18b873..000000000 --- a/numba/numba/cuda/tests/cudapy/test_freevar.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestFreeVar(SerialMixin, unittest.TestCase): - def test_freevar(self): - """Make sure we can compile the following kernel with freevar reference - in macros - """ - from numba import float32 - - size = 1024 - nbtype = float32 - @cuda.jit("(float32[::1], intp)") - def foo(A, i): - "Dummy function" - sdata = cuda.shared.array(size, # size is freevar - dtype=nbtype) # nbtype is freevar - A[i] = sdata[i] - - A = np.arange(2, dtype="float32") - foo(A, 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_globals.py b/numba/numba/cuda/tests/cudapy/test_globals.py deleted file mode 100644 index 07f8b7882..000000000 --- a/numba/numba/cuda/tests/cudapy/test_globals.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import absolute_import, print_function, division -import numpy as np -from numba import cuda, int32, float32 -from numba.cuda.testing import unittest, SerialMixin - -N = 100 - - -def simple_smem(ary): - sm = cuda.shared.array(N, int32) - i = cuda.grid(1) - if i == 0: - for j in range(N): - sm[j] = j - cuda.syncthreads() - ary[i] = sm[i] - - -S0 = 10 -S1 = 20 - - -def coop_smem2d(ary): - i, j = cuda.grid(2) - sm = cuda.shared.array((S0, S1), float32) - sm[i, j] = (i + 1) / (j + 1) - cuda.syncthreads() - ary[i, j] = sm[i, j] - - -class TestCudaTestGlobal(SerialMixin, unittest.TestCase): - def test_global_int_const(self): - """Test simple_smem - """ - compiled = cuda.jit("void(int32[:])")(simple_smem) - - nelem = 100 - ary = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary) - - self.assertTrue(np.all(ary == np.arange(nelem, dtype=np.int32))) - - @unittest.SkipTest - def test_global_tuple_const(self): - """Test coop_smem2d - """ - compiled = cuda.jit("void(float32[:,:])")(coop_smem2d) - - shape = 10, 20 - ary = np.empty(shape, dtype=np.float32) - compiled[1, shape](ary) - - exp = np.empty_like(ary) - for i in range(ary.shape[0]): - for j in range(ary.shape[1]): - exp[i, j] = float(i + 1) / (j + 1) - self.assertTrue(np.allclose(ary, exp)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_gufunc.py b/numba/numba/cuda/tests/cudapy/test_gufunc.py deleted file mode 100644 index c5531a77b..000000000 --- a/numba/numba/cuda/tests/cudapy/test_gufunc.py +++ /dev/null @@ -1,303 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np -import numpy.core.umath_tests as ut - -from numba import void, float32, float64 -from numba import guvectorize -from numba import cuda -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestCUDAGufunc(SerialMixin, unittest.TestCase): - - def test_gufunc_small(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - matrix_ct = 2 - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, - 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, - 5) - - C = gufunc(A, B) - Gold = ut.matrix_multiply(A, B) - self.assertTrue(np.allclose(C, Gold)) - - def test_gufunc_auto_transfer(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - matrix_ct = 2 - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, - 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, - 5) - - dB = cuda.to_device(B) - - C = gufunc(A, dB).copy_to_host() - Gold = ut.matrix_multiply(A, B) - self.assertTrue(np.allclose(C, Gold)) - - def test_gufunc(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - matrix_ct = 1001 # an odd number to test thread/block division in CUDA - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, - 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, - 5) - - C = gufunc(A, B) - Gold = ut.matrix_multiply(A, B) - self.assertTrue(np.allclose(C, Gold)) - - def test_gufunc_hidim(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - matrix_ct = 100 # an odd number to test thread/block division in CUDA - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5) - - C = gufunc(A, B) - Gold = ut.matrix_multiply(A, B) - self.assertTrue(np.allclose(C, Gold)) - - def test_gufunc_new_axis(self): - - @guvectorize([void(float64[:, :], float64[:, :], float64[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - - X = np.random.randn(10, 3, 3) - Y = np.random.randn(3, 3) - - gold = ut.matrix_multiply(X, Y) - - res1 = gufunc(X, Y) - np.testing.assert_allclose(gold, res1) - - res2 = gufunc(X, np.tile(Y, (10, 1, 1))) - np.testing.assert_allclose(gold, res2) - - def test_gufunc_adjust_blocksize(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - matrix_ct = 1001 # an odd number to test thread/block division in CUDA - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, - 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, - 5) - - gufunc.max_blocksize = 32 - C = gufunc(A, B) - Gold = ut.matrix_multiply(A, B) - self.assertTrue(np.allclose(C, Gold)) - - def test_gufunc_stream(self): - - @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])], - '(m,n),(n,p)->(m,p)', - target='cuda') - def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - gufunc = matmulcore - gufunc.max_blocksize = 512 - - #cuda.driver.flush_pending_free() - matrix_ct = 1001 # an odd number to test thread/block division in CUDA - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, - 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, - 5) - - stream = cuda.stream() - dA = cuda.to_device(A, stream) - dB = cuda.to_device(B, stream) - - dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream) - dC = gufunc(dA, dB, out=dC, stream=stream) - C = dC.copy_to_host(stream=stream) - stream.synchronize() - - Gold = ut.matrix_multiply(A, B) - - self.assertTrue(np.allclose(C, Gold)) - - def test_copy(self): - - @guvectorize([void(float32[:], float32[:])], - '(x)->(x)', - target='cuda') - def copy(A, B): - for i in range(B.size): - B[i] = A[i] - - A = np.arange(10, dtype=np.float32) + 1 - B = np.zeros_like(A) - copy(A, out=B) - self.assertTrue(np.allclose(A, B)) - - def test_copy_odd(self): - - @guvectorize([void(float32[:], float32[:])], - '(x)->(x)', - target='cuda') - def copy(A, B): - for i in range(B.size): - B[i] = A[i] - - A = np.arange(11, dtype=np.float32) + 1 - B = np.zeros_like(A) - copy(A, out=B) - self.assertTrue(np.allclose(A, B)) - - def test_copy2d(self): - - @guvectorize([void(float32[:, :], float32[:, :])], - '(x, y)->(x, y)', - target='cuda') - def copy2d(A, B): - for x in range(B.shape[0]): - for y in range(B.shape[1]): - B[x, y] = A[x, y] - - A = np.arange(30, dtype=np.float32).reshape(5, 6) + 1 - B = np.zeros_like(A) - copy2d(A, out=B) - self.assertTrue(np.allclose(A, B)) - - def test_nopython_flag(self): - - def foo(A, B): - pass - - # nopython = True is fine - guvectorize([void(float32[:], float32[:])], '(x)->(x)', target='cuda', - nopython=True)(foo) - - # nopython = False is bad - with self.assertRaises(TypeError) as raises: - guvectorize([void(float32[:], float32[:])], '(x)->(x)', - target='cuda', nopython=False)(foo) - self.assertEqual("nopython flag must be True", str(raises.exception)) - - def test_invalid_flags(self): - # Check invalid flags - def foo(A, B): - pass - - with self.assertRaises(TypeError) as raises: - guvectorize([void(float32[:], float32[:])], '(x)->(x)', - target='cuda', what1=True, ever2=False)(foo) - head = "The following target options are not supported:" - msg = str(raises.exception) - self.assertEqual(msg[:len(head)], head) - items = msg[len(head):].strip().split(',') - items = [i.strip("'\" ") for i in items] - self.assertEqual(set(['what1', 'ever2']), set(items)) - - def test_duplicated_output(self): - @guvectorize([void(float32[:], float32[:])], '(x)->(x)', target='cuda') - def foo(inp, out): - pass # intentionally empty; never executed - - inp = out = np.zeros(10, dtype=np.float32) - with self.assertRaises(ValueError) as raises: - foo(inp, out, out=out) - self.assertEqual(str(raises.exception), - "cannot specify 'out' as both a positional and keyword argument") - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba/numba/cuda/tests/cudapy/test_gufunc_scalar.py deleted file mode 100644 index bc6905fb3..000000000 --- a/numba/numba/cuda/tests/cudapy/test_gufunc_scalar.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Example: sum each row using guvectorize - -See Numpy documentation for detail about gufunc: - http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html -""" -from __future__ import absolute_import, print_function, division -import numpy as np -from numba import guvectorize, cuda -from numba import unittest_support as unittest -from numba.tests.support import TestCase -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestGUFuncScalar(SerialMixin, TestCase): - def test_gufunc_scalar_output(self): - # function type: - # - has no void return type - # - array argument is one dimenion fewer than the source array - # - scalar output is passed as a 1-element array. - # - # signature: (n)->() - # - the function takes an array of n-element and output a scalar. - - @guvectorize(['void(int32[:], int32[:])'], '(n)->()', target='cuda') - def sum_row(inp, out): - tmp = 0. - for i in range(inp.shape[0]): - tmp += inp[i] - out[0] = tmp - - # inp is (10000, 3) - # out is (10000) - # The outter (leftmost) dimension must match or numpy broadcasting is performed. - # But, broadcasting on CUDA arrays is not supported. - - inp = np.arange(300, dtype=np.int32).reshape(100, 3) - - # invoke on CUDA with manually managed memory - out1 = np.empty(100, dtype=inp.dtype) - out2 = np.empty(100, dtype=inp.dtype) - - dev_inp = cuda.to_device( - inp) # alloc and copy input data - dev_out1 = cuda.to_device(out1, copy=False) # alloc only - - sum_row(dev_inp, out=dev_out1) # invoke the gufunc - dev_out2 = sum_row(dev_inp) # invoke the gufunc - - dev_out1.copy_to_host(out1) # retrieve the result - dev_out2.copy_to_host(out2) # retrieve the result - - # verify result - for i in range(inp.shape[0]): - self.assertTrue(out1[i] == inp[i].sum()) - self.assertTrue(out2[i] == inp[i].sum()) - - def test_gufunc_scalar_output_bug(self): - # Issue 2812: Error due to using input argument types as output argument - @guvectorize(['void(int32, int32[:])'], '()->()', target='cuda') - def twice(inp, out): - out[0] = inp * 2 - - self.assertEqual(twice(10), 20) - arg = np.arange(10).astype(np.int32) - self.assertPreciseEqual(twice(arg), arg * 2) - - def test_gufunc_scalar_input_saxpy(self): - @guvectorize(['void(float32, float32[:], float32[:], float32[:])'], - '(),(t),(t)->(t)', target='cuda') - def saxpy(a, x, y, out): - for i in range(out.shape[0]): - out[i] = a * x[i] + y[i] - - A = np.float32(2) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i]) - - X = np.arange(10, dtype=np.float32) - Y = np.arange(10, dtype=np.float32) - out = saxpy(A, X, Y) - - for j in range(10): - exp = A * X[j] + Y[j] - self.assertTrue(exp == out[j], (exp, out[j])) - - A = np.arange(5, dtype=np.float32) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A[j] * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i], (exp, out[j, i])) - - def test_gufunc_scalar_cast(self): - @guvectorize(['void(int32, int32[:], int32[:])'], '(),(t)->(t)', - target='cuda') - def foo(a, b, out): - for i in range(b.size): - out[i] = a * b[i] - - a = np.int64(2) # type does not match signature (int32) - b = np.arange(10).astype(np.int32) - out = foo(a, b) - np.testing.assert_equal(out, a * b) - - # test error - a = np.array(a) - da = cuda.to_device(a) - self.assertEqual(da.dtype, np.int64) - with self.assertRaises(TypeError) as raises: - foo(da, b) - - self.assertIn("does not support .astype()", str(raises.exception)) - - def test_gufunc_old_style_scalar_as_array(self): - # Example from issue #2579 - @guvectorize(['void(int32[:],int32[:],int32[:])'], '(n),()->(n)', - target='cuda') - def gufunc(x, y, res): - for i in range(x.shape[0]): - res[i] = x[i] + y[0] - - # Case 1 - a = np.array([1, 2, 3, 4], dtype=np.int32) - b = np.array([2], dtype=np.int32) - - res = np.zeros(4, dtype=np.int32) - - expected = res.copy() - expected = a + b - - gufunc(a, b, out=res) - - np.testing.assert_almost_equal(expected, res) - - # Case 2 - a = np.array([1, 2, 3, 4] * 2, dtype=np.int32).reshape(2, 4) - b = np.array([2, 10], dtype=np.int32) - - res = np.zeros((2, 4), dtype=np.int32) - - expected = res.copy() - expected[0] = a[0] + b[0] - expected[1] = a[1] + b[1] - - gufunc(a, b, res) - - np.testing.assert_almost_equal(expected, res) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_gufunc_scheduling.py b/numba/numba/cuda/tests/cudapy/test_gufunc_scheduling.py deleted file mode 100644 index 21214af11..000000000 --- a/numba/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import absolute_import, print_function, division -from numba.npyufunc.deviceufunc import GUFuncEngine -from numba import unittest_support as unittest - - -def template(signature, shapes, expects): - gufb = GUFuncEngine.from_signature(signature) - sch = gufb.schedule(shapes) - for k, v in expects.items(): - got = getattr(sch, k) - if got != v: - fmt = 'error for %s: got=%s but expect=%s' - raise AssertionError(fmt % (k, got, v)) - - -class TestGUFuncScheduling(unittest.TestCase): - def test_signature_1(self): - signature = '(m, n), (n, p) -> (m, p)' - shapes = (100, 4, 5), (1, 5, 7) - expects = dict( - ishapes=[(4, 5), (5, 7)], - oshapes=[(4, 7)], - loopdims=(100,), - pinned=[False, True] - ) - template(signature, shapes, expects) - - def test_signature_2(self): - signature = '(m, n), (n, p) -> (m, p)' - shapes = (100, 4, 5), (100, 5, 7) - expects = dict( - ishapes=[(4, 5), (5, 7)], - oshapes=[(4, 7)], - loopdims=(100,), - pinned=[False, False] - ) - template(signature, shapes, expects) - - def test_signature_3(self): - signature = '(m, n), (n, p) -> (m, p)' - shapes = (12, 34, 4, 5), (12, 34, 5, 7) - expects = dict( - ishapes=[(4, 5), (5, 7)], - oshapes=[(4, 7)], - loopdims=(12, 34), - pinned=[False, False] - ) - template(signature, shapes, expects) - - def test_signature_4(self): - signature = '(m, n), (n, p) -> (m, p)' - shapes = (4, 5), (5, 7) - expects = dict( - ishapes=[(4, 5), (5, 7)], - oshapes=[(4, 7)], - loopdims=(), - pinned=[False, False] - ) - template(signature, shapes, expects) - - def test_signature_5(self): - signature = '(a), (a) -> (a)' - shapes = (5,), (5,) - expects = dict( - ishapes=[(5,), (5,)], - oshapes=[(5,)], - loopdims=(), - pinned=[False, False] - ) - template(signature, shapes, expects) - - def test_signature_6(self): - signature = '(), () -> ()' - shapes = (5,), (5,) - expects = dict( - ishapes=[(), ()], - oshapes=[()], - loopdims=(5,), - pinned=[False, False] - ) - template(signature, shapes, expects) - - def test_signature_7(self): - signature = '(), () -> ()' - shapes = (5,), () - expects = dict( - ishapes=[(), ()], - oshapes=[()], - loopdims=(5,), - pinned=[False, True] - ) - template(signature, shapes, expects) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_idiv.py b/numba/numba/cuda/tests/cudapy/test_idiv.py deleted file mode 100644 index 635a9e4c1..000000000 --- a/numba/numba/cuda/tests/cudapy/test_idiv.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np -from numba import cuda, float32, float64, int32 -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaIDiv(SerialMixin, unittest.TestCase): - def test_inplace_div(self): - - @cuda.jit(argtypes=[float32[:, :], int32, int32]) - def div(grid, l_x, l_y): - for x in range(l_x): - for y in range(l_y): - grid[x, y] /= 2.0 - - x = np.ones((2, 2), dtype=np.float32) - grid = cuda.to_device(x) - div(grid, 2, 2) - y = grid.copy_to_host() - self.assertTrue(np.all(y == 0.5)) - - - def test_inplace_div_double(self): - - @cuda.jit(argtypes=[float64[:, :], int32, int32]) - def div_double(grid, l_x, l_y): - for x in range(l_x): - for y in range(l_y): - grid[x, y] /= 2.0 - - x = np.ones((2, 2), dtype=np.float64) - grid = cuda.to_device(x) - div_double(grid, 2, 2) - y = grid.copy_to_host() - self.assertTrue(np.all(y == 0.5)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_inspect.py b/numba/numba/cuda/tests/cudapy/test_inspect.py deleted file mode 100644 index c0807369c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_inspect.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import print_function, division, absolute_import -from numba import cuda, float64, intp -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim -from numba.utils import StringIO - - -@skip_on_cudasim('Simulator does not generate code to be inspected') -class TestInspect(SerialMixin, unittest.TestCase): - def test_monotyped(self): - @cuda.jit("(float32, int32)") - def foo(x, y): - pass - - file = StringIO() - foo.inspect_types(file=file) - typeanno = file.getvalue() - # Function name in annotation - self.assertIn("foo", typeanno) - # Signature in annotation - self.assertIn("(float32, int32)", typeanno) - file.close() - # Function name in LLVM - self.assertIn("foo", foo.inspect_llvm()) - - asm = foo.inspect_asm() - - # Function name in PTX - self.assertIn("foo", asm) - # NVVM inserted comments in PTX - self.assertIn("Generated by NVIDIA NVVM Compiler", asm) - - def test_polytyped(self): - @cuda.jit - def foo(x, y): - pass - - foo(1, 1) - foo(1.2, 2.4) - - file = StringIO() - foo.inspect_types(file=file) - typeanno = file.getvalue() - file.close() - # Signature in annotation - self.assertIn("({0}, {0})".format(intp), typeanno) - self.assertIn("(float64, float64)", typeanno) - - # Signature in LLVM dict - llvmirs = foo.inspect_llvm() - self.assertEqual(2, len(llvmirs), ) - self.assertIn((intp, intp), llvmirs) - self.assertIn((float64, float64), llvmirs) - - # Function name in LLVM - self.assertIn("foo", llvmirs[intp, intp]) - self.assertIn("foo", llvmirs[float64, float64]) - - asmdict = foo.inspect_asm() - - # Signature in LLVM dict - self.assertEqual(2, len(asmdict), ) - self.assertIn((intp, intp), asmdict) - self.assertIn((float64, float64), asmdict) - - # NNVM inserted in PTX - self.assertIn("foo", asmdict[intp, intp]) - self.assertIn("foo", asmdict[float64, float64]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_intrinsics.py b/numba/numba/cuda/tests/cudapy/test_intrinsics.py deleted file mode 100644 index 634babcfb..000000000 --- a/numba/numba/cuda/tests/cudapy/test_intrinsics.py +++ /dev/null @@ -1,365 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np -import re -from numba import cuda, int32, float32 -from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim - - -def simple_threadidx(ary): - i = cuda.threadIdx.x - ary[0] = i - - -def fill_threadidx(ary): - i = cuda.threadIdx.x - ary[i] = i - - -def fill3d_threadidx(ary): - i = cuda.threadIdx.x - j = cuda.threadIdx.y - k = cuda.threadIdx.z - - ary[i, j, k] = (i + 1) * (j + 1) * (k + 1) - - -def simple_grid1d(ary): - i = cuda.grid(1) - ary[i] = i - - -def simple_grid2d(ary): - i, j = cuda.grid(2) - ary[i, j] = i + j - - -def simple_gridsize1d(ary): - i = cuda.grid(1) - x = cuda.gridsize(1) - if i == 0: - ary[0] = x - - -def simple_gridsize2d(ary): - i, j = cuda.grid(2) - x, y = cuda.gridsize(2) - if i == 0 and j == 0: - ary[0] = x - ary[1] = y - - -def intrinsic_forloop_step(c): - startX, startY = cuda.grid(2) - gridX = cuda.gridDim.x * cuda.blockDim.x - gridY = cuda.gridDim.y * cuda.blockDim.y - height, width = c.shape - - for x in range(startX, width, gridX): - for y in range(startY, height, gridY): - c[y, x] = x + y - - -def simple_popc(ary, c): - ary[0] = cuda.popc(c) - - -def simple_brev(ary, c): - ary[0] = cuda.brev(c) - - -def simple_clz(ary, c): - ary[0] = cuda.clz(c) - - -def simple_ffs(ary, c): - ary[0] = cuda.ffs(c) - - -def branching_with_ifs(a, b, c): - i = cuda.grid(1) - - if a[i] > 4: - if b % 2 == 0: - a[i] = c[i] - else: - a[i] = 13 - else: - a[i] = 3 - - -def branching_with_selps(a, b, c): - i = cuda.grid(1) - - inner = cuda.selp(b % 2 == 0, c[i], 13) - a[i] = cuda.selp(a[i] > 4, inner, 3) - - -def simple_laneid(ary): - i = cuda.grid(1) - ary[i] = cuda.laneid - - -def simple_warpsize(ary): - ary[0] = cuda.warpsize - - -class TestCudaIntrinsic(SerialMixin, unittest.TestCase): - def test_simple_threadidx(self): - compiled = cuda.jit("void(int32[:])")(simple_threadidx) - ary = np.ones(1, dtype=np.int32) - compiled(ary) - self.assertTrue(ary[0] == 0) - - def test_fill_threadidx(self): - compiled = cuda.jit("void(int32[:])")(fill_threadidx) - N = 10 - ary = np.ones(N, dtype=np.int32) - exp = np.arange(N, dtype=np.int32) - compiled[1, N](ary) - self.assertTrue(np.all(ary == exp)) - - def test_fill3d_threadidx(self): - X, Y, Z = 4, 5, 6 - - def c_contigous(): - compiled = cuda.jit("void(int32[:,:,::1])")(fill3d_threadidx) - ary = np.zeros((X, Y, Z), dtype=np.int32) - compiled[1, (X, Y, Z)](ary) - return ary - - def f_contigous(): - compiled = cuda.jit("void(int32[::1,:,:])")(fill3d_threadidx) - ary = np.asfortranarray(np.zeros((X, Y, Z), dtype=np.int32)) - compiled[1, (X, Y, Z)](ary) - return ary - - c_res = c_contigous() - f_res = f_contigous() - self.assertTrue(np.all(c_res == f_res)) - - def test_simple_grid1d(self): - compiled = cuda.jit("void(int32[::1])")(simple_grid1d) - ntid, nctaid = 3, 7 - nelem = ntid * nctaid - ary = np.empty(nelem, dtype=np.int32) - compiled[nctaid, ntid](ary) - self.assertTrue(np.all(ary == np.arange(nelem))) - - def test_simple_grid2d(self): - compiled = cuda.jit("void(int32[:,::1])")(simple_grid2d) - ntid = (4, 3) - nctaid = (5, 6) - shape = (ntid[0] * nctaid[0], ntid[1] * nctaid[1]) - ary = np.empty(shape, dtype=np.int32) - exp = ary.copy() - compiled[nctaid, ntid](ary) - - for i in range(ary.shape[0]): - for j in range(ary.shape[1]): - exp[i, j] = i + j - - self.assertTrue(np.all(ary == exp)) - - def test_simple_gridsize1d(self): - compiled = cuda.jit("void(int32[::1])")(simple_gridsize1d) - ntid, nctaid = 3, 7 - ary = np.zeros(1, dtype=np.int32) - compiled[nctaid, ntid](ary) - self.assertEqual(ary[0], nctaid * ntid) - - @skip_on_cudasim('Tests PTX emission') - def test_selp(self): - cu_branching_with_ifs = cuda.jit('void(i8[:], i8, i8[:])')(branching_with_ifs) - cu_branching_with_selps = cuda.jit('void(i8[:], i8, i8[:])')(branching_with_selps) - - n = 32 - b = 6 - c = np.full(shape=32, fill_value=17, dtype=np.int64) - - expected = c.copy() - expected[:5] = 3 - - a = np.arange(n, dtype=np.int64) - cu_branching_with_ifs[n, 1](a, b, c) - ptx = cu_branching_with_ifs.inspect_asm() - self.assertEqual(2, len(re.findall(r'\s+bra\s+', ptx))) - np.testing.assert_array_equal(a, expected, err_msg='branching') - - a = np.arange(n, dtype=np.int64) - cu_branching_with_selps[n, 1](a, b, c) - ptx = cu_branching_with_selps.inspect_asm() - self.assertEqual(0, len(re.findall(r'\s+bra\s+', ptx))) - np.testing.assert_array_equal(a, expected, err_msg='selp') - - def test_simple_gridsize2d(self): - compiled = cuda.jit("void(int32[::1])")(simple_gridsize2d) - ntid = (4, 3) - nctaid = (5, 6) - ary = np.zeros(2, dtype=np.int32) - compiled[nctaid, ntid](ary) - - self.assertEqual(ary[0], nctaid[0] * ntid[0]) - self.assertEqual(ary[1], nctaid[1] * ntid[1]) - - def test_intrinsic_forloop_step(self): - compiled = cuda.jit("void(float32[:,::1])")(intrinsic_forloop_step) - ntid = (4, 3) - nctaid = (5, 6) - shape = (ntid[0] * nctaid[0], ntid[1] * nctaid[1]) - ary = np.empty(shape, dtype=np.int32) - - compiled[nctaid, ntid](ary) - - gridX, gridY = shape - height, width = ary.shape - for i, j in zip(range(ntid[0]), range(ntid[1])): - startX, startY = gridX + i, gridY + j - for x in range(startX, width, gridX): - for y in range(startY, height, gridY): - self.assertTrue(ary[y, x] == x + y, (ary[y, x], x + y)) - - def test_3dgrid(self): - @cuda.jit - def foo(out): - x, y, z = cuda.grid(3) - a, b, c = cuda.gridsize(3) - out[x, y, z] = a * b * c - - arr = np.zeros(9 ** 3, dtype=np.int32).reshape(9, 9, 9) - foo[(3, 3, 3), (3, 3, 3)](arr) - - np.testing.assert_equal(arr, 9 ** 3) - - def test_3dgrid_2(self): - @cuda.jit - def foo(out): - x, y, z = cuda.grid(3) - a, b, c = cuda.gridsize(3) - grid_is_right = ( - x == cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x and - y == cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y and - z == cuda.threadIdx.z + cuda.blockIdx.z * cuda.blockDim.z - ) - gridsize_is_right = (a == cuda.blockDim.x * cuda.gridDim.x and - b == cuda.blockDim.y * cuda.gridDim.y and - c == cuda.blockDim.z * cuda.gridDim.z) - out[x, y, z] = grid_is_right and gridsize_is_right - - x, y, z = (4 * 3, 3 * 2, 2 * 4) - arr = np.zeros((x * y * z), dtype=np.bool).reshape(x, y, z) - foo[(4, 3, 2), (3, 2, 4)](arr) - - self.assertTrue(np.all(arr)) - - def test_popc_u4(self): - compiled = cuda.jit("void(int32[:], uint32)")(simple_popc) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0xF0) - self.assertEquals(ary[0], 4) - - def test_popc_u8(self): - compiled = cuda.jit("void(int32[:], uint64)")(simple_popc) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0xF00000000000) - self.assertEquals(ary[0], 4) - - def test_brev_u4(self): - compiled = cuda.jit("void(uint32[:], uint32)")(simple_brev) - ary = np.zeros(1, dtype=np.uint32) - compiled(ary, 0x000030F0) - self.assertEquals(ary[0], 0x0F0C0000) - - @skip_on_cudasim('only get given a Python "int", assumes 32 bits') - def test_brev_u8(self): - compiled = cuda.jit("void(uint64[:], uint64)")(simple_brev) - ary = np.zeros(1, dtype=np.uint64) - compiled(ary, 0x000030F0000030F0) - self.assertEquals(ary[0], 0x0F0C00000F0C0000) - - def test_clz_i4(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_clz) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x00100000) - self.assertEquals(ary[0], 11) - - def test_clz_u4(self): - """ - Although the CUDA Math API (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html) - only says int32 & int64 arguments are supported in C code, the LLVM - IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of - unsigned integers, just unsigned operations on integers). - http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics - """ - compiled = cuda.jit("void(int32[:], uint32)")(simple_clz) - ary = np.zeros(1, dtype=np.uint32) - compiled(ary, 0x00100000) - self.assertEquals(ary[0], 11) - - def test_clz_i4_1s(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_clz) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0xFFFFFFFF) - self.assertEquals(ary[0], 0) - - def test_clz_i4_0s(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_clz) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x0) - self.assertEquals(ary[0], 32, "CUDA semantics") - - @skip_on_cudasim('only get given a Python "int", assumes 32 bits') - def test_clz_i8(self): - compiled = cuda.jit("void(int32[:], int64)")(simple_clz) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x000000000010000) - self.assertEquals(ary[0], 47) - - def test_ffs_i4(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_ffs) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x00100000) - self.assertEquals(ary[0], 20) - - def test_ffs_u4(self): - compiled = cuda.jit("void(int32[:], uint32)")(simple_ffs) - ary = np.zeros(1, dtype=np.uint32) - compiled(ary, 0x00100000) - self.assertEquals(ary[0], 20) - - def test_ffs_i4_1s(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_ffs) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0xFFFFFFFF) - self.assertEquals(ary[0], 0) - - def test_ffs_i4_0s(self): - compiled = cuda.jit("void(int32[:], int32)")(simple_ffs) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x0) - self.assertEquals(ary[0], 32, "CUDA semantics") - - @skip_on_cudasim('only get given a Python "int", assumes 32 bits') - def test_ffs_i8(self): - compiled = cuda.jit("void(int32[:], int64)")(simple_ffs) - ary = np.zeros(1, dtype=np.int32) - compiled(ary, 0x000000000010000) - self.assertEquals(ary[0], 16) - - def test_simple_laneid(self): - compiled = cuda.jit("void(int32[:])")(simple_laneid) - count = 2 - ary = np.zeros(count*32, dtype=np.int32) - exp = np.tile(np.arange(32, dtype=np.int32), count) - compiled[1, count*32](ary) - self.assertTrue(np.all(ary == exp)) - - def test_simple_warpsize(self): - compiled = cuda.jit("void(int32[:])")(simple_warpsize) - ary = np.zeros(1, dtype=np.int32) - compiled(ary) - self.assertEquals(ary[0], 32, "CUDA semantics") - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_ipc.py b/numba/numba/cuda/tests/cudapy/test_ipc.py deleted file mode 100644 index b6301f7fd..000000000 --- a/numba/numba/cuda/tests/cudapy/test_ipc.py +++ /dev/null @@ -1,277 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import sys -import multiprocessing as mp -import traceback -import pickle - -import numpy as np - -from numba import cuda -from numba.cuda.cudadrv import drvapi, devicearray -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, CUDATestCase - - -not_linux = not sys.platform.startswith('linux') -has_mp_get_context = hasattr(mp, 'get_context') - - -def core_ipc_handle_test(the_work, result_queue): - try: - arr = the_work() - except: - # FAILED. propagate the exception as a string - succ = False - out = traceback.format_exc() - else: - # OK. send the ndarray back - succ = True - out = arr - result_queue.put((succ, out)) - - -def base_ipc_handle_test(handle, size, result_queue): - def the_work(): - dtype = np.dtype(np.intp) - with cuda.open_ipc_array(handle, shape=size // dtype.itemsize, - dtype=dtype) as darr: - # copy the data to host - return darr.copy_to_host() - - core_ipc_handle_test(the_work, result_queue) - - -def serialize_ipc_handle_test(handle, result_queue): - def the_work(): - dtype = np.dtype(np.intp) - darr = handle.open_array(cuda.current_context(), - shape=handle.size // dtype.itemsize, - dtype=dtype) - # copy the data to host - arr = darr.copy_to_host() - handle.close() - return arr - - core_ipc_handle_test(the_work, result_queue) - - -def ipc_array_test(ipcarr, result_queue): - try: - with ipcarr as darr: - arr = darr.copy_to_host() - try: - # should fail to reopen - with ipcarr: - pass - except ValueError as e: - if str(e) != 'IpcHandle is already opened': - raise AssertionError('invalid exception message') - else: - raise AssertionError('did not raise on reopen') - - except: - # FAILED. propagate the exception as a string - succ = False - out = traceback.format_exc() - else: - # OK. send the ndarray back - succ = True - out = arr - result_queue.put((succ, out)) - - -@unittest.skipIf(not_linux, "IPC only supported on Linux") -@unittest.skipUnless(has_mp_get_context, "requires multiprocessing.get_context") -@skip_on_cudasim('Ipc not available in CUDASIM') -class TestIpcMemory(CUDATestCase): - def test_ipc_handle(self): - # prepare data for IPC - arr = np.arange(10, dtype=np.intp) - devarr = cuda.to_device(arr) - - # create IPC handle - ctx = cuda.current_context() - ipch = ctx.get_ipc_handle(devarr.gpu_data) - - # manually prepare for serialization as bytes - handle_bytes = bytes(ipch.handle) - size = ipch.size - - # spawn new process for testing - ctx = mp.get_context('spawn') - result_queue = ctx.Queue() - args = (handle_bytes, size, result_queue) - proc = ctx.Process(target=base_ipc_handle_test, args=args) - proc.start() - succ, out = result_queue.get() - if not succ: - self.fail(out) - else: - np.testing.assert_equal(arr, out) - proc.join(3) - - def test_ipc_handle_serialization(self): - # prepare data for IPC - arr = np.arange(10, dtype=np.intp) - devarr = cuda.to_device(arr) - - # create IPC handle - ctx = cuda.current_context() - ipch = ctx.get_ipc_handle(devarr.gpu_data) - - # pickle - buf = pickle.dumps(ipch) - ipch_recon = pickle.loads(buf) - self.assertIs(ipch_recon.base, None) - self.assertEqual(tuple(ipch_recon.handle), tuple(ipch.handle)) - self.assertEqual(ipch_recon.size, ipch.size) - - # spawn new process for testing - ctx = mp.get_context('spawn') - result_queue = ctx.Queue() - args = (ipch, result_queue) - proc = ctx.Process(target=serialize_ipc_handle_test, args=args) - proc.start() - succ, out = result_queue.get() - if not succ: - self.fail(out) - else: - np.testing.assert_equal(arr, out) - proc.join(3) - - def test_ipc_array(self): - # prepare data for IPC - arr = np.arange(10, dtype=np.intp) - devarr = cuda.to_device(arr) - ipch = devarr.get_ipc_handle() - - # spawn new process for testing - ctx = mp.get_context('spawn') - result_queue = ctx.Queue() - args = (ipch, result_queue) - proc = ctx.Process(target=ipc_array_test, args=args) - proc.start() - succ, out = result_queue.get() - if not succ: - self.fail(out) - else: - np.testing.assert_equal(arr, out) - proc.join(3) - - -@unittest.skipUnless(not_linux, "Only on OS other than Linux") -@skip_on_cudasim('Ipc not available in CUDASIM') -class TestIpcNotSupported(CUDATestCase): - def test_unsupported(self): - arr = np.arange(10, dtype=np.intp) - devarr = cuda.to_device(arr) - with self.assertRaises(OSError) as raises: - devarr.get_ipc_handle() - errmsg = str(raises.exception) - self.assertIn('OS does not support CUDA IPC', errmsg) - - -def staged_ipc_handle_test(handle, device_num, result_queue): - def the_work(): - with cuda.gpus[device_num]: - this_ctx = cuda.devices.get_context() - can_access = handle.can_access_peer(this_ctx) - print('can_access_peer {} {}'.format(this_ctx, can_access)) - deviceptr = handle.open_staged(this_ctx) - arrsize = handle.size // np.dtype(np.intp).itemsize - hostarray = np.zeros(arrsize, dtype=np.intp) - cuda.driver.device_to_host( - hostarray, deviceptr, size=handle.size, - ) - handle.close() - return hostarray - - core_ipc_handle_test(the_work, result_queue) - - -def staged_ipc_array_test(ipcarr, device_num, result_queue): - try: - with cuda.gpus[device_num]: - this_ctx = cuda.devices.get_context() - print(this_ctx.device) - with ipcarr as darr: - arr = darr.copy_to_host() - try: - # should fail to reopen - with ipcarr: - pass - except ValueError as e: - if str(e) != 'IpcHandle is already opened': - raise AssertionError('invalid exception message') - else: - raise AssertionError('did not raise on reopen') - except: - # FAILED. propagate the exception as a string - succ = False - out = traceback.format_exc() - else: - # OK. send the ndarray back - succ = True - out = arr - result_queue.put((succ, out)) - - -@unittest.skipIf(not_linux, "IPC only supported on Linux") -@unittest.skipUnless(has_mp_get_context, "requires multiprocessing.get_context") -@skip_on_cudasim('Ipc not available in CUDASIM') -class TestIpcStaged(CUDATestCase): - def test_staged(self): - # prepare data for IPC - arr = np.arange(10, dtype=np.intp) - devarr = cuda.to_device(arr) - - # spawn new process for testing - mpctx = mp.get_context('spawn') - result_queue = mpctx.Queue() - - # create IPC handle - ctx = cuda.current_context() - ipch = ctx.get_ipc_handle(devarr.gpu_data) - # pickle - buf = pickle.dumps(ipch) - ipch_recon = pickle.loads(buf) - self.assertIs(ipch_recon.base, None) - self.assertEqual(tuple(ipch_recon.handle), tuple(ipch.handle)) - self.assertEqual(ipch_recon.size, ipch.size) - - # Test on every CUDA devices - for device_num in range(len(cuda.gpus)): - args = (ipch, device_num, result_queue) - proc = mpctx.Process(target=staged_ipc_handle_test, args=args) - proc.start() - succ, out = result_queue.get() - proc.join(3) - if not succ: - self.fail(out) - else: - np.testing.assert_equal(arr, out) - - def test_ipc_array(self): - for device_num in range(len(cuda.gpus)): - # prepare data for IPC - arr = np.random.random(10) - devarr = cuda.to_device(arr) - ipch = devarr.get_ipc_handle() - - # spawn new process for testing - ctx = mp.get_context('spawn') - result_queue = ctx.Queue() - args = (ipch, device_num, result_queue) - proc = ctx.Process(target=staged_ipc_array_test, args=args) - proc.start() - succ, out = result_queue.get() - proc.join(3) - if not succ: - self.fail(out) - else: - np.testing.assert_equal(arr, out) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_lang.py b/numba/numba/cuda/tests/cudapy/test_lang.py deleted file mode 100644 index f0133a092..000000000 --- a/numba/numba/cuda/tests/cudapy/test_lang.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Test basic language features - -""" -from __future__ import print_function, absolute_import, division - -import numpy as np -from numba import cuda, float64 -from numba.cuda.testing import unittest, SerialMixin - - -class TestLang(SerialMixin, unittest.TestCase): - def test_enumerate(self): - tup = (1., 2.5, 3.) - - @cuda.jit("void(float64[:])") - def foo(a): - for i, v in enumerate(tup): - a[i] = v - - a = np.zeros(len(tup)) - foo(a) - self.assertTrue(np.all(a == tup)) - - def test_zip(self): - t1 = (1, 2, 3) - t2 = (4.5, 5.6, 6.7) - - @cuda.jit("void(float64[:])") - def foo(a): - c = 0 - for i, j in zip(t1, t2): - c += i + j - a[0] = c - - a = np.zeros(1) - foo(a) - b = np.array(t1) - c = np.array(t2) - self.assertTrue(np.all(a == (b + c).sum())) - - def test_issue_872(self): - ''' - Ensure that macro expansion works for more than one block (issue #872) - ''' - - @cuda.jit("void(float64[:,:])") - def macros_in_multiple_blocks(ary): - for i in range(2): - tx = cuda.threadIdx.x - for j in range(3): - ty = cuda.threadIdx.y - sm = cuda.shared.array((2, 3), float64) - sm[tx, ty] = 1.0 - ary[tx, ty] = sm[tx, ty] - - a = np.zeros((2, 3)) - macros_in_multiple_blocks[1, (2, 3)](a) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_laplace.py b/numba/numba/cuda/tests/cudapy/test_laplace.py deleted file mode 100644 index 99fc049ca..000000000 --- a/numba/numba/cuda/tests/cudapy/test_laplace.py +++ /dev/null @@ -1,126 +0,0 @@ -from __future__ import print_function, absolute_import, division -import numpy as np -import time -from numba import cuda, config, float64, void -from numba.cuda.testing import unittest, SerialMixin - -# NOTE: CUDA kernel does not return any value - -if config.ENABLE_CUDASIM: - tpb = 4 -else: - tpb = 16 -SM_SIZE = tpb, tpb - -class TestCudaLaplace(SerialMixin, unittest.TestCase): - def test_laplace_small(self): - - @cuda.jit(float64(float64, float64), device=True, inline=True) - def get_max(a, b): - if a > b: - return a - else: - return b - - @cuda.jit(void(float64[:, :], float64[:, :], float64[:, :])) - def jocabi_relax_core(A, Anew, error): - err_sm = cuda.shared.array(SM_SIZE, dtype=float64) - - ty = cuda.threadIdx.x - tx = cuda.threadIdx.y - bx = cuda.blockIdx.x - by = cuda.blockIdx.y - - n = A.shape[0] - m = A.shape[1] - - i, j = cuda.grid(2) - - err_sm[ty, tx] = 0 - if j >= 1 and j < n - 1 and i >= 1 and i < m - 1: - Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \ - + A[j - 1, i] + A[j + 1, i]) - err_sm[ty, tx] = Anew[j, i] - A[j, i] - - cuda.syncthreads() - - # max-reduce err_sm vertically - t = tpb // 2 - while t > 0: - if ty < t: - err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty + t, tx]) - t //= 2 - cuda.syncthreads() - - # max-reduce err_sm horizontally - t = tpb // 2 - while t > 0: - if tx < t and ty == 0: - err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty, tx + t]) - t //= 2 - cuda.syncthreads() - - if tx == 0 and ty == 0: - error[by, bx] = err_sm[0, 0] - - - - if config.ENABLE_CUDASIM: - NN, NM = 4, 4 - iter_max = 20 - else: - NN, NM = 256, 256 - iter_max = 1000 - - A = np.zeros((NN, NM), dtype=np.float64) - Anew = np.zeros((NN, NM), dtype=np.float64) - - n = NN - m = NM - - tol = 1.0e-6 - error = 1.0 - - for j in range(n): - A[j, 0] = 1.0 - Anew[j, 0] = 1.0 - - timer = time.time() - iter = 0 - - blockdim = (tpb, tpb) - griddim = (NN // blockdim[0], NM // blockdim[1]) - - error_grid = np.zeros(griddim) - - stream = cuda.stream() - - dA = cuda.to_device(A, stream) # to device and don't come back - dAnew = cuda.to_device(Anew, stream) # to device and don't come back - derror_grid = cuda.to_device(error_grid, stream) - - while error > tol and iter < iter_max: - self.assertTrue(error_grid.dtype == np.float64) - - jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid) - - derror_grid.copy_to_host(error_grid, stream=stream) - - - # error_grid is available on host - stream.synchronize() - - error = np.abs(error_grid).max() - - # swap dA and dAnew - tmp = dA - dA = dAnew - dAnew = tmp - - iter += 1 - - runtime = time.time() - timer - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_localmem.py b/numba/numba/cuda/tests/cudapy/test_localmem.py deleted file mode 100644 index 605beae56..000000000 --- a/numba/numba/cuda/tests/cudapy/test_localmem.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import cuda, int32, complex128 -from numba.cuda.testing import unittest, SerialMixin - - -def culocal(A, B): - C = cuda.local.array(1000, dtype=int32) - for i in range(C.shape[0]): - C[i] = A[i] - for i in range(C.shape[0]): - B[i] = C[i] - - -def culocalcomplex(A, B): - C = cuda.local.array(100, dtype=complex128) - for i in range(C.shape[0]): - C[i] = A[i] - for i in range(C.shape[0]): - B[i] = C[i] - - -def culocal1tuple(A, B): - C = cuda.local.array((5,), dtype=int32) - for i in range(C.shape[0]): - C[i] = A[i] - for i in range(C.shape[0]): - B[i] = C[i] - - -class TestCudaLocalMem(SerialMixin, unittest.TestCase): - def test_local_array(self): - jculocal = cuda.jit('void(int32[:], int32[:])')(culocal) - self.assertTrue('.local' in jculocal.ptx) - A = np.arange(1000, dtype='int32') - B = np.zeros_like(A) - jculocal(A, B) - self.assertTrue(np.all(A == B)) - - def test_local_array_1_tuple(self): - """Ensure that the macro can be use with 1-tuple - """ - jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple) - # Don't check if .local is in the ptx because the optimizer - # may reduce it to registers. - A = np.arange(5, dtype='int32') - B = np.zeros_like(A) - jculocal(A, B) - self.assertTrue(np.all(A == B)) - - def test_local_array_complex(self): - sig = 'void(complex128[:], complex128[:])' - jculocalcomplex = cuda.jit(sig)(culocalcomplex) - # The local memory would be turned into register - # self.assertTrue('.local' in jculocalcomplex.ptx) - A = (np.arange(100, dtype='complex128') - 1) / 2j - B = np.zeros_like(A) - jculocalcomplex(A, B) - self.assertTrue(np.all(A == B)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_macro.py b/numba/numba/cuda/tests/cudapy/test_macro.py deleted file mode 100644 index b6644c4fe..000000000 --- a/numba/numba/cuda/tests/cudapy/test_macro.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np -from numba import cuda, float32 -from numba.errors import MacroError -from numba.cuda.testing import unittest, SerialMixin -from numba.cuda.testing import skip_on_cudasim - -GLOBAL_CONSTANT = 5 -GLOBAL_CONSTANT_2 = 6 -GLOBAL_CONSTANT_TUPLE = 5, 6 - - -def udt_global_constants(A): - sa = cuda.shared.array(shape=GLOBAL_CONSTANT, dtype=float32) - i = cuda.grid(1) - A[i] = sa[i] - - -def udt_global_build_tuple(A): - sa = cuda.shared.array(shape=(GLOBAL_CONSTANT, GLOBAL_CONSTANT_2), - dtype=float32) - i, j = cuda.grid(2) - A[i, j] = sa[i, j] - - -def udt_global_build_list(A): - sa = cuda.shared.array(shape=[GLOBAL_CONSTANT, GLOBAL_CONSTANT_2], - dtype=float32) - i, j = cuda.grid(2) - A[i, j] = sa[i, j] - - -def udt_global_constant_tuple(A): - sa = cuda.shared.array(shape=GLOBAL_CONSTANT_TUPLE, dtype=float32) - i, j = cuda.grid(2) - A[i, j] = sa[i, j] - - -def udt_invalid_1(A): - sa = cuda.shared.array(shape=A[0], dtype=float32) - i = cuda.grid(1) - A[i] = sa[i] - - -def udt_invalid_2(A): - sa = cuda.shared.array(shape=(1, A[0]), dtype=float32) - i, j = cuda.grid(2) - A[i, j] = sa[i, j] - - -class TestMacro(SerialMixin, unittest.TestCase): - def getarg(self): - return np.array(100, dtype=np.float32, ndmin=1) - - def getarg2(self): - return self.getarg().reshape(1,1) - - def test_global_constants(self): - udt = cuda.jit((float32[:],))(udt_global_constants) - udt(self.getarg()) - - def test_global_build_tuple(self): - udt = cuda.jit((float32[:, :],))(udt_global_build_tuple) - udt(self.getarg2()) - - @skip_on_cudasim('Simulator does not perform macro expansion') - def test_global_build_list(self): - with self.assertRaises(MacroError) as raises: - cuda.jit((float32[:, :],))(udt_global_build_list) - - self.assertIn("invalid type for shape; got {0}".format(list), - str(raises.exception)) - - def test_global_constant_tuple(self): - udt = cuda.jit((float32[:, :],))(udt_global_constant_tuple) - udt(self.getarg2()) - - @skip_on_cudasim("Can't check for constants in simulator") - def test_invalid_1(self): - with self.assertRaises(ValueError) as raises: - cuda.jit((float32[:],))(udt_invalid_1) - - self.assertIn("Argument 'shape' must be a constant at", - str(raises.exception)) - - @skip_on_cudasim("Can't check for constants in simulator") - def test_invalid_2(self): - with self.assertRaises(ValueError) as raises: - cuda.jit((float32[:, :],))(udt_invalid_2) - - self.assertIn("Argument 'shape' must be a constant at", - str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_mandel.py b/numba/numba/cuda/tests/cudapy/test_mandel.py deleted file mode 100644 index 75a341360..000000000 --- a/numba/numba/cuda/tests/cudapy/test_mandel.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import print_function, absolute_import, division -from numba import cuda -from numba.cuda.testing import unittest - - -class TestCudaMandel(unittest.TestCase): - def test_mandel(self): - """Just make sure we can compile this - """ - - @cuda.jit('(uint32, float64, float64, float64, ' - 'float64, uint32, uint32, uint32)', device=True) - def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters): - pixel_size_x = (max_x - min_x) / width - pixel_size_y = (max_y - min_y) / height - - x = tid % width - y = tid / width - - real = min_x + x * pixel_size_x - imag = min_y + y * pixel_size_y - - c = complex(real, imag) - z = 0.0j - - for i in range(iters): - z = z * z + c - if (z.real * z.real + z.imag * z.imag) >= 4: - return i - return iters - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_math.py b/numba/numba/cuda/tests/cudapy/test_math.py deleted file mode 100644 index 831ac0398..000000000 --- a/numba/numba/cuda/tests/cudapy/test_math.py +++ /dev/null @@ -1,523 +0,0 @@ -from __future__ import print_function, absolute_import, division -import sys -import numpy as np -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda, float32, float64, int32 -import math - - -def math_acos(A, B): - i = cuda.grid(1) - B[i] = math.acos(A[i]) - - -def math_asin(A, B): - i = cuda.grid(1) - B[i] = math.asin(A[i]) - - -def math_atan(A, B): - i = cuda.grid(1) - B[i] = math.atan(A[i]) - - -def math_acosh(A, B): - i = cuda.grid(1) - B[i] = math.acosh(A[i]) - - -def math_asinh(A, B): - i = cuda.grid(1) - B[i] = math.asinh(A[i]) - - -def math_atanh(A, B): - i = cuda.grid(1) - B[i] = math.atanh(A[i]) - - -def math_cos(A, B): - i = cuda.grid(1) - B[i] = math.cos(A[i]) - - -def math_sin(A, B): - i = cuda.grid(1) - B[i] = math.sin(A[i]) - - -def math_tan(A, B): - i = cuda.grid(1) - B[i] = math.tan(A[i]) - - -def math_cosh(A, B): - i = cuda.grid(1) - B[i] = math.cosh(A[i]) - - -def math_sinh(A, B): - i = cuda.grid(1) - B[i] = math.sinh(A[i]) - - -def math_tanh(A, B): - i = cuda.grid(1) - B[i] = math.tanh(A[i]) - - -def math_atan2(A, B, C): - i = cuda.grid(1) - C[i] = math.atan2(A[i], B[i]) - - -def math_exp(A, B): - i = cuda.grid(1) - B[i] = math.exp(A[i]) - -def math_erf(A, B): - i = cuda.grid(1) - B[i] = math.erf(A[i]) - -def math_erfc(A, B): - i = cuda.grid(1) - B[i] = math.erfc(A[i]) - -def math_expm1(A, B): - i = cuda.grid(1) - B[i] = math.expm1(A[i]) - -def math_fabs(A, B): - i = cuda.grid(1) - B[i] = math.fabs(A[i]) - -def math_gamma(A, B): - i = cuda.grid(1) - B[i] = math.gamma(A[i]) - -def math_lgamma(A, B): - i = cuda.grid(1) - B[i] = math.lgamma(A[i]) - -def math_log(A, B): - i = cuda.grid(1) - B[i] = math.log(A[i]) - - -def math_log10(A, B): - i = cuda.grid(1) - B[i] = math.log10(A[i]) - - -def math_log1p(A, B): - i = cuda.grid(1) - B[i] = math.log1p(A[i]) - - -def math_sqrt(A, B): - i = cuda.grid(1) - B[i] = math.sqrt(A[i]) - - -def math_hypot(A, B, C): - i = cuda.grid(1) - C[i] = math.hypot(A[i], B[i]) - - -def math_pow(A, B, C): - i = cuda.grid(1) - C[i] = math.pow(A[i], B[i]) - - -def math_ceil(A, B): - i = cuda.grid(1) - B[i] = math.ceil(A[i]) - - -def math_floor(A, B): - i = cuda.grid(1) - B[i] = math.floor(A[i]) - - -def math_copysign(A, B, C): - i = cuda.grid(1) - C[i] = math.copysign(A[i], B[i]) - - -def math_fmod(A, B, C): - i = cuda.grid(1) - C[i] = math.fmod(A[i], B[i]) - - -def math_modf(A, B, C): - i = cuda.grid(1) - C[i] = math.modf(A[i], B[i]) - - -def math_isnan(A, B): - i = cuda.grid(1) - B[i] = math.isnan(A[i]) - - -def math_isinf(A, B): - i = cuda.grid(1) - B[i] = math.isinf(A[i]) - - -def math_pow_binop(A, B, C): - i = cuda.grid(1) - C[i] = A[i] ** B[i] - - -def math_mod_binop(A, B, C): - i = cuda.grid(1) - C[i] = A[i] % B[i] - - -class TestCudaMath(SerialMixin, unittest.TestCase): - def unary_template_float32(self, func, npfunc, start=0, stop=1): - self.unary_template(func, npfunc, np.float32, float32, start, stop) - - - def unary_template_float64(self, func, npfunc, start=0, stop=1): - self.unary_template(func, npfunc, np.float64, float64, start, stop) - - - def unary_template(self, func, npfunc, npdtype, npmtype, start, stop): - nelem = 50 - A = np.linspace(start, stop, nelem).astype(npdtype) - B = np.empty_like(A) - arytype = npmtype[::1] - cfunc = cuda.jit((arytype, arytype))(func) - cfunc[1, nelem](A, B) - self.assertTrue(np.allclose(npfunc(A), B)) - - def unary_bool_template_float32(self, func, npfunc, start=0, stop=1): - self.unary_template(func, npfunc, np.float32, float32, start, stop) - - - def unary_bool_template_float64(self, func, npfunc, start=0, stop=1): - self.unary_template(func, npfunc, np.float64, float64, start, stop) - - def unary_bool_template(self, func, npfunc, npdtype, npmtype, start, stop): - nelem = 50 - A = np.linspace(start, stop, nelem).astype(npdtype) - B = np.empty(A.shape, dtype=np.int32) - iarytype = npmtype[::1] - oarytype = int32[::1] - cfunc = cuda.jit((iarytype, oarytype))(func) - cfunc[1, nelem](A, B) - self.assertTrue(np.all(npfunc(A), B)) - - - def binary_template_float32(self, func, npfunc, start=0, stop=1): - self.binary_template(func, npfunc, np.float32, float32, start, stop) - - - def binary_template_float64(self, func, npfunc, start=0, stop=1): - self.binary_template(func, npfunc, np.float64, float64, start, stop) - - - def binary_template(self, func, npfunc, npdtype, npmtype, start, stop): - nelem = 50 - A = np.linspace(start, stop, nelem).astype(npdtype) - B = np.empty_like(A) - arytype = npmtype[::1] - cfunc = cuda.jit((arytype, arytype, arytype))(func) - cfunc.bind() - cfunc[1, nelem](A, A, B) - self.assertTrue(np.allclose(npfunc(A, A), B)) - - # Test helper for math functions when no ufunc exists - # and dtype specificity is required. - def _math_vectorize(self, mathfunc, x): - ret = np.zeros_like(x) - for k in range(len(x)): - ret[k] = mathfunc(x[k]) - return ret - - #------------------------------------------------------------------------------ - # test_math_acos - - def test_math_acos(self): - self.unary_template_float32(math_acos, np.arccos) - self.unary_template_float64(math_acos, np.arccos) - - #------------------------------------------------------------------------------ - # test_math_asin - - - def test_math_asin(self): - self.unary_template_float32(math_asin, np.arcsin) - self.unary_template_float64(math_asin, np.arcsin) - - #------------------------------------------------------------------------------ - # test_math_atan - - - def test_math_atan(self): - self.unary_template_float32(math_atan, np.arctan) - self.unary_template_float64(math_atan, np.arctan) - - #------------------------------------------------------------------------------ - # test_math_acosh - - - def test_math_acosh(self): - self.unary_template_float32(math_acosh, np.arccosh, start=1, stop=2) - self.unary_template_float64(math_acosh, np.arccosh, start=1, stop=2) - - #------------------------------------------------------------------------------ - # test_math_asinh - - - def test_math_asinh(self): - self.unary_template_float32(math_asinh, np.arcsinh) - self.unary_template_float64(math_asinh, np.arcsinh) - - #------------------------------------------------------------------------------ - # test_math_atanh - - - def test_math_atanh(self): - self.unary_template_float32(math_atanh, np.arctanh, start=0, stop=.9) - self.unary_template_float64(math_atanh, np.arctanh, start=0, stop=.9) - - - #------------------------------------------------------------------------------ - # test_math_cos - - - def test_math_cos(self): - self.unary_template_float32(math_cos, np.cos) - self.unary_template_float64(math_cos, np.cos) - - #------------------------------------------------------------------------------ - # test_math_sin - - - def test_math_sin(self): - self.unary_template_float32(math_sin, np.sin) - self.unary_template_float64(math_sin, np.sin) - - #------------------------------------------------------------------------------ - # test_math_tan - - - def test_math_tan(self): - self.unary_template_float32(math_tan, np.tan) - self.unary_template_float64(math_tan, np.tan) - - #------------------------------------------------------------------------------ - # test_math_cosh - - - def test_math_cosh(self): - self.unary_template_float32(math_cosh, np.cosh) - self.unary_template_float64(math_cosh, np.cosh) - - #------------------------------------------------------------------------------ - # test_math_sinh - - - def test_math_sinh(self): - self.unary_template_float32(math_sinh, np.sinh) - self.unary_template_float64(math_sinh, np.sinh) - - #------------------------------------------------------------------------------ - # test_math_tanh - - - def test_math_tanh(self): - self.unary_template_float32(math_tanh, np.tanh) - self.unary_template_float64(math_tanh, np.tanh) - - #------------------------------------------------------------------------------ - # test_math_atan2 - - - def test_math_atan2(self): - self.binary_template_float32(math_atan2, np.arctan2) - self.binary_template_float64(math_atan2, np.arctan2) - - #------------------------------------------------------------------------------ - # test_math_erf - - - def test_math_erf(self): - def ufunc(x): - return self._math_vectorize(math.erf, x) - self.unary_template_float32(math_erf, ufunc) - self.unary_template_float64(math_erf, ufunc) - - #------------------------------------------------------------------------------ - # test_math_erfc - - - def test_math_erfc(self): - def ufunc(x): - return self._math_vectorize(math.erfc, x) - self.unary_template_float32(math_erfc, ufunc) - self.unary_template_float64(math_erfc, ufunc) - - #------------------------------------------------------------------------------ - # test_math_exp - - - def test_math_exp(self): - self.unary_template_float32(math_exp, np.exp) - self.unary_template_float64(math_exp, np.exp) - - #------------------------------------------------------------------------------ - # test_math_expm1 - - def test_math_expm1(self): - self.unary_template_float32(math_expm1, np.expm1) - self.unary_template_float64(math_expm1, np.expm1) - - #------------------------------------------------------------------------------ - # test_math_fabs - - - def test_math_fabs(self): - self.unary_template_float32(math_fabs, np.fabs, start=-1) - self.unary_template_float64(math_fabs, np.fabs, start=-1) - - #------------------------------------------------------------------------------ - # test_math_gamma - - - def test_math_gamma(self): - def ufunc(x): - return self._math_vectorize(math.gamma, x) - self.unary_template_float32(math_gamma, ufunc, start=0.1) - self.unary_template_float64(math_gamma, ufunc, start=0.1) - - #------------------------------------------------------------------------------ - # test_math_lgamma - - - def test_math_lgamma(self): - def ufunc(x): - return self._math_vectorize(math.lgamma, x) - self.unary_template_float32(math_lgamma, ufunc, start=0.1) - self.unary_template_float64(math_lgamma, ufunc, start=0.1) - - #------------------------------------------------------------------------------ - # test_math_log - - - def test_math_log(self): - self.unary_template_float32(math_log, np.log, start=1) - self.unary_template_float64(math_log, np.log, start=1) - - #------------------------------------------------------------------------------ - # test_math_log10 - - - def test_math_log10(self): - self.unary_template_float32(math_log10, np.log10, start=1) - self.unary_template_float64(math_log10, np.log10, start=1) - - #------------------------------------------------------------------------------ - # test_math_log1p - - - def test_math_log1p(self): - self.unary_template_float32(math_log1p, np.log1p) - self.unary_template_float64(math_log1p, np.log1p) - - #------------------------------------------------------------------------------ - # test_math_sqrt - - - def test_math_sqrt(self): - self.unary_template_float32(math_sqrt, np.sqrt) - self.unary_template_float64(math_sqrt, np.sqrt) - - #------------------------------------------------------------------------------ - # test_math_hypot - - - def test_math_hypot(self): - self.binary_template_float32(math_hypot, np.hypot) - self.binary_template_float64(math_hypot, np.hypot) - - - #------------------------------------------------------------------------------ - # test_math_pow - - - def test_math_pow(self): - self.binary_template_float32(math_pow, np.power) - self.binary_template_float64(math_pow, np.power) - - - #------------------------------------------------------------------------------ - # test_math_pow_binop - - - def test_math_pow_binop(self): - self.binary_template_float32(math_pow_binop, np.power) - self.binary_template_float64(math_pow_binop, np.power) - - #------------------------------------------------------------------------------ - # test_math_ceil - - - def test_math_ceil(self): - self.unary_template_float32(math_ceil, np.ceil) - self.unary_template_float64(math_ceil, np.ceil) - - #------------------------------------------------------------------------------ - # test_math_floor - - - def test_math_floor(self): - self.unary_template_float32(math_floor, np.floor) - self.unary_template_float64(math_floor, np.floor) - - #------------------------------------------------------------------------------ - # test_math_copysign - - - def test_math_copysign(self): - self.binary_template_float32(math_copysign, np.copysign, start=-1) - self.binary_template_float64(math_copysign, np.copysign, start=-1) - - #------------------------------------------------------------------------------ - # test_math_fmod - - - def test_math_fmod(self): - self.binary_template_float32(math_fmod, np.fmod, start=1) - self.binary_template_float64(math_fmod, np.fmod, start=1) - - #------------------------------------------------------------------------------ - # test_math_mod_binop - - - def test_math_mod_binop(self): - self.binary_template_float32(math_mod_binop, np.fmod, start=1) - self.binary_template_float64(math_mod_binop, np.fmod, start=1) - - #------------------------------------------------------------------------------ - # test_math_isnan - - - def test_math_isnan(self): - self.unary_bool_template_float32(math_isnan, np.isnan) - self.unary_bool_template_float64(math_isnan, np.isnan) - - #------------------------------------------------------------------------------ - # test_math_isinf - - - def test_math_isinf(self): - self.unary_bool_template_float32(math_isinf, np.isinf) - self.unary_bool_template_float64(math_isinf, np.isinf) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_matmul.py b/numba/numba/cuda/tests/cudapy/test_matmul.py deleted file mode 100644 index 31ff2f947..000000000 --- a/numba/numba/cuda/tests/cudapy/test_matmul.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import cuda, config, float32 -from numba.cuda.testing import unittest, SerialMixin - -# Ensure the test takes a reasonable amount of time in the simulator -if config.ENABLE_CUDASIM: - bpg, tpb = 2, 8 -else: - bpg, tpb = 50, 32 - -n = bpg * tpb -SM_SIZE = (tpb, tpb) - - -class TestCudaMatMul(SerialMixin, unittest.TestCase): - - def test_func(self): - - @cuda.jit(argtypes=[float32[:, ::1], float32[:, ::1], float32[:, ::1]]) - def cu_square_matrix_mul(A, B, C): - sA = cuda.shared.array(shape=SM_SIZE, dtype=float32) - sB = cuda.shared.array(shape=(tpb, tpb), dtype=float32) - - tx = cuda.threadIdx.x - ty = cuda.threadIdx.y - bx = cuda.blockIdx.x - by = cuda.blockIdx.y - bw = cuda.blockDim.x - bh = cuda.blockDim.y - - x = tx + bx * bw - y = ty + by * bh - - acc = float32(0) # forces all the math to be f32 - for i in range(bpg): - if x < n and y < n: - sA[ty, tx] = A[y, tx + i * tpb] - sB[ty, tx] = B[ty + i * tpb, x] - - cuda.syncthreads() - - if x < n and y < n: - for j in range(tpb): - acc += sA[ty, j] * sB[j, tx] - - cuda.syncthreads() - - if x < n and y < n: - C[y, x] = acc - - np.random.seed(42) - A = np.array(np.random.random((n, n)), dtype=np.float32) - B = np.array(np.random.random((n, n)), dtype=np.float32) - C = np.empty_like(A) - - stream = cuda.stream() - with stream.auto_synchronize(): - dA = cuda.to_device(A, stream) - dB = cuda.to_device(B, stream) - dC = cuda.to_device(C, stream) - cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), stream](dA, dB, dC) - dC.copy_to_host(C, stream) - - # Host compute - Cans = np.dot(A, B) - - # Check result - np.testing.assert_allclose(C, Cans, rtol=1e-5) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_montecarlo.py b/numba/numba/cuda/tests/cudapy/test_montecarlo.py deleted file mode 100644 index b646b75d5..000000000 --- a/numba/numba/cuda/tests/cudapy/test_montecarlo.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import print_function, absolute_import -import math -from numba import cuda -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaMonteCarlo(SerialMixin, unittest.TestCase): - def test_montecarlo(self): - """Just make sure we can compile this - """ - - @cuda.jit( - 'void(double[:], double[:], double, double, double, double[:])') - def step(last, paths, dt, c0, c1, normdist): - i = cuda.grid(1) - if i >= paths.shape[0]: - return - noise = normdist[i] - paths[i] = last[i] * math.exp(c0 * dt + c1 * noise) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_multigpu.py b/numba/numba/cuda/tests/cudapy/test_multigpu.py deleted file mode 100644 index 035928f66..000000000 --- a/numba/numba/cuda/tests/cudapy/test_multigpu.py +++ /dev/null @@ -1,121 +0,0 @@ -from numba import cuda -import numpy as np -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin -import threading - - -class TestMultiGPUContext(SerialMixin, unittest.TestCase): - @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus") - def test_multigpu_context(self): - @cuda.jit("void(float64[:], float64[:])") - def copy_plus_1(inp, out): - i = cuda.grid(1) - if i < out.size: - out[i] = inp[i] + 1 - - def check(inp, out): - np.testing.assert_equal(inp + 1, out) - - - N = 32 - A = np.arange(N, dtype=np.float64) - B = np.arange(N, dtype=np.float64) - - with cuda.gpus[0]: - copy_plus_1[1, N](A, B) - - check(A, B) - - copy_plus_1[1, N](A, B) - check(A, B) - - with cuda.gpus[0]: - A0 = np.arange(N, dtype=np.float64) - B0 = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A0, B0) - - with cuda.gpus[1]: - A1 = np.arange(N, dtype=np.float64) - B1 = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A1, B1) - - check(A0, B0) - check(A1, B1) - - A = np.arange(N, dtype=np.float64) - B = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A, B) - check(A, B) - - @skip_on_cudasim('Simulator does not support multiple threads') - def test_multithreaded(self): - def work(gpu, dA, results, ridx): - try: - with gpu: - arr = dA.copy_to_host() - - except BaseException as e: - results[ridx] = e - - else: - results[ridx] = np.all(arr == np.arange(10)) - - - dA = cuda.to_device(np.arange(10)) - - nthreads = 10 - results = [None] * nthreads - threads = [threading.Thread(target=work, args=(cuda.gpus.current, - dA, results, i)) - for i in range(nthreads)] - for th in threads: - th.start() - - for th in threads: - th.join() - - for r in results: - if isinstance(r, BaseException): - raise r - else: - self.assertTrue(r) - - - @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus") - def test_with_context(self): - - @cuda.jit - def vector_add_scalar(arr, val): - i = cuda.grid(1) - if i < arr.size: - arr[i] += val - - - hostarr = np.arange(10, dtype=np.float32) - with cuda.gpus[0]: - arr1 = cuda.to_device(hostarr) - - with cuda.gpus[1]: - arr2 = cuda.to_device(hostarr) - - with cuda.gpus[0]: - vector_add_scalar[1, 10](arr1, 1) - - with cuda.gpus[1]: - vector_add_scalar[1, 10](arr2, 2) - - with cuda.gpus[0]: - np.testing.assert_equal(arr1.copy_to_host(), (hostarr + 1)) - - with cuda.gpus[1]: - np.testing.assert_equal(arr2.copy_to_host(), (hostarr + 2)) - - with cuda.gpus[0]: - # Transfer from GPU1 to GPU0 - arr1.copy_to_device(arr2) - np.testing.assert_equal(arr1.copy_to_host(), (hostarr + 2)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_multiprocessing.py b/numba/numba/cuda/tests/cudapy/test_multiprocessing.py deleted file mode 100644 index fb21bad7c..000000000 --- a/numba/numba/cuda/tests/cudapy/test_multiprocessing.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import multiprocessing as mp - -import numpy as np - -from numba import cuda -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - -has_mp_get_context = hasattr(mp, 'get_context') -is_unix = os.name == 'posix' - - -def fork_test(q): - from numba.cuda.cudadrv.error import CudaDriverError - try: - cuda.to_device(np.arange(1)) - except CudaDriverError as e: - q.put(e) - else: - q.put(None) - - -@skip_on_cudasim('disabled for cudasim') -class TestMultiprocessing(SerialMixin, unittest.TestCase): - @unittest.skipUnless(has_mp_get_context, 'requires mp.get_context') - @unittest.skipUnless(is_unix, 'requires Unix') - def test_fork(self): - """ - Test fork detection. - """ - cuda.current_context() # force cuda initialize - # fork in process that also uses CUDA - ctx = mp.get_context('fork') - q = ctx.Queue() - proc = ctx.Process(target=fork_test, args=[q]) - proc.start() - exc = q.get() - proc.join() - # there should be an exception raised in the child process - self.assertIsNotNone(exc) - self.assertIn('CUDA initialized before forking', str(exc)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_multithreads.py b/numba/numba/cuda/tests/cudapy/test_multithreads.py deleted file mode 100644 index 7337241b2..000000000 --- a/numba/numba/cuda/tests/cudapy/test_multithreads.py +++ /dev/null @@ -1,98 +0,0 @@ -import traceback -import threading -import multiprocessing -import numpy as np -from numba import cuda -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - -try: - from concurrent.futures import ThreadPoolExecutor -except ImportError: - has_concurrent_futures = False -else: - has_concurrent_futures = True - - -has_mp_get_context = hasattr(multiprocessing, 'get_context') - - -def check_concurrent_compiling(): - @cuda.jit - def foo(x): - x[0] += 1 - - def use_foo(x): - foo(x) - return x - - arrays = [np.arange(10) for i in range(10)] - expected = np.arange(10) - expected[0] += 1 - with ThreadPoolExecutor(max_workers=4) as e: - for ary in e.map(use_foo, arrays): - np.testing.assert_equal(ary, expected) - - -def spawn_process_entry(q): - try: - check_concurrent_compiling() - except: - msg = traceback.format_exc() - q.put('\n'.join(['', '=' * 80, msg])) - else: - q.put(None) - - -@skip_on_cudasim('disabled for cudasim') -class TestMultiThreadCompiling(SerialMixin, unittest.TestCase): - - @unittest.skipIf(not has_concurrent_futures, "no concurrent.futures") - def test_concurrent_compiling(self): - check_concurrent_compiling() - - @unittest.skipIf(not has_mp_get_context, "no multiprocessing.get_context") - def test_spawn_concurrent_compilation(self): - # force CUDA context init - cuda.get_current_device() - # use "spawn" to avoid inheriting the CUDA context - ctx = multiprocessing.get_context('spawn') - - q = ctx.Queue() - p = ctx.Process(target=spawn_process_entry, args=(q,)) - p.start() - try: - err = q.get() - finally: - p.join() - if err is not None: - raise AssertionError(err) - self.assertEqual(p.exitcode, 0, 'test failed in child process') - - def test_invalid_context_error_with_d2h(self): - def d2h(arr, out): - out[:] = arr.copy_to_host() - - arr = np.arange(1, 4) - out = np.zeros_like(arr) - darr = cuda.to_device(arr) - th = threading.Thread(target=d2h, args=[darr, out]) - th.start() - th.join() - np.testing.assert_equal(arr, out) - - def test_invalid_context_error_with_d2d(self): - def d2d(dst, src): - dst.copy_to_device(src) - - arr = np.arange(100) - common = cuda.to_device(arr) - darr = cuda.to_device(np.zeros(common.shape, dtype=common.dtype)) - th = threading.Thread(target=d2d, args=[darr, common]) - th.start() - th.join() - np.testing.assert_equal(darr.copy_to_host(), arr) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_nondet.py b/numba/numba/cuda/tests/cudapy/test_nondet.py deleted file mode 100644 index 1b612eaa7..000000000 --- a/numba/numba/cuda/tests/cudapy/test_nondet.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda, float32 -from numba.cuda.testing import unittest, SerialMixin - - -def generate_input(n): - A = np.array(np.arange(n * n).reshape(n, n), dtype=np.float32) - B = np.array(np.arange(n) + 0, dtype=A.dtype) - return A, B - - -class TestCudaNonDet(SerialMixin, unittest.TestCase): - def test_for_pre(self): - """Test issue with loop not running due to bad sign-extension at the for loop - precondition. - """ - - @cuda.jit(argtypes=[float32[:, :], float32[:, :], float32[:]]) - def diagproduct(c, a, b): - startX, startY = cuda.grid(2) - gridX = cuda.gridDim.x * cuda.blockDim.x - gridY = cuda.gridDim.y * cuda.blockDim.y - height = c.shape[0] - width = c.shape[1] - - for x in range(startX, width, (gridX)): - for y in range(startY, height, (gridY)): - c[y, x] = a[y, x] * b[x] - - N = 8 - - A, B = generate_input(N) - - F = np.empty(A.shape, dtype=A.dtype) - - blockdim = (32, 8) - griddim = (1, 1) - - dA = cuda.to_device(A) - dB = cuda.to_device(B) - dF = cuda.to_device(F, copy=False) - diagproduct[griddim, blockdim](dF, dA, dB) - - E = np.dot(A, np.diag(B)) - np.testing.assert_array_almost_equal(dF.copy_to_host(), E) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_operator.py b/numba/numba/cuda/tests/cudapy/test_operator.py deleted file mode 100644 index f6b5fb191..000000000 --- a/numba/numba/cuda/tests/cudapy/test_operator.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda -import operator - - -class TestOperatorModule(SerialMixin, unittest.TestCase): - """ - Test if operator module is supported by the CUDA target. - """ - def operator_template(self, op): - @cuda.jit - def foo(a, b): - i = 0 - a[i] = op(a[i], b[i]) - - a = np.ones(1) - b = np.ones(1) - res = a.copy() - foo[1, 1](res, b) - - np.testing.assert_equal(res, op(a, b)) - - def test_add(self): - self.operator_template(operator.add) - - def test_sub(self): - self.operator_template(operator.sub) - - def test_mul(self): - self.operator_template(operator.mul) - - def test_truediv(self): - self.operator_template(operator.truediv) - - def test_floordiv(self): - self.operator_template(operator.floordiv) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_powi.py b/numba/numba/cuda/tests/cudapy/test_powi.py deleted file mode 100644 index 130504432..000000000 --- a/numba/numba/cuda/tests/cudapy/test_powi.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import print_function, absolute_import -import math -import numpy as np -from numba import cuda, float64, int8, int32 -from numba.cuda.testing import unittest, SerialMixin - - -def cu_mat_power(A, power, power_A): - y, x = cuda.grid(2) - - m, n = power_A.shape - if x >= n or y >= m: - return - - power_A[y, x] = math.pow(A[y, x], int32(power)) - - -def cu_mat_power_binop(A, power, power_A): - y, x = cuda.grid(2) - - m, n = power_A.shape - if x >= n or y >= m: - return - - power_A[y, x] = A[y, x] ** power - - -class TestCudaPowi(SerialMixin, unittest.TestCase): - def test_powi(self): - dec = cuda.jit(argtypes=[float64[:, :], int8, float64[:, :]]) - kernel = dec(cu_mat_power) - - power = 2 - A = np.arange(10, dtype=np.float64).reshape(2, 5) - Aout = np.empty_like(A) - kernel[1, A.shape](A, power, Aout) - self.assertTrue(np.allclose(Aout, A ** power)) - - def test_powi_binop(self): - dec = cuda.jit(argtypes=[float64[:, :], int8, float64[:, :]]) - kernel = dec(cu_mat_power_binop) - - power = 2 - A = np.arange(10, dtype=np.float64).reshape(2, 5) - Aout = np.empty_like(A) - kernel[1, A.shape](A, power, Aout) - self.assertTrue(np.allclose(Aout, A ** power)) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_print.py b/numba/numba/cuda/tests/cudapy/test_print.py deleted file mode 100644 index 59513d127..000000000 --- a/numba/numba/cuda/tests/cudapy/test_print.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import print_function - -import numpy as np - -from numba import cuda -from numba import unittest_support as unittest -from numba.cuda.testing import captured_cuda_stdout, SerialMixin - - -def cuhello(): - i = cuda.grid(1) - print(i, 999) - print(-42) - - -def printfloat(): - i = cuda.grid(1) - print(i, 23, 34.75, 321) - - -def printstring(): - i = cuda.grid(1) - print(i, "hop!", 999) - - -def printempty(): - print() - - -class TestPrint(SerialMixin, unittest.TestCase): - - def test_cuhello(self): - jcuhello = cuda.jit('void()', debug=False)(cuhello) - with captured_cuda_stdout() as stdout: - jcuhello[2, 3]() - # The output of GPU threads is intermingled, but each print() - # call is still atomic - out = stdout.getvalue() - lines = sorted(out.splitlines(True)) - expected = ['-42\n'] * 6 + ['%d 999\n' % i for i in range(6)] - self.assertEqual(lines, expected) - - def test_printfloat(self): - jprintfloat = cuda.jit('void()', debug=False)(printfloat) - with captured_cuda_stdout() as stdout: - jprintfloat() - # CUDA and the simulator use different formats for float formatting - self.assertIn(stdout.getvalue(), ["0 23 34.750000 321\n", - "0 23 34.75 321\n"]) - - def test_printempty(self): - cufunc = cuda.jit('void()', debug=False)(printempty) - with captured_cuda_stdout() as stdout: - cufunc() - self.assertEqual(stdout.getvalue(), "\n") - - def test_string(self): - cufunc = cuda.jit('void()', debug=False)(printstring) - with captured_cuda_stdout() as stdout: - cufunc[1, 3]() - out = stdout.getvalue() - lines = sorted(out.splitlines(True)) - expected = ['%d hop! 999\n' % i for i in range(3)] - self.assertEqual(lines, expected) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_py2_div_issue.py b/numba/numba/cuda/tests/cudapy/test_py2_div_issue.py deleted file mode 100644 index 545d0480a..000000000 --- a/numba/numba/cuda/tests/cudapy/test_py2_div_issue.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda, float32, int32 -from numba.cuda.testing import unittest, SerialMixin - - -class TestCudaPy2Div(SerialMixin, unittest.TestCase): - def test_py2_div_issue(self): - @cuda.jit(argtypes=[float32[:], float32[:], float32[:], int32]) - def preCalc(y, yA, yB, numDataPoints): - i = cuda.grid(1) - k = i % numDataPoints - - ans = float32(1.001 * float32(i)) - - y[i] = ans - yA[i] = ans * 1.0 - yB[i] = ans / 1.0 - - numDataPoints = 15 - - y = np.zeros(numDataPoints, dtype=np.float32) - yA = np.zeros(numDataPoints, dtype=np.float32) - yB = np.zeros(numDataPoints, dtype=np.float32) - z = 1.0 - preCalc[1, 15](y, yA, yB, numDataPoints) - - self.assertTrue(np.all(y == yA)) - self.assertTrue(np.all(y == yB)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_random.py b/numba/numba/cuda/tests/cudapy/test_random.py deleted file mode 100644 index 476a76691..000000000 --- a/numba/numba/cuda/tests/cudapy/test_random.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import math - -import numpy as np - -from numba import cuda, config, float32 -from numba.cuda.testing import unittest -import numba.cuda.random -from numba.cuda.testing import skip_on_cudasim, SerialMixin - -from numba.cuda.random import \ - xoroshiro128p_uniform_float32, xoroshiro128p_normal_float32, \ - xoroshiro128p_uniform_float64, xoroshiro128p_normal_float64 - - -# Distributions -UNIFORM = 1 -NORMAL = 2 - - -@cuda.jit -def rng_kernel_float32(states, out, count, distribution): - thread_id = cuda.grid(1) - - for i in range(count): - if distribution == UNIFORM: - out[thread_id * count + i] = xoroshiro128p_uniform_float32(states, thread_id) - elif distribution == NORMAL: - out[thread_id * count + i] = xoroshiro128p_normal_float32(states, thread_id) - - -@cuda.jit -def rng_kernel_float64(states, out, count, distribution): - thread_id = cuda.grid(1) - - for i in range(count): - if distribution == UNIFORM: - out[thread_id * count + i] = xoroshiro128p_uniform_float64(states, thread_id) - elif distribution == NORMAL: - out[thread_id * count + i] = xoroshiro128p_normal_float64(states, thread_id) - - -class TestCudaRandomXoroshiro128p(SerialMixin, unittest.TestCase): - def test_create(self): - states = cuda.random.create_xoroshiro128p_states(10, seed=1) - s = states.copy_to_host() - self.assertEqual(len(np.unique(s)), 10) - - def test_create_subsequence_start(self): - states = cuda.random.create_xoroshiro128p_states(10, seed=1) - s1 = states.copy_to_host() - - states = cuda.random.create_xoroshiro128p_states(10, seed=1, - subsequence_start=3) - s2 = states.copy_to_host() - - # Starting seeds should match up with offset of 3 - np.testing.assert_array_equal(s1[3:], s2[:-3]) - - def test_create_stream(self): - stream = cuda.stream() - states = cuda.random.create_xoroshiro128p_states(10, seed=1, stream=stream) - s = states.copy_to_host() - self.assertEqual(len(np.unique(s)), 10) - - def check_uniform(self, kernel_func, dtype): - states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) - out = np.zeros(2 * 32 * 32, dtype=np.float32) - - kernel_func[2, 32](states, out, 32, UNIFORM) - self.assertAlmostEqual(out.min(), 0.0, delta=1e-3) - self.assertAlmostEqual(out.max(), 1.0, delta=1e-3) - self.assertAlmostEqual(out.mean(), 0.5, delta=1.5e-2) - self.assertAlmostEqual(out.std(), 1.0/(2*math.sqrt(3)), delta=6e-3) - - def test_uniform_float32(self): - self.check_uniform(rng_kernel_float32, np.float32) - - @skip_on_cudasim('skip test for speed under cudasim') - def test_uniform_float64(self): - self.check_uniform(rng_kernel_float64, np.float64) - - def check_normal(self, kernel_func, dtype): - states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) - out = np.zeros(2 * 32 * 32, dtype=dtype) - - kernel_func[2, 32](states, out, 32, NORMAL) - - self.assertAlmostEqual(out.mean(), 0.0, delta=4e-3) - self.assertAlmostEqual(out.std(), 1.0, delta=2e-3) - - def test_normal_float32(self): - self.check_normal(rng_kernel_float32, np.float32) - - @skip_on_cudasim('skip test for speed under cudasim') - def test_normal_float64(self): - self.check_normal(rng_kernel_float64, np.float64) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_record_dtype.py b/numba/numba/cuda/tests/cudapy/test_record_dtype.py deleted file mode 100644 index b07770dd4..000000000 --- a/numba/numba/cuda/tests/cudapy/test_record_dtype.py +++ /dev/null @@ -1,289 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import sys - -import numpy as np -from numba import cuda, numpy_support, types -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -def set_a(ary, i, v): - ary[i].a = v - - -def set_b(ary, i, v): - ary[i].b = v - - -def set_c(ary, i, v): - ary[i].c = v - - -def set_record(ary, i, j): - ary[i] = ary[j] - - -def record_set_a(r, v): - r.a = v - - -def record_set_b(r, v): - r.b = v - - -def record_set_c(r, v): - r.c = v - - -def record_read_a(r, arr): - arr[0] = r.a - - -def record_read_b(r, arr): - arr[0] = r.b - - -def record_read_c(r, arr): - arr[0] = r.c - - -def record_write_array(r): - r.g = 2 - r.h[0] = 3.0 - r.h[1] = 4.0 - - -def record_write_2d_array(r): - r.i = 3 - r.j[0, 0] = 5.0 - r.j[0, 1] = 6.0 - r.j[1, 0] = 7.0 - r.j[1, 1] = 8.0 - r.j[2, 0] = 9.0 - r.j[2, 1] = 10.0 - - -def record_read_array(r, a): - a[0] = r.h[0] - a[1] = r.h[1] - - -def record_read_2d_array(r, a): - a[0, 0] = r.j[0, 0] - a[0, 1] = r.j[0, 1] - a[1, 0] = r.j[1, 0] - a[1, 1] = r.j[1, 1] - a[2, 0] = r.j[2, 0] - a[2, 1] = r.j[2, 1] - - -recordtype = np.dtype( - [ - ('a', np.float64), - ('b', np.int32), - ('c', np.complex64), - ('d', (np.str, 5)) - ], - align=True -) - -recordwitharray = np.dtype( - [ - ('g', np.int32), - ('h', np.float32, 2) - ], - align=True -) - -recordwith2darray = np.dtype([('i', np.int32), - ('j', np.float32, (3, 2))]) - - -class TestRecordDtype(SerialMixin, unittest.TestCase): - - def _createSampleArrays(self): - self.sample1d = np.recarray(3, dtype=recordtype) - self.samplerec1darr = np.recarray(1, dtype=recordwitharray)[0] - self.samplerec2darr = np.recarray(1, dtype=recordwith2darray)[0] - - def setUp(self): - self._createSampleArrays() - - ary = self.sample1d - for i in range(ary.size): - x = i + 1 - ary[i]['a'] = x / 2 - ary[i]['b'] = x - ary[i]['c'] = x * 1j - ary[i]['d'] = "%d" % x - - def get_cfunc(self, pyfunc, argspec): - return cuda.jit()(pyfunc) - - def _test_set_equal(self, pyfunc, value, valuetype): - rec = numpy_support.from_dtype(recordtype) - cfunc = self.get_cfunc(pyfunc, (rec[:], types.intp, valuetype)) - - for i in range(self.sample1d.size): - got = self.sample1d.copy() - - # Force the argument to the pure Python function to be - # a recarray, as attribute access isn't supported on - # structured arrays. - if numpy_support.version <= (1, 9): - expect = np.recarray(got.shape, got.dtype) - expect[:] = got - else: - expect = got.copy().view(np.recarray) - - cfunc(got, i, value) - pyfunc(expect, i, value) - - # Match the entire array to ensure no memory corruption - self.assertTrue(np.all(expect == got)) - - def test_set_a(self): - self._test_set_equal(set_a, 3.1415, types.float64) - # Test again to check if coercion works - self._test_set_equal(set_a, 3., types.float32) - - def test_set_b(self): - self._test_set_equal(set_b, 123, types.int32) - # Test again to check if coercion works - self._test_set_equal(set_b, 123, types.float64) - - def test_set_c(self): - self._test_set_equal(set_c, 43j, types.complex64) - # Test again to check if coercion works - self._test_set_equal(set_c, 43j, types.complex128) - - def test_set_record(self): - pyfunc = set_record - rec = numpy_support.from_dtype(recordtype) - cfunc = self.get_cfunc(pyfunc, (rec[:], types.intp, types.intp)) - - test_indices = [(0, 1), (1, 2), (0, 2)] - for i, j in test_indices: - expect = self.sample1d.copy() - pyfunc(expect, i, j) - - got = self.sample1d.copy() - cfunc(got, i, j) - - # Match the entire array to ensure no memory corruption - self.assertEqual(expect[i], expect[j]) - self.assertEqual(got[i], got[j]) - self.assertTrue(np.all(expect == got)) - - def _test_rec_set(self, v, pyfunc, f): - rec = self.sample1d.copy()[0] - nbrecord = numpy_support.from_dtype(recordtype) - cfunc = self.get_cfunc(pyfunc, (nbrecord,)) - cfunc(rec, v) - np.testing.assert_equal(rec[f], v) - - def test_rec_set_a(self): - self._test_rec_set(np.float64(1.5), record_set_a, 'a') - - def test_rec_set_b(self): - self._test_rec_set(np.int32(2), record_set_b, 'b') - - def test_rec_set_c(self): - self._test_rec_set(np.complex64(4.0+5.0j), record_set_c, 'c') - - def _test_rec_read(self, v, pyfunc, f): - rec = self.sample1d.copy()[0] - rec[f] = v - arr = np.zeros(1, v.dtype) - nbrecord = numpy_support.from_dtype(recordtype) - cfunc = self.get_cfunc(pyfunc, (nbrecord,)) - cfunc(rec, arr) - np.testing.assert_equal(arr[0], v) - - def test_rec_read_a(self): - self._test_rec_read(np.float64(1.5), record_read_a, 'a') - - def test_rec_read_b(self): - self._test_rec_read(np.int32(2), record_read_b, 'b') - - def test_rec_read_c(self): - self._test_rec_read(np.complex64(4.0+5.0j), record_read_c, 'c') - - def test_record_write_1d_array(self): - ''' - Test writing to a 1D array within a structured type - ''' - rec = self.samplerec1darr.copy() - nbrecord = numpy_support.from_dtype(recordwitharray) - cfunc = self.get_cfunc(record_write_array, (nbrecord,)) - - cfunc(rec) - expected = self.samplerec1darr.copy() - expected['g'] = 2 - expected['h'][0] = 3.0 - expected['h'][1] = 4.0 - - np.testing.assert_equal(expected, rec) - - def test_record_write_2d_array(self): - ''' - Test writing to a 2D array within a structured type - ''' - rec = self.samplerec2darr.copy() - nbrecord = numpy_support.from_dtype(recordwith2darray) - cfunc = self.get_cfunc(record_write_2d_array, (nbrecord,)) - cfunc(rec) - - expected = self.samplerec2darr.copy() - expected['i'] = 3 - expected['j'][:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - np.float32).reshape(3, 2) - np.testing.assert_equal(expected, rec) - - def test_record_read_1d_array(self): - ''' - Test reading from a 1D array within a structured type - ''' - rec = self.samplerec1darr.copy() - rec['h'][0] = 4.0 - rec['h'][1] = 5.0 - - nbrecord = numpy_support.from_dtype(recordwitharray) - cfunc = self.get_cfunc(record_read_array, (nbrecord,)) - arr = np.zeros(2, dtype=rec['h'].dtype) - cfunc(rec, arr) - - np.testing.assert_equal(rec['h'], arr) - - - def test_record_read_2d_array(self): - ''' - Test reading from a 2D array within a structured type - ''' - rec = self.samplerec2darr.copy() - rec['j'][:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - np.float32).reshape(3, 2) - - nbrecord = numpy_support.from_dtype(recordwith2darray) - cfunc = self.get_cfunc(record_read_2d_array, (nbrecord,)) - arr = np.zeros((3,2), dtype=rec['j'].dtype) - cfunc(rec, arr) - - np.testing.assert_equal(rec['j'], arr) - - -@skip_on_cudasim('Attribute access of structured arrays not supported in simulator') -class TestRecordDtypeWithStructArrays(TestRecordDtype): - ''' - Same as TestRecordDtype, but using structured arrays instead of recarrays. - ''' - - def _createSampleArrays(self): - self.sample1d = np.zeros(3, dtype=recordtype) - self.samplerec1darr = np.zeros(1, dtype=recordwitharray)[0] - self.samplerec2darr = np.zeros(1, dtype=recordwith2darray)[0] - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_reduction.py b/numba/numba/cuda/tests/cudapy/test_reduction.py deleted file mode 100644 index 48be41628..000000000 --- a/numba/numba/cuda/tests/cudapy/test_reduction.py +++ /dev/null @@ -1,77 +0,0 @@ -from __future__ import print_function -import numpy as np -from numba import cuda -from numba import unittest_support as unittest -from numba.config import ENABLE_CUDASIM -from numba.cuda.testing import SerialMixin - -# Avoid recompilation of the sum_reduce function by keeping it at global scope -sum_reduce = cuda.Reduce(lambda a, b: a + b) - - -class TestReduction(SerialMixin, unittest.TestCase): - def _sum_reduce(self, n): - A = (np.arange(n, dtype=np.float64) + 1) - expect = A.sum() - got = sum_reduce(A) - self.assertEqual(expect, got) - - def test_sum_reduce(self): - if ENABLE_CUDASIM: - # Minimal test set for the simulator (which only wraps - # functools.reduce) - test_sizes = [ 1, 16 ] - else: - # Tests around the points where blocksize changes, and around larger - # powers of two, sums of powers of two, and some "random" sizes - test_sizes = [ 1, 15, 16, 17, 127, 128, 129, 1023, 1024, - 1025, 1536, 1048576, 1049600, 1049728, 34567 ] - # Avoid recompilation by keeping sum_reduce here - for n in test_sizes: - self._sum_reduce(n) - - def test_empty_array_host(self): - A = (np.arange(0, dtype=np.float64) + 1) - expect = A.sum() - got = sum_reduce(A) - self.assertEqual(expect, got) - - def test_empty_array_device(self): - A = (np.arange(0, dtype=np.float64) + 1) - dA = cuda.to_device(A) - expect = A.sum() - got = sum_reduce(dA) - self.assertEqual(expect, got) - - def test_prod_reduce(self): - prod_reduce = cuda.reduce(lambda a, b: a * b) - A = (np.arange(64, dtype=np.float64) + 1) - expect = A.prod() - got = prod_reduce(A, init=1) - self.assertTrue(np.allclose(expect, got)) - - def test_max_reduce(self): - max_reduce = cuda.Reduce(lambda a, b: max(a, b)) - A = (np.arange(3717, dtype=np.float64) + 1) - expect = A.max() - got = max_reduce(A, init=0) - self.assertEqual(expect, got) - - def test_non_identity_init(self): - init = 3 - A = (np.arange(10, dtype=np.float64) + 1) - expect = A.sum() + init - got = sum_reduce(A, init=init) - self.assertEqual(expect, got) - - def test_result_on_device(self): - A = (np.arange(10, dtype=np.float64) + 1) - got = cuda.to_device(np.zeros(1, dtype=np.float64)) - expect = A.sum() - res = sum_reduce(A, res=got) - self.assertIsNone(res) - self.assertEqual(expect, got[0]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py b/numba/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py deleted file mode 100644 index 81a98279a..000000000 --- a/numba/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import cuda -from numba import unittest_support as unittest -from numba.cuda.args import wrap_arg -from numba.cuda.testing import SerialMixin - - -class DefaultIn(object): - def prepare_args(self, ty, val, **kwargs): - return ty, wrap_arg(val, default=cuda.In) - - -def nocopy(kernel): - kernel.extensions.append(DefaultIn()) - return kernel - - -def set_array_to_three(arr): - arr[0] = 3 - - -def set_record_to_three(rec): - rec[0]['b'] = 3 - - -recordtype = np.dtype( - [('b', np.int32)], - align=True -) - - -class TestRetrieveAutoconvertedArrays(SerialMixin, unittest.TestCase): - def setUp(self): - self.set_array_to_three = cuda.jit(set_array_to_three) - self.set_array_to_three_nocopy = nocopy(cuda.jit(set_array_to_three)) - self.set_record_to_three = cuda.jit(set_record_to_three) - self.set_record_to_three_nocopy = nocopy(cuda.jit(set_record_to_three)) - - def test_array_inout(self): - host_arr = np.zeros(1, dtype=np.int64) - self.set_array_to_three(cuda.InOut(host_arr)) - self.assertEqual(3, host_arr[0]) - - def test_array_in(self): - host_arr = np.zeros(1, dtype=np.int64) - self.set_array_to_three(cuda.In(host_arr)) - self.assertEqual(0, host_arr[0]) - - def test_array_in_from_config(self): - host_arr = np.zeros(1, dtype=np.int64) - self.set_array_to_three_nocopy(host_arr) - self.assertEqual(0, host_arr[0]) - - def test_array_default(self): - host_arr = np.zeros(1, dtype=np.int64) - self.set_array_to_three(host_arr) - self.assertEqual(3, host_arr[0]) - - def test_record_in(self): - host_rec = np.zeros(1, dtype=recordtype) - self.set_record_to_three(cuda.In(host_rec)) - self.assertEqual(0, host_rec[0]['b']) - - def test_record_inout(self): - host_rec = np.zeros(1, dtype=recordtype) - self.set_record_to_three(cuda.InOut(host_rec)) - self.assertEqual(3, host_rec[0]['b']) - - def test_record_default(self): - host_rec = np.zeros(1, dtype=recordtype) - self.set_record_to_three(host_rec) - self.assertEqual(3, host_rec[0]['b']) - - def test_record_in_from_config(self): - host_rec = np.zeros(1, dtype=recordtype) - self.set_record_to_three_nocopy(host_rec) - self.assertEqual(0, host_rec[0]['b']) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_serialize.py b/numba/numba/cuda/tests/cudapy/test_serialize.py deleted file mode 100644 index c2289e327..000000000 --- a/numba/numba/cuda/tests/cudapy/test_serialize.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import print_function -import pickle -import numpy as np -from numba import cuda, vectorize, numpy_support, types -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('pickling not supported in CUDASIM') -class TestPickle(SerialMixin, unittest.TestCase): - - def check_call(self, callee): - arr = np.array([100]) - expected = callee(arr) - - # serialize and rebuild - foo1 = pickle.loads(pickle.dumps(callee)) - del callee - # call rebuild function - got1 = foo1(arr) - np.testing.assert_equal(got1, expected) - del got1 - - # test serialization of previously serialized object - foo2 = pickle.loads(pickle.dumps(foo1)) - del foo1 - # call rebuild function - got2 = foo2(arr) - np.testing.assert_equal(got2, expected) - del got2 - - # test propagation of thread, block config - foo3 = pickle.loads(pickle.dumps(foo2[5, 8])) - del foo2 - self.assertEqual(foo3.griddim, (5, 1, 1)) - self.assertEqual(foo3.blockdim, (8, 1, 1)) - - def test_pickling_jit(self): - @cuda.jit(device=True) - def inner(a): - return a + 1 - - @cuda.jit('void(intp[:])') - def foo(arr): - arr[0] = inner(arr[0]) - - self.check_call(foo) - - def test_pickling_autojit(self): - - @cuda.jit(device=True) - def inner(a): - return a + 1 - - @cuda.jit - def foo(arr): - arr[0] = inner(arr[0]) - - self.check_call(foo) - - def test_pickling_vectorize(self): - @vectorize(['intp(intp)', 'float64(float64)'], target='cuda') - def cuda_vect(x): - return x * 2 - - # accommodate int representations in np.arange - npty = numpy_support.as_dtype(types.intp) - # get expected result - ary = np.arange(10, dtype=npty) - expected = cuda_vect(ary) - # first pickle - foo1 = pickle.loads(pickle.dumps(cuda_vect)) - del cuda_vect - got1 = foo1(ary) - np.testing.assert_equal(expected, got1) - # second pickle - foo2 = pickle.loads(pickle.dumps(foo1)) - del foo1 - got2 = foo2(ary) - np.testing.assert_equal(expected, got2) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_slicing.py b/numba/numba/cuda/tests/cudapy/test_slicing.py deleted file mode 100644 index 6ea254da9..000000000 --- a/numba/numba/cuda/tests/cudapy/test_slicing.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda, float32, int32 -from numba.cuda.testing import unittest, SerialMixin - - -def foo(inp, out): - for i in range(out.shape[0]): - out[i] = inp[i] - - -def copy(inp, out): - i = cuda.grid(1) - cufoo(inp[i, :], out[i, :]) - - -class TestCudaSlicing(SerialMixin, unittest.TestCase): - def test_slice_as_arg(self): - global cufoo - cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo) - cucopy = cuda.jit("void(int32[:,:], int32[:,:])")(copy) - - inp = np.arange(100, dtype=np.int32).reshape(10, 10) - out = np.zeros_like(inp) - - cucopy[1, 10](inp, out) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_sm.py b/numba/numba/cuda/tests/cudapy/test_sm.py deleted file mode 100644 index b9f1812be..000000000 --- a/numba/numba/cuda/tests/cudapy/test_sm.py +++ /dev/null @@ -1,73 +0,0 @@ -from numba import cuda, int32, float64 - -from numba.cuda.testing import unittest, SerialMixin - -import numpy as np - - -class TestSharedMemoryIssue(SerialMixin, unittest.TestCase): - def test_issue_953_sm_linkage_conflict(self): - @cuda.jit(device=True) - def inner(): - inner_arr = cuda.shared.array(1, dtype=int32) - - @cuda.jit - def outer(): - outer_arr = cuda.shared.array(1, dtype=int32) - inner() - - outer() - - def _check_shared_array_size(self, shape, expected): - @cuda.jit - def s(a): - arr = cuda.shared.array(shape, dtype=int32) - a[0] = arr.size - - result = np.zeros(1, dtype=np.int32) - s(result) - self.assertEqual(result[0], expected) - - def test_issue_1051_shared_size_broken_1d(self): - self._check_shared_array_size(2, 2) - - def test_issue_1051_shared_size_broken_2d(self): - self._check_shared_array_size((2, 3), 6) - - def test_issue_1051_shared_size_broken_3d(self): - self._check_shared_array_size((2, 3, 4), 24) - - def test_issue_2393(self): - """ - Test issue of warp misalign address due to nvvm not knowing the - alignment(? but it should have taken the natural alignment of the type) - """ - num_weights = 2 - num_blocks = 48 - examples_per_block = 4 - threads_per_block = 1 - - @cuda.jit - def costs_func(d_block_costs): - s_features = cuda.shared.array((examples_per_block, num_weights), - float64) - s_initialcost = cuda.shared.array(7, float64) # Bug - - threadIdx = cuda.threadIdx.x - - prediction = 0 - for j in range(num_weights): - prediction += s_features[threadIdx, j] - - d_block_costs[0] = s_initialcost[0] + prediction - - block_costs = np.zeros(num_blocks, dtype=np.float64) - d_block_costs = cuda.to_device(block_costs) - - costs_func[num_blocks, threads_per_block](d_block_costs) - - cuda.synchronize() - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_smart_array.py b/numba/numba/cuda/tests/cudapy/test_smart_array.py deleted file mode 100644 index 035e3f991..000000000 --- a/numba/numba/cuda/tests/cudapy/test_smart_array.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba import types -from numba.extending import typeof_impl -from numba.cuda.kernels.transpose import transpose -from numba.tracing import event -from numba import SmartArray -from numba.cuda.testing import skip_on_cudasim, SerialMixin - -@skip_on_cudasim('Simulator does not support Device arrays') -class TestJIT(SerialMixin, unittest.TestCase): - """Test handling of numba.SmartArray""" - - def test_transpose(self): - - # To verify non-redundant data movement run this test with NUMBA_TRACE=1 - a = SmartArray(np.arange(16, dtype=float).reshape(4,4)) - b = SmartArray(where='gpu', shape=(4,4), dtype=float) - c = SmartArray(where='gpu', shape=(4,4), dtype=float) - event("initialization done") - transpose(a, b) - event("checkpoint") - transpose(b, c) - event("done") - self.assertTrue((c.get('host') == a.get('host')).all()) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_sync.py b/numba/numba/cuda/tests/cudapy/test_sync.py deleted file mode 100644 index 81d1aac90..000000000 --- a/numba/numba/cuda/tests/cudapy/test_sync.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda, int32, float32 -from numba.cuda.testing import unittest, SerialMixin -from numba.config import ENABLE_CUDASIM - - -def useless_sync(ary): - i = cuda.grid(1) - cuda.syncthreads() - ary[i] = i - - -def simple_smem(ary): - N = 100 - sm = cuda.shared.array(N, int32) - i = cuda.grid(1) - if i == 0: - for j in range(N): - sm[j] = j - cuda.syncthreads() - ary[i] = sm[i] - - -def coop_smem2d(ary): - i, j = cuda.grid(2) - sm = cuda.shared.array((10, 20), float32) - sm[i, j] = (i + 1) / (j + 1) - cuda.syncthreads() - ary[i, j] = sm[i, j] - - -def dyn_shared_memory(ary): - i = cuda.grid(1) - sm = cuda.shared.array(0, float32) - sm[i] = i * 2 - cuda.syncthreads() - ary[i] = sm[i] - - -def use_threadfence(ary): - ary[0] += 123 - cuda.threadfence() - ary[0] += 321 - - -def use_threadfence_block(ary): - ary[0] += 123 - cuda.threadfence_block() - ary[0] += 321 - - -def use_threadfence_system(ary): - ary[0] += 123 - cuda.threadfence_system() - ary[0] += 321 - - -def use_syncthreads_count(ary_in, ary_out): - i = cuda.grid(1) - ary_out[i] = cuda.syncthreads_count(ary_in[i]) - - -def use_syncthreads_and(ary_in, ary_out): - i = cuda.grid(1) - ary_out[i] = cuda.syncthreads_and(ary_in[i]) - - -def use_syncthreads_or(ary_in, ary_out): - i = cuda.grid(1) - ary_out[i] = cuda.syncthreads_or(ary_in[i]) - - - -class TestCudaSync(SerialMixin, unittest.TestCase): - def test_useless_sync(self): - compiled = cuda.jit("void(int32[::1])")(useless_sync) - nelem = 10 - ary = np.empty(nelem, dtype=np.int32) - exp = np.arange(nelem, dtype=np.int32) - compiled[1, nelem](ary) - self.assertTrue(np.all(ary == exp)) - - def test_simple_smem(self): - compiled = cuda.jit("void(int32[::1])")(simple_smem) - nelem = 100 - ary = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary) - self.assertTrue(np.all(ary == np.arange(nelem, dtype=np.int32))) - - def test_coop_smem2d(self): - compiled = cuda.jit("void(float32[:,::1])")(coop_smem2d) - shape = 10, 20 - ary = np.empty(shape, dtype=np.float32) - compiled[1, shape](ary) - exp = np.empty_like(ary) - for i in range(ary.shape[0]): - for j in range(ary.shape[1]): - exp[i, j] = (i + 1) / (j + 1) - self.assertTrue(np.allclose(ary, exp)) - - def test_dyn_shared_memory(self): - compiled = cuda.jit("void(float32[::1])")(dyn_shared_memory) - shape = 50 - ary = np.empty(shape, dtype=np.float32) - compiled[1, shape, 0, ary.size * 4](ary) - self.assertTrue(np.all(ary == 2 * np.arange(ary.size, dtype=np.int32))) - - def test_threadfence_codegen(self): - # Does not test runtime behavior, just the code generation. - compiled = cuda.jit("void(int32[:])")(use_threadfence) - ary = np.zeros(10, dtype=np.int32) - compiled[1, 1](ary) - self.assertEqual(123 + 321, ary[0]) - if not ENABLE_CUDASIM: - self.assertIn("membar.gl;", compiled.ptx) - - def test_threadfence_block_codegen(self): - # Does not test runtime behavior, just the code generation. - compiled = cuda.jit("void(int32[:])")(use_threadfence_block) - ary = np.zeros(10, dtype=np.int32) - compiled[1, 1](ary) - self.assertEqual(123 + 321, ary[0]) - if not ENABLE_CUDASIM: - self.assertIn("membar.cta;", compiled.ptx) - - def test_threadfence_system_codegen(self): - # Does not test runtime behavior, just the code generation. - compiled = cuda.jit("void(int32[:])")(use_threadfence_system) - ary = np.zeros(10, dtype=np.int32) - compiled[1, 1](ary) - self.assertEqual(123 + 321, ary[0]) - if not ENABLE_CUDASIM: - self.assertIn("membar.sys;", compiled.ptx) - - def test_syncthreads_count(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_syncthreads_count) - ary_in = np.ones(72, dtype=np.int32) - ary_out = np.zeros(72, dtype=np.int32) - ary_in[31] = 0 - ary_in[42] = 0 - compiled[1, 72](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 70)) - - def test_syncthreads_and(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_syncthreads_and) - nelem = 100 - ary_in = np.ones(nelem, dtype=np.int32) - ary_out = np.zeros(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - ary_in[31] = 0 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - - def test_syncthreads_or(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_syncthreads_or) - nelem = 100 - ary_in = np.zeros(nelem, dtype=np.int32) - ary_out = np.zeros(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - ary_in[31] = 1 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_transpose.py b/numba/numba/cuda/tests/cudapy/test_transpose.py deleted file mode 100644 index b73ca5856..000000000 --- a/numba/numba/cuda/tests/cudapy/test_transpose.py +++ /dev/null @@ -1,29 +0,0 @@ -import numpy as np -from numba import cuda -from numba.cuda.kernels.transpose import transpose -from numba.cuda.testing import unittest -from numba.testing.ddt import ddt, data, unpack -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('Device Array API unsupported in the simulator') -@ddt -class Test(SerialMixin, unittest.TestCase): - - @data((5, 6, np.float64), - (128, 128, np.complex128), - (1025, 512, np.float64)) - @unpack - def test_transpose(self, rows, cols, dtype): - - x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows) - y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols) - dx = cuda.to_device(x) - dy = cuda.cudadrv.devicearray.from_array_like(y) - transpose(dx, dy) - dy.copy_to_host(y) - self.assertTrue(np.all(x.transpose() == y)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_userexc.py b/numba/numba/cuda/tests/cudapy/test_userexc.py deleted file mode 100644 index 1c3e81bc0..000000000 --- a/numba/numba/cuda/tests/cudapy/test_userexc.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from numba.cuda.testing import unittest, SerialMixin -from numba import cuda - - -class MyError(Exception): - pass - - -class TestUserExc(SerialMixin, unittest.TestCase): - def test_user_exception(self): - @cuda.jit("void(int32)", debug=True) - def test_exc(x): - if x == 1: - raise MyError - elif x == 2: - raise MyError("foo") - - test_exc(0) # no raise - with self.assertRaises(MyError) as cm: - test_exc(1) - self.assertEqual("tid=[0, 0, 0] ctaid=[0, 0, 0]", str(cm.exception)) - with self.assertRaises(MyError) as cm: - test_exc(2) - self.assertEqual("tid=[0, 0, 0] ctaid=[0, 0, 0]: foo", str(cm.exception)) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/cuda/tests/cudapy/test_vectorize.py b/numba/numba/cuda/tests/cudapy/test_vectorize.py deleted file mode 100644 index d3ca8c13f..000000000 --- a/numba/numba/cuda/tests/cudapy/test_vectorize.py +++ /dev/null @@ -1,193 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import vectorize -from numba import cuda, int32, float32, float64 -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim -from numba.cuda.testing import CUDATestCase -from numba import config - -sig = [int32(int32, int32), - float32(float32, float32), - float64(float64, float64)] - - -target='cuda' -if config.ENABLE_CUDASIM: - target='cpu' - - -test_dtypes = np.float32, np.int32 - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestCUDAVectorize(CUDATestCase): - N = 1000001 - - def test_scalar(self): - - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - a = 1.2 - b = 2.3 - c = vector_add(a, b) - self.assertEqual(c, a + b) - - def test_1d(self): - - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - # build python ufunc - np_ufunc = np.add - - # test it out - def test(ty): - data = np.array(np.random.random(self.N), dtype=ty) - - result = cuda_ufunc(data, data) - gold = np_ufunc(data, data) - self.assertTrue(np.allclose(gold, result), (gold, result)) - - test(np.double) - test(np.float32) - test(np.int32) - - def test_1d_async(self): - - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - # build python ufunc - np_ufunc = np.add - - # test it out - def test(ty): - data = np.array(np.random.random(self.N), dtype=ty) - - stream = cuda.stream() - device_data = cuda.to_device(data, stream) - dresult = cuda_ufunc(device_data, device_data, stream=stream) - result = dresult.copy_to_host() - stream.synchronize() - - gold = np_ufunc(data, data) - - self.assertTrue(np.allclose(gold, result), (gold, result)) - - test(np.double) - test(np.float32) - test(np.int32) - - def test_nd(self): - - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - def test(dtype, order, nd, size=4): - data = np.random.random((size,) * nd).astype(dtype) - data[data != data] = 2.4 - data[data == float('inf')] = 3.8 - data[data == float('-inf')] = -3.8 - data2 = np.array(data.T, order=order) # .copy(order=order) - - result = data + data2 - our_result = cuda_ufunc(data, data2) - self.assertTrue(np.allclose(result, our_result), - (dtype, order, result, our_result)) - - for nd in range(1, 8): - for dtype in test_dtypes: - for order in ('C', 'F'): - test(dtype, order, nd) - - def test_ufunc_attrib(self): - self.reduce_test(8) - self.reduce_test(100) - self.reduce_test(2 ** 10 + 1) - self.reduce_test2(8) - self.reduce_test2(100) - self.reduce_test2(2 ** 10 + 1) - - def test_output_arg(self): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - A = np.arange(10, dtype=np.float32) - B = np.arange(10, dtype=np.float32) - C = np.empty_like(A) - vector_add(A, B, out=C) - self.assertTrue(np.allclose(A + B, C)) - - def reduce_test(self, n): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - x = np.arange(n, dtype=np.int32) - gold = np.add.reduce(x) - result = cuda_ufunc.reduce(x) - self.assertEqual(result, gold) - - def reduce_test2(self, n): - - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - x = np.arange(n, dtype=np.int32) - gold = np.add.reduce(x) - stream = cuda.stream() - dx = cuda.to_device(x, stream) - result = cuda_ufunc.reduce(dx, stream=stream) - self.assertEqual(result, gold) - - def test_auto_transfer(self): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - n = 10 - x = np.arange(n, dtype=np.int32) - dx = cuda.to_device(x) - y = cuda_ufunc(x, dx).copy_to_host() - np.testing.assert_equal(y, x + x) - - def test_ufunc_output_ravel(self): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - cuda_ufunc = vector_add - - n = 10 - x = np.arange(n, dtype=np.int32).reshape(2, 5) - dx = cuda.to_device(x) - cuda_ufunc(dx, dx, out=dx) - - got = dx.copy_to_host() - expect = x + x - np.testing.assert_equal(got, expect) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_vectorize_complex.py b/numba/numba/cuda/tests/cudapy/test_vectorize_complex.py deleted file mode 100644 index 607167b7d..000000000 --- a/numba/numba/cuda/tests/cudapy/test_vectorize_complex.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import absolute_import, print_function, division -import numpy as np -from numba import vectorize -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestVectorizeComplex(SerialMixin, unittest.TestCase): - def test_vectorize_complex(self): - @vectorize(['complex128(complex128)'], target='cuda') - def vcomp(a): - return a * a + 1. - - A = np.arange(5, dtype=np.complex128) - B = vcomp(A) - self.assertTrue(np.allclose(A * A + 1., B)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba/numba/cuda/tests/cudapy/test_vectorize_decor.py deleted file mode 100644 index 3be50bf16..000000000 --- a/numba/numba/cuda/tests/cudapy/test_vectorize_decor.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import vectorize, cuda -from numba.tests.npyufunc import test_vectorize_decor -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestVectorizeDecor(SerialMixin, test_vectorize_decor.BaseVectorizeDecor): - def test_gpu_1(self): - self._test_template_1('cuda') - - def test_gpu_2(self): - self._test_template_2('cuda') - - def test_gpu_3(self): - self._test_template_3('cuda') - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestGPUVectorizeBroadcast(SerialMixin, unittest.TestCase): - def test_broadcast_bug_90(self): - """ - https://github.com/ContinuumIO/numbapro/issues/90 - """ - - a = np.random.randn(100, 3, 1) - b = a.transpose(2, 1, 0) - - def fn(a, b): - return a - b - - @vectorize(['float64(float64,float64)'], target='cuda') - def fngpu(a, b): - return a - b - - expect = fn(a, b) - got = fngpu(a, b) - np.testing.assert_almost_equal(expect, got) - - def test_device_broadcast(self): - """ - Same test as .test_broadcast_bug_90() but with device array as inputs - """ - - a = np.random.randn(100, 3, 1) - b = a.transpose(2, 1, 0) - - def fn(a, b): - return a - b - - @vectorize(['float64(float64,float64)'], target='cuda') - def fngpu(a, b): - return a - b - - expect = fn(a, b) - got = fngpu(cuda.to_device(a), cuda.to_device(b)) - np.testing.assert_almost_equal(expect, got.copy_to_host()) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_vectorize_device.py b/numba/numba/cuda/tests/cudapy/test_vectorize_device.py deleted file mode 100644 index 1619bef1f..000000000 --- a/numba/numba/cuda/tests/cudapy/test_vectorize_device.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import absolute_import, print_function, division -from numba import vectorize -from numba import cuda, float32 -import numpy as np -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestCudaVectorizeDeviceCall(SerialMixin, unittest.TestCase): - def test_cuda_vectorize_device_call(self): - - @cuda.jit(float32(float32, float32, float32), device=True) - def cu_device_fn(x, y, z): - return x ** y / z - - def cu_ufunc(x, y, z): - return cu_device_fn(x, y, z) - - ufunc = vectorize([float32(float32, float32, float32)], target='cuda')( - cu_ufunc) - - N = 100 - - X = np.array(np.random.sample(N), dtype=np.float32) - Y = np.array(np.random.sample(N), dtype=np.float32) - Z = np.array(np.random.sample(N), dtype=np.float32) + 0.1 - - out = ufunc(X, Y, Z) - - gold = (X ** Y) / Z - - self.assertTrue(np.allclose(out, gold)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py deleted file mode 100644 index 78c81a5d6..000000000 --- a/numba/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import absolute_import, print_function, division -import numpy as np -from numba import vectorize -from numba import cuda, float64 -from numba import unittest_support as unittest -from numba.cuda.testing import skip_on_cudasim, SerialMixin -from numba import config - -sig = [float64(float64, float64)] - - -target='cuda' -if config.ENABLE_CUDASIM: - target='cpu' - - -@skip_on_cudasim('ufunc API unsupported in the simulator') -class TestCUDAVectorizeScalarArg(SerialMixin, unittest.TestCase): - - def test_vectorize_scalar_arg(self): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - A = np.arange(10, dtype=np.float64) - dA = cuda.to_device(A) - v = vector_add(1.0, dA) - - np.testing.assert_array_almost_equal( - v.copy_to_host(), - np.arange(1, 11, dtype=np.float64)) - - def test_vectorize_all_scalars(self): - @vectorize(sig, target=target) - def vector_add(a, b): - return a + b - - v = vector_add(1.0, 1.0) - - np.testing.assert_almost_equal(2.0, v) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudapy/test_warp_ops.py b/numba/numba/cuda/tests/cudapy/test_warp_ops.py deleted file mode 100644 index 3d4dfb8e1..000000000 --- a/numba/numba/cuda/tests/cudapy/test_warp_ops.py +++ /dev/null @@ -1,245 +0,0 @@ -from __future__ import print_function, absolute_import -import numpy as np -from numba import cuda, config, int32, int64, float32, float64 -from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim - - -def useful_syncwarp(ary): - i = cuda.grid(1) - if i == 0: - ary[0] = 42 - cuda.syncwarp(0xffffffff) - ary[i] = ary[0] - - -def use_shfl_sync_idx(ary, idx): - i = cuda.grid(1) - val = cuda.shfl_sync(0xffffffff, i, idx) - ary[i] = val - - -def use_shfl_sync_up(ary, delta): - i = cuda.grid(1) - val = cuda.shfl_up_sync(0xffffffff, i, delta) - ary[i] = val - - -def use_shfl_sync_down(ary, delta): - i = cuda.grid(1) - val = cuda.shfl_down_sync(0xffffffff, i, delta) - ary[i] = val - - -def use_shfl_sync_xor(ary, xor): - i = cuda.grid(1) - val = cuda.shfl_xor_sync(0xffffffff, i, xor) - ary[i] = val - - -def use_shfl_sync_with_val(ary, into): - i = cuda.grid(1) - val = cuda.shfl_sync(0xffffffff, into, 0) - ary[i] = val - - -def use_vote_sync_all(ary_in, ary_out): - i = cuda.grid(1) - pred = cuda.all_sync(0xffffffff, ary_in[i]) - ary_out[i] = pred - - -def use_vote_sync_any(ary_in, ary_out): - i = cuda.grid(1) - pred = cuda.any_sync(0xffffffff, ary_in[i]) - ary_out[i] = pred - - -def use_vote_sync_eq(ary_in, ary_out): - i = cuda.grid(1) - pred = cuda.eq_sync(0xffffffff, ary_in[i]) - ary_out[i] = pred - - -def use_vote_sync_ballot(ary): - i = cuda.threadIdx.x - ballot = cuda.ballot_sync(0xffffffff, True) - ary[i] = ballot - - -def use_match_any_sync(ary_in, ary_out): - i = cuda.grid(1) - ballot = cuda.match_any_sync(0xffffffff, ary_in[i]) - ary_out[i] = ballot - - -def use_match_all_sync(ary_in, ary_out): - i = cuda.grid(1) - ballot, pred = cuda.match_all_sync(0xffffffff, ary_in[i]) - ary_out[i] = ballot if pred else 0 - - -def use_independent_scheduling(arr): - i = cuda.threadIdx.x - if i % 4 == 0: - ballot = cuda.ballot_sync(0x11111111, True) - elif i % 4 == 1: - ballot = cuda.ballot_sync(0x22222222, True) - elif i % 4 == 2: - ballot = cuda.ballot_sync(0x44444444, True) - elif i % 4 == 3: - ballot = cuda.ballot_sync(0x88888888, True) - arr[i] = ballot - - -def _safe_skip(): - if config.ENABLE_CUDASIM: - return False - else: - return cuda.cudadrv.nvvm.NVVM_VERSION >= (1, 4) - - -def _safe_cc_check(cc): - if config.ENABLE_CUDASIM: - return True - else: - return cuda.get_current_device().compute_capability >= cc - - -@unittest.skipUnless(_safe_skip(), - "Warp Operations require at least CUDA 9" - "and are not yet implemented for the CudaSim") -class TestCudaWarpOperations(SerialMixin, unittest.TestCase): - def test_useful_syncwarp(self): - compiled = cuda.jit("void(int32[:])")(useful_syncwarp) - nelem = 32 - ary = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary) - self.assertTrue(np.all(ary == 42)) - - def test_shfl_sync_idx(self): - compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_idx) - nelem = 32 - idx = 4 - ary = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary, idx) - self.assertTrue(np.all(ary == idx)) - - def test_shfl_sync_up(self): - compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_up) - nelem = 32 - delta = 4 - ary = np.empty(nelem, dtype=np.int32) - exp = np.arange(nelem, dtype=np.int32) - exp[delta:] -= delta - compiled[1, nelem](ary, delta) - self.assertTrue(np.all(ary == exp)) - - def test_shfl_sync_down(self): - compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_down) - nelem = 32 - delta = 4 - ary = np.empty(nelem, dtype=np.int32) - exp = np.arange(nelem, dtype=np.int32) - exp[:-delta] += delta - compiled[1, nelem](ary, delta) - self.assertTrue(np.all(ary == exp)) - - def test_shfl_sync_xor(self): - compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_xor) - nelem = 32 - xor = 16 - ary = np.empty(nelem, dtype=np.int32) - exp = np.arange(nelem, dtype=np.int32) ^ xor - compiled[1, nelem](ary, xor) - self.assertTrue(np.all(ary == exp)) - - def test_shfl_sync_types(self): - types = int32, int64, float32, float64 - values = np.int32(-1), np.int64(1 << 42), np.float32(np.pi), np.float64(np.pi) - for typ, val in zip(types, values): - compiled = cuda.jit((typ[:], typ))(use_shfl_sync_with_val) - nelem = 32 - ary = np.empty(nelem, dtype=val.dtype) - compiled[1, nelem](ary, val) - self.assertTrue(np.all(ary == val)) - - def test_vote_sync_all(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_all) - nelem = 32 - ary_in = np.ones(nelem, dtype=np.int32) - ary_out = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - ary_in[-1] = 0 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - - def test_vote_sync_any(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_any) - nelem = 32 - ary_in = np.zeros(nelem, dtype=np.int32) - ary_out = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - ary_in[2] = 1 - ary_in[5] = 1 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - - def test_vote_sync_eq(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_eq) - nelem = 32 - ary_in = np.zeros(nelem, dtype=np.int32) - ary_out = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - ary_in[1] = 1 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - ary_in[:] = 1 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 1)) - - def test_vote_sync_ballot(self): - compiled = cuda.jit("void(uint32[:])")(use_vote_sync_ballot) - nelem = 32 - ary = np.empty(nelem, dtype=np.uint32) - compiled[1, nelem](ary) - self.assertTrue(np.all(ary == np.uint32(0xffffffff))) - - @unittest.skipUnless(_safe_cc_check((7, 0)), - "Matching requires at least Volta Architecture") - def test_match_any_sync(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_match_any_sync) - nelem = 10 - ary_in = np.arange(nelem, dtype=np.int32) % 2 - ary_out = np.empty(nelem, dtype=np.int32) - exp = np.tile((0b1010101010, 0b0101010101), 5) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == exp)) - - @unittest.skipUnless(_safe_cc_check((7, 0)), - "Matching requires at least Volta Architecture") - def test_match_all_sync(self): - compiled = cuda.jit("void(int32[:], int32[:])")(use_match_all_sync) - nelem = 10 - ary_in = np.zeros(nelem, dtype=np.int32) - ary_out = np.empty(nelem, dtype=np.int32) - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0b1111111111)) - ary_in[1] = 4 - compiled[1, nelem](ary_in, ary_out) - self.assertTrue(np.all(ary_out == 0)) - - @unittest.skipUnless(_safe_cc_check((7, 0)), - "Independent scheduling requires at least Volta Architecture") - def test_independent_scheduling(self): - compiled = cuda.jit("void(int32[:])")(use_independent_scheduling) - arr = np.empty(32, dtype=np.int32) - exp = np.tile((0x11111111, 0x22222222, 0x44444444, 0x88888888), 8) - compiled[1, 32](arr) - self.assertTrue(np.all(ary_out == exp)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/cudasim/__init__.py b/numba/numba/cuda/tests/cudasim/__init__.py deleted file mode 100644 index 0465337eb..000000000 --- a/numba/numba/cuda/tests/cudasim/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import load_testsuite -import os - - -def load_tests(loader, tests, pattern): - return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba/numba/cuda/tests/cudasim/support.py b/numba/numba/cuda/tests/cudasim/support.py deleted file mode 100644 index 4fca39cad..000000000 --- a/numba/numba/cuda/tests/cudasim/support.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba import cuda - - -@cuda.jit(device=True) -def cuda_module_in_device_function(): - return cuda.threadIdx.x diff --git a/numba/numba/cuda/tests/cudasim/test_cudasim_issues.py b/numba/numba/cuda/tests/cudasim/test_cudasim_issues.py deleted file mode 100644 index a20b94c27..000000000 --- a/numba/numba/cuda/tests/cudasim/test_cudasim_issues.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import threading - -import numpy as np - -from numba import unittest_support as unittest -from numba import cuda -from numba.cuda.testing import SerialMixin, skip_unless_cudasim -import numba.cuda.simulator as simulator - - -class TestCudaSimIssues(SerialMixin, unittest.TestCase): - - def test_cuda_module_in_device_function(self): - """ - Discovered in https://github.com/numba/numba/issues/1837. - When the `cuda` module is referenced in a device function, - it does not have the kernel API (e.g. cuda.threadIdx, cuda.shared) - """ - from .support import cuda_module_in_device_function as inner - - @cuda.jit - def outer(out): - tid = inner() - if tid < out.size: - out[tid] = tid - - arr = np.zeros(10, dtype=np.int32) - outer[1, 11](arr) - expected = np.arange(arr.size, dtype=np.int32) - np.testing.assert_equal(expected, arr) - - @skip_unless_cudasim('Only works on CUDASIM') - def test_deadlock_on_exception(self): - def assert_no_blockthreads(): - blockthreads = [] - for t in threading.enumerate(): - if not isinstance(t, simulator.kernel.BlockThread): - continue - - # join blockthreads with a short timeout to allow aborted threads - # to exit - t.join(1) - if t.is_alive(): - self.fail("Blocked kernel thread: %s" % t) - - self.assertListEqual(blockthreads, []) - - @simulator.jit - def assign_with_sync(x, y): - i = cuda.grid(1) - y[i] = x[i] - - cuda.syncthreads() - cuda.syncthreads() - - x = np.arange(3) - y = np.empty(3) - assign_with_sync[1, 3](x, y) - np.testing.assert_array_equal(x, y) - assert_no_blockthreads() - - - with self.assertRaises(IndexError): - assign_with_sync[1, 6](x, y) - assert_no_blockthreads() - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/tests/nocuda/__init__.py b/numba/numba/cuda/tests/nocuda/__init__.py deleted file mode 100644 index 0465337eb..000000000 --- a/numba/numba/cuda/tests/nocuda/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import load_testsuite -import os - - -def load_tests(loader, tests, pattern): - return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba/numba/cuda/tests/nocuda/test_nvvm.py b/numba/numba/cuda/tests/nocuda/test_nvvm.py deleted file mode 100644 index 96398671e..000000000 --- a/numba/numba/cuda/tests/nocuda/test_nvvm.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import absolute_import, print_function, division - -from numba.cuda.compiler import compile_kernel -from numba.cuda.cudadrv import nvvm -from numba.cuda.testing import skip_on_cudasim, SerialMixin -from numba import unittest_support as unittest -from numba import types, utils - - -@skip_on_cudasim('libNVVM not supported in simulator') -@unittest.skipIf(utils.MACHINE_BITS == 32, "CUDA not support for 32-bit") -@unittest.skipIf(not nvvm.is_available(), "No libNVVM") -class TestNvvmWithoutCuda(SerialMixin, unittest.TestCase): - def test_nvvm_llvm_to_ptx(self): - """ - A simple test to exercise nvvm.llvm_to_ptx() - to trigger issues with mismatch NVVM API. - """ - - def foo(x): - x[0] = 123 - - cukern = compile_kernel(foo, args=(types.int32[::1],), link=()) - llvmir = cukern._func.ptx.llvmir - ptx = nvvm.llvm_to_ptx(llvmir) - self.assertIn("foo", ptx.decode('ascii')) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/cuda/vectorizers.py b/numba/numba/cuda/vectorizers.py deleted file mode 100644 index d23b3469c..000000000 --- a/numba/numba/cuda/vectorizers.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import print_function, absolute_import - -from numba import cuda -from numba.npyufunc import deviceufunc -from . import dispatcher - -vectorizer_stager_source = ''' -def __vectorized_{name}({args}, __out__): - __tid__ = __cuda__.grid(1) - if __tid__ < __out__.shape[0]: - __out__[__tid__] = __core__({argitems}) -''' - - -class CUDAVectorize(deviceufunc.DeviceVectorize): - def _compile_core(self, sig): - cudevfn = cuda.jit(sig, device=True, inline=True)(self.pyfunc) - return cudevfn, cudevfn.cres.signature.return_type - - def _get_globals(self, corefn): - glbl = self.pyfunc.__globals__.copy() - glbl.update({'__cuda__': cuda, - '__core__': corefn}) - return glbl - - def _compile_kernel(self, fnobj, sig): - return cuda.jit(fnobj) - - def build_ufunc(self): - return dispatcher.CUDAUFuncDispatcher(self.kernelmap) - - @property - def _kernel_template(self): - return vectorizer_stager_source - - -# ------------------------------------------------------------------------------ -# Generalized CUDA ufuncs - -_gufunc_stager_source = ''' -def __gufunc_{name}({args}): - __tid__ = __cuda__.grid(1) - if __tid__ < {checkedarg}: - __core__({argitems}) -''' - - -class CUDAGUFuncVectorize(deviceufunc.DeviceGUFuncVectorize): - def build_ufunc(self): - engine = deviceufunc.GUFuncEngine(self.inputsig, self.outputsig) - return dispatcher.CUDAGenerializedUFunc(kernelmap=self.kernelmap, - engine=engine) - - def _compile_kernel(self, fnobj, sig): - return cuda.jit(sig)(fnobj) - - @property - def _kernel_template(self): - return _gufunc_stager_source - - def _get_globals(self, sig): - corefn = cuda.jit(sig, device=True)(self.pyfunc) - glbls = self.py_func.__globals__.copy() - glbls.update({'__cuda__': cuda, - '__core__': corefn}) - return glbls diff --git a/numba/numba/dataflow.py b/numba/numba/dataflow.py deleted file mode 100644 index 1267e5c90..000000000 --- a/numba/numba/dataflow.py +++ /dev/null @@ -1,872 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -from pprint import pprint -import sys -import warnings - -from numba import utils - - -class DataFlowAnalysis(object): - """ - Perform stack2reg - - This is necessary to resolve blocks that propagates stack value. - This would allow the use of `and` and `or` and python2.6 jumps. - """ - - def __init__(self, cfa): - self.cfa = cfa - self.bytecode = cfa.bytecode - # { block offset -> BlockInfo } - self.infos = {} - self.edge_process = {} - - def run(self): - for blk in self.cfa.iterliveblocks(): - self.infos[blk.offset] = self.run_on_block(blk) - - def run_on_block(self, blk): - incoming_blocks = [] - info = BlockInfo(blk, blk.offset, incoming_blocks) - edge_callbacks = [] - - for ib, pops in self.cfa.incoming_blocks(blk): - # By nature of Python bytecode, there will be no incoming - # variables from subsequent blocks. This is an easy way - # of breaking the potential circularity of the problem. - if ib.offset >= blk.offset: - continue - ib = self.infos[ib.offset] - incoming_blocks.append(ib) - if (ib.offset, blk.offset) in self.edge_process: - edge_callbacks.append(self.edge_process[(ib.offset, blk.offset)]) - - # Compute stack offset at block entry - # The stack effect of our predecessors should be known - assert ib.stack_offset is not None, ib - new_offset = ib.stack_offset + ib.stack_effect - pops - if new_offset < 0: - raise RuntimeError("computed negative stack offset for %s" - % blk) - if info.stack_offset is None: - info.stack_offset = new_offset - elif info.stack_offset != new_offset: - warnings.warn("inconsistent stack offset for %s" % blk, - RuntimeWarning) - - # Compute syntax blocks at block entry - assert ib.syntax_blocks is not None, ib - if info.syntax_blocks is None: - info.syntax_blocks = ib.syntax_blocks[:] - elif info.syntax_blocks != ib.syntax_blocks: - warnings.warn("inconsistent entry syntax blocks for %s" % blk, - RuntimeWarning) - - if info.stack_offset is None: - # No incoming blocks => assume it's the entry block - info.stack_offset = 0 - info.syntax_blocks = [] - info.stack_effect = 0 - - for callback in edge_callbacks: - callback(info) - - for offset in blk: - inst = self.bytecode[offset] - self.dispatch(info, inst) - return info - - def dump(self): - for blk in utils.itervalues(self.infos): - blk.dump() - - def dispatch(self, info, inst): - fname = "op_%s" % inst.opname.replace('+', '_') - fn = getattr(self, fname, self.handle_unknown_opcode) - fn(info, inst) - - def handle_unknown_opcode(self, info, inst): - msg = "Use of unknown opcode {} at line {} of {}" - raise NotImplementedError(msg.format(inst.opname, inst.lineno, - self.bytecode.func_id.filename)) - - def dup_topx(self, info, inst, count): - orig = [info.pop() for _ in range(count)] - orig.reverse() - # We need to actually create new temporaries if we want the - # IR optimization pass to work correctly (see issue #580) - duped = [info.make_temp() for _ in range(count)] - info.append(inst, orig=orig, duped=duped) - for val in orig: - info.push(val) - for val in duped: - info.push(val) - - def add_syntax_block(self, info, block): - """ - Add an inner syntax block. - """ - block.stack_offset = info.stack_offset - info.syntax_blocks.append(block) - - def pop_syntax_block(self, info): - """ - Pop the innermost syntax block and revert its stack effect. - """ - block = info.syntax_blocks.pop() - assert info.stack_offset >= block.stack_offset - while info.stack_offset + info.stack_effect > block.stack_offset: - info.pop(discard=True) - return block - - def op_DUP_TOPX(self, info, inst): - count = inst.arg - assert 1 <= count <= 5, "Invalid DUP_TOPX count" - self.dup_topx(info, inst, count) - - def op_DUP_TOP(self, info, inst): - self.dup_topx(info, inst, count=1) - - def op_DUP_TOP_TWO(self, info, inst): - self.dup_topx(info, inst, count=2) - - def op_ROT_TWO(self, info, inst): - first = info.pop() - second = info.pop() - info.push(first) - info.push(second) - - def op_ROT_THREE(self, info, inst): - first = info.pop() - second = info.pop() - third = info.pop() - info.push(first) - info.push(third) - info.push(second) - - def op_ROT_FOUR(self, info, inst): - first = info.pop() - second = info.pop() - third = info.pop() - forth = info.pop() - info.push(first) - info.push(forth) - info.push(third) - info.push(second) - - def op_UNPACK_SEQUENCE(self, info, inst): - count = inst.arg - iterable = info.pop() - stores = [info.make_temp() for _ in range(count)] - tupleobj = info.make_temp() - info.append(inst, iterable=iterable, stores=stores, tupleobj=tupleobj) - for st in reversed(stores): - info.push(st) - - def op_BUILD_TUPLE(self, info, inst): - count = inst.arg - items = list(reversed([info.pop() for _ in range(count)])) - tup = info.make_temp() - info.append(inst, items=items, res=tup) - info.push(tup) - - def op_BUILD_LIST(self, info, inst): - count = inst.arg - items = list(reversed([info.pop() for _ in range(count)])) - lst = info.make_temp() - info.append(inst, items=items, res=lst) - info.push(lst) - - def op_LIST_APPEND(self, info, inst): - value = info.pop() - # Python 2.7+ added an argument to LIST_APPEND. - if sys.version_info[:2] == (2, 6): - target = info.pop() - else: - index = inst.arg - target = info.peek(index) - appendvar = info.make_temp() - res = info.make_temp() - info.append(inst, target=target, value=value, appendvar=appendvar, res=res) - - def op_BUILD_MAP(self, info, inst): - dct = info.make_temp() - count = inst.arg - items = [] - if sys.version_info >= (3, 5): - # In 3.5+, BUILD_MAP takes pairs from the stack - for i in range(count): - v, k = info.pop(), info.pop() - items.append((k, v)) - info.append(inst, items=items[::-1], size=count, res=dct) - info.push(dct) - - def op_BUILD_SET(self, info, inst): - count = inst.arg - # Note: related python bug http://bugs.python.org/issue26020 - items = list(reversed([info.pop() for _ in range(count)])) - res = info.make_temp() - info.append(inst, items=items, res=res) - info.push(res) - - def op_POP_TOP(self, info, inst): - info.pop(discard=True) - - def op_STORE_ATTR(self, info, inst): - target = info.pop() - value = info.pop() - info.append(inst, target=target, value=value) - - def op_DELETE_ATTR(self, info, inst): - target = info.pop() - info.append(inst, target=target) - - def op_STORE_FAST(self, info, inst): - value = info.pop() - info.append(inst, value=value) - - def op_STORE_MAP(self, info, inst): - key = info.pop() - value = info.pop() - dct = info.tos - info.append(inst, dct=dct, key=key, value=value) - - def op_STORE_DEREF(self, info, inst): - value = info.pop() - info.append(inst, value=value) - - def op_LOAD_FAST(self, info, inst): - name = self.bytecode.co_varnames[inst.arg] - res = info.make_temp(name) - info.append(inst, res=res) - info.push(res) - - def op_LOAD_CONST(self, info, inst): - res = info.make_temp('const') - info.append(inst, res=res) - info.push(res) - - def op_LOAD_GLOBAL(self, info, inst): - res = info.make_temp() - info.append(inst, res=res) - info.push(res) - - def op_LOAD_DEREF(self, info, inst): - res = info.make_temp() - info.append(inst, res=res) - info.push(res) - - def op_LOAD_ATTR(self, info, inst): - item = info.pop() - res = info.make_temp() - info.append(inst, item=item, res=res) - info.push(res) - - def op_BINARY_SUBSCR(self, info, inst): - index = info.pop() - target = info.pop() - res = info.make_temp() - info.append(inst, index=index, target=target, res=res) - info.push(res) - - def op_STORE_SUBSCR(self, info, inst): - index = info.pop() - target = info.pop() - value = info.pop() - info.append(inst, target=target, index=index, value=value) - - def op_DELETE_SUBSCR(self, info, inst): - index = info.pop() - target = info.pop() - info.append(inst, target=target, index=index) - - def op_GET_ITER(self, info, inst): - value = info.pop() - res = info.make_temp() - info.append(inst, value=value, res=res) - info.push(res) - - def op_FOR_ITER(self, info, inst): - iterator = info.tos - pair = info.make_temp() - indval = info.make_temp() - pred = info.make_temp() - info.append(inst, iterator=iterator, pair=pair, indval=indval, pred=pred) - info.push(indval) - # Setup for stack POP (twice) at loop exit (before processing instruction at jump target) - def pop_info(info): - info.pop() - info.pop() - self.edge_process[(info.block.offset, inst.get_jump_target())] = pop_info - - if utils.PYVERSION < (3, 6): - - def _op_call_function(self, info, inst, has_vararg): - narg = inst.arg & 0xff - nkws = (inst.arg >> 8) & 0xff - - def pop_kws(): - val = info.pop() - key = info.pop() - return key, val - - vararg = info.pop() if has_vararg else None - kws = list(reversed([pop_kws() for _ in range(nkws)])) - args = list(reversed([info.pop() for _ in range(narg)])) - func = info.pop() - - res = info.make_temp() - info.append(inst, func=func, args=args, kws=kws, res=res, - vararg=vararg) - info.push(res) - - def op_CALL_FUNCTION(self, info, inst): - self._op_call_function(info, inst, has_vararg=False) - - def op_CALL_FUNCTION_VAR(self, info, inst): - self._op_call_function(info, inst, has_vararg=True) - - else: - def op_CALL_FUNCTION(self, info, inst): - narg = inst.arg - args = list(reversed([info.pop() for _ in range(narg)])) - func = info.pop() - - res = info.make_temp() - info.append(inst, func=func, args=args, res=res) - info.push(res) - - def op_CALL_FUNCTION_KW(self, info, inst): - narg = inst.arg - names = info.pop() # tuple of names - args = list(reversed([info.pop() for _ in range(narg)])) - func = info.pop() - - res = info.make_temp() - info.append(inst, func=func, args=args, names=names, res=res) - info.push(res) - - def op_CALL_FUNCTION_EX(self, info, inst): - if inst.arg & 1: - errmsg = 'CALL_FUNCTION_EX with **kwargs not supported' - raise NotImplementedError(errmsg) - vararg = info.pop() - func = info.pop() - res = info.make_temp() - info.append(inst, func=func, vararg=vararg, res=res) - info.push(res) - - def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, info, inst): - # Builds tuple from other tuples on the stack - tuples = list(reversed([info.pop() for _ in range(inst.arg)])) - temps = [info.make_temp() for _ in range(len(tuples) - 1)] - info.append(inst, tuples=tuples, temps=temps) - # The result is in the last temp var - info.push(temps[-1]) - - def op_BUILD_CONST_KEY_MAP(self, info, inst): - keys = info.pop() - vals = list(reversed([info.pop() for _ in range(inst.arg)])) - keytmps = [info.make_temp() for _ in range(inst.arg)] - res = info.make_temp() - info.append(inst, keys=keys, keytmps=keytmps, values=vals, res=res) - info.push(res) - - def op_PRINT_ITEM(self, info, inst): - warnings.warn("Python2 style print partially supported. Please use " - "Python3 style print.", RuntimeWarning) - item = info.pop() - printvar = info.make_temp() - res = info.make_temp() - info.append(inst, item=item, printvar=printvar, res=res) - - def op_PRINT_NEWLINE(self, info, inst): - printvar = info.make_temp() - res = info.make_temp() - info.append(inst, printvar=printvar, res=res) - - def _unaryop(self, info, inst): - val = info.pop() - res = info.make_temp() - info.append(inst, value=val, res=res) - info.push(res) - - op_UNARY_NEGATIVE = _unaryop - op_UNARY_POSITIVE = _unaryop - op_UNARY_NOT = _unaryop - op_UNARY_INVERT = _unaryop - - def _binaryop(self, info, inst): - rhs = info.pop() - lhs = info.pop() - res = info.make_temp() - info.append(inst, lhs=lhs, rhs=rhs, res=res) - info.push(res) - - op_COMPARE_OP = _binaryop - - op_INPLACE_ADD = _binaryop - op_INPLACE_SUBTRACT = _binaryop - op_INPLACE_MULTIPLY = _binaryop - op_INPLACE_DIVIDE = _binaryop - op_INPLACE_TRUE_DIVIDE = _binaryop - op_INPLACE_FLOOR_DIVIDE = _binaryop - op_INPLACE_MODULO = _binaryop - op_INPLACE_POWER = _binaryop - op_INPLACE_MATRIX_MULTIPLY = _binaryop - - op_INPLACE_LSHIFT = _binaryop - op_INPLACE_RSHIFT = _binaryop - op_INPLACE_AND = _binaryop - op_INPLACE_OR = _binaryop - op_INPLACE_XOR = _binaryop - - op_BINARY_ADD = _binaryop - op_BINARY_SUBTRACT = _binaryop - op_BINARY_MULTIPLY = _binaryop - op_BINARY_DIVIDE = _binaryop - op_BINARY_TRUE_DIVIDE = _binaryop - op_BINARY_FLOOR_DIVIDE = _binaryop - op_BINARY_MODULO = _binaryop - op_BINARY_POWER = _binaryop - op_BINARY_MATRIX_MULTIPLY = _binaryop - - op_BINARY_LSHIFT = _binaryop - op_BINARY_RSHIFT = _binaryop - op_BINARY_AND = _binaryop - op_BINARY_OR = _binaryop - op_BINARY_XOR = _binaryop - - def op_SLICE_0(self, info, inst): - """ - TOS = TOS[:] - """ - tos = info.pop() - res = info.make_temp() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos, res=res, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - info.push(res) - - def op_SLICE_1(self, info, inst): - """ - TOS = TOS1[TOS:] - """ - tos = info.pop() - tos1 = info.pop() - res = info.make_temp() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, start=tos, res=res, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - info.push(res) - - def op_SLICE_2(self, info, inst): - """ - TOS = TOS1[:TOS] - """ - tos = info.pop() - tos1 = info.pop() - res = info.make_temp() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, stop=tos, res=res, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - info.push(res) - - def op_SLICE_3(self, info, inst): - """ - TOS = TOS2[TOS1:TOS] - """ - tos = info.pop() - tos1 = info.pop() - tos2 = info.pop() - res = info.make_temp() - slicevar = info.make_temp() - indexvar = info.make_temp() - info.append(inst, base=tos2, start=tos1, stop=tos, res=res, - slicevar=slicevar, indexvar=indexvar) - info.push(res) - - def op_STORE_SLICE_0(self, info, inst): - """ - TOS[:] = TOS1 - """ - tos = info.pop() - value = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos, value=value, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - - def op_STORE_SLICE_1(self, info, inst): - """ - TOS1[TOS:] = TOS2 - """ - tos = info.pop() - tos1 = info.pop() - value = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, start=tos, slicevar=slicevar, - value=value, indexvar=indexvar, nonevar=nonevar) - - def op_STORE_SLICE_2(self, info, inst): - """ - TOS1[:TOS] = TOS2 - """ - tos = info.pop() - tos1 = info.pop() - value = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, stop=tos, value=value, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - - def op_STORE_SLICE_3(self, info, inst): - """ - TOS2[TOS1:TOS] = TOS3 - """ - tos = info.pop() - tos1 = info.pop() - tos2 = info.pop() - value = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - info.append(inst, base=tos2, start=tos1, stop=tos, value=value, - slicevar=slicevar, indexvar=indexvar) - - def op_DELETE_SLICE_0(self, info, inst): - """ - del TOS[:] - """ - tos = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - - def op_DELETE_SLICE_1(self, info, inst): - """ - del TOS1[TOS:] - """ - tos = info.pop() - tos1 = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, start=tos, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - - def op_DELETE_SLICE_2(self, info, inst): - """ - del TOS1[:TOS] - """ - tos = info.pop() - tos1 = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - nonevar = info.make_temp() - info.append(inst, base=tos1, stop=tos, slicevar=slicevar, - indexvar=indexvar, nonevar=nonevar) - - def op_DELETE_SLICE_3(self, info, inst): - """ - del TOS2[TOS1:TOS] - """ - tos = info.pop() - tos1 = info.pop() - tos2 = info.pop() - slicevar = info.make_temp() - indexvar = info.make_temp() - info.append(inst, base=tos2, start=tos1, stop=tos, - slicevar=slicevar, indexvar=indexvar) - - def op_BUILD_SLICE(self, info, inst): - """ - slice(TOS1, TOS) or slice(TOS2, TOS1, TOS) - """ - argc = inst.arg - if argc == 2: - tos = info.pop() - tos1 = info.pop() - start = tos1 - stop = tos - step = None - elif argc == 3: - tos = info.pop() - tos1 = info.pop() - tos2 = info.pop() - start = tos2 - stop = tos1 - step = tos - else: - raise Exception("unreachable") - slicevar = info.make_temp() - res = info.make_temp() - info.append(inst, start=start, stop=stop, step=step, res=res, - slicevar=slicevar) - info.push(res) - - def op_POP_JUMP_IF_TRUE(self, info, inst): - pred = info.pop() - info.append(inst, pred=pred) - info.terminator = inst - - def op_POP_JUMP_IF_FALSE(self, info, inst): - pred = info.pop() - info.append(inst, pred=pred) - info.terminator = inst - - def op_JUMP_IF_TRUE(self, info, inst): - pred = info.tos - info.append(inst, pred=pred) - info.terminator = inst - - def op_JUMP_IF_FALSE(self, info, inst): - pred = info.tos - info.append(inst, pred=pred) - info.terminator = inst - - op_JUMP_IF_FALSE_OR_POP = op_JUMP_IF_FALSE - op_JUMP_IF_TRUE_OR_POP = op_JUMP_IF_TRUE - - def op_JUMP_ABSOLUTE(self, info, inst): - info.append(inst) - info.terminator = inst - - def op_JUMP_FORWARD(self, info, inst): - info.append(inst) - info.terminator = inst - - def op_BREAK_LOOP(self, info, inst): - self.pop_syntax_block(info) - info.append(inst) - info.terminator = inst - - def op_RETURN_VALUE(self, info, inst): - info.append(inst, retval=info.pop(), castval=info.make_temp()) - info.terminator = inst - - def op_YIELD_VALUE(self, info, inst): - val = info.pop() - res = info.make_temp() - info.append(inst, value=val, res=res) - info.push(res) - - def op_SETUP_LOOP(self, info, inst): - self.add_syntax_block(info, LoopBlock()) - info.append(inst) - - def op_POP_BLOCK(self, info, inst): - block = self.pop_syntax_block(info) - info.append(inst) - - def op_RAISE_VARARGS(self, info, inst): - if inst.arg == 0: - exc = None - elif inst.arg == 1: - exc = info.pop() - else: - raise ValueError("Multiple argument raise is not supported.") - info.append(inst, exc=exc) - - def op_MAKE_FUNCTION(self, info, inst, MAKE_CLOSURE=False): - if utils.PYVERSION == (2, 7): - name = None - else: - name = info.pop() - code = info.pop() - closure = annotations = kwdefaults = defaults = None - if utils.PYVERSION < (3, 0): - if MAKE_CLOSURE: - closure = info.pop() - num_posdefaults = inst.arg - if num_posdefaults > 0: - defaults = [] - for i in range(num_posdefaults): - defaults.append(info.pop()) - defaults = tuple(defaults) - elif utils.PYVERSION >= (3, 0) and utils.PYVERSION < (3, 6): - num_posdefaults = inst.arg & 0xff - num_kwdefaults = (inst.arg >> 8) & 0xff - num_annotations = (inst.arg >> 16) & 0x7fff - if MAKE_CLOSURE: - closure = info.pop() - if num_annotations > 0: - annotations = info.pop() - if num_kwdefaults > 0: - kwdefaults = [] - for i in range(num_kwdefaults): - v = info.pop() - k = info.pop() - kwdefaults.append((k,v)) - kwdefaults = tuple(kwdefaults) - if num_posdefaults: - defaults = [] - for i in range(num_posdefaults): - defaults.append(info.pop()) - defaults = tuple(defaults) - else: - if inst.arg & 0x8: - closure = info.pop() - if inst.arg & 0x4: - annotations = info.pop() - if inst.arg & 0x2: - kwdefaults = info.pop() - if inst.arg & 0x1: - defaults = info.pop() - res = info.make_temp() - info.append(inst, name=name, code=code, closure=closure, annotations=annotations, - kwdefaults=kwdefaults, defaults=defaults, res=res) - info.push(res) - - def op_MAKE_CLOSURE(self, info, inst): - self.op_MAKE_FUNCTION(info, inst, MAKE_CLOSURE=True) - - def op_LOAD_CLOSURE(self, info, inst): - res = info.make_temp() - info.append(inst, res=res) - info.push(res) - - #NOTE: Please see notes in `interpreter.py` surrounding the implementation - # of LOAD_METHOD and CALL_METHOD. - - def op_LOAD_METHOD(self, *args, **kws): - self.op_LOAD_ATTR(*args, **kws) - - def op_CALL_METHOD(self, *args, **kws): - self.op_CALL_FUNCTION(*args, **kws) - - def _ignored(self, info, inst): - pass - - -class LoopBlock(object): - __slots__ = ('stack_offset',) - - def __init__(self): - self.stack_offset = None - - -class BlockInfo(object): - def __init__(self, block, offset, incoming_blocks): - self.block = block - self.offset = offset - # The list of incoming BlockInfo objects (obtained by control - # flow analysis). - self.incoming_blocks = incoming_blocks - self.stack = [] - # Outgoing variables from this block: - # { outgoing phi name -> var name } - self.outgoing_phis = {} - self.insts = [] - self.tempct = 0 - self._term = None - self.stack_offset = None - self.stack_effect = 0 - self.syntax_blocks = None - - def __repr__(self): - return "<%s at offset %d>" % (self.__class__.__name__, self.offset) - - def dump(self): - print("offset", self.offset, "{") - print(" stack: ", end='') - pprint(self.stack) - pprint(self.insts) - print("}") - - def make_temp(self, prefix=''): - self.tempct += 1 - name = '$%s%s.%s' % (prefix, self.offset, self.tempct) - return name - - def push(self, val): - self.stack_effect += 1 - self.stack.append(val) - - def pop(self, discard=False): - """ - Pop a variable from the stack, or request it from incoming blocks if - the stack is empty. - If *discard* is true, the variable isn't meant to be used anymore, - which allows reducing the number of temporaries created. - """ - if not self.stack: - self.stack_offset -= 1 - if not discard: - return self.make_incoming() - else: - self.stack_effect -= 1 - return self.stack.pop() - - def peek(self, k): - """ - Return the k'th element back from the top of the stack. - peek(1) is the top of the stack. - """ - num_pops = k - top_k = [self.pop() for _ in range(num_pops)] - r = top_k[-1] - for i in range(num_pops - 1, -1, -1): - self.push(top_k[i]) - return r - - def make_incoming(self): - """ - Create an incoming variable (due to not enough values being - available on our stack) and request its assignment from our - incoming blocks' own stacks. - """ - assert self.incoming_blocks - ret = self.make_temp('phi') - for ib in self.incoming_blocks: - stack_index = self.stack_offset + self.stack_effect - ib.request_outgoing(self, ret, stack_index) - return ret - - def request_outgoing(self, outgoing_block, phiname, stack_index): - """ - Request the assignment of the next available stack variable - for block *outgoing_block* with target name *phiname*. - """ - if phiname in self.outgoing_phis: - # If phiname was already requested, ignore this new request - # (can happen with a diamond-shaped block flow structure). - return - if stack_index < self.stack_offset: - assert self.incoming_blocks - for ib in self.incoming_blocks: - ib.request_outgoing(self, phiname, stack_index) - else: - varname = self.stack[stack_index - self.stack_offset] - self.outgoing_phis[phiname] = varname - - @property - def tos(self): - r = self.pop() - self.push(r) - return r - - def append(self, inst, **kws): - self.insts.append((inst.offset, kws)) - - @property - def terminator(self): - assert self._term is None - return self._term - - @terminator.setter - def terminator(self, inst): - self._term = inst diff --git a/numba/numba/datamodel/__init__.py b/numba/numba/datamodel/__init__.py deleted file mode 100644 index 2cb8b104e..000000000 --- a/numba/numba/datamodel/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .manager import DataModelManager -from .packer import ArgPacker, DataPacker -from .registry import register_default, default_manager, register -from .models import PrimitiveModel, CompositeModel, StructModel diff --git a/numba/numba/datamodel/manager.py b/numba/numba/datamodel/manager.py deleted file mode 100644 index 974c0647a..000000000 --- a/numba/numba/datamodel/manager.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import print_function, absolute_import - -import weakref - -from numba import types - - -class DataModelManager(object): - """Manages mapping of FE types to their corresponding data model - """ - - def __init__(self): - # { numba type class -> model factory } - self._handlers = {} - # { numba type instance -> model instance } - self._cache = weakref.WeakKeyDictionary() - - def register(self, fetypecls, handler): - """Register the datamodel factory corresponding to a frontend-type class - """ - assert issubclass(fetypecls, types.Type) - self._handlers[fetypecls] = handler - - def lookup(self, fetype): - """Returns the corresponding datamodel given the frontend-type instance - """ - try: - return self._cache[fetype] - except KeyError: - pass - handler = self._handlers[type(fetype)] - model = self._cache[fetype] = handler(self, fetype) - return model - - def __getitem__(self, fetype): - """Shorthand for lookup() - """ - return self.lookup(fetype) - - def copy(self): - """ - Make a copy of the manager. - Use this to inherit from the default data model and specialize it - for custom target. - """ - dmm = DataModelManager() - dmm._handlers = self._handlers.copy() - return dmm - diff --git a/numba/numba/datamodel/models.py b/numba/numba/datamodel/models.py deleted file mode 100644 index 9131dc17f..000000000 --- a/numba/numba/datamodel/models.py +++ /dev/null @@ -1,1311 +0,0 @@ -from __future__ import print_function, absolute_import - -from functools import partial - -from llvmlite import ir - -from numba import cgutils, types, numpy_support -from .registry import register_default - - -class DataModel(object): - """ - DataModel describe how a FE type is represented in the LLVM IR at - different contexts. - - Contexts are: - - - value: representation inside function body. Maybe stored in stack. - The representation here are flexible. - - - data: representation used when storing into containers (e.g. arrays). - - - argument: representation used for function argument. All composite - types are unflattened into multiple primitive types. - - - return: representation used for return argument. - - Throughput the compiler pipeline, a LLVM value is usually passed around - in the "value" representation. All "as_" prefix function converts from - "value" representation. All "from_" prefix function converts to the - "value" representation. - - """ - def __init__(self, dmm, fe_type): - self._dmm = dmm - self._fe_type = fe_type - - @property - def fe_type(self): - return self._fe_type - - def get_value_type(self): - raise NotImplementedError - - def get_data_type(self): - return self.get_value_type() - - def get_argument_type(self): - """Return a LLVM type or nested tuple of LLVM type - """ - return self.get_value_type() - - def get_return_type(self): - return self.get_value_type() - - def as_data(self, builder, value): - raise NotImplementedError - - def as_argument(self, builder, value): - """ - Takes one LLVM value - Return a LLVM value or nested tuple of LLVM value - """ - raise NotImplementedError(self) - - def as_return(self, builder, value): - raise NotImplementedError(self) - - def from_data(self, builder, value): - raise NotImplementedError(self) - - def from_argument(self, builder, value): - """ - Takes a LLVM value or nested tuple of LLVM value - Returns one LLVM value - """ - raise NotImplementedError(self) - - def from_return(self, builder, value): - raise NotImplementedError - - def load_from_data_pointer(self, builder, ptr, align=None): - """ - Load value from a pointer to data. - This is the default implementation, sufficient for most purposes. - """ - return self.from_data(builder, builder.load(ptr, align=align)) - - def traverse(self, builder): - """ - Traverse contained members. - Returns a iterable of contained (types, getters). - Each getter is a one-argument function accepting a LLVM value. - """ - return [] - - def traverse_models(self): - """ - Recursively list all models involved in this model. - """ - return [self._dmm[t] for t in self.traverse_types()] - - def traverse_types(self): - """ - Recursively list all frontend types involved in this model. - """ - return [self._fe_type] + self.inner_types() - - def inner_types(self): - """ - List all *inner* frontend types. - """ - return [] - - def get_nrt_meminfo(self, builder, value): - """ - Returns the MemInfo object or None if it is not tracked. - It is only defined for types.meminfo_pointer - """ - return None - - def has_nrt_meminfo(self): - return False - - def contains_nrt_meminfo(self): - """ - Recursively check all contained types for need for NRT meminfo. - """ - return any(model.has_nrt_meminfo() for model in self.traverse_models()) - - def _compared_fields(self): - return (type(self), self._fe_type) - - def __hash__(self): - return hash(tuple(self._compared_fields())) - - def __eq__(self, other): - if type(self) is type(other): - return self._compared_fields() == other._compared_fields() - else: - return False - - def __ne__(self, other): - return not self.__eq__(other) - - -@register_default(types.Omitted) -class OmittedArgDataModel(DataModel): - """ - A data model for omitted arguments. Only the "argument" representation - is defined, other representations raise a NotImplementedError. - """ - # Omitted arguments don't produce any LLVM function argument. - - def get_argument_type(self): - return () - - def as_argument(self, builder, val): - return () - - def from_argument(self, builder, val): - assert val == (), val - return None - - -@register_default(types.Boolean) -class BooleanModel(DataModel): - _bit_type = ir.IntType(1) - _byte_type = ir.IntType(8) - - def get_value_type(self): - return self._bit_type - - def get_data_type(self): - return self._byte_type - - def get_return_type(self): - return self.get_data_type() - - def get_argument_type(self): - return self.get_data_type() - - def as_data(self, builder, value): - return builder.zext(value, self.get_data_type()) - - def as_argument(self, builder, value): - return self.as_data(builder, value) - - def as_return(self, builder, value): - return self.as_data(builder, value) - - def from_data(self, builder, value): - return builder.trunc(value, self.get_value_type()) - - def from_argument(self, builder, value): - return self.from_data(builder, value) - - def from_return(self, builder, value): - return self.from_data(builder, value) - - -class PrimitiveModel(DataModel): - """A primitive type can be represented natively in the target in all - usage contexts. - """ - - def __init__(self, dmm, fe_type, be_type): - super(PrimitiveModel, self).__init__(dmm, fe_type) - self.be_type = be_type - - def get_value_type(self): - return self.be_type - - def as_data(self, builder, value): - return value - - def as_argument(self, builder, value): - return value - - def as_return(self, builder, value): - return value - - def from_data(self, builder, value): - return value - - def from_argument(self, builder, value): - return value - - def from_return(self, builder, value): - return value - - -class ProxyModel(DataModel): - """ - Helper class for models which delegate to another model. - """ - - def get_value_type(self): - return self._proxied_model.get_value_type() - - def get_data_type(self): - return self._proxied_model.get_data_type() - - def get_return_type(self): - return self._proxied_model.get_return_type() - - def get_argument_type(self): - return self._proxied_model.get_argument_type() - - def as_data(self, builder, value): - return self._proxied_model.as_data(builder, value) - - def as_argument(self, builder, value): - return self._proxied_model.as_argument(builder, value) - - def as_return(self, builder, value): - return self._proxied_model.as_return(builder, value) - - def from_data(self, builder, value): - return self._proxied_model.from_data(builder, value) - - def from_argument(self, builder, value): - return self._proxied_model.from_argument(builder, value) - - def from_return(self, builder, value): - return self._proxied_model.from_return(builder, value) - - -@register_default(types.EnumMember) -@register_default(types.IntEnumMember) -class EnumModel(ProxyModel): - """ - Enum members are represented exactly like their values. - """ - def __init__(self, dmm, fe_type): - super(EnumModel, self).__init__(dmm, fe_type) - self._proxied_model = dmm.lookup(fe_type.dtype) - - -@register_default(types.Opaque) -@register_default(types.PyObject) -@register_default(types.RawPointer) -@register_default(types.NoneType) -@register_default(types.Const) -@register_default(types.EllipsisType) -@register_default(types.Function) -@register_default(types.Type) -@register_default(types.Object) -@register_default(types.Module) -@register_default(types.Phantom) -@register_default(types.Dispatcher) -@register_default(types.ExceptionClass) -@register_default(types.Dummy) -@register_default(types.ExceptionInstance) -@register_default(types.ExternalFunction) -@register_default(types.NumbaFunction) -@register_default(types.Macro) -@register_default(types.EnumClass) -@register_default(types.IntEnumClass) -@register_default(types.NumberClass) -@register_default(types.NamedTupleClass) -@register_default(types.DType) -@register_default(types.RecursiveCall) -class OpaqueModel(PrimitiveModel): - """ - Passed as opaque pointers - """ - _ptr_type = ir.IntType(8).as_pointer() - - def __init__(self, dmm, fe_type): - be_type = self._ptr_type - super(OpaqueModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.MemInfoPointer) -class MemInfoModel(OpaqueModel): - - def inner_types(self): - return self._dmm.lookup(self._fe_type.dtype).traverse_types() - - def has_nrt_meminfo(self): - return True - - def get_nrt_meminfo(self, builder, value): - return value - - -@register_default(types.Integer) -class IntegerModel(PrimitiveModel): - def __init__(self, dmm, fe_type): - be_type = ir.IntType(fe_type.bitwidth) - super(IntegerModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.Float) -class FloatModel(PrimitiveModel): - def __init__(self, dmm, fe_type): - if fe_type == types.float32: - be_type = ir.FloatType() - elif fe_type == types.float64: - be_type = ir.DoubleType() - else: - raise NotImplementedError(fe_type) - super(FloatModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.CPointer) -class PointerModel(PrimitiveModel): - def __init__(self, dmm, fe_type): - self._pointee_model = dmm.lookup(fe_type.dtype) - self._pointee_be_type = self._pointee_model.get_data_type() - be_type = self._pointee_be_type.as_pointer() - super(PointerModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.EphemeralPointer) -class EphemeralPointerModel(PointerModel): - - def get_data_type(self): - return self._pointee_be_type - - def as_data(self, builder, value): - value = builder.load(value) - return self._pointee_model.as_data(builder, value) - - def from_data(self, builder, value): - raise NotImplementedError("use load_from_data_pointer() instead") - - def load_from_data_pointer(self, builder, ptr, align=None): - return builder.bitcast(ptr, self.get_value_type()) - - -@register_default(types.EphemeralArray) -class EphemeralArrayModel(PointerModel): - - def __init__(self, dmm, fe_type): - super(EphemeralArrayModel, self).__init__(dmm, fe_type) - self._data_type = ir.ArrayType(self._pointee_be_type, - self._fe_type.count) - - def get_data_type(self): - return self._data_type - - def as_data(self, builder, value): - values = [builder.load(cgutils.gep_inbounds(builder, value, i)) - for i in range(self._fe_type.count)] - return cgutils.pack_array(builder, values) - - def from_data(self, builder, value): - raise NotImplementedError("use load_from_data_pointer() instead") - - def load_from_data_pointer(self, builder, ptr, align=None): - return builder.bitcast(ptr, self.get_value_type()) - - -@register_default(types.ExternalFunctionPointer) -class ExternalFuncPointerModel(PrimitiveModel): - def __init__(self, dmm, fe_type): - sig = fe_type.sig - # Since the function is non-Numba, there is no adaptation - # of arguments and return value, hence get_value_type(). - retty = dmm.lookup(sig.return_type).get_value_type() - args = [dmm.lookup(t).get_value_type() for t in sig.args] - be_type = ir.PointerType(ir.FunctionType(retty, args)) - super(ExternalFuncPointerModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.UniTuple) -@register_default(types.NamedUniTuple) -class UniTupleModel(DataModel): - def __init__(self, dmm, fe_type): - super(UniTupleModel, self).__init__(dmm, fe_type) - self._elem_model = dmm.lookup(fe_type.dtype) - self._count = len(fe_type) - self._value_type = ir.ArrayType(self._elem_model.get_value_type(), - self._count) - self._data_type = ir.ArrayType(self._elem_model.get_data_type(), - self._count) - - def get_value_type(self): - return self._value_type - - def get_data_type(self): - return self._data_type - - def get_return_type(self): - return self.get_value_type() - - def get_argument_type(self): - return (self._elem_model.get_argument_type(),) * self._count - - def as_argument(self, builder, value): - out = [] - for i in range(self._count): - v = builder.extract_value(value, [i]) - v = self._elem_model.as_argument(builder, v) - out.append(v) - return out - - def from_argument(self, builder, value): - out = ir.Constant(self.get_value_type(), ir.Undefined) - for i, v in enumerate(value): - v = self._elem_model.from_argument(builder, v) - out = builder.insert_value(out, v, [i]) - return out - - def as_data(self, builder, value): - out = ir.Constant(self.get_data_type(), ir.Undefined) - for i in range(self._count): - val = builder.extract_value(value, [i]) - dval = self._elem_model.as_data(builder, val) - out = builder.insert_value(out, dval, [i]) - return out - - def from_data(self, builder, value): - out = ir.Constant(self.get_value_type(), ir.Undefined) - for i in range(self._count): - val = builder.extract_value(value, [i]) - dval = self._elem_model.from_data(builder, val) - out = builder.insert_value(out, dval, [i]) - return out - - def as_return(self, builder, value): - return value - - def from_return(self, builder, value): - return value - - def traverse(self, builder): - def getter(i, value): - return builder.extract_value(value, i) - return [(self._fe_type.dtype, partial(getter, i)) - for i in range(self._count)] - - def inner_types(self): - return self._elem_model.traverse_types() - - -class CompositeModel(DataModel): - """Any model that is composed of multiple other models should subclass from - this. - """ - pass - - -class StructModel(CompositeModel): - _value_type = None - _data_type = None - - def __init__(self, dmm, fe_type, members): - super(StructModel, self).__init__(dmm, fe_type) - if members: - self._fields, self._members = zip(*members) - else: - self._fields = self._members = () - self._models = tuple([self._dmm.lookup(t) for t in self._members]) - - def get_member_fe_type(self, name): - """ - StructModel-specific: get the Numba type of the field named *name*. - """ - pos = self.get_field_position(name) - return self._members[pos] - - def get_value_type(self): - if self._value_type is None: - self._value_type = ir.LiteralStructType([t.get_value_type() - for t in self._models]) - return self._value_type - - def get_data_type(self): - if self._data_type is None: - self._data_type = ir.LiteralStructType([t.get_data_type() - for t in self._models]) - return self._data_type - - def get_argument_type(self): - return tuple([t.get_argument_type() for t in self._models]) - - def get_return_type(self): - return self.get_data_type() - - def _as(self, methname, builder, value): - extracted = [] - for i, dm in enumerate(self._models): - extracted.append(getattr(dm, methname)(builder, - self.get(builder, value, i))) - return tuple(extracted) - - def _from(self, methname, builder, value): - struct = ir.Constant(self.get_value_type(), ir.Undefined) - - for i, (dm, val) in enumerate(zip(self._models, value)): - v = getattr(dm, methname)(builder, val) - struct = self.set(builder, struct, v, i) - - return struct - - def as_data(self, builder, value): - """ - Converts the LLVM struct in `value` into a representation suited for - storing into arrays. - - Note - ---- - Current implementation rarely changes how types are represented for - "value" and "data". This is usually a pointless rebuild of the - immutable LLVM struct value. Luckily, LLVM optimization removes all - redundancy. - - Sample usecase: Structures nested with pointers to other structures - that can be serialized into a flat representation when storing into - array. - """ - elems = self._as("as_data", builder, value) - struct = ir.Constant(self.get_data_type(), ir.Undefined) - for i, el in enumerate(elems): - struct = builder.insert_value(struct, el, [i]) - return struct - - def from_data(self, builder, value): - """ - Convert from "data" representation back into "value" representation. - Usually invoked when loading from array. - - See notes in `as_data()` - """ - vals = [builder.extract_value(value, [i]) - for i in range(len(self._members))] - return self._from("from_data", builder, vals) - - def load_from_data_pointer(self, builder, ptr, align=None): - values = [] - for i, model in enumerate(self._models): - elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) - val = model.load_from_data_pointer(builder, elem_ptr, align) - values.append(val) - - struct = ir.Constant(self.get_value_type(), ir.Undefined) - for i, val in enumerate(values): - struct = self.set(builder, struct, val, i) - return struct - - def as_argument(self, builder, value): - return self._as("as_argument", builder, value) - - def from_argument(self, builder, value): - return self._from("from_argument", builder, value) - - def as_return(self, builder, value): - elems = self._as("as_data", builder, value) - struct = ir.Constant(self.get_data_type(), ir.Undefined) - for i, el in enumerate(elems): - struct = builder.insert_value(struct, el, [i]) - return struct - - def from_return(self, builder, value): - vals = [builder.extract_value(value, [i]) - for i in range(len(self._members))] - return self._from("from_data", builder, vals) - - def get(self, builder, val, pos): - """Get a field at the given position or the fieldname - - Args - ---- - builder: - LLVM IRBuilder - val: - value to be inserted - pos: int or str - field index or field name - - Returns - ------- - Extracted value - """ - if isinstance(pos, str): - pos = self.get_field_position(pos) - return builder.extract_value(val, [pos], - name="extracted." + self._fields[pos]) - - def set(self, builder, stval, val, pos): - """Set a field at the given position or the fieldname - - Args - ---- - builder: - LLVM IRBuilder - stval: - LLVM struct value - val: - value to be inserted - pos: int or str - field index or field name - - Returns - ------- - A new LLVM struct with the value inserted - """ - if isinstance(pos, str): - pos = self.get_field_position(pos) - return builder.insert_value(stval, val, [pos], - name="inserted." + self._fields[pos]) - - def get_field_position(self, field): - try: - return self._fields.index(field) - except ValueError: - raise KeyError("%s does not have a field named %r" - % (self.__class__.__name__, field)) - - @property - def field_count(self): - return len(self._fields) - - def get_type(self, pos): - """Get the frontend type (numba type) of a field given the position - or the fieldname - - Args - ---- - pos: int or str - field index or field name - """ - if isinstance(pos, str): - pos = self.get_field_position(pos) - return self._members[pos] - - def get_model(self, pos): - """ - Get the datamodel of a field given the position or the fieldname. - - Args - ---- - pos: int or str - field index or field name - """ - return self._models[pos] - - def traverse(self, builder): - def getter(k, value): - if value.type != self.get_value_type(): - args = self.get_value_type(), value.type - raise TypeError("expecting {0} but got {1}".format(*args)) - return self.get(builder, value, k) - - return [(self.get_type(k), partial(getter, k)) for k in self._fields] - - def inner_types(self): - types = [] - for dm in self._models: - types += dm.traverse_types() - return types - - -@register_default(types.Complex) -class ComplexModel(StructModel): - _element_type = NotImplemented - - def __init__(self, dmm, fe_type): - members = [ - ('real', fe_type.underlying_float), - ('imag', fe_type.underlying_float), - ] - super(ComplexModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.Tuple) -@register_default(types.NamedTuple) -class TupleModel(StructModel): - def __init__(self, dmm, fe_type): - members = [('f' + str(i), t) for i, t in enumerate(fe_type)] - super(TupleModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.Pair) -class PairModel(StructModel): - def __init__(self, dmm, fe_type): - members = [('first', fe_type.first_type), - ('second', fe_type.second_type)] - super(PairModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.ListPayload) -class ListPayloadModel(StructModel): - def __init__(self, dmm, fe_type): - # The fields are mutable but the payload is always manipulated - # by reference. This scheme allows mutations of an array to - # be seen by its iterators. - members = [ - ('size', types.intp), - ('allocated', types.intp), - # This member is only used only for reflected lists - ('dirty', types.boolean), - # Actually an inlined var-sized array - ('data', fe_type.container.dtype), - ] - super(ListPayloadModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.List) -class ListModel(StructModel): - def __init__(self, dmm, fe_type): - payload_type = types.ListPayload(fe_type) - members = [ - # The meminfo data points to a ListPayload - ('meminfo', types.MemInfoPointer(payload_type)), - # This member is only used only for reflected lists - ('parent', types.pyobject), - ] - super(ListModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.ListIter) -class ListIterModel(StructModel): - def __init__(self, dmm, fe_type): - payload_type = types.ListPayload(fe_type.container) - members = [ - # The meminfo data points to a ListPayload (shared with the - # original list object) - ('meminfo', types.MemInfoPointer(payload_type)), - ('index', types.EphemeralPointer(types.intp)), - ] - super(ListIterModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.SetEntry) -class SetEntryModel(StructModel): - def __init__(self, dmm, fe_type): - dtype = fe_type.set_type.dtype - members = [ - # -1 = empty, -2 = deleted - ('hash', types.intp), - ('key', dtype), - ] - super(SetEntryModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.SetPayload) -class SetPayloadModel(StructModel): - def __init__(self, dmm, fe_type): - entry_type = types.SetEntry(fe_type.container) - members = [ - # Number of active + deleted entries - ('fill', types.intp), - # Number of active entries - ('used', types.intp), - # Allocated size - 1 (size being a power of 2) - ('mask', types.intp), - # Search finger - ('finger', types.intp), - # This member is only used only for reflected sets - ('dirty', types.boolean), - # Actually an inlined var-sized array - ('entries', entry_type), - ] - super(SetPayloadModel, self).__init__(dmm, fe_type, members) - -@register_default(types.Set) -class SetModel(StructModel): - def __init__(self, dmm, fe_type): - payload_type = types.SetPayload(fe_type) - members = [ - # The meminfo data points to a SetPayload - ('meminfo', types.MemInfoPointer(payload_type)), - # This member is only used only for reflected sets - ('parent', types.pyobject), - ] - super(SetModel, self).__init__(dmm, fe_type, members) - -@register_default(types.SetIter) -class SetIterModel(StructModel): - def __init__(self, dmm, fe_type): - payload_type = types.SetPayload(fe_type.container) - members = [ - # The meminfo data points to a SetPayload (shared with the - # original set object) - ('meminfo', types.MemInfoPointer(payload_type)), - # The index into the entries table - ('index', types.EphemeralPointer(types.intp)), - ] - super(SetIterModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.Array) -@register_default(types.Buffer) -@register_default(types.ByteArray) -@register_default(types.Bytes) -@register_default(types.MemoryView) -@register_default(types.PyArray) -class ArrayModel(StructModel): - def __init__(self, dmm, fe_type): - ndim = fe_type.ndim - members = [ - ('meminfo', types.MemInfoPointer(fe_type.dtype)), - ('parent', types.pyobject), - ('nitems', types.intp), - ('itemsize', types.intp), - ('data', types.CPointer(fe_type.dtype)), - ('shape', types.UniTuple(types.intp, ndim)), - ('strides', types.UniTuple(types.intp, ndim)), - - ] - super(ArrayModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.ArrayFlags) -class ArrayFlagsModel(StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('parent', fe_type.array_type), - ] - super(ArrayFlagsModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.SmartArrayType) -class SmartArrayModel(StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('parent', types.pyobject), - ('data', fe_type.as_array) - ] - super(SmartArrayModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.NestedArray) -class NestedArrayModel(ArrayModel): - def __init__(self, dmm, fe_type): - self._be_type = dmm.lookup(fe_type.dtype).get_data_type() - super(NestedArrayModel, self).__init__(dmm, fe_type) - - def get_data_type(self): - ret = ir.ArrayType(self._be_type, self._fe_type.nitems) - return ret - - -@register_default(types.Optional) -class OptionalModel(StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('data', fe_type.type), - ('valid', types.boolean), - ] - self._value_model = dmm.lookup(fe_type.type) - super(OptionalModel, self).__init__(dmm, fe_type, members) - - def get_return_type(self): - return self._value_model.get_return_type() - - def as_return(self, builder, value): - raise NotImplementedError - - def from_return(self, builder, value): - return self._value_model.from_return(builder, value) - - def traverse(self, builder): - def get_data(value): - valid = get_valid(value) - data = self.get(builder, value, "data") - return builder.select(valid, data, ir.Constant(data.type, None)) - def get_valid(value): - return self.get(builder, value, "valid") - - return [(self.get_type("data"), get_data), - (self.get_type("valid"), get_valid)] - - -@register_default(types.Record) -class RecordModel(CompositeModel): - def __init__(self, dmm, fe_type): - super(RecordModel, self).__init__(dmm, fe_type) - self._models = [self._dmm.lookup(t) for _, t in fe_type.members] - self._be_type = ir.ArrayType(ir.IntType(8), fe_type.size) - self._be_ptr_type = self._be_type.as_pointer() - - def get_value_type(self): - """Passed around as reference to underlying data - """ - return self._be_ptr_type - - def get_argument_type(self): - return self._be_ptr_type - - def get_return_type(self): - return self._be_ptr_type - - def get_data_type(self): - return self._be_type - - def as_data(self, builder, value): - return builder.load(value) - - def from_data(self, builder, value): - raise NotImplementedError("use load_from_data_pointer() instead") - - def as_argument(self, builder, value): - return value - - def from_argument(self, builder, value): - return value - - def as_return(self, builder, value): - return value - - def from_return(self, builder, value): - return value - - def load_from_data_pointer(self, builder, ptr, align=None): - return builder.bitcast(ptr, self.get_value_type()) - - -@register_default(types.UnicodeCharSeq) -class UnicodeCharSeq(DataModel): - def __init__(self, dmm, fe_type): - super(UnicodeCharSeq, self).__init__(dmm, fe_type) - charty = ir.IntType(numpy_support.sizeof_unicode_char * 8) - self._be_type = ir.ArrayType(charty, fe_type.count) - - def get_value_type(self): - return self._be_type - - def get_data_type(self): - return self._be_type - - -@register_default(types.CharSeq) -class CharSeq(DataModel): - def __init__(self, dmm, fe_type): - super(CharSeq, self).__init__(dmm, fe_type) - charty = ir.IntType(8) - self._be_type = ir.ArrayType(charty, fe_type.count) - - def get_value_type(self): - return self._be_type - - def get_data_type(self): - return self._be_type - - def as_data(self, builder, value): - return value - - def from_data(self, builder, value): - return value - - def as_return(self, builder, value): - return value - - def from_return(self, builder, value): - return value - - def as_argument(self, builder, value): - return value - - def from_argument(self, builder, value): - return value - - -class CContiguousFlatIter(StructModel): - def __init__(self, dmm, fe_type, need_indices): - assert fe_type.array_type.layout == 'C' - array_type = fe_type.array_type - dtype = array_type.dtype - ndim = array_type.ndim - members = [('array', array_type), - ('stride', types.intp), - ('index', types.EphemeralPointer(types.intp)), - ] - if need_indices: - # For ndenumerate() - members.append(('indices', types.EphemeralArray(types.intp, ndim))) - super(CContiguousFlatIter, self).__init__(dmm, fe_type, members) - - -class FlatIter(StructModel): - def __init__(self, dmm, fe_type): - array_type = fe_type.array_type - dtype = array_type.dtype - ndim = array_type.ndim - members = [('array', array_type), - ('pointers', types.EphemeralArray(types.CPointer(dtype), ndim)), - ('indices', types.EphemeralArray(types.intp, ndim)), - ('exhausted', types.EphemeralPointer(types.boolean)), - ] - super(FlatIter, self).__init__(dmm, fe_type, members) - - -@register_default(types.UniTupleIter) -class UniTupleIter(StructModel): - def __init__(self, dmm, fe_type): - members = [('index', types.EphemeralPointer(types.intp)), - ('tuple', fe_type.container,)] - super(UniTupleIter, self).__init__(dmm, fe_type, members) - - -@register_default(types.SliceType) -class SliceModel(StructModel): - def __init__(self, dmm, fe_type): - members = [('start', types.intp), - ('stop', types.intp), - ('step', types.intp), - ] - super(SliceModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.NPDatetime) -@register_default(types.NPTimedelta) -class NPDatetimeModel(PrimitiveModel): - def __init__(self, dmm, fe_type): - be_type = ir.IntType(64) - super(NPDatetimeModel, self).__init__(dmm, fe_type, be_type) - - -@register_default(types.ArrayIterator) -class ArrayIterator(StructModel): - def __init__(self, dmm, fe_type): - # We use an unsigned index to avoid the cost of negative index tests. - members = [('index', types.EphemeralPointer(types.uintp)), - ('array', fe_type.array_type)] - super(ArrayIterator, self).__init__(dmm, fe_type, members) - - -@register_default(types.EnumerateType) -class EnumerateType(StructModel): - def __init__(self, dmm, fe_type): - members = [('count', types.EphemeralPointer(types.intp)), - ('iter', fe_type.source_type)] - - super(EnumerateType, self).__init__(dmm, fe_type, members) - - -@register_default(types.ZipType) -class ZipType(StructModel): - def __init__(self, dmm, fe_type): - members = [('iter%d' % i, source_type.iterator_type) - for i, source_type in enumerate(fe_type.source_types)] - super(ZipType, self).__init__(dmm, fe_type, members) - - -@register_default(types.RangeIteratorType) -class RangeIteratorType(StructModel): - def __init__(self, dmm, fe_type): - int_type = fe_type.yield_type - members = [('iter', types.EphemeralPointer(int_type)), - ('stop', int_type), - ('step', int_type), - ('count', types.EphemeralPointer(int_type))] - super(RangeIteratorType, self).__init__(dmm, fe_type, members) - - -@register_default(types.Generator) -class GeneratorModel(CompositeModel): - def __init__(self, dmm, fe_type): - super(GeneratorModel, self).__init__(dmm, fe_type) - # XXX Fold this in DataPacker? - self._arg_models = [self._dmm.lookup(t) for t in fe_type.arg_types - if not isinstance(t, types.Omitted)] - self._state_models = [self._dmm.lookup(t) for t in fe_type.state_types] - - self._args_be_type = ir.LiteralStructType( - [t.get_data_type() for t in self._arg_models]) - self._state_be_type = ir.LiteralStructType( - [t.get_data_type() for t in self._state_models]) - # The whole generator closure - self._be_type = ir.LiteralStructType( - [self._dmm.lookup(types.int32).get_value_type(), - self._args_be_type, self._state_be_type]) - self._be_ptr_type = self._be_type.as_pointer() - - def get_value_type(self): - """ - The generator closure is passed around as a reference. - """ - return self._be_ptr_type - - def get_argument_type(self): - return self._be_ptr_type - - def get_return_type(self): - return self._be_type - - def get_data_type(self): - return self._be_type - - def as_argument(self, builder, value): - return value - - def from_argument(self, builder, value): - return value - - def as_return(self, builder, value): - return self.as_data(builder, value) - - def from_return(self, builder, value): - return self.from_data(builder, value) - - def as_data(self, builder, value): - return builder.load(value) - - def from_data(self, builder, value): - stack = cgutils.alloca_once(builder, value.type) - builder.store(value, stack) - return stack - - -@register_default(types.ArrayCTypes) -class ArrayCTypesModel(StructModel): - def __init__(self, dmm, fe_type): - # ndim = fe_type.ndim - members = [('data', types.CPointer(fe_type.dtype)), - ('meminfo', types.MemInfoPointer(fe_type.dtype))] - super(ArrayCTypesModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.RangeType) -class RangeModel(StructModel): - def __init__(self, dmm, fe_type): - int_type = fe_type.iterator_type.yield_type - members = [('start', int_type), - ('stop', int_type), - ('step', int_type)] - super(RangeModel, self).__init__(dmm, fe_type, members) - - -# ============================================================================= - -@register_default(types.NumpyNdIndexType) -class NdIndexModel(StructModel): - def __init__(self, dmm, fe_type): - ndim = fe_type.ndim - members = [('shape', types.UniTuple(types.intp, ndim)), - ('indices', types.EphemeralArray(types.intp, ndim)), - ('exhausted', types.EphemeralPointer(types.boolean)), - ] - super(NdIndexModel, self).__init__(dmm, fe_type, members) - - -@register_default(types.NumpyFlatType) -def handle_numpy_flat_type(dmm, ty): - if ty.array_type.layout == 'C': - return CContiguousFlatIter(dmm, ty, need_indices=False) - else: - return FlatIter(dmm, ty) - -@register_default(types.NumpyNdEnumerateType) -def handle_numpy_ndenumerate_type(dmm, ty): - if ty.array_type.layout == 'C': - return CContiguousFlatIter(dmm, ty, need_indices=True) - else: - return FlatIter(dmm, ty) - -@register_default(types.BoundFunction) -def handle_bound_function(dmm, ty): - # The same as the underlying type - return dmm[ty.this] - - -@register_default(types.NumpyNdIterType) -class NdIter(StructModel): - def __init__(self, dmm, fe_type): - array_types = fe_type.arrays - ndim = fe_type.ndim - shape_len = ndim if fe_type.need_shaped_indexing else 1 - members = [('exhausted', types.EphemeralPointer(types.boolean)), - ('arrays', types.Tuple(array_types)), - # The iterator's main shape and indices - ('shape', types.UniTuple(types.intp, shape_len)), - ('indices', types.EphemeralArray(types.intp, shape_len)), - ] - # Indexing state for the various sub-iterators - # XXX use a tuple instead? - for i, sub in enumerate(fe_type.indexers): - kind, start_dim, end_dim, _ = sub - member_name = 'index%d' % i - if kind == 'flat': - # A single index into the flattened array - members.append((member_name, types.EphemeralPointer(types.intp))) - elif kind in ('scalar', 'indexed', '0d'): - # Nothing required - pass - else: - assert 0 - # Slots holding values of the scalar args - # XXX use a tuple instead? - for i, ty in enumerate(fe_type.arrays): - if not isinstance(ty, types.Array): - member_name = 'scalar%d' % i - members.append((member_name, types.EphemeralPointer(ty))) - - super(NdIter, self).__init__(dmm, fe_type, members) - - -@register_default(types.DeferredType) -class DeferredStructModel(CompositeModel): - def __init__(self, dmm, fe_type): - super(DeferredStructModel, self).__init__(dmm, fe_type) - self.typename = "deferred.{0}".format(id(fe_type)) - self.actual_fe_type = fe_type.get() - - def get_value_type(self): - return ir.global_context.get_identified_type(self.typename + '.value') - - def get_data_type(self): - return ir.global_context.get_identified_type(self.typename + '.data') - - def get_argument_type(self): - return self._actual_model.get_argument_type() - - def as_argument(self, builder, value): - inner = self.get(builder, value) - return self._actual_model.as_argument(builder, inner) - - def from_argument(self, builder, value): - res = self._actual_model.from_argument(builder, value) - return self.set(builder, self.make_uninitialized(), res) - - def from_data(self, builder, value): - self._define() - elem = self.get(builder, value) - value = self._actual_model.from_data(builder, elem) - out = self.make_uninitialized() - return self.set(builder, out, value) - - def as_data(self, builder, value): - self._define() - elem = self.get(builder, value) - value = self._actual_model.as_data(builder, elem) - out = self.make_uninitialized(kind='data') - return self.set(builder, out, value) - - def from_return(self, builder, value): - return value - - def as_return(self, builder, value): - return value - - def get(self, builder, value): - return builder.extract_value(value, [0]) - - def set(self, builder, value, content): - return builder.insert_value(value, content, [0]) - - def make_uninitialized(self, kind='value'): - self._define() - if kind == 'value': - ty = self.get_value_type() - else: - ty = self.get_data_type() - return ir.Constant(ty, ir.Undefined) - - def _define(self): - valty = self.get_value_type() - self._define_value_type(valty) - datty = self.get_data_type() - self._define_data_type(datty) - - def _define_value_type(self, value_type): - if value_type.is_opaque: - value_type.set_body(self._actual_model.get_value_type()) - - def _define_data_type(self, data_type): - if data_type.is_opaque: - data_type.set_body(self._actual_model.get_data_type()) - - @property - def _actual_model(self): - return self._dmm.lookup(self.actual_fe_type) - - def traverse(self, builder): - return [(self.actual_fe_type, - lambda value: builder.extract_value(value, [0]))] diff --git a/numba/numba/datamodel/packer.py b/numba/numba/datamodel/packer.py deleted file mode 100644 index 672b1283d..000000000 --- a/numba/numba/datamodel/packer.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import print_function, absolute_import - -from collections import deque - -from numba import cgutils, types - - - -class DataPacker(object): - """ - A helper to pack a number of typed arguments into a data structure. - Omitted arguments (i.e. values with the type `Omitted`) are automatically - skipped. - """ - # XXX should DataPacker be a model for a dedicated type? - - def __init__(self, dmm, fe_types): - self._dmm = dmm - self._fe_types = fe_types - self._models = [dmm.lookup(ty) for ty in fe_types] - - self._pack_map = [] - self._be_types = [] - for i, ty in enumerate(fe_types): - if not isinstance(ty, types.Omitted): - self._pack_map.append(i) - self._be_types.append(self._models[i].get_data_type()) - - def as_data(self, builder, values): - """ - Return the given values packed as a data structure. - """ - elems = [self._models[i].as_data(builder, values[i]) - for i in self._pack_map] - return cgutils.make_anonymous_struct(builder, elems) - - def _do_load(self, builder, ptr, formal_list=None): - res = [] - for i, i_formal in enumerate(self._pack_map): - elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) - val = self._models[i_formal].load_from_data_pointer(builder, elem_ptr) - if formal_list is None: - res.append((self._fe_types[i_formal], val)) - else: - formal_list[i_formal] = val - return res - - def load(self, builder, ptr): - """ - Load the packed values and return a (type, value) tuples. - """ - return self._do_load(builder, ptr) - - def load_into(self, builder, ptr, formal_list): - """ - Load the packed values into a sequence indexed by formal - argument number (skipping any Omitted position). - """ - self._do_load(builder, ptr, formal_list) - - -class ArgPacker(object): - """ - Compute the position for each high-level typed argument. - It flattens every composite argument into primitive types. - It maintains a position map for unflattening the arguments. - - Since struct (esp. nested struct) have specific ABI requirements (e.g. - alignemnt, pointer address-space, ...) in different architecture (e.g. - OpenCL, CUDA), flattening composite argument types simplifes the call - setup from the Python side. Functions are receiving simple primitive - types and there are only a handful of these. - """ - - def __init__(self, dmm, fe_args): - self._dmm = dmm - self._fe_args = fe_args - self._nargs = len(fe_args) - - self._dm_args = [] - argtys = [] - for ty in fe_args: - dm = self._dmm.lookup(ty) - self._dm_args.append(dm) - argtys.append(dm.get_argument_type()) - self._unflattener = _Unflattener(argtys) - self._be_args = list(_flatten(argtys)) - - def as_arguments(self, builder, values): - """Flatten all argument values - """ - if len(values) != self._nargs: - raise TypeError("invalid number of args: expected %d, got %d" - % (self._nargs, len(values))) - - if not values: - return () - - args = [dm.as_argument(builder, val) - for dm, val in zip(self._dm_args, values) - ] - - args = tuple(_flatten(args)) - return args - - def from_arguments(self, builder, args): - """Unflatten all argument values - """ - - valtree = self._unflattener.unflatten(args) - values = [dm.from_argument(builder, val) - for dm, val in zip(self._dm_args, valtree) - ] - - return values - - def assign_names(self, args, names): - """Assign names for each flattened argument values. - """ - - valtree = self._unflattener.unflatten(args) - for aval, aname in zip(valtree, names): - self._assign_names(aval, aname) - - def _assign_names(self, val_or_nested, name, depth=()): - if isinstance(val_or_nested, (tuple, list)): - for pos, aval in enumerate(val_or_nested): - self._assign_names(aval, name, depth=depth + (pos,)) - else: - postfix = '.'.join(map(str, depth)) - parts = [name, postfix] - val_or_nested.name = '.'.join(filter(bool, parts)) - - @property - def argument_types(self): - """Return a list of LLVM types that are results of flattening - composite types. - """ - return tuple(ty for ty in self._be_args if ty != ()) - - -def _flatten(iterable): - """ - Flatten nested iterable of (tuple, list). - """ - def rec(iterable): - for i in iterable: - if isinstance(i, (tuple, list)): - for j in rec(i): - yield j - else: - yield i - return rec(iterable) - - -_PUSH_LIST = 1 -_APPEND_NEXT_VALUE = 2 -_APPEND_EMPTY_TUPLE = 3 -_POP = 4 - -class _Unflattener(object): - """ - An object used to unflatten nested sequences after a given pattern - (an arbitrarily nested sequence). - The pattern shows the nested sequence shape desired when unflattening; - the values it contains are irrelevant. - """ - - def __init__(self, pattern): - self._code = self._build_unflatten_code(pattern) - - def _build_unflatten_code(self, iterable): - """Build the unflatten opcode sequence for the given *iterable* structure - (an iterable of nested sequences). - """ - code = [] - def rec(iterable): - for i in iterable: - if isinstance(i, (tuple, list)): - if len(i) > 0: - code.append(_PUSH_LIST) - rec(i) - code.append(_POP) - else: - code.append(_APPEND_EMPTY_TUPLE) - else: - code.append(_APPEND_NEXT_VALUE) - - rec(iterable) - return code - - def unflatten(self, flatiter): - """Rebuild a nested tuple structure. - """ - vals = deque(flatiter) - - res = [] - cur = res - stack = [] - for op in self._code: - if op is _PUSH_LIST: - stack.append(cur) - cur.append([]) - cur = cur[-1] - elif op is _APPEND_NEXT_VALUE: - cur.append(vals.popleft()) - elif op is _APPEND_EMPTY_TUPLE: - cur.append(()) - elif op is _POP: - cur = stack.pop() - - assert not stack, stack - assert not vals, vals - - return res diff --git a/numba/numba/datamodel/registry.py b/numba/numba/datamodel/registry.py deleted file mode 100644 index e68849f65..000000000 --- a/numba/numba/datamodel/registry.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import print_function, absolute_import - -import functools -from .manager import DataModelManager - - -def register(dmm, typecls): - """Used as decorator to simplify datamodel registration. - Returns the object being decorated so that chaining is possible. - """ - def wraps(fn): - dmm.register(typecls, fn) - return fn - - return wraps - - -default_manager = DataModelManager() - -register_default = functools.partial(register, default_manager) diff --git a/numba/numba/datamodel/testing.py b/numba/numba/datamodel/testing.py deleted file mode 100644 index 181d791e2..000000000 --- a/numba/numba/datamodel/testing.py +++ /dev/null @@ -1,152 +0,0 @@ -from __future__ import print_function, absolute_import - -from llvmlite import ir -from llvmlite import binding as ll - -from numba import datamodel -from numba import unittest_support as unittest - - -class DataModelTester(unittest.TestCase): - """ - Test the implementation of a DataModel for a frontend type. - """ - fe_type = NotImplemented - - def setUp(self): - self.module = ir.Module() - self.datamodel = datamodel.default_manager[self.fe_type] - - def test_as_arg(self): - """ - - Is as_arg() and from_arg() implemented? - - Are they the inverse of each other? - """ - fnty = ir.FunctionType(ir.VoidType(), []) - function = ir.Function(self.module, fnty, name="test_as_arg") - builder = ir.IRBuilder() - builder.position_at_end(function.append_basic_block()) - - undef_value = ir.Constant(self.datamodel.get_value_type(), None) - args = self.datamodel.as_argument(builder, undef_value) - self.assertIsNot(args, NotImplemented, "as_argument returned " - "NotImplementedError") - - if isinstance(args, (tuple, list)): - def recur_tuplize(args, func=None): - for arg in args: - if isinstance(arg, (tuple, list)): - yield tuple(recur_tuplize(arg, func=func)) - else: - if func is None: - yield arg - else: - yield func(arg) - - argtypes = tuple(recur_tuplize(args, func=lambda x: x.type)) - exptypes = tuple(recur_tuplize( - self.datamodel.get_argument_type())) - self.assertEqual(exptypes, argtypes) - else: - self.assertEqual(args.type, - self.datamodel.get_argument_type()) - - rev_value = self.datamodel.from_argument(builder, args) - self.assertEqual(rev_value.type, self.datamodel.get_value_type()) - - builder.ret_void() # end function - - # Ensure valid LLVM generation - materialized = ll.parse_assembly(str(self.module)) - str(materialized) - - def test_as_return(self): - """ - - Is as_return() and from_return() implemented? - - Are they the inverse of each other? - """ - fnty = ir.FunctionType(ir.VoidType(), []) - function = ir.Function(self.module, fnty, name="test_as_return") - builder = ir.IRBuilder() - builder.position_at_end(function.append_basic_block()) - - undef_value = ir.Constant(self.datamodel.get_value_type(), None) - ret = self.datamodel.as_return(builder, undef_value) - self.assertIsNot(ret, NotImplemented, "as_return returned " - "NotImplementedError") - - self.assertEqual(ret.type, self.datamodel.get_return_type()) - - rev_value = self.datamodel.from_return(builder, ret) - self.assertEqual(rev_value.type, self.datamodel.get_value_type()) - - builder.ret_void() # end function - - # Ensure valid LLVM generation - materialized = ll.parse_assembly(str(self.module)) - str(materialized) - - -class SupportAsDataMixin(object): - """Test as_data() and from_data() - """ - # XXX test load_from_data_pointer() as well - - def test_as_data(self): - fnty = ir.FunctionType(ir.VoidType(), []) - function = ir.Function(self.module, fnty, name="test_as_data") - builder = ir.IRBuilder() - builder.position_at_end(function.append_basic_block()) - - undef_value = ir.Constant(self.datamodel.get_value_type(), None) - data = self.datamodel.as_data(builder, undef_value) - self.assertIsNot(data, NotImplemented, - "as_data returned NotImplemented") - - self.assertEqual(data.type, self.datamodel.get_data_type()) - - rev_value = self.datamodel.from_data(builder, data) - self.assertEqual(rev_value.type, - self.datamodel.get_value_type()) - - builder.ret_void() # end function - - # Ensure valid LLVM generation - materialized = ll.parse_assembly(str(self.module)) - str(materialized) - - -class NotSupportAsDataMixin(object): - """Ensure as_data() and from_data() raise NotImplementedError. - """ - - def test_as_data_not_supported(self): - fnty = ir.FunctionType(ir.VoidType(), []) - function = ir.Function(self.module, fnty, name="test_as_data") - builder = ir.IRBuilder() - builder.position_at_end(function.append_basic_block()) - - undef_value = ir.Constant(self.datamodel.get_value_type(), None) - with self.assertRaises(NotImplementedError): - data = self.datamodel.as_data(builder, undef_value) - with self.assertRaises(NotImplementedError): - rev_data = self.datamodel.from_data(builder, undef_value) - - -class DataModelTester_SupportAsDataMixin(DataModelTester, - SupportAsDataMixin): - pass - - -class DataModelTester_NotSupportAsDataMixin(DataModelTester, - NotSupportAsDataMixin): - pass - - -def test_factory(support_as_data=True): - """A helper for returning a unittest TestCase for testing - """ - if support_as_data: - return DataModelTester_SupportAsDataMixin - else: - return DataModelTester_NotSupportAsDataMixin diff --git a/numba/numba/debuginfo.py b/numba/numba/debuginfo.py deleted file mode 100644 index ec040a864..000000000 --- a/numba/numba/debuginfo.py +++ /dev/null @@ -1,378 +0,0 @@ -""" -Implements helpers to build LLVM debuginfo. -""" - -from __future__ import absolute_import - -import abc -import os.path - -from llvmlite import ir - -from .six import add_metaclass - - -@add_metaclass(abc.ABCMeta) -class AbstractDIBuilder(object): - @abc.abstractmethod - def mark_variable(self, builder, allocavalue, name, lltype, size, loc): - """Emit debug info for the variable. - """ - pass - - @abc.abstractmethod - def mark_location(self, builder, loc): - """Emit source location information to the given IRBuilder. - """ - pass - - @abc.abstractmethod - def mark_subprogram(self, function, name, loc): - """Emit source location information for the given function. - """ - pass - - @abc.abstractmethod - def finalize(self): - """Finalize the debuginfo by emitting all necessary metadata. - """ - pass - - -class DummyDIBuilder(AbstractDIBuilder): - - def __init__(self, module, filepath): - pass - - def mark_variable(self, builder, allocavalue, name, lltype, size, loc): - pass - - def mark_location(self, builder, loc): - pass - - def mark_subprogram(self, function, name, loc): - pass - - def finalize(self): - pass - - -class DIBuilder(AbstractDIBuilder): - DWARF_VERSION = 4 - DEBUG_INFO_VERSION = 3 - DBG_CU_NAME = 'llvm.dbg.cu' - - def __init__(self, module, filepath): - self.module = module - self.filepath = os.path.abspath(filepath) - self.difile = self._di_file() - self.subprograms = [] - self.dicompileunit = self._di_compile_unit() - - def _var_type(self, lltype, size): - m = self.module - bitsize = size * 8 - - int_type = ir.IntType, - real_type = ir.FloatType, ir.DoubleType - # For simple numeric types, choose the closest encoding. - # We treat all integers as unsigned. - if isinstance(lltype, int_type + real_type): - mdtype = m.add_debug_info('DIBasicType', { - 'name': str(lltype), - 'size': bitsize, - 'encoding': (ir.DIToken('DW_ATE_unsigned') - if isinstance(lltype, int_type) - else ir.DIToken('DW_ATE_float')), - }) - # For all other types, describe it as sequence of bytes - else: - count = size - mdrange = m.add_debug_info('DISubrange', { - 'count': count, - }) - mdbase = m.add_debug_info('DIBasicType', { - 'name': 'byte', - 'size': 8, - 'encoding': ir.DIToken('DW_ATE_unsigned_char'), - }) - mdtype = m.add_debug_info('DICompositeType', { - 'tag': ir.DIToken('DW_TAG_array_type'), - 'baseType': mdbase, - 'name': str(lltype), - 'size': bitsize, - 'identifier': str(lltype), - 'elements': m.add_metadata([mdrange]), - }) - return mdtype - - def mark_variable(self, builder, allocavalue, name, lltype, size, loc): - m = self.module - fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3) - decl = m.get_or_insert_function(fnty, name='llvm.dbg.declare') - - mdtype = self._var_type(lltype, size) - name = name.replace('.', '$') # for gdb to work correctly - mdlocalvar = m.add_debug_info('DILocalVariable', { - 'name': name, - 'arg': 0, - 'scope': self.subprograms[-1], - 'file': self.difile, - 'line': loc.line, - 'type': mdtype, - }) - mdexpr = m.add_debug_info('DIExpression', {}) - - return builder.call(decl, [allocavalue, mdlocalvar, mdexpr]) - - def mark_location(self, builder, loc): - builder.debug_metadata = self._add_location(loc.line) - - def mark_subprogram(self, function, name, loc): - di_subp = self._add_subprogram(name=name, linkagename=function.name, - line=loc.line) - function.set_metadata("dbg", di_subp) - # disable inlining for this function for easier debugging - function.attributes.add('noinline') - - def finalize(self): - dbgcu = self.module.get_or_insert_named_metadata(self.DBG_CU_NAME) - dbgcu.add(self.dicompileunit) - self._set_module_flags() - - # - # Internal APIs - # - - def _set_module_flags(self): - """Set the module flags metadata - """ - module = self.module - mflags = module.get_or_insert_named_metadata('llvm.module.flags') - # Set *require* behavior to warning - # See http://llvm.org/docs/LangRef.html#module-flags-metadata - require_warning_behavior = self._const_int(2) - if self.DWARF_VERSION is not None: - dwarf_version = module.add_metadata([ - require_warning_behavior, - "Dwarf Version", - self._const_int(self.DWARF_VERSION) - ]) - if dwarf_version not in mflags.operands: - mflags.add(dwarf_version) - debuginfo_version = module.add_metadata([ - require_warning_behavior, - "Debug Info Version", - self._const_int(self.DEBUG_INFO_VERSION) - ]) - if debuginfo_version not in mflags.operands: - mflags.add(debuginfo_version) - - def _add_subprogram(self, name, linkagename, line): - """Emit subprogram metdata - """ - subp = self._di_subprogram(name, linkagename, line) - self.subprograms.append(subp) - return subp - - def _add_location(self, line): - """Emit location metatdaa - """ - loc = self._di_location(line) - return loc - - @classmethod - def _const_int(cls, num, bits=32): - """Util to create constant int in metadata - """ - return ir.IntType(bits)(num) - - @classmethod - def _const_bool(cls, boolean): - """Util to create constant boolean in metadata - """ - return ir.IntType(1)(boolean) - - # - # Helpers to emit the metadata nodes - # - - def _di_file(self): - return self.module.add_debug_info('DIFile', { - 'directory': os.path.dirname(self.filepath), - 'filename': os.path.basename(self.filepath), - }) - - def _di_compile_unit(self): - return self.module.add_debug_info('DICompileUnit', { - 'language': ir.DIToken('DW_LANG_Python'), - 'file': self.difile, - 'producer': 'Numba', - 'runtimeVersion': 0, - 'isOptimized': True, - 'emissionKind': 1, # 0-NoDebug, 1-FullDebug - }, is_distinct=True) - - def _di_subroutine_type(self): - return self.module.add_debug_info('DISubroutineType', { - 'types': self.module.add_metadata([]), - }) - - def _di_subprogram(self, name, linkagename, line): - return self.module.add_debug_info('DISubprogram', { - 'name': name, - 'linkageName': linkagename, - 'scope': self.difile, - 'file': self.difile, - 'line': line, - 'type': self._di_subroutine_type(), - 'isLocal': False, - 'isDefinition': True, - 'scopeLine': line, - 'isOptimized': True, - 'variables': self.module.add_metadata([]), - 'unit': self.dicompileunit, - }, is_distinct=True) - - def _di_location(self, line): - return self.module.add_debug_info('DILocation', { - 'line': line, - 'column': 1, - 'scope': self.subprograms[-1], - }) - - -class NvvmDIBuilder(DIBuilder): - """ - Only implemented the minimal metadata to get line number information. - See http://llvm.org/releases/3.4/docs/LangRef.html - """ - # These constants are copied from llvm3.4 - DW_LANG_Python = 0x0014 - DI_Compile_unit = 786449 - DI_Subroutine_type = 786453 - DI_Subprogram = 786478 - DI_File = 786473 - - DWARF_VERSION = None # don't emit DWARF version - DEBUG_INFO_VERSION = 1 # as required by NVVM IR Spec - # Rename DIComputeUnit MD to hide it from llvm.parse_assembly() - # which strips invalid/outdated debug metadata - DBG_CU_NAME = 'numba.llvm.dbg.cu' - - # Default member - # Used in mark_location to remember last lineno to avoid duplication - _last_lineno = None - - def mark_variable(self, builder, allocavalue, name, lltype, size, loc): - # unsupported - pass - - def mark_location(self, builder, loc): - # Avoid duplication - if self._last_lineno == loc.line: - return - self._last_lineno = loc.line - # Add call to an inline asm to mark line location - asmty = ir.FunctionType(ir.VoidType(), []) - asm = ir.InlineAsm(asmty, "// dbg {}".format(loc.line), "", - side_effect=True) - call = builder.call(asm, []) - md = self._di_location(loc.line) - call.set_metadata('numba.dbg', md) - - def mark_subprogram(self, function, name, loc): - self._add_subprogram(name=name, linkagename=function.name, - line=loc.line) - - # - # Helper methods to create the metadata nodes. - # - - def _filepair(self): - return self.module.add_metadata([ - os.path.basename(self.filepath), - os.path.dirname(self.filepath), - ]) - - def _di_file(self): - return self.module.add_metadata([ - self._const_int(self.DI_File), - self._filepair(), - ]) - - def _di_compile_unit(self): - filepair = self._filepair() - empty = self.module.add_metadata([self._const_int(0)]) - return self.module.add_metadata([ - self._const_int(self.DI_Compile_unit), # tag - filepair, # source directory and file pair - self._const_int(self.DW_LANG_Python), # language - 'Numba', # producer - self._const_bool(True), # optimized - "", # flags?? - self._const_int(0), # runtime version - empty, # enums types - empty, # retained types - self.module.add_metadata(self.subprograms), # subprograms - empty, # global variables - empty, # imported entities - "", # split debug filename - ]) - - def _di_subroutine_type(self): - types = self.module.add_metadata([None]) - return self.module.add_metadata([ - self._const_int(self.DI_Subroutine_type), # tag - self._const_int(0), - None, - "", - self._const_int(0), # line of definition - self._const_int(0, 64), # size in bits - self._const_int(0, 64), # offset in bits - self._const_int(0, 64), # align in bits - self._const_int(0), # flags - None, - types, - self._const_int(0), - None, - None, - None, - ]) - - def _di_subprogram(self, name, linkagename, line): - function_ptr = self.module.get_global(linkagename) - subroutine_type = self._di_subroutine_type() - funcvars = self.module.add_metadata([self._const_int(0)]) - context = self._di_file() - return self.module.add_metadata([ - self._const_int(self.DI_Subprogram), # tag - self._filepair(), # source dir & file - context, # context descriptor - name, # name - name, # display name - linkagename, # linkage name - self._const_int(line), # line - subroutine_type, # type descriptor - self._const_bool(False), # is local - self._const_bool(True), # is definition - self._const_int(0), # virtuality - self._const_int(0), # virtual function index - None, # vtable base type - self._const_int(0), # flags - self._const_bool(True), # is optimized - function_ptr, # pointer to function - None, # function template parameters - None, # function declaration descriptor - funcvars, # function variables - self._const_int(line) # scope line - ]) - - def _di_location(self, line): - return self.module.add_metadata([ - self._const_int(line), # line - self._const_int(0), # column - self.subprograms[-1], # scope - None, # original scope - ]) - diff --git a/numba/numba/decorators.py b/numba/numba/decorators.py deleted file mode 100644 index 038d9d704..000000000 --- a/numba/numba/decorators.py +++ /dev/null @@ -1,260 +0,0 @@ -""" -Define @jit and related decorators. -""" - -from __future__ import print_function, division, absolute_import - -import sys -import warnings - -from . import config, sigutils -from .errors import DeprecationError -from .targets import registry -from .stencil import stencil - - - -# ----------------------------------------------------------------------------- -# Decorators - -def autojit(*args, **kws): - """Deprecated. - - Use jit instead. Calls to jit internally. - """ - warnings.warn("autojit is deprecated, use jit instead which now performs " - "the same functionality", DeprecationWarning) - return jit(*args, **kws) - - -_msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. " - "Signatures should be passed as the first " - "positional argument.") - -def jit(signature_or_function=None, locals={}, target='cpu', cache=False, - pipeline_class=None, **options): - """ - This decorator is used to compile a Python function into native code. - - Args - ----- - signature: - The (optional) signature or list of signatures to be compiled. - If not passed, required signatures will be compiled when the - decorated function is called, depending on the argument values. - As a convenience, you can directly pass the function to be compiled - instead. - - locals: dict - Mapping of local variable names to Numba types. Used to override the - types deduced by Numba's type inference engine. - - target: str - Specifies the target platform to compile for. Valid targets are cpu, - gpu, npyufunc, and cuda. Defaults to cpu. - - pipeline_class: type numba.compiler.BasePipeline - The compiler pipeline type for customizing the compilation stages. - - options: - For a cpu target, valid options are: - nopython: bool - Set to True to disable the use of PyObjects and Python API - calls. The default behavior is to allow the use of PyObjects - and Python API. Default value is False. - - forceobj: bool - Set to True to force the use of PyObjects for every value. - Default value is False. - - looplift: bool - Set to True to enable jitting loops in nopython mode while - leaving surrounding code in object mode. This allows functions - to allocate NumPy arrays and use Python objects, while the - tight loops in the function can still be compiled in nopython - mode. Any arrays that the tight loop uses should be created - before the loop is entered. Default value is True. - - error_model: str - The error-model affects divide-by-zero behavior. - Valid values are 'python' and 'numpy'. The 'python' model - raises exception. The 'numpy' model sets the result to - *+/-inf* or *nan*. Default value is 'python'. - - Returns - -------- - A callable usable as a compiled function. Actual compiling will be - done lazily if no explicit signatures are passed. - - Examples - -------- - The function can be used in the following ways: - - 1) jit(signatures, target='cpu', **targetoptions) -> jit(function) - - Equivalent to: - - d = dispatcher(function, targetoptions) - for signature in signatures: - d.compile(signature) - - Create a dispatcher object for a python function. Then, compile - the function with the given signature(s). - - Example: - - @jit("int32(int32, int32)") - def foo(x, y): - return x + y - - @jit(["int32(int32, int32)", "float32(float32, float32)"]) - def bar(x, y): - return x + y - - 2) jit(function, target='cpu', **targetoptions) -> dispatcher - - Create a dispatcher function object that specializes at call site. - - Examples: - - @jit - def foo(x, y): - return x + y - - @jit(target='cpu', nopython=True) - def bar(x, y): - return x + y - - """ - if 'argtypes' in options: - raise DeprecationError(_msg_deprecated_signature_arg.format('argtypes')) - if 'restype' in options: - raise DeprecationError(_msg_deprecated_signature_arg.format('restype')) - - if options.get('parallel'): - uns1 = sys.platform.startswith('win32') and sys.version_info[:2] == (2, 7) - uns2 = sys.maxsize <= 2 ** 32 - if uns1 or uns2: - msg = ("The 'parallel' target is not currently supported on " - "Windows operating systems when using Python 2.7, or " - "on 32 bit hardware.") - raise RuntimeError(msg) - if cache: - msg = ("Caching is not available when the 'parallel' target is in " - "use. Caching is now being disabled to allow execution to " - "continue.") - warnings.warn(msg, RuntimeWarning) - cache = False - - # Handle signature - if signature_or_function is None: - # No signature, no function - pyfunc = None - sigs = None - elif isinstance(signature_or_function, list): - # A list of signatures is passed - pyfunc = None - sigs = signature_or_function - elif sigutils.is_signature(signature_or_function): - # A single signature is passed - pyfunc = None - sigs = [signature_or_function] - else: - # A function is passed - pyfunc = signature_or_function - sigs = None - - dispatcher_args = {} - if pipeline_class is not None: - dispatcher_args['pipeline_class'] = pipeline_class - wrapper = _jit(sigs, locals=locals, target=target, cache=cache, - targetoptions=options, **dispatcher_args) - if pyfunc is not None: - return wrapper(pyfunc) - else: - return wrapper - - -def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args): - dispatcher = registry.dispatcher_registry[target] - - def wrapper(func): - if config.ENABLE_CUDASIM and target == 'cuda': - from . import cuda - return cuda.jit(func) - if config.DISABLE_JIT and not target == 'npyufunc': - return func - disp = dispatcher(py_func=func, locals=locals, - targetoptions=targetoptions, - **dispatcher_args) - if cache: - disp.enable_caching() - if sigs is not None: - # Register the Dispatcher to the type inference mechanism, - # even though the decorator hasn't returned yet. - from . import typeinfer - with typeinfer.register_dispatcher(disp): - for sig in sigs: - disp.compile(sig) - disp.disable_compile() - return disp - - return wrapper - - -def generated_jit(function=None, target='cpu', cache=False, - pipeline_class=None, **options): - """ - This decorator allows flexible type-based compilation - of a jitted function. It works as `@jit`, except that the decorated - function is called at compile-time with the *types* of the arguments - and should return an implementation function for those types. - """ - dispatcher_args = {} - if pipeline_class is not None: - dispatcher_args['pipeline_class'] = pipeline_class - wrapper = _jit(sigs=None, locals={}, target=target, cache=cache, - targetoptions=options, impl_kind='generated', - **dispatcher_args) - if function is not None: - return wrapper(function) - else: - return wrapper - - -def njit(*args, **kws): - """ - Equivalent to jit(nopython=True) - - See documentation for jit function/decorator for full description. - """ - if 'nopython' in kws: - warnings.warn('nopython is set for njit and is ignored', RuntimeWarning) - if 'forceobj' in kws: - warnings.warn('forceobj is set for njit and is ignored', RuntimeWarning) - kws.update({'nopython': True}) - return jit(*args, **kws) - - -def cfunc(sig, locals={}, cache=False, **options): - """ - This decorator is used to compile a Python function into a C callback - usable with foreign C libraries. - - Usage:: - @cfunc("float64(float64, float64)", nopython=True, cache=True) - def add(a, b): - return a + b - - """ - sig = sigutils.normalize_signature(sig) - - def wrapper(func): - from .ccallback import CFunc - res = CFunc(func, sig, locals=locals, options=options) - if cache: - res.enable_caching() - res.compile() - return res - - return wrapper diff --git a/numba/numba/dispatcher.py b/numba/numba/dispatcher.py deleted file mode 100644 index 471ea592b..000000000 --- a/numba/numba/dispatcher.py +++ /dev/null @@ -1,747 +0,0 @@ -# -*- coding: utf8 -*- - -from __future__ import print_function, division, absolute_import - -import collections -import functools -import os -import struct -import sys -import uuid -import weakref - -import numba -from numba import _dispatcher, compiler, utils, types, config, errors -from numba.typeconv.rules import default_type_manager -from numba import sigutils, serialize, typing -from numba.typing.templates import fold_arguments -from numba.typing.typeof import Purpose, typeof, typeof_impl -from numba.bytecode import get_code_object -from numba.six import create_bound_method, next, reraise -from .caching import NullCache, FunctionCache - - -class OmittedArg(object): - """ - A placeholder for omitted arguments with a default value. - """ - - def __init__(self, value): - self.value = value - - def __repr__(self): - return "omitted arg(%r)" % (self.value,) - - @property - def _numba_type_(self): - return types.Omitted(self.value) - - -class _FunctionCompiler(object): - - def __init__(self, py_func, targetdescr, targetoptions, locals, - pipeline_class): - self.py_func = py_func - self.targetdescr = targetdescr - self.targetoptions = targetoptions - self.locals = locals - self.pysig = utils.pysignature(self.py_func) - self.pipeline_class = pipeline_class - - def fold_argument_types(self, args, kws): - """ - Given positional and named argument types, fold keyword arguments - and resolve defaults by inserting types.Omitted() instances. - - A (pysig, argument types) tuple is returned. - """ - def normal_handler(index, param, value): - return value - def default_handler(index, param, default): - return types.Omitted(default) - def stararg_handler(index, param, values): - return types.Tuple(values) - # For now, we take argument values from the @jit function, even - # in the case of generated jit. - args = fold_arguments(self.pysig, args, kws, - normal_handler, - default_handler, - stararg_handler) - return self.pysig, args - - def compile(self, args, return_type): - flags = compiler.Flags() - self.targetdescr.options.parse_as_flags(flags, self.targetoptions) - flags = self._customize_flags(flags) - - impl = self._get_implementation(args, {}) - cres = compiler.compile_extra(self.targetdescr.typing_context, - self.targetdescr.target_context, - impl, - args=args, return_type=return_type, - flags=flags, locals=self.locals, - pipeline_class=self.pipeline_class) - # Check typing error if object mode is used - if cres.typing_error is not None and not flags.enable_pyobject: - raise cres.typing_error - return cres - - def get_globals_for_reduction(self): - return serialize._get_function_globals_for_reduction(self.py_func) - - def _get_implementation(self, args, kws): - return self.py_func - - def _customize_flags(self, flags): - return flags - - -class _GeneratedFunctionCompiler(_FunctionCompiler): - - def __init__(self, py_func, targetdescr, targetoptions, locals, - pipeline_class): - super(_GeneratedFunctionCompiler, self).__init__( - py_func, targetdescr, targetoptions, locals, pipeline_class) - self.impls = set() - - def get_globals_for_reduction(self): - # This will recursively get the globals used by any nested - # implementation function. - return serialize._get_function_globals_for_reduction(self.py_func) - - def _get_implementation(self, args, kws): - impl = self.py_func(*args, **kws) - # Check the generating function and implementation signatures are - # compatible, otherwise compiling would fail later. - pysig = utils.pysignature(self.py_func) - implsig = utils.pysignature(impl) - ok = len(pysig.parameters) == len(implsig.parameters) - if ok: - for pyparam, implparam in zip(pysig.parameters.values(), - implsig.parameters.values()): - # We allow the implementation to omit default values, but - # if it mentions them, they should have the same value... - if (pyparam.name != implparam.name or - pyparam.kind != implparam.kind or - (implparam.default is not implparam.empty and - implparam.default != pyparam.default)): - ok = False - if not ok: - raise TypeError("generated implementation %s should be compatible " - "with signature '%s', but has signature '%s'" - % (impl, pysig, implsig)) - self.impls.add(impl) - return impl - - -_CompileStats = collections.namedtuple( - '_CompileStats', ('cache_path', 'cache_hits', 'cache_misses')) - - -class _CompilingCounter(object): - """ - A simple counter that increment in __enter__ and decrement in __exit__. - """ - - def __init__(self): - self.counter = 0 - - def __enter__(self): - assert self.counter >= 0 - self.counter += 1 - - def __exit__(self, *args, **kwargs): - self.counter -= 1 - assert self.counter >= 0 - - def __bool__(self): - return self.counter > 0 - - __nonzero__ = __bool__ - - -class _DispatcherBase(_dispatcher.Dispatcher): - """ - Common base class for dispatcher Implementations. - """ - - __numba__ = "py_func" - - def __init__(self, arg_count, py_func, pysig, can_fallback): - self._tm = default_type_manager - - # A mapping of signatures to compile results - self.overloads = collections.OrderedDict() - - self.py_func = py_func - # other parts of Numba assume the old Python 2 name for code object - self.func_code = get_code_object(py_func) - # but newer python uses a different name - self.__code__ = self.func_code - - argnames = tuple(pysig.parameters) - default_values = self.py_func.__defaults__ or () - defargs = tuple(OmittedArg(val) for val in default_values) - try: - lastarg = list(pysig.parameters.values())[-1] - except IndexError: - has_stararg = False - else: - has_stararg = lastarg.kind == lastarg.VAR_POSITIONAL - _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), - arg_count, self._fold_args, - argnames, defargs, - can_fallback, - has_stararg) - - self.doc = py_func.__doc__ - self._compiling_counter = _CompilingCounter() - utils.finalize(self, self._make_finalizer()) - - def _reset_overloads(self): - self._clear() - self.overloads.clear() - - def _make_finalizer(self): - """ - Return a finalizer function that will release references to - related compiled functions. - """ - overloads = self.overloads - targetctx = self.targetctx - - # Early-bind utils.shutting_down() into the function's local namespace - # (see issue #689) - def finalizer(shutting_down=utils.shutting_down): - # The finalizer may crash at shutdown, skip it (resources - # will be cleared by the process exiting, anyway). - if shutting_down(): - return - # This function must *not* hold any reference to self: - # we take care to bind the necessary objects in the closure. - for cres in overloads.values(): - try: - targetctx.remove_user_function(cres.entry_point) - except KeyError: - pass - - return finalizer - - @property - def signatures(self): - """ - Returns a list of compiled function signatures. - """ - return list(self.overloads) - - @property - def nopython_signatures(self): - return [cres.signature for cres in self.overloads.values() - if not cres.objectmode and not cres.interpmode] - - def disable_compile(self, val=True): - """Disable the compilation of new signatures at call time. - """ - # If disabling compilation then there must be at least one signature - assert (not val) or len(self.signatures) > 0 - self._can_compile = not val - - def add_overload(self, cres): - args = tuple(cres.signature.args) - sig = [a._code for a in args] - self._insert(sig, cres.entry_point, cres.objectmode, cres.interpmode) - self.overloads[args] = cres - - def fold_argument_types(self, args, kws): - return self._compiler.fold_argument_types(args, kws) - - def get_call_template(self, args, kws): - """ - Get a typing.ConcreteTemplate for this dispatcher and the given - *args* and *kws* types. This allows to resolve the return type. - - A (template, pysig, args, kws) tuple is returned. - """ - # XXX how about a dispatcher template class automating the - # following? - - # Fold keyword arguments and resolve default values - pysig, args = self._compiler.fold_argument_types(args, kws) - kws = {} - # Ensure an overload is available - if self._can_compile: - self.compile(tuple(args)) - - # Create function type for typing - func_name = self.py_func.__name__ - name = "CallTemplate({0})".format(func_name) - # The `key` isn't really used except for diagnosis here, - # so avoid keeping a reference to `cfunc`. - call_template = typing.make_concrete_template( - name, key=func_name, signatures=self.nopython_signatures) - return call_template, pysig, args, kws - - def get_overload(self, sig): - """ - Return the compiled function for the given signature. - """ - args, return_type = sigutils.normalize_signature(sig) - return self.overloads[tuple(args)].entry_point - - @property - def is_compiling(self): - """ - Whether a specialization is currently being compiled. - """ - return self._compiling_counter - - def _compile_for_args(self, *args, **kws): - """ - For internal use. Compile a specialized version of the function - for the given *args* and *kws*, and return the resulting callable. - """ - assert not kws - - def error_rewrite(e, issue_type): - """ - Rewrite and raise Exception `e` with help supplied based on the - specified issue_type. - """ - if config.SHOW_HELP: - help_msg = errors.error_extras[issue_type] - e.patch_message(''.join(e.args) + help_msg) - if config.FULL_TRACEBACKS: - raise e - else: - reraise(type(e), e, None) - - argtypes = [] - for a in args: - if isinstance(a, OmittedArg): - argtypes.append(types.Omitted(a.value)) - else: - argtypes.append(self.typeof_pyval(a)) - try: - return self.compile(tuple(argtypes)) - except errors.TypingError as e: - # Intercept typing error that may be due to an argument - # that failed inferencing as a Numba type - failed_args = [] - for i, arg in enumerate(args): - val = arg.value if isinstance(arg, OmittedArg) else arg - try: - tp = typeof(val, Purpose.argument) - except ValueError as typeof_exc: - failed_args.append((i, str(typeof_exc))) - else: - if tp is None: - failed_args.append( - (i, - "cannot determine Numba type of value %r" % (val,))) - if failed_args: - # Patch error message to ease debugging - msg = str(e).rstrip() + ( - "\n\nThis error may have been caused by the following argument(s):\n%s\n" - % "\n".join("- argument %d: %s" % (i, err) - for i, err in failed_args)) - e.patch_message(msg) - - error_rewrite(e, 'typing') - except errors.UnsupportedError as e: - # Something unsupported is present in the user code, add help info - error_rewrite(e, 'unsupported_error') - except (errors.NotDefinedError, errors.RedefinedError, - errors.VerificationError) as e: - # These errors are probably from an issue with either the code supplied - # being syntactically or otherwise invalid - error_rewrite(e, 'interpreter') - except errors.ConstantInferenceError as e: - # this is from trying to infer something as constant when it isn't - # or isn't supported as a constant - error_rewrite(e, 'constant_inference') - except Exception as e: - if config.SHOW_HELP: - if hasattr(e, 'patch_message'): - help_msg = errors.error_extras['reportable'] - e.patch_message(''.join(e.args) + help_msg) - # ignore the FULL_TRACEBACKS config, this needs reporting! - raise e - - def inspect_llvm(self, signature=None): - if signature is not None: - lib = self.overloads[signature].library - return lib.get_llvm_str() - - return dict((sig, self.inspect_llvm(sig)) for sig in self.signatures) - - def inspect_asm(self, signature=None): - if signature is not None: - lib = self.overloads[signature].library - return lib.get_asm_str() - - return dict((sig, self.inspect_asm(sig)) for sig in self.signatures) - - def inspect_types(self, file=None, **kwargs): - """ - print or return annotated source with Numba intermediate IR - - Pass `pretty=True` to attempt color highlighting, and HTML rendering in - Jupyter and IPython by returning an Annotate Object. `file` must be - None if used in conjunction with `pretty=True`. - """ - pretty = kwargs.get('pretty', False) - style = kwargs.get('style', 'default') - - if not pretty: - if file is None: - file = sys.stdout - - for ver, res in utils.iteritems(self.overloads): - print("%s %s" % (self.py_func.__name__, ver), file=file) - print('-' * 80, file=file) - print(res.type_annotation, file=file) - print('=' * 80, file=file) - else: - if file is not None: - raise ValueError("`file` must be None if `pretty=True`") - from .pretty_annotate import Annotate - return Annotate(self, style=style) - - def inspect_cfg(self, signature=None, show_wrapper=None): - """ - For inspecting the CFG of the function. - - By default the CFG of the user function is showed. The *show_wrapper* - option can be set to "python" or "cfunc" to show the python wrapper - function or the *cfunc* wrapper function, respectively. - """ - if signature is not None: - cres = self.overloads[signature] - lib = cres.library - if show_wrapper == 'python': - fname = cres.fndesc.llvm_cpython_wrapper_name - elif show_wrapper == 'cfunc': - fname = cres.fndesc.llvm_cfunc_wrapper_name - else: - fname = cres.fndesc.mangled_name - return lib.get_function_cfg(fname) - - return dict((sig, self.inspect_cfg(sig, show_wrapper=show_wrapper)) - for sig in self.signatures) - - def get_annotation_info(self, signature=None): - """ - Gets the annotation information for the function specified by - signature. If no signature is supplied a dictionary of signature to - annotation information is returned. - """ - if signature is not None: - cres = self.overloads[signature] - return cres.type_annotation.annotate_raw() - return dict((sig, self.annotate(sig)) for sig in self.signatures) - - def _explain_ambiguous(self, *args, **kws): - """ - Callback for the C _Dispatcher object. - """ - assert not kws, "kwargs not handled" - args = tuple([self.typeof_pyval(a) for a in args]) - # The order here must be deterministic for testing purposes, which - # is ensured by the OrderedDict. - sigs = self.nopython_signatures - # This will raise - self.typingctx.resolve_overload(self.py_func, sigs, args, kws, - allow_ambiguous=False) - - def _explain_matching_error(self, *args, **kws): - """ - Callback for the C _Dispatcher object. - """ - assert not kws, "kwargs not handled" - args = [self.typeof_pyval(a) for a in args] - msg = ("No matching definition for argument type(s) %s" - % ', '.join(map(str, args))) - raise TypeError(msg) - - def _search_new_conversions(self, *args, **kws): - """ - Callback for the C _Dispatcher object. - Search for approximately matching signatures for the given arguments, - and ensure the corresponding conversions are registered in the C++ - type manager. - """ - assert not kws, "kwargs not handled" - args = [self.typeof_pyval(a) for a in args] - found = False - for sig in self.nopython_signatures: - conv = self.typingctx.install_possible_conversions(args, sig.args) - if conv: - found = True - return found - - def __repr__(self): - return "%s(%s)" % (type(self).__name__, self.py_func) - - def typeof_pyval(self, val): - """ - Resolve the Numba type of Python value *val*. - This is called from numba._dispatcher as a fallback if the native code - cannot decide the type. - """ - # Not going through the resolve_argument_type() indirection - # can save a couple µs. - try: - tp = typeof(val, Purpose.argument) - except ValueError: - tp = types.pyobject - else: - if tp is None: - tp = types.pyobject - return tp - - -class Dispatcher(_DispatcherBase): - """ - Implementation of user-facing dispatcher objects (i.e. created using - the @jit decorator). - This is an abstract base class. Subclasses should define the targetdescr - class attribute. - """ - _fold_args = True - _impl_kinds = { - 'direct': _FunctionCompiler, - 'generated': _GeneratedFunctionCompiler, - } - # A {uuid -> instance} mapping, for deserialization - _memo = weakref.WeakValueDictionary() - __uuid = None - __numba__ = 'py_func' - - def __init__(self, py_func, locals={}, targetoptions={}, - impl_kind='direct', pipeline_class=compiler.Pipeline): - """ - Parameters - ---------- - py_func: function object to be compiled - locals: dict, optional - Mapping of local variable names to Numba types. Used to override - the types deduced by the type inference engine. - targetoptions: dict, optional - Target-specific config options. - impl_kind: str - Select the compiler mode for `@jit` and `@generated_jit` - pipeline_class: type numba.compiler.BasePipeline - The compiler pipeline type. - """ - self.typingctx = self.targetdescr.typing_context - self.targetctx = self.targetdescr.target_context - - pysig = utils.pysignature(py_func) - arg_count = len(pysig.parameters) - can_fallback = not targetoptions.get('nopython', False) - _DispatcherBase.__init__(self, arg_count, py_func, pysig, can_fallback) - - functools.update_wrapper(self, py_func) - - self.targetoptions = targetoptions - self.locals = locals - self._cache = NullCache() - compiler_class = self._impl_kinds[impl_kind] - self._impl_kind = impl_kind - self._compiler = compiler_class(py_func, self.targetdescr, - targetoptions, locals, pipeline_class) - self._cache_hits = collections.Counter() - self._cache_misses = collections.Counter() - - self._type = types.Dispatcher(self) - self.typingctx.insert_global(self, self._type) - - @property - def _numba_type_(self): - return types.Dispatcher(self) - - def enable_caching(self): - self._cache = FunctionCache(self.py_func) - - def __get__(self, obj, objtype=None): - '''Allow a JIT function to be bound as a method to an object''' - if obj is None: # Unbound method - return self - else: # Bound method - return create_bound_method(self, obj) - - def __reduce__(self): - """ - Reduce the instance for pickling. This will serialize - the original function as well the compilation options and - compiled signatures, but not the compiled code itself. - """ - if self._can_compile: - sigs = [] - else: - sigs = [cr.signature for cr in self.overloads.values()] - globs = self._compiler.get_globals_for_reduction() - return (serialize._rebuild_reduction, - (self.__class__, str(self._uuid), - serialize._reduce_function(self.py_func, globs), - self.locals, self.targetoptions, self._impl_kind, - self._can_compile, sigs)) - - @classmethod - def _rebuild(cls, uuid, func_reduced, locals, targetoptions, impl_kind, - can_compile, sigs): - """ - Rebuild an Dispatcher instance after it was __reduce__'d. - """ - try: - return cls._memo[uuid] - except KeyError: - pass - py_func = serialize._rebuild_function(*func_reduced) - self = cls(py_func, locals, targetoptions, impl_kind) - # Make sure this deserialization will be merged with subsequent ones - self._set_uuid(uuid) - for sig in sigs: - self.compile(sig) - self._can_compile = can_compile - return self - - @property - def _uuid(self): - """ - An instance-specific UUID, to avoid multiple deserializations of - a given instance. - - Note this is lazily-generated, for performance reasons. - """ - u = self.__uuid - if u is None: - u = str(uuid.uuid1()) - self._set_uuid(u) - return u - - def _set_uuid(self, u): - assert self.__uuid is None - self.__uuid = u - self._memo[u] = self - - def compile(self, sig): - if not self._can_compile: - raise RuntimeError("compilation disabled") - # Use cache and compiler in a critical section - with compiler.lock_compiler: - # Use counter to track recursion compilation depth - with self._compiling_counter: - args, return_type = sigutils.normalize_signature(sig) - # Don't recompile if signature already exists - existing = self.overloads.get(tuple(args)) - if existing is not None: - return existing.entry_point - - # Try to load from disk cache - cres = self._cache.load_overload(sig, self.targetctx) - if cres is not None: - self._cache_hits[sig] += 1 - # XXX fold this in add_overload()? (also see compiler.py) - if not cres.objectmode and not cres.interpmode: - self.targetctx.insert_user_function(cres.entry_point, - cres.fndesc, [cres.library]) - self.add_overload(cres) - return cres.entry_point - - self._cache_misses[sig] += 1 - cres = self._compiler.compile(args, return_type) - self.add_overload(cres) - self._cache.save_overload(sig, cres) - return cres.entry_point - - def recompile(self): - """ - Recompile all signatures afresh. - """ - sigs = list(self.overloads) - old_can_compile = self._can_compile - # Ensure the old overloads are disposed of, including compiled functions. - self._make_finalizer()() - self._reset_overloads() - self._cache.flush() - self._can_compile = True - try: - for sig in sigs: - self.compile(sig) - finally: - self._can_compile = old_can_compile - - @property - def stats(self): - return _CompileStats( - cache_path=self._cache.cache_path, - cache_hits=self._cache_hits, - cache_misses=self._cache_misses, - ) - - -class LiftedLoop(_DispatcherBase): - """ - Implementation of the hidden dispatcher objects used for lifted loop - (a lifted loop is really compiled as a separate function). - """ - _fold_args = False - - def __init__(self, func_ir, typingctx, targetctx, flags, locals): - self.func_ir = func_ir - self.lifted_from = None - - self.typingctx = typingctx - self.targetctx = targetctx - self.flags = flags - self.locals = locals - - _DispatcherBase.__init__(self, self.func_ir.arg_count, - self.func_ir.func_id.func, - self.func_ir.func_id.pysig, - can_fallback=True) - - def get_source_location(self): - """Return the starting line number of the loop. - """ - return self.func_ir.loc.line - - def compile(self, sig): - # Use cache and compiler in a critical section - with compiler.lock_compiler: - # Use counter to track recursion compilation depth - with self._compiling_counter: - # XXX this is mostly duplicated from Dispatcher. - flags = self.flags - args, return_type = sigutils.normalize_signature(sig) - - # Don't recompile if signature already exists - # (e.g. if another thread compiled it before we got the lock) - existing = self.overloads.get(tuple(args)) - if existing is not None: - return existing.entry_point - - assert not flags.enable_looplift, "Enable looplift flags is on" - # Clone IR to avoid mutation in rewrite pass - cloned_func_ir = self.func_ir.copy() - cres = compiler.compile_ir(typingctx=self.typingctx, - targetctx=self.targetctx, - func_ir=cloned_func_ir, - args=args, return_type=return_type, - flags=flags, locals=self.locals, - lifted=(), - lifted_from=self.lifted_from) - - # Check typing error if object mode is used - if cres.typing_error is not None and not flags.enable_pyobject: - raise cres.typing_error - - self.add_overload(cres) - return cres.entry_point - - -# Initialize typeof machinery -_dispatcher.typeof_init( - OmittedArg, - dict((str(t), t._code) for t in types.number_domain)) diff --git a/numba/numba/dummyarray.py b/numba/numba/dummyarray.py deleted file mode 100644 index a4ab89b43..000000000 --- a/numba/numba/dummyarray.py +++ /dev/null @@ -1,402 +0,0 @@ -from __future__ import print_function, division - -from collections import namedtuple -import itertools -import functools -import operator -import ctypes - -import numpy as np - -from . import _helperlib - - -Extent = namedtuple("Extent", ["begin", "end"]) - - -attempt_nocopy_reshape = ctypes.CFUNCTYPE( - ctypes.c_int, - ctypes.c_long, # nd - np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # dims - np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # strides - ctypes.c_long, # newnd - np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newdims - np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides - ctypes.c_long, # itemsize - ctypes.c_int, # is_f_order -)(_helperlib.c_helpers['attempt_nocopy_reshape']) - -class Dim(object): - """A single dimension of the array - - Attributes - ---------- - start: - start offset - stop: - stop offset - size: - number of items - stride: - item stride - """ - __slots__ = 'start', 'stop', 'size', 'stride', 'single' - - def __init__(self, start, stop, size, stride, single): - if stop < start: - raise ValueError("end offset is before start offset") - self.start = start - self.stop = stop - self.size = size - self.stride = stride - self.single = single - assert not single or size == 1 - - def __getitem__(self, item): - if isinstance(item, slice): - start, stop, step = item.start, item.stop, item.step - single = False - else: - single = True - start = item - stop = start + 1 - step = None - - # Default values - # Start value is default to zero - if start is None: - start = 0 - # Stop value is default to self.size - if stop is None: - stop = self.size - # Step is default to 1 - if step is None: - step = 1 - - stride = step * self.stride - - # Compute start in bytes - if start >= 0: - start = self.start + start * self.stride - else: - start = self.stop + start * self.stride - start = max(start, self.start) - - # Compute stop in bytes - if stop >= 0: - stop = self.start + stop * self.stride - else: - stop = self.stop + stop * self.stride - stop = min(stop, self.stop) - - # Clip stop - if (stop - start) > self.size * self.stride: - stop = start + self.size * stride - - if stop < start: - start = stop - size = 0 - elif stride == 0: - size = 1 if single else ((stop - start) // step) - else: - size = (stop - start + (stride - 1)) // stride - - return Dim(start, stop, size, stride, single) - - def get_offset(self, idx): - return self.start + idx * self.stride - - def __repr__(self): - strfmt = "Dim(start=%s, stop=%s, size=%s, stride=%s)" - return strfmt % (self.start, self.stop, self.size, self.stride) - - def normalize(self, base): - return Dim(start=self.start - base, stop=self.stop - base, - size=self.size, stride=self.stride, single=self.single) - - def copy(self, start=None, stop=None, size=None, stride=None, single=None): - if start is None: - start = self.start - if stop is None: - stop = self.stop - if size is None: - size = self.size - if stride is None: - stride = self.stride - if single is None: - single = self.single - return Dim(start, stop, size, stride, single) - - def is_contiguous(self, itemsize): - return self.stride == itemsize - - -def compute_index(indices, dims): - return sum(d.get_offset(i) for i, d in zip(indices, dims)) - - -class Element(object): - is_array = False - - def __init__(self, extent): - self.extent = extent - - def iter_contiguous_extent(self): - yield self.extent - - -class Array(object): - """A dummy numpy array-like object. Consider it an array without the - actual data, but offset from the base data pointer. - - Attributes - ---------- - dims: tuple of Dim - describing each dimension of the array - - ndim: int - number of dimension - - shape: tuple of int - size of each dimension - - strides: tuple of int - stride of each dimension - - itemsize: int - itemsize - - extent: (start, end) - start and end offset containing the memory region - """ - is_array = True - - @classmethod - def from_desc(cls, offset, shape, strides, itemsize): - dims = [] - for ashape, astride in zip(shape, strides): - dim = Dim(offset, offset + ashape * astride, ashape, astride, - single=False) - dims.append(dim) - offset = 0 # offset only applies to first dimension - return cls(dims, itemsize) - - def __init__(self, dims, itemsize): - self.dims = tuple(dims) - self.ndim = len(self.dims) - self.shape = tuple(dim.size for dim in self.dims) - self.strides = tuple(dim.stride for dim in self.dims) - self.itemsize = itemsize - self.size = np.prod(self.shape) - self.extent = self._compute_extent() - self.flags = self._compute_layout() - - def _compute_layout(self): - flags = {} - - if not self.dims: - # Records have no dims, and we can treat them as contiguous - flags['F_CONTIGUOUS'] = True - flags['C_CONTIGUOUS'] = True - return flags - - leftmost = self.dims[0].is_contiguous(self.itemsize) - rightmost = self.dims[-1].is_contiguous(self.itemsize) - - def is_contig(traverse): - last = next(traverse) - for dim in traverse: - if last.size != 0 and last.size * last.stride != dim.stride: - return False - last = dim - return True - - flags['F_CONTIGUOUS'] = leftmost and is_contig(iter(self.dims)) - flags['C_CONTIGUOUS'] = rightmost and is_contig(reversed(self.dims)) - return flags - - def _compute_extent(self): - firstidx = [0] * self.ndim - lastidx = [s - 1 for s in self.shape] - start = compute_index(firstidx, self.dims) - stop = compute_index(lastidx, self.dims) + self.itemsize - stop = max(stop, start) # ensure postive extent - return Extent(start, stop) - - def __repr__(self): - return '' % (self.dims, self.itemsize) - - def __getitem__(self, item): - if not isinstance(item, tuple): - item = [item] - else: - item = list(item) - - nitem = len(item) - ndim = len(self.dims) - if nitem > ndim: - raise IndexError("%d extra indices given" % (nitem - ndim,)) - - # Add empty slices for missing indices - while len(item) < ndim: - item.append(slice(None, None)) - - dims = [dim.__getitem__(it) for dim, it in zip(self.dims, item)] - newshape = [d.size for d in dims if not d.single] - - arr = Array(dims, self.itemsize) - if newshape: - return arr.reshape(*newshape)[0] - else: - return Element(arr.extent) - - @property - def is_c_contig(self): - return self.flags['C_CONTIGUOUS'] - - @property - def is_f_contig(self): - return self.flags['F_CONTIGUOUS'] - - def iter_contiguous_extent(self): - """ Generates extents - """ - if self.is_c_contig or self.is_f_contig: - yield self.extent - else: - if self.dims[0].stride < self.dims[-1].stride: - innerdim = self.dims[0] - outerdims = self.dims[1:] - outershape = self.shape[1:] - else: - innerdim = self.dims[-1] - outerdims = self.dims[:-1] - outershape = self.shape[:-1] - - if innerdim.is_contiguous(self.itemsize): - oslen = [range(s) for s in outershape] - for indices in itertools.product(*oslen): - base = compute_index(indices, outerdims) - yield base + innerdim.start, base + innerdim.stop - else: - oslen = [range(s) for s in self.shape] - for indices in itertools.product(*oslen): - offset = compute_index(indices, self.dims) - yield offset, offset + self.itemsize - - def reshape(self, *newdims, **kws): - oldnd = self.ndim - newnd = len(newdims) - - if newdims == self.shape: - return self, None - - order = kws.pop('order', 'C') - if kws: - raise TypeError('unknown keyword arguments %s' % kws.keys()) - if order not in 'CFA': - raise ValueError('order not C|F|A') - - newsize = np.prod(newdims) - - if order == 'A': - order = 'F' if self.is_f_contig else 'C' - - if newsize != self.size: - raise ValueError("reshape changes the size of the array") - - if self.is_c_contig or self.is_f_contig: - if order == 'C': - newstrides = list(iter_strides_c_contig(self, newdims)) - elif order == 'F': - newstrides = list(iter_strides_f_contig(self, newdims)) - else: - raise AssertionError("unreachable") - else: - newstrides = np.empty(newnd, np.ctypeslib.c_intp) - - # need to keep these around in variables, not temporaries, so they - # don't get GC'ed before we call into the C code - olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp) - oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp) - newdims = np.array(newdims, dtype=np.ctypeslib.c_intp) - - if not attempt_nocopy_reshape( - oldnd, - olddims, - oldstrides, - newnd, - newdims, - newstrides, - self.itemsize, - order == 'F', - ): - raise NotImplementedError('reshape would require copy') - - ret = self.from_desc(self.extent.begin, shape=newdims, - strides=newstrides, itemsize=self.itemsize) - - return ret, list(self.iter_contiguous_extent()) - - def ravel(self, order='C'): - if order not in 'CFA': - raise ValueError('order not C|F|A') - - if self.ndim <= 1: - return self - - elif (order == 'C' and self.is_c_contig or - order == 'F' and self.is_f_contig): - newshape = (self.size,) - newstrides = (self.itemsize,) - arr = self.from_desc(self.extent.begin, newshape, newstrides, - self.itemsize) - return arr, list(self.iter_contiguous_extent()) - - else: - raise NotImplementedError("ravel on non-contiguous array") - - -def iter_strides_f_contig(arr, shape=None): - """yields the f-contigous strides - """ - shape = arr.shape if shape is None else shape - itemsize = arr.itemsize - yield itemsize - sum = 1 - for s in shape[:-1]: - sum *= s - yield sum * itemsize - - -def iter_strides_c_contig(arr, shape=None): - """yields the c-contigous strides - """ - shape = arr.shape if shape is None else shape - itemsize = arr.itemsize - - def gen(): - yield itemsize - sum = 1 - for s in reversed(shape[1:]): - sum *= s - yield sum * itemsize - - for i in reversed(list(gen())): - yield i - - -def is_element_indexing(item, ndim): - if isinstance(item, slice): - return False - - elif isinstance(item, tuple): - if len(item) == ndim: - if not any(isinstance(it, slice) for it in item): - return True - - else: - return True - - return False - diff --git a/numba/numba/errors.py b/numba/numba/errors.py deleted file mode 100644 index 4036086fe..000000000 --- a/numba/numba/errors.py +++ /dev/null @@ -1,608 +0,0 @@ -""" -Numba-specific errors and warnings. -""" - -from __future__ import print_function, division, absolute_import - -import abc -import contextlib -import os -import sys -import warnings -import numba -import numpy as np -from collections import defaultdict -from numba import six -from functools import wraps -from abc import abstractmethod - -# Filled at the end -__all__ = [] - - -class NumbaWarning(Warning): - """ - Base category for all Numba compiler warnings. - """ - - -class PerformanceWarning(NumbaWarning): - """ - Warning category for when an operation might not be - as fast as expected. - """ - - -# These are needed in the color formatting of errors setup - -@six.add_metaclass(abc.ABCMeta) -class _ColorScheme(object): - - @abstractmethod - def code(self, msg): - pass - - @abstractmethod - def errmsg(self, msg): - pass - - @abstractmethod - def filename(self, msg): - pass - - @abstractmethod - def indicate(self, msg): - pass - - @abstractmethod - def highlight(self, msg): - pass - - -class _DummyColorScheme(_ColorScheme): - - def __init__(self, theme=None): - pass - - def code(self, msg): - pass - - def errmsg(self, msg): - pass - - def filename(self, msg): - pass - - def indicate(self, msg): - pass - - def highlight(self, msg): - pass - - -# holds reference to the instance of the terminal color scheme in use -_termcolor_inst = None - -try: - import colorama - - # If the colorama version is < 0.3.9 it can break stdout/stderr in some - # situations, as a result if this condition is met colorama is disabled and - # the user is warned. - if tuple([int(x) for x in colorama.__version__.split('.')]) < (0, 3, 9): - msg = ("Insufficiently recent colorama version found. " - "Numba requires colorama >= 0.3.9") - # warn the user - warnings.warn(msg) - # trip the exception to disable color errors - raise ImportError - - # If Numba is running in testsuite mode then do not use error message - # coloring so CI system output is consistently readable without having - # to read between shell escape characters. - if os.environ.get('NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING', None): - raise ImportError # just to trigger the exception handler below - -except ImportError: - - class NOPColorScheme(_DummyColorScheme): - def __init__(self, theme=None): - if theme is not None: - raise ValueError("specifying a theme has no effect") - _DummyColorScheme.__init__(self, theme=theme) - - def code(self, msg): - return msg - - def errmsg(self, msg): - return msg - - def filename(self, msg): - return msg - - def indicate(self, msg): - return msg - - def highlight(self, msg): - return msg - - def termcolor(): - global _termcolor_inst - if _termcolor_inst is None: - _termcolor_inst = NOPColorScheme() - return _termcolor_inst - -else: - - from colorama import init, reinit, deinit, Fore, Back, Style - from contextlib import contextmanager - - class ColorShell(object): - _has_initialized = False - - def __init__(self): - init() - self._has_initialized = True - - def __enter__(self): - if self._has_initialized: - reinit() - - def __exit__(self, *exc_detail): - Style.RESET_ALL - deinit() - - class reset_terminal(object): - def __init__(self): - self._buf = bytearray(b'') - - def __enter__(self): - return self._buf - - def __exit__(self, *exc_detail): - self._buf += bytearray(Style.RESET_ALL.encode('utf-8')) - - # define some default themes, if more are added, update the envvars docs! - themes = {} - - # No color added, just bold weighting - themes['no_color'] = {'code': None, - 'errmsg': None, - 'filename': None, - 'indicate': None, - 'highlight': None, } - - # suitable for terminals with a dark background - themes['dark_bg'] = {'code': Fore.BLUE, - 'errmsg': Fore.YELLOW, - 'filename': Fore.WHITE, - 'indicate': Fore.GREEN, - 'highlight': Fore.RED, } - - # suitable for terminals with a light background - themes['light_bg'] = {'code': Fore.BLUE, - 'errmsg': Fore.BLACK, - 'filename': Fore.MAGENTA, - 'indicate': Fore.BLACK, - 'highlight': Fore.RED, } - - # suitable for terminals with a blue background - themes['blue_bg'] = {'code': Fore.WHITE, - 'errmsg': Fore.YELLOW, - 'filename': Fore.MAGENTA, - 'indicate': Fore.CYAN, - 'highlight': Fore.RED, } - - # suitable for use in jupyter notebooks - themes['jupyter_nb'] = {'code': Fore.BLACK, - 'errmsg': Fore.BLACK, - 'filename': Fore.GREEN, - 'indicate': Fore.CYAN, - 'highlight': Fore.RED, } - - default_theme = themes['no_color'] - - class HighlightColorScheme(_DummyColorScheme): - def __init__(self, theme=default_theme): - self._code = theme['code'] - self._errmsg = theme['errmsg'] - self._filename = theme['filename'] - self._indicate = theme['indicate'] - self._highlight = theme['highlight'] - _DummyColorScheme.__init__(self, theme=theme) - - def _markup(self, msg, color=None, style=Style.BRIGHT): - features = '' - if color: - features += color - if style: - features += style - with ColorShell(): - with reset_terminal() as mu: - mu += features.encode('utf-8') - mu += (msg).encode('utf-8') - return mu.decode('utf-8') - - def code(self, msg): - return self._markup(msg, self._code) - - def errmsg(self, msg): - return self._markup(msg, self._errmsg) - - def filename(self, msg): - return self._markup(msg, self._filename) - - def indicate(self, msg): - return self._markup(msg, self._indicate) - - def highlight(self, msg): - return self._markup(msg, self._highlight) - - def termcolor(): - global _termcolor_inst - if _termcolor_inst is None: - scheme = themes[numba.config.COLOR_SCHEME] - _termcolor_inst = HighlightColorScheme(scheme) - return _termcolor_inst - - -unsupported_error_info = """ -Unsupported functionality was found in the code Numba was trying to compile. - -If this functionality is important to you please file a feature request at: -https://github.com/numba/numba/issues/new -""" - -interpreter_error_info = """ -Unsupported Python functionality was found in the code Numba was trying to -compile. This error could be due to invalid code, does the code work -without Numba? (To temporarily disable Numba JIT, set the `NUMBA_DISABLE_JIT` -environment variable to non-zero, and then rerun the code). - -If the code is valid and the unsupported functionality is important to you -please file a feature request at: https://github.com/numba/numba/issues/new - -To see Python/NumPy features supported by the latest release of Numba visit: -http://numba.pydata.org/numba-doc/dev/reference/pysupported.html -and -http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html -""" - -constant_inference_info = """ -Numba could not make a constant out of something that it decided should be -a constant. This could well be a current limitation in Numba's internals, -please either raise a bug report along with a minimal reproducer at: -https://github.com/numba/numba/issues/new -""" - -typing_error_info = """ -This is not usually a problem with Numba itself but instead often caused by -the use of unsupported features or an issue in resolving types. - -To see Python/NumPy features supported by the latest release of Numba visit: -http://numba.pydata.org/numba-doc/dev/reference/pysupported.html -and -http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html - -For more information about typing errors and how to debug them visit: -http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile - -If you think your code should work with Numba, please report the error message -and traceback, along with a minimal reproducer at: -https://github.com/numba/numba/issues/new -""" - -reportable_issue_info = """ -------------------------------------------------------------------------------- -This should not have happened, a problem has occurred in Numba's internals. - -Please report the error message and traceback, along with a minimal reproducer -at: https://github.com/numba/numba/issues/new - -If more help is needed please feel free to speak to the Numba core developers -directly at: https://gitter.im/numba/numba - -Thanks in advance for your help in improving Numba! -""" - -error_extras = dict() -error_extras['unsupported_error'] = unsupported_error_info -error_extras['typing'] = typing_error_info -error_extras['reportable'] = reportable_issue_info -error_extras['interpreter'] = interpreter_error_info -error_extras['constant_inference'] = constant_inference_info - - -def deprecated(arg): - """Define a deprecation decorator. - An optional string should refer to the new API to be used instead. - - Example: - @deprecated - def old_func(): ... - - @deprecated('new_func') - def old_func(): ...""" - - subst = arg if isinstance(arg, str) else None - - def decorator(func): - def wrapper(*args, **kwargs): - msg = "Call to deprecated function \"{}\"." - if subst: - msg += "\n Use \"{}\" instead." - warnings.warn(msg.format(func.__name__, subst), - category=DeprecationWarning, stacklevel=2) - return func(*args, **kwargs) - - return wraps(func)(wrapper) - - if not subst: - return decorator(arg) - else: - return decorator - - -class WarningsFixer(object): - """ - An object "fixing" warnings of a given category caught during - certain phases. The warnings can have their filename and lineno fixed, - and they are deduplicated as well. - """ - - def __init__(self, category): - self._category = category - # {(filename, lineno, category) -> messages} - self._warnings = defaultdict(set) - - @contextlib.contextmanager - def catch_warnings(self, filename=None, lineno=None): - """ - Store warnings and optionally fix their filename and lineno. - """ - with warnings.catch_warnings(record=True) as wlist: - warnings.simplefilter('always', self._category) - yield - - for w in wlist: - msg = str(w.message) - if issubclass(w.category, self._category): - # Store warnings of this category for deduplication - filename = filename or w.filename - lineno = lineno or w.lineno - self._warnings[filename, lineno, w.category].add(msg) - else: - # Simply emit other warnings again - warnings.warn_explicit(msg, w.category, - w.filename, w.lineno) - - def flush(self): - """ - Emit all stored warnings. - """ - for (filename, lineno, category), messages in sorted(self._warnings.items()): - for msg in sorted(messages): - warnings.warn_explicit(msg, category, filename, lineno) - self._warnings.clear() - - -class NumbaError(Exception): - - def __init__(self, msg, loc=None, highlighting=True): - self.msg = msg - self.loc = loc - if highlighting: - highlight = termcolor().errmsg - else: - def highlight(x): return x - if loc: - super(NumbaError, self).__init__( - highlight("%s\n%s\n" % (msg, loc.strformat()))) - else: - super(NumbaError, self).__init__(highlight("%s" % (msg,))) - - @property - def contexts(self): - try: - return self._contexts - except AttributeError: - self._contexts = lst = [] - return lst - - def add_context(self, msg): - """ - Add contextual info. The exception message is expanded with the new - contextual information. - """ - self.contexts.append(msg) - f = termcolor().errmsg('{0}\n') + termcolor().filename( - '[{1}] During: {2}') - newmsg = f.format(self, len(self.contexts), msg) - self.args = (newmsg,) - return self - - def patch_message(self, new_message): - """ - Change the error message to the given new message. - """ - self.args = (new_message,) + self.args[1:] - - -class UnsupportedError(NumbaError): - """ - Numba does not have an implementation for this functionality. - """ - pass - - -class IRError(NumbaError): - """ - An error occurred during Numba IR generation. - """ - pass - - -class RedefinedError(IRError): - """ - An error occurred during interpretation of IR due to variable redefinition. - """ - pass - - -class NotDefinedError(IRError): - """ - An undefined variable is encountered during interpretation of IR. - """ - def __init__(self, name, loc=None): - self.name = name - msg = "Variable '%s' is not defined." % name - super(NotDefinedError, self).__init__(msg, loc=loc) - - -class VerificationError(IRError): - """ - An error occurred during IR verification. Once Numba's internal - representation (IR) is constructed it is then verified to ensure that - terminators are both present and in the correct places within the IR. If - it is the case that this condition is not met, a VerificationError is - raised. - """ - pass - - -class MacroError(NumbaError): - """ - An error occurred during macro expansion. - """ - pass - - -class DeprecationError(NumbaError): - """ - Functionality is deprecated. - """ - pass - - -class LoweringError(NumbaError): - """ - An error occurred during lowering. - """ - def __init__(self, msg, loc): - self.msg = msg - self.loc = loc - super(LoweringError, self).__init__("%s\n%s" % (msg, loc.strformat())) - - -class ForbiddenConstruct(LoweringError): - """ - A forbidden Python construct was encountered (e.g. use of locals()). - """ - pass - - -class TypingError(NumbaError): - """ - A type inference failure. - """ - pass - - -class UntypedAttributeError(TypingError): - def __init__(self, value, attr, loc=None): - module = getattr(value, 'pymod', None) - if module is not None and module == np: - # unsupported numpy feature. - msg = ("Use of unsupported NumPy function 'numpy.%s' " - "or unsupported use of the function.") % attr - else: - msg = "Unknown attribute '{attr}' of type {type}" - msg = msg.format(type=value, attr=attr) - super(UntypedAttributeError, self).__init__(msg, loc=loc) - - -class ByteCodeSupportError(NumbaError): - """ - Failure to extract the bytecode of the user's function. - """ - def __init__(self, msg, loc=None): - super(ByteCodeSupportError, self).__init__(msg, loc=loc) - - -class CompilerError(NumbaError): - """ - Some high-level error in the compiler. - """ - pass - - -class ConstantInferenceError(NumbaError): - """ - Failure during constant inference. - """ - def __init__(self, value, loc=None): - self.value = value - msg = "Cannot make a constant from: %s" % value - super(ConstantInferenceError, self).__init__(msg, loc=loc) - - -class InternalError(NumbaError): - """ - For wrapping internal error occured within the compiler - """ - - def __init__(self, exception): - super(InternalError, self).__init__(str(exception)) - self.old_exception = exception - - -class RequireConstValue(TypingError): - """For signaling a function typing require constant value for some of - its arguments. - """ - pass - - -def _format_msg(fmt, args, kwargs): - return fmt.format(*args, **kwargs) - - -import os.path -_numba_path = os.path.dirname(__file__) -loc_info = {} - - -@contextlib.contextmanager -def new_error_context(fmt_, *args, **kwargs): - """ - A contextmanager that prepend contextual information to any exception - raised within. If the exception type is not an instance of NumbaError, - it will be wrapped into a InternalError. The exception class can be - changed by providing a "errcls_" keyword argument with the exception - constructor. - - The first argument is a message that describes the context. It can be a - format string. If there are additional arguments, it will be used as - ``fmt_.format(*args, **kwargs)`` to produce the final message string. - """ - errcls = kwargs.pop('errcls_', InternalError) - - loc = kwargs.get('loc', None) - if loc is not None and not loc.filename.startswith(_numba_path): - loc_info.update(kwargs) - - try: - yield - except NumbaError as e: - e.add_context(_format_msg(fmt_, args, kwargs)) - raise - except Exception as e: - newerr = errcls(e).add_context(_format_msg(fmt_, args, kwargs)) - from numba import config - tb = sys.exc_info()[2] if config.FULL_TRACEBACKS else None - six.reraise(type(newerr), newerr, tb) - - -__all__ += [name for (name, value) in globals().items() - if not name.startswith('_') and isinstance(value, type) - and issubclass(value, (Exception, Warning))] diff --git a/numba/numba/extending.py b/numba/numba/extending.py deleted file mode 100644 index 623dc8138..000000000 --- a/numba/numba/extending.py +++ /dev/null @@ -1,371 +0,0 @@ - -import inspect -import uuid -import weakref - -from numba import types - -# Exported symbols -from .typing.typeof import typeof_impl -from .typing.templates import infer, infer_getattr -from .targets.imputils import ( - lower_builtin, lower_getattr, lower_getattr_generic, - lower_setattr, lower_setattr_generic, lower_cast) -from .datamodel import models, register_default as register_model -from .pythonapi import box, unbox, reflect, NativeValue -from ._helperlib import _import_cython_function - -def type_callable(func): - """ - Decorate a function as implementing typing for the callable *func*. - *func* can be a callable object (probably a global) or a string - denoting a built-in operation (such 'getitem' or '__array_wrap__') - """ - from .typing.templates import CallableTemplate, infer, infer_global - if not callable(func) and not isinstance(func, str): - raise TypeError("`func` should be a function or string") - try: - func_name = func.__name__ - except AttributeError: - func_name = str(func) - - def decorate(typing_func): - def generic(self): - return typing_func(self.context) - - name = "%s_CallableTemplate" % (func_name,) - bases = (CallableTemplate,) - class_dict = dict(key=func, generic=generic) - template = type(name, bases, class_dict) - infer(template) - if hasattr(func, '__module__'): - infer_global(func, types.Function(template)) - - return decorate - - -# By default, an *overload* does not have a cpython wrapper because it is not -# callable from python. -_overload_default_jit_options = {'no_cpython_wrapper': True} - - -def overload(func, jit_options={}): - """ - A decorator marking the decorated function as typing and implementing - *func* in nopython mode. - - The decorated function will have the same formal parameters as *func* - and be passed the Numba types of those parameters. It should return - a function implementing *func* for the given types. - - Here is an example implementing len() for tuple types:: - - @overload(len) - def tuple_len(seq): - if isinstance(seq, types.BaseTuple): - n = len(seq) - def len_impl(seq): - return n - return len_impl - - Compiler options can be passed as an dictionary using the **jit_options** - argument. - """ - from .typing.templates import make_overload_template, infer_global - - # set default options - opts = _overload_default_jit_options.copy() - opts.update(jit_options) # let user options override - - def decorate(overload_func): - template = make_overload_template(func, overload_func, opts) - infer(template) - if hasattr(func, '__module__'): - infer_global(func, types.Function(template)) - return overload_func - - return decorate - - -def register_jitable(*args, **kwargs): - """ - Register a regular python function that can be executed by the python - interpreter and can be compiled into a nopython function when referenced - by other jit'ed functions. Can be used as:: - - @register_jitable - def foo(x, y): - return x + y - - Or, with compiler options:: - - @register_jitable(_nrt=False) # disable runtime allocation - def foo(x, y): - return x + y - - """ - def wrap(fn): - # It is just a wrapper for @overload - @overload(fn, jit_options=kwargs) - def ov_wrap(*args, **kwargs): - return fn - return fn - - if kwargs: - return wrap - else: - return wrap(*args) - - -def overload_attribute(typ, attr): - """ - A decorator marking the decorated function as typing and implementing - attribute *attr* for the given Numba type in nopython mode. - - Here is an example implementing .nbytes for array types:: - - @overload_attribute(types.Array, 'nbytes') - def array_nbytes(arr): - def get(arr): - return arr.size * arr.itemsize - return get - """ - # TODO implement setters - from .typing.templates import make_overload_attribute_template - - def decorate(overload_func): - template = make_overload_attribute_template(typ, attr, overload_func) - infer_getattr(template) - return overload_func - - return decorate - - -def overload_method(typ, attr): - """ - A decorator marking the decorated function as typing and implementing - attribute *attr* for the given Numba type in nopython mode. - - Here is an example implementing .take() for array types:: - - @overload_method(types.Array, 'take') - def array_take(arr, indices): - if isinstance(indices, types.Array): - def take_impl(arr, indices): - n = indices.shape[0] - res = np.empty(n, arr.dtype) - for i in range(n): - res[i] = arr[indices[i]] - return res - return take_impl - """ - from .typing.templates import make_overload_method_template - - def decorate(overload_func): - template = make_overload_method_template(typ, attr, overload_func) - infer_getattr(template) - return overload_func - - return decorate - - -def make_attribute_wrapper(typeclass, struct_attr, python_attr): - """ - Make an automatic attribute wrapper exposing member named *struct_attr* - as a read-only attribute named *python_attr*. - The given *typeclass*'s model must be a StructModel subclass. - """ - from .typing.templates import AttributeTemplate - from .datamodel import default_manager - from .datamodel.models import StructModel - from .targets.imputils import impl_ret_borrowed - from . import cgutils - - if not isinstance(typeclass, type) or not issubclass(typeclass, types.Type): - raise TypeError("typeclass should be a Type subclass, got %s" - % (typeclass,)) - - def get_attr_fe_type(typ): - """ - Get the Numba type of member *struct_attr* in *typ*. - """ - model = default_manager.lookup(typ) - if not isinstance(model, StructModel): - raise TypeError("make_struct_attribute_wrapper() needs a type " - "with a StructModel, but got %s" % (model,)) - return model.get_member_fe_type(struct_attr) - - @infer_getattr - class StructAttribute(AttributeTemplate): - key = typeclass - - def generic_resolve(self, typ, attr): - if attr == python_attr: - return get_attr_fe_type(typ) - - @lower_getattr(typeclass, python_attr) - def struct_getattr_impl(context, builder, typ, val): - val = cgutils.create_struct_proxy(typ)(context, builder, value=val) - attrty = get_attr_fe_type(typ) - attrval = getattr(val, struct_attr) - return impl_ret_borrowed(context, builder, attrty, attrval) - - -class _Intrinsic(object): - """ - Dummy callable for intrinsic - """ - _memo = weakref.WeakValueDictionary() - __uuid = None - - def __init__(self, name, defn, support_literals=False): - self._name = name - self._defn = defn - self._support_literals = support_literals - - @property - def _uuid(self): - """ - An instance-specific UUID, to avoid multiple deserializations of - a given instance. - - Note this is lazily-generated, for performance reasons. - """ - u = self.__uuid - if u is None: - u = str(uuid.uuid1()) - self._set_uuid(u) - return u - - def _set_uuid(self, u): - assert self.__uuid is None - self.__uuid = u - self._memo[u] = self - - def _register(self): - from .typing.templates import make_intrinsic_template, infer_global - - template = make_intrinsic_template(self, self._defn, self._name) - template.support_literals = self._support_literals - infer(template) - infer_global(self, types.Function(template)) - - def __call__(self, *args, **kwargs): - """ - This is only defined to pretend to be a callable from CPython. - """ - msg = '{0} is not usable in pure-python'.format(self) - raise NotImplementedError(msg) - - def __repr__(self): - return "".format(self._name) - - def __reduce__(self): - from numba import serialize - - def reduce_func(fn): - gs = serialize._get_function_globals_for_reduction(fn) - return serialize._reduce_function(fn, gs) - - return (serialize._rebuild_reduction, - (self.__class__, str(self._uuid), self._name, - reduce_func(self._defn))) - - @classmethod - def _rebuild(cls, uuid, name, defn_reduced): - from numba import serialize - - try: - return cls._memo[uuid] - except KeyError: - defn = serialize._rebuild_function(*defn_reduced) - - llc = cls(name=name, defn=defn) - llc._register() - llc._set_uuid(uuid) - return llc - - -def intrinsic(*args, **kwargs): - """ - A decorator marking the decorated function as typing and implementing - *func* in nopython mode using the llvmlite IRBuilder API. This is an escape - hatch for expert users to build custom LLVM IR that will be inlined to - the caller. - - The first argument to *func* is the typing context. The rest of the - arguments corresponds to the type of arguments of the decorated function. - These arguments are also used as the formal argument of the decorated - function. If *func* has the signature ``foo(typing_context, arg0, arg1)``, - the decorated function will have the signature ``foo(arg0, arg1)``. - - The return values of *func* should be a 2-tuple of expected type signature, - and a code-generation function that will passed to ``lower_builtin``. - For unsupported operation, return None. - - Here is an example implementing a ``cast_int_to_byte_ptr`` that cast - any integer to a byte pointer:: - - @intrinsic - def cast_int_to_byte_ptr(typingctx, src): - # check for accepted types - if isinstance(src, types.Integer): - # create the expected type signature - result_type = types.CPointer(types.uint8) - sig = result_type(types.uintp) - # defines the custom code generation - def codegen(context, builder, signature, args): - # llvm IRBuilder code here - [src] = args - rtype = signature.return_type - llrtype = context.get_value_type(rtype) - return builder.inttoptr(src, llrtype) - return sig, codegen - - Optionally, keyword arguments can be provided to configure the intrinsic; e.g. - - @intrinsic(support_literals=True) - def example(typingctx, ...): - ... - - Supported keyword arguments are: - - - support_literals : bool - Indicates to the type inferencer that the typing logic accepts and can specialize to - `Const` type. - """ - # Make inner function for the actual work - def _intrinsic(func): - name = getattr(func, '__name__', str(func)) - llc = _Intrinsic(name, func, **kwargs) - llc._register() - return llc - - if not kwargs: - # No option is given - return _intrinsic(*args) - else: - # options are given, create a new callable to recv the - # definition function - def wrapper(func): - return _intrinsic(func) - return wrapper - - -def get_cython_function_address(module_name, function_name): - """ - Get the address of a Cython function. - - Args - ---- - module_name: - Name of the Cython module - function_name: - Name of the Cython function - - Returns - ------- - A Python int containing the address of the function - - """ - return _import_cython_function(module_name, function_name) diff --git a/numba/numba/findlib.py b/numba/numba/findlib.py deleted file mode 100644 index 6189fd3d1..000000000 --- a/numba/numba/findlib.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import print_function, absolute_import -import sys -import os -import re - - -def get_lib_dir(): - """ - Anaconda specific - """ - dirname = 'DLLs' if sys.platform == 'win32' else 'lib' - libdir = os.path.join(sys.prefix, dirname) - return libdir - - -DLLNAMEMAP = { - 'linux': r'lib%(name)s\.so\.%(ver)s$', - 'linux2': r'lib%(name)s\.so\.%(ver)s$', - 'darwin': r'lib%(name)s\.%(ver)s\.dylib$', - 'win32': r'%(name)s%(ver)s\.dll$', -} - -RE_VER = r'[0-9]*([_\.][0-9]+)*' - - -def find_lib(libname, libdir=None, platform=None): - platform = platform or sys.platform - pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} - regex = re.compile(pat) - return find_file(regex, libdir) - - -def find_file(pat, libdir=None): - libdir = libdir or get_lib_dir() - entries = os.listdir(libdir) - candidates = [os.path.join(libdir, ent) - for ent in entries if pat.match(ent)] - return [c for c in candidates if os.path.isfile(c)] diff --git a/numba/numba/funcdesc.py b/numba/numba/funcdesc.py deleted file mode 100644 index 9b2d79487..000000000 --- a/numba/numba/funcdesc.py +++ /dev/null @@ -1,199 +0,0 @@ -""" -Function descriptors. -""" -from __future__ import print_function, division, absolute_import - -from collections import defaultdict -import sys - -from . import types, itanium_mangler -from .utils import _dynamic_modname, _dynamic_module - - -def default_mangler(name, argtypes): - return itanium_mangler.mangle(name, argtypes) - - -def qualifying_prefix(modname, qualname): - """ - Returns a new string that is used for the first half of the mangled name. - """ - # XXX choose a different convention for object mode - return '{}.{}'.format(modname, qualname) if modname else qualname - - -class FunctionDescriptor(object): - """ - Base class for function descriptors: an object used to carry - useful metadata about a natively callable function. - - Note that while `FunctionIdentity` denotes a Python function - which is being concretely compiled by Numba, `FunctionDescriptor` - may be more "abstract": e.g. a function decorated with `@generated_jit`. - """ - __slots__ = ('native', 'modname', 'qualname', 'doc', 'typemap', - 'calltypes', 'args', 'kws', 'restype', 'argtypes', - 'mangled_name', 'unique_name', 'env_name', - 'inline', 'noalias') - - def __init__(self, native, modname, qualname, unique_name, doc, - typemap, restype, calltypes, args, kws, mangler=None, - argtypes=None, inline=False, noalias=False, env_name=None): - self.native = native - self.modname = modname - self.qualname = qualname - self.unique_name = unique_name - self.doc = doc - # XXX typemap and calltypes should be on the compile result, - # not the FunctionDescriptor - self.typemap = typemap - self.calltypes = calltypes - self.args = args - self.kws = kws - self.restype = restype - # Argument types - if argtypes is not None: - assert isinstance(argtypes, tuple), argtypes - self.argtypes = argtypes - else: - # Get argument types from the type inference result - # (note the "arg.FOO" convention as used in typeinfer - self.argtypes = tuple(self.typemap['arg.' + a] for a in args) - mangler = default_mangler if mangler is None else mangler - # The mangled name *must* be unique, else the wrong function can - # be chosen at link time. - qualprefix = qualifying_prefix(self.modname, self.unique_name) - self.mangled_name = mangler(qualprefix, self.argtypes) - if env_name is None: - env_name = mangler(".NumbaEnv.{}".format(qualprefix), - self.argtypes) - self.env_name = env_name - self.inline = inline - self.noalias = noalias - - def lookup_module(self): - """ - Return the module in which this function is supposed to exist. - This may be a dummy module if the function was dynamically - generated. - """ - if self.modname == _dynamic_modname: - return _dynamic_module - else: - return sys.modules[self.modname] - - def lookup_function(self): - """ - Return the original function object described by this object. - """ - return getattr(self.lookup_module(), self.qualname) - - @property - def llvm_func_name(self): - """ - The LLVM-registered name for the raw function. - """ - return self.mangled_name - - # XXX refactor this - - @property - def llvm_cpython_wrapper_name(self): - """ - The LLVM-registered name for a CPython-compatible wrapper of the - raw function (i.e. a PyCFunctionWithKeywords). - """ - return itanium_mangler.prepend_namespace(self.mangled_name, - ns='cpython') - - @property - def llvm_cfunc_wrapper_name(self): - """ - The LLVM-registered name for a C-compatible wrapper of the - raw function. - """ - return 'cfunc.' + self.mangled_name - - def __repr__(self): - return "" % (self.unique_name) - - @classmethod - def _get_function_info(cls, func_ir): - """ - Returns - ------- - qualname, unique_name, modname, doc, args, kws, globals - - ``unique_name`` must be a unique name. - """ - func = func_ir.func_id.func - qualname = func_ir.func_id.func_qualname - # XXX to func_id - modname = func.__module__ - doc = func.__doc__ or '' - args = tuple(func_ir.arg_names) - kws = () # TODO - - if modname is None: - # Dynamically generated function. - modname = _dynamic_modname - - unique_name = func_ir.func_id.unique_name - - return qualname, unique_name, modname, doc, args, kws - - @classmethod - def _from_python_function(cls, func_ir, typemap, restype, calltypes, - native, mangler=None, inline=False, noalias=False): - (qualname, unique_name, modname, doc, args, kws, - )= cls._get_function_info(func_ir) - self = cls(native, modname, qualname, unique_name, doc, - typemap, restype, calltypes, - args, kws, mangler=mangler, inline=inline, noalias=noalias) - return self - - -class PythonFunctionDescriptor(FunctionDescriptor): - """ - A FunctionDescriptor subclass for Numba-compiled functions. - """ - __slots__ = () - - @classmethod - def from_specialized_function(cls, func_ir, typemap, restype, calltypes, - mangler, inline, noalias): - """ - Build a FunctionDescriptor for a given specialization of a Python - function (in nopython mode). - """ - return cls._from_python_function(func_ir, typemap, restype, calltypes, - native=True, mangler=mangler, - inline=inline, noalias=noalias) - - @classmethod - def from_object_mode_function(cls, func_ir): - """ - Build a FunctionDescriptor for an object mode variant of a Python - function. - """ - typemap = defaultdict(lambda: types.pyobject) - calltypes = typemap.copy() - restype = types.pyobject - return cls._from_python_function(func_ir, typemap, restype, calltypes, - native=False) - - -class ExternalFunctionDescriptor(FunctionDescriptor): - """ - A FunctionDescriptor subclass for opaque external functions - (e.g. raw C functions). - """ - __slots__ = () - - def __init__(self, name, restype, argtypes): - args = ["arg%d" % i for i in range(len(argtypes))] - super(ExternalFunctionDescriptor, self).__init__(native=True, - modname=None, qualname=name, unique_name=name, doc='', - typemap=None, restype=restype, calltypes=None, - args=args, kws=None, mangler=lambda a, x: a, - argtypes=argtypes) diff --git a/numba/numba/generators.py b/numba/numba/generators.py deleted file mode 100644 index 74b437961..000000000 --- a/numba/numba/generators.py +++ /dev/null @@ -1,350 +0,0 @@ -""" -Support for lowering generators. -""" -from __future__ import print_function, division, absolute_import - -from llvmlite.llvmpy.core import Constant, Type, Builder - -from . import cgutils, types, config -from .funcdesc import FunctionDescriptor - - -class GeneratorDescriptor(FunctionDescriptor): - """ - The descriptor for a generator's next function. - """ - __slots__ = () - - @classmethod - def from_generator_fndesc(cls, func_ir, fndesc, gentype, mangler): - """ - Build a GeneratorDescriptor for the generator returned by the - function described by *fndesc*, with type *gentype*. - - The generator inherits the env_name from the *fndesc*. - All emitted functions for the generator shares the same Env. - """ - assert isinstance(gentype, types.Generator) - restype = gentype.yield_type - args = ['gen'] - argtypes = (gentype,) - qualname = fndesc.qualname + '.next' - unique_name = fndesc.unique_name + '.next' - self = cls(fndesc.native, fndesc.modname, qualname, unique_name, - fndesc.doc, fndesc.typemap, restype, fndesc.calltypes, - args, fndesc.kws, argtypes=argtypes, mangler=mangler, - inline=True, env_name=fndesc.env_name) - return self - - @property - def llvm_finalizer_name(self): - """ - The LLVM name of the generator's finalizer function - (if .has_finalizer is true). - """ - return 'finalize_' + self.mangled_name - - -class BaseGeneratorLower(object): - """ - Base support class for lowering generators. - """ - - def __init__(self, lower): - self.context = lower.context - self.fndesc = lower.fndesc - self.library = lower.library - self.call_conv = lower.call_conv - self.func_ir = lower.func_ir - - self.geninfo = lower.generator_info - self.gentype = self.get_generator_type() - self.gendesc = GeneratorDescriptor.from_generator_fndesc( - lower.func_ir, self.fndesc, self.gentype, self.context.mangler) - # Helps packing non-omitted arguments into a structure - self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) - - self.resume_blocks = {} - - def get_args_ptr(self, builder, genptr): - return cgutils.gep_inbounds(builder, genptr, 0, 1) - - def get_resume_index_ptr(self, builder, genptr): - return cgutils.gep_inbounds(builder, genptr, 0, 0, - name='gen.resume_index') - - def get_state_ptr(self, builder, genptr): - return cgutils.gep_inbounds(builder, genptr, 0, 2, - name='gen.state') - - def lower_init_func(self, lower): - """ - Lower the generator's initialization function (which will fill up - the passed-by-reference generator structure). - """ - lower.setup_function(self.fndesc) - - builder = lower.builder - - # Insert the generator into the target context in order to allow - # calling from other Numba-compiled functions. - lower.context.insert_generator(self.gentype, self.gendesc, - [self.library]) - - # Init argument values - lower.extract_function_arguments() - - lower.pre_lower() - - # Initialize the return structure (i.e. the generator structure). - retty = self.context.get_return_type(self.gentype) - # Structure index #0: the initial resume index (0 == start of generator) - resume_index = self.context.get_constant(types.int32, 0) - # Structure index #1: the function arguments - argsty = retty.elements[1] - statesty = retty.elements[2] - - lower.debug_print("# low_init_func incref") - # Incref all NRT arguments before storing into generator states - if self.context.enable_nrt: - for argty, argval in zip(self.fndesc.argtypes, lower.fnargs): - self.context.nrt.incref(builder, argty, argval) - - # Filter out omitted arguments - argsval = self.arg_packer.as_data(builder, lower.fnargs) - - # Zero initialize states - statesval = Constant.null(statesty) - gen_struct = cgutils.make_anonymous_struct(builder, - [resume_index, argsval, - statesval], - retty) - - retval = self.box_generator_struct(lower, gen_struct) - - lower.debug_print("# low_init_func before return") - self.call_conv.return_value(builder, retval) - lower.post_lower() - - def lower_next_func(self, lower): - """ - Lower the generator's next() function (which takes the - passed-by-reference generator structure and returns the next - yielded value). - """ - lower.setup_function(self.gendesc) - lower.debug_print("# lower_next_func: {0}".format(self.gendesc.unique_name)) - assert self.gendesc.argtypes[0] == self.gentype - builder = lower.builder - function = lower.function - - # Extract argument values and other information from generator struct - genptr, = self.call_conv.get_arguments(function) - self.arg_packer.load_into(builder, - self.get_args_ptr(builder, genptr), - lower.fnargs) - - self.resume_index_ptr = self.get_resume_index_ptr(builder, genptr) - self.gen_state_ptr = self.get_state_ptr(builder, genptr) - - prologue = function.append_basic_block("generator_prologue") - - # Lower the generator's Python code - entry_block_tail = lower.lower_function_body() - - # Add block for StopIteration on entry - stop_block = function.append_basic_block("stop_iteration") - builder.position_at_end(stop_block) - self.call_conv.return_stop_iteration(builder) - - # Add prologue switch to resume blocks - builder.position_at_end(prologue) - # First Python block is also the resume point on first next() call - first_block = self.resume_blocks[0] = lower.blkmap[lower.firstblk] - - # Create front switch to resume points - switch = builder.switch(builder.load(self.resume_index_ptr), - stop_block) - for index, block in self.resume_blocks.items(): - switch.add_case(index, block) - - # Close tail of entry block - builder.position_at_end(entry_block_tail) - builder.branch(prologue) - - def lower_finalize_func(self, lower): - """ - Lower the generator's finalizer. - """ - fnty = Type.function(Type.void(), - [self.context.get_value_type(self.gentype)]) - function = lower.module.get_or_insert_function( - fnty, name=self.gendesc.llvm_finalizer_name) - entry_block = function.append_basic_block('entry') - builder = Builder(entry_block) - - genptrty = self.context.get_value_type(self.gentype) - genptr = builder.bitcast(function.args[0], genptrty) - self.lower_finalize_func_body(builder, genptr) - - def return_from_generator(self, lower): - """ - Emit a StopIteration at generator end and mark the generator exhausted. - """ - indexval = Constant.int(self.resume_index_ptr.type.pointee, -1) - lower.builder.store(indexval, self.resume_index_ptr) - self.call_conv.return_stop_iteration(lower.builder) - - def create_resumption_block(self, lower, index): - block_name = "generator_resume%d" % (index,) - block = lower.function.append_basic_block(block_name) - lower.builder.position_at_end(block) - self.resume_blocks[index] = block - - def debug_print(self, builder, msg): - if config.DEBUG_JIT: - self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) - -class GeneratorLower(BaseGeneratorLower): - """ - Support class for lowering nopython generators. - """ - - def get_generator_type(self): - return self.fndesc.restype - - def box_generator_struct(self, lower, gen_struct): - return gen_struct - - def lower_finalize_func_body(self, builder, genptr): - """ - Lower the body of the generator's finalizer: decref all live - state variables. - """ - self.debug_print(builder, "# generator: finalize") - if self.context.enable_nrt: - - # Always dereference all arguments - # self.debug_print(builder, "# generator: clear args") - args_ptr = self.get_args_ptr(builder, genptr) - for ty, val in self.arg_packer.load(builder, args_ptr): - self.context.nrt.decref(builder, ty, val) - - self.debug_print(builder, "# generator: finalize end") - builder.ret_void() - -class PyGeneratorLower(BaseGeneratorLower): - """ - Support class for lowering object mode generators. - """ - - def get_generator_type(self): - """ - Compute the actual generator type (the generator function's return - type is simply "pyobject"). - """ - return types.Generator( - gen_func=self.func_ir.func_id.func, - yield_type=types.pyobject, - arg_types=(types.pyobject,) * self.func_ir.arg_count, - state_types=(types.pyobject,) * len(self.geninfo.state_vars), - has_finalizer=True, - ) - - def box_generator_struct(self, lower, gen_struct): - """ - Box the raw *gen_struct* as a Python object. - """ - gen_ptr = cgutils.alloca_once_value(lower.builder, gen_struct) - return lower.pyapi.from_native_generator(gen_ptr, self.gentype, lower.envarg) - - def init_generator_state(self, lower): - """ - NULL-initialize all generator state variables, to avoid spurious - decref's on cleanup. - """ - lower.builder.store(Constant.null(self.gen_state_ptr.type.pointee), - self.gen_state_ptr) - - def lower_finalize_func_body(self, builder, genptr): - """ - Lower the body of the generator's finalizer: decref all live - state variables. - """ - pyapi = self.context.get_python_api(builder) - resume_index_ptr = self.get_resume_index_ptr(builder, genptr) - resume_index = builder.load(resume_index_ptr) - # If resume_index is 0, next() was never called - # If resume_index is -1, generator terminated cleanly - # (note function arguments are saved in state variables, - # so they don't need a separate cleanup step) - need_cleanup = builder.icmp_signed( - '>', resume_index, Constant.int(resume_index.type, 0)) - - with cgutils.if_unlikely(builder, need_cleanup): - # Decref all live vars (some may be NULL) - gen_state_ptr = self.get_state_ptr(builder, genptr) - for state_index in range(len(self.gentype.state_types)): - state_slot = cgutils.gep_inbounds(builder, gen_state_ptr, - 0, state_index) - ty = self.gentype.state_types[state_index] - val = self.context.unpack_value(builder, ty, state_slot) - pyapi.decref(val) - - builder.ret_void() - - -class LowerYield(object): - """ - Support class for lowering a particular yield point. - """ - - def __init__(self, lower, yield_point, live_vars): - self.lower = lower - self.context = lower.context - self.builder = lower.builder - self.genlower = lower.genlower - self.gentype = self.genlower.gentype - - self.gen_state_ptr = self.genlower.gen_state_ptr - self.resume_index_ptr = self.genlower.resume_index_ptr - self.yp = yield_point - self.inst = self.yp.inst - self.live_vars = live_vars - self.live_var_indices = [lower.generator_info.state_vars.index(v) - for v in live_vars] - - def lower_yield_suspend(self): - self.lower.debug_print("# generator suspend") - # Save live vars in state - for state_index, name in zip(self.live_var_indices, self.live_vars): - state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, - 0, state_index) - ty = self.gentype.state_types[state_index] - val = self.lower.loadvar(name) - # IncRef newly stored value - if self.context.enable_nrt: - self.context.nrt.incref(self.builder, ty, val) - - self.context.pack_value(self.builder, ty, val, state_slot) - # Save resume index - indexval = Constant.int(self.resume_index_ptr.type.pointee, - self.inst.index) - self.builder.store(indexval, self.resume_index_ptr) - self.lower.debug_print("# generator suspend end") - - def lower_yield_resume(self): - # Emit resumption point - self.genlower.create_resumption_block(self.lower, self.inst.index) - self.lower.debug_print("# generator resume") - # Reload live vars from state - for state_index, name in zip(self.live_var_indices, self.live_vars): - state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, - 0, state_index) - ty = self.gentype.state_types[state_index] - val = self.context.unpack_value(self.builder, ty, state_slot) - self.lower.storevar(val, name) - # Previous storevar is making an extra incref - if self.context.enable_nrt: - self.context.nrt.decref(self.builder, ty, val) - self.lower.debug_print("# generator resume end") diff --git a/numba/numba/inline_closurecall.py b/numba/numba/inline_closurecall.py deleted file mode 100644 index 51ee7f120..000000000 --- a/numba/numba/inline_closurecall.py +++ /dev/null @@ -1,1059 +0,0 @@ -import types as pytypes # avoid confusion with numba.types -import ctypes -import numba -from numba import config, ir, ir_utils, utils, prange, rewrites, types, typing -from numba.parfor import internal_prange -from numba.ir_utils import ( - mk_unique_var, - next_label, - add_offset_to_labels, - replace_vars, - remove_dels, - remove_dead, - rename_labels, - find_topo_order, - merge_adjacent_blocks, - GuardException, - require, - guard, - get_definition, - find_callname, - find_build_sequence, - get_np_ufunc_typ, - get_ir_of_code, - simplify_CFG, - canonicalize_array_math - ) - -from numba.analysis import ( - compute_cfg_from_blocks, - compute_use_defs, - compute_live_variables) - -from numba.targets.rangeobj import range_iter_len -from numba.unsafe.ndarray import empty_inferred as unsafe_empty_inferred -import numpy as np - -""" -Variable enable_inline_arraycall is only used for testing purpose. -""" -enable_inline_arraycall = True - -class InlineClosureCallPass(object): - """InlineClosureCallPass class looks for direct calls to locally defined - closures, and inlines the body of the closure function to the call site. - """ - - def __init__(self, func_ir, parallel_options): - self.func_ir = func_ir - self.parallel_options = parallel_options - self._processed_stencils = [] - - def run(self): - """Run inline closure call pass. - """ - modified = False - work_list = list(self.func_ir.blocks.items()) - debug_print = _make_debug_print("InlineClosureCallPass") - debug_print("START") - while work_list: - label, block = work_list.pop() - for i, instr in enumerate(block.body): - if isinstance(instr, ir.Assign): - lhs = instr.target - expr = instr.value - if isinstance(expr, ir.Expr) and expr.op == 'call': - call_name = guard(find_callname, self.func_ir, expr) - func_def = guard(get_definition, self.func_ir, expr.func) - - if guard(self._inline_reduction, - work_list, block, i, expr, call_name): - modified = True - break # because block structure changed - - if guard(self._inline_closure, - work_list, block, i, func_def): - modified = True - break # because block structure changed - - if guard(self._inline_stencil, - instr, call_name, func_def): - modified = True - - if enable_inline_arraycall: - # Identify loop structure - if modified: - # Need to do some cleanups if closure inlining kicked in - merge_adjacent_blocks(self.func_ir.blocks) - cfg = compute_cfg_from_blocks(self.func_ir.blocks) - debug_print("start inline arraycall") - _debug_dump(cfg) - loops = cfg.loops() - sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] - visited = [] - # We go over all loops, bigger loops first (outer first) - for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): - visited.append(k) - if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], - self.parallel_options.comprehension): - modified = True - if modified: - _fix_nested_array(self.func_ir) - - if modified: - remove_dels(self.func_ir.blocks) - # repeat dead code elimintation until nothing can be further - # removed - while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names, - self.func_ir)): - pass - self.func_ir.blocks = rename_labels(self.func_ir.blocks) - debug_print("END") - - def _inline_reduction(self, work_list, block, i, expr, call_name): - # only inline reduction in sequential execution, parallel handling - # is done in ParforPass. - require(not self.parallel_options.reduction) - require(call_name == ('reduce', 'builtins') or - call_name == ('reduce', '_functools')) - if len(expr.args) != 3: - raise TypeError("invalid reduce call, " - "three arguments including initial " - "value required") - check_reduce_func(self.func_ir, expr.args[0]) - def reduce_func(f, A, v): - s = v - it = iter(A) - for a in it: - s = f(s, a) - return s - inline_closure_call(self.func_ir, - self.func_ir.func_id.func.__globals__, - block, i, reduce_func, work_list=work_list) - return True - - def _inline_stencil(self, instr, call_name, func_def): - from numba.stencil import StencilFunc - lhs = instr.target - expr = instr.value - # We keep the escaping variables of the stencil kernel - # alive by adding them to the actual kernel call as extra - # keyword arguments, which is ignored anyway. - if (isinstance(func_def, ir.Global) and - func_def.name == 'stencil' and - isinstance(func_def.value, StencilFunc)): - if expr.kws: - expr.kws += func_def.value.kws - else: - expr.kws = func_def.value.kws - return True - # Otherwise we proceed to check if it is a call to numba.stencil - require(call_name == ('stencil', 'numba.stencil') or - call_name == ('stencil', 'numba')) - require(expr not in self._processed_stencils) - self._processed_stencils.append(expr) - if not len(expr.args) == 1: - raise ValueError("As a minimum Stencil requires" - " a kernel as an argument") - stencil_def = guard(get_definition, self.func_ir, expr.args[0]) - require(isinstance(stencil_def, ir.Expr) and - stencil_def.op == "make_function") - kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, - stencil_def.code) - options = dict(expr.kws) - if 'neighborhood' in options: - fixed = guard(self._fix_stencil_neighborhood, options) - if not fixed: - raise ValueError("stencil neighborhood option should be a tuple" - " with constant structure such as ((-w, w),)") - if 'index_offsets' in options: - fixed = guard(self._fix_stencil_index_offsets, options) - if not fixed: - raise ValueError("stencil index_offsets option should be a tuple" - " with constant structure such as (offset, )") - sf = StencilFunc(kernel_ir, 'constant', options) - sf.kws = expr.kws # hack to keep variables live - sf_global = ir.Global('stencil', sf, expr.loc) - self.func_ir._definitions[lhs.name] = [sf_global] - instr.value = sf_global - return True - - def _fix_stencil_neighborhood(self, options): - """ - Extract the two-level tuple representing the stencil neighborhood - from the program IR to provide a tuple to StencilFunc. - """ - # build_tuple node with neighborhood for each dimension - dims_build_tuple = get_definition(self.func_ir, options['neighborhood']) - require(hasattr(dims_build_tuple, 'items')) - res = [] - for window_var in dims_build_tuple.items: - win_build_tuple = get_definition(self.func_ir, window_var) - require(hasattr(win_build_tuple, 'items')) - res.append(tuple(win_build_tuple.items)) - options['neighborhood'] = tuple(res) - return True - - def _fix_stencil_index_offsets(self, options): - """ - Extract the tuple representing the stencil index offsets - from the program IR to provide to StencilFunc. - """ - offset_tuple = get_definition(self.func_ir, options['index_offsets']) - require(hasattr(offset_tuple, 'items')) - options['index_offsets'] = tuple(offset_tuple.items) - return True - - def _inline_closure(self, work_list, block, i, func_def): - require(isinstance(func_def, ir.Expr) and - func_def.op == "make_function") - inline_closure_call(self.func_ir, - self.func_ir.func_id.func.__globals__, - block, i, func_def, work_list=work_list) - return True - -def check_reduce_func(func_ir, func_var): - reduce_func = guard(get_definition, func_ir, func_var) - if reduce_func is None: - raise ValueError("Reduce function cannot be found for njit \ - analysis") - if not (hasattr(reduce_func, 'code') - or hasattr(reduce_func, '__code__')): - raise ValueError("Invalid reduction function") - f_code = (reduce_func.code if hasattr(reduce_func, 'code') - else reduce_func.__code__) - if not f_code.co_argcount == 2: - raise TypeError("Reduction function should take 2 arguments") - return - - -def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, - arg_typs=None, typemap=None, calltypes=None, - work_list=None): - """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) - - `func_ir` is the func_ir object of the caller function and `glbls` is its - global variable environment (func_ir.func_id.func.__globals__). - `block` is the IR block of the callsite and `i` is the index of the - callsite's node. `callee` is either the called function or a - make_function node. `typingctx`, `typemap` and `calltypes` are typing - data structures of the caller, available if we are in a typed pass. - `arg_typs` includes the types of the arguments at the callsite. - """ - scope = block.scope - instr = block.body[i] - call_expr = instr.value - debug_print = _make_debug_print("inline_closure_call") - debug_print("Found closure call: ", instr, " with callee = ", callee) - # support both function object and make_function Expr - callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ - callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__ - callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ - # first, get the IR of the callee - callee_ir = get_ir_of_code(glbls, callee_code) - callee_blocks = callee_ir.blocks - - # 1. relabel callee_ir by adding an offset - max_label = max(func_ir.blocks.keys()) - callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) - callee_blocks = simplify_CFG(callee_blocks) - callee_ir.blocks = callee_blocks - min_label = min(callee_blocks.keys()) - max_label = max(callee_blocks.keys()) - # reset globals in ir_utils before we use it - ir_utils._max_label = max_label - debug_print("After relabel") - _debug_dump(callee_ir) - - # 2. rename all local variables in callee_ir with new locals created in func_ir - callee_scopes = _get_all_scopes(callee_blocks) - debug_print("callee_scopes = ", callee_scopes) - # one function should only have one local scope - assert(len(callee_scopes) == 1) - callee_scope = callee_scopes[0] - var_dict = {} - for var in callee_scope.localvars._con.values(): - if not (var.name in callee_code.co_freevars): - new_var = scope.define(mk_unique_var(var.name), loc=var.loc) - var_dict[var.name] = new_var - debug_print("var_dict = ", var_dict) - replace_vars(callee_blocks, var_dict) - debug_print("After local var rename") - _debug_dump(callee_ir) - - # 3. replace formal parameters with actual arguments - args = list(call_expr.args) - if callee_defaults: - debug_print("defaults = ", callee_defaults) - if isinstance(callee_defaults, tuple): # Python 3.5 - args = args + list(callee_defaults) - elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str): - defaults = func_ir.get_definition(callee_defaults) - assert(isinstance(defaults, ir.Const)) - loc = defaults.loc - args = args + [ir.Const(value=v, loc=loc) - for v in defaults.value] - else: - raise NotImplementedError( - "Unsupported defaults to make_function: {}".format(defaults)) - debug_print("After arguments rename: ") - _debug_dump(callee_ir) - - # 4. replace freevar with actual closure var - if callee_closure: - closure = func_ir.get_definition(callee_closure) - debug_print("callee's closure = ", closure) - if isinstance(closure, tuple): - cellget = ctypes.pythonapi.PyCell_Get - cellget.restype = ctypes.py_object - cellget.argtypes = (ctypes.py_object,) - items = tuple(cellget(x) for x in closure) - else: - assert(isinstance(closure, ir.Expr) - and closure.op == 'build_tuple') - items = closure.items - assert(len(callee_code.co_freevars) == len(items)) - _replace_freevars(callee_blocks, items) - debug_print("After closure rename") - _debug_dump(callee_ir) - - if typingctx: - from numba import compiler - f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( - typingctx, callee_ir, arg_typs, None) - canonicalize_array_math(callee_ir, f_typemap, - f_calltypes, typingctx) - # remove argument entries like arg.a from typemap - arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] - for a in arg_names: - f_typemap.pop(a) - typemap.update(f_typemap) - calltypes.update(f_calltypes) - - _replace_args_with(callee_blocks, args) - # 5. split caller blocks into two - new_blocks = [] - new_block = ir.Block(scope, block.loc) - new_block.body = block.body[i + 1:] - new_label = next_label() - func_ir.blocks[new_label] = new_block - new_blocks.append((new_label, new_block)) - block.body = block.body[:i] - block.body.append(ir.Jump(min_label, instr.loc)) - - # 6. replace Return with assignment to LHS - topo_order = find_topo_order(callee_blocks) - _replace_returns(callee_blocks, instr.target, new_label) - # remove the old definition of instr.target too - if (instr.target.name in func_ir._definitions): - func_ir._definitions[instr.target.name] = [] - - # 7. insert all new blocks, and add back definitions - for label in topo_order: - # block scope must point to parent's - block = callee_blocks[label] - block.scope = scope - _add_definitions(func_ir, block) - func_ir.blocks[label] = block - new_blocks.append((label, block)) - debug_print("After merge in") - _debug_dump(func_ir) - - if work_list != None: - for block in new_blocks: - work_list.append(block) - return callee_blocks - -def _make_debug_print(prefix): - def debug_print(*args): - if config.DEBUG_INLINE_CLOSURE: - print(prefix + ": " + "".join(str(x) for x in args)) - return debug_print - -def _debug_dump(func_ir): - if config.DEBUG_INLINE_CLOSURE: - func_ir.dump() - - -def _get_all_scopes(blocks): - """Get all block-local scopes from an IR. - """ - all_scopes = [] - for label, block in blocks.items(): - if not (block.scope in all_scopes): - all_scopes.append(block.scope) - return all_scopes - - -def _replace_args_with(blocks, args): - """ - Replace ir.Arg(...) with real arguments from call site - """ - for label, block in blocks.items(): - assigns = block.find_insts(ir.Assign) - for stmt in assigns: - if isinstance(stmt.value, ir.Arg): - idx = stmt.value.index - assert(idx < len(args)) - stmt.value = args[idx] - - -def _replace_freevars(blocks, args): - """ - Replace ir.FreeVar(...) with real variables from parent function - """ - for label, block in blocks.items(): - assigns = block.find_insts(ir.Assign) - for stmt in assigns: - if isinstance(stmt.value, ir.FreeVar): - idx = stmt.value.index - assert(idx < len(args)) - if isinstance(args[idx], ir.Var): - stmt.value = args[idx] - else: - stmt.value = ir.Const(args[idx], stmt.loc) - - -def _replace_returns(blocks, target, return_label): - """ - Return return statement by assigning directly to target, and a jump. - """ - for label, block in blocks.items(): - casts = [] - for i in range(len(block.body)): - stmt = block.body[i] - if isinstance(stmt, ir.Return): - assert(i + 1 == len(block.body)) - block.body[i] = ir.Assign(stmt.value, target, stmt.loc) - block.body.append(ir.Jump(return_label, stmt.loc)) - # remove cast of the returned value - for cast in casts: - if cast.target.name == stmt.value.name: - cast.value = cast.value.value - elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast': - casts.append(stmt) - -def _add_definitions(func_ir, block): - """ - Add variable definitions found in a block to parent func_ir. - """ - definitions = func_ir._definitions - assigns = block.find_insts(ir.Assign) - for stmt in assigns: - definitions[stmt.target.name].append(stmt.value) - -def _find_arraycall(func_ir, block): - """Look for statement like "x = numpy.array(y)" or "x[..] = y" - immediately after the closure call that creates list y (the i-th - statement in block). Return the statement index if found, or - raise GuardException. - """ - array_var = None - array_call_index = None - list_var_dead_after_array_call = False - list_var = None - - i = 0 - while i < len(block.body): - instr = block.body[i] - if isinstance(instr, ir.Del): - # Stop the process if list_var becomes dead - if list_var and array_var and instr.value == list_var.name: - list_var_dead_after_array_call = True - break - pass - elif isinstance(instr, ir.Assign): - # Found array_var = array(list_var) - lhs = instr.target - expr = instr.value - if (guard(find_callname, func_ir, expr) == ('array', 'numpy') and - isinstance(expr.args[0], ir.Var)): - list_var = expr.args[0] - array_var = lhs - array_stmt_index = i - array_kws = dict(expr.kws) - elif (isinstance(instr, ir.SetItem) and - isinstance(instr.value, ir.Var) and - not list_var): - list_var = instr.value - # Found array_var[..] = list_var, the case for nested array - array_var = instr.target - array_def = get_definition(func_ir, array_var) - require(guard(_find_unsafe_empty_inferred, func_ir, array_def)) - array_stmt_index = i - array_kws = {} - else: - # Bail out otherwise - break - i = i + 1 - # require array_var is found, and list_var is dead after array_call. - require(array_var and list_var_dead_after_array_call) - _make_debug_print("find_array_call")(block.body[array_stmt_index]) - return list_var, array_stmt_index, array_kws - - -def _find_iter_range(func_ir, range_iter_var): - """Find the iterator's actual range if it is either range(n), or range(m, n), - otherwise return raise GuardException. - """ - debug_print = _make_debug_print("find_iter_range") - range_iter_def = get_definition(func_ir, range_iter_var) - debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def) - require(isinstance(range_iter_def, ir.Expr) and range_iter_def.op == 'getiter') - range_var = range_iter_def.value - range_def = get_definition(func_ir, range_var) - debug_print("range_var = ", range_var, " range_def = ", range_def) - require(isinstance(range_def, ir.Expr) and range_def.op == 'call') - func_var = range_def.func - func_def = get_definition(func_ir, func_var) - debug_print("func_var = ", func_var, " func_def = ", func_def) - require(isinstance(func_def, ir.Global) and func_def.value == range) - nargs = len(range_def.args) - if nargs == 1: - stop = get_definition(func_ir, range_def.args[0], lhs_only=True) - return (0, range_def.args[0], func_def) - elif nargs == 2: - start = get_definition(func_ir, range_def.args[0], lhs_only=True) - stop = get_definition(func_ir, range_def.args[1], lhs_only=True) - return (start, stop, func_def) - else: - raise GuardException - -def _inline_arraycall(func_ir, cfg, visited, loop, enable_prange=False): - """Look for array(list) call in the exit block of a given loop, and turn list operations into - array operations in the loop if the following conditions are met: - 1. The exit block contains an array call on the list; - 2. The list variable is no longer live after array call; - 3. The list is created in the loop entry block; - 4. The loop is created from an range iterator whose length is known prior to the loop; - 5. There is only one list_append operation on the list variable in the loop body; - 6. The block that contains list_append dominates the loop head, which ensures list - length is the same as loop length; - If any condition check fails, no modification will be made to the incoming IR. - """ - debug_print = _make_debug_print("inline_arraycall") - # There should only be one loop exit - require(len(loop.exits) == 1) - exit_block = next(iter(loop.exits)) - list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block]) - - # check if dtype is present in array call - dtype_def = None - dtype_mod_def = None - if 'dtype' in array_kws: - require(isinstance(array_kws['dtype'], ir.Var)) - # We require that dtype argument to be a constant of getattr Expr, and we'll - # remember its definition for later use. - dtype_def = get_definition(func_ir, array_kws['dtype']) - require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr') - dtype_mod_def = get_definition(func_ir, dtype_def.value) - - list_var_def = get_definition(func_ir, list_var) - debug_print("list_var = ", list_var, " def = ", list_var_def) - if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast': - list_var_def = get_definition(func_ir, list_var_def.value) - # Check if the definition is a build_list - require(isinstance(list_var_def, ir.Expr) and list_var_def.op == 'build_list') - - # Look for list_append in "last" block in loop body, which should be a block that is - # a post-dominator of the loop header. - list_append_stmts = [] - for label in loop.body: - # We have to consider blocks of this loop, but not sub-loops. - # To achieve this, we require the set of "in_loops" of "label" to be visited loops. - in_visited_loops = [l.header in visited for l in cfg.in_loops(label)] - if not all(in_visited_loops): - continue - block = func_ir.blocks[label] - debug_print("check loop body block ", label) - for stmt in block.find_insts(ir.Assign): - lhs = stmt.target - expr = stmt.value - if isinstance(expr, ir.Expr) and expr.op == 'call': - func_def = get_definition(func_ir, expr.func) - if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \ - and func_def.attr == 'append': - list_def = get_definition(func_ir, func_def.value) - debug_print("list_def = ", list_def, list_def == list_var_def) - if list_def == list_var_def: - # found matching append call - list_append_stmts.append((label, block, stmt)) - - # Require only one list_append, otherwise we won't know the indices - require(len(list_append_stmts) == 1) - append_block_label, append_block, append_stmt = list_append_stmts[0] - - # Check if append_block (besides loop entry) dominates loop header. - # Since CFG doesn't give us this info without loop entry, we approximate - # by checking if the predecessor set of the header block is the same - # as loop_entries plus append_block, which is certainly more restrictive - # than necessary, and can be relaxed if needed. - preds = set(l for l, b in cfg.predecessors(loop.header)) - debug_print("preds = ", preds, (loop.entries | set([append_block_label]))) - require(preds == (loop.entries | set([append_block_label]))) - - # Find iterator in loop header - iter_vars = [] - iter_first_vars = [] - loop_header = func_ir.blocks[loop.header] - for stmt in loop_header.find_insts(ir.Assign): - expr = stmt.value - if isinstance(expr, ir.Expr): - if expr.op == 'iternext': - iter_def = get_definition(func_ir, expr.value) - debug_print("iter_def = ", iter_def) - iter_vars.append(expr.value) - elif expr.op == 'pair_first': - iter_first_vars.append(stmt.target) - - # Require only one iterator in loop header - require(len(iter_vars) == 1 and len(iter_first_vars) == 1) - iter_var = iter_vars[0] # variable that holds the iterator object - iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator - - # Final requirement: only one loop entry, and we're going to modify it by: - # 1. replacing the list definition with an array definition; - # 2. adding a counter for the array iteration. - require(len(loop.entries) == 1) - loop_entry = func_ir.blocks[next(iter(loop.entries))] - terminator = loop_entry.terminator - scope = loop_entry.scope - loc = loop_entry.loc - stmts = [] - removed = [] - def is_removed(val, removed): - if isinstance(val, ir.Var): - for x in removed: - if x.name == val.name: - return True - return False - # Skip list construction and skip terminator, add the rest to stmts - for i in range(len(loop_entry.body) - 1): - stmt = loop_entry.body[i] - if isinstance(stmt, ir.Assign) and (stmt.value == list_def or is_removed(stmt.value, removed)): - removed.append(stmt.target) - else: - stmts.append(stmt) - debug_print("removed variables: ", removed) - - # Define an index_var to index the array. - # If the range happens to be single step ranges like range(n), or range(m, n), - # then the index_var correlates to iterator index; otherwise we'll have to - # define a new counter. - range_def = guard(_find_iter_range, func_ir, iter_var) - index_var = ir.Var(scope, mk_unique_var("index"), loc) - if range_def and range_def[0] == 0: - # iterator starts with 0, index_var can just be iter_first_var - index_var = iter_first_var - else: - # index_var = -1 # starting the index with -1 since it will incremented in loop header - stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc)) - - # Insert statement to get the size of the loop iterator - size_var = ir.Var(scope, mk_unique_var("size"), loc) - if range_def: - start, stop, range_func_def = range_def - if start == 0: - size_val = stop - else: - size_val = ir.Expr.binop(fn='-', lhs=stop, rhs=start, loc=loc) - # we can parallelize this loop if enable_prange = True, by changing - # range function from range, to prange. - if enable_prange and isinstance(range_func_def, ir.Global): - range_func_def.name = 'internal_prange' - range_func_def.value = internal_prange - - else: - len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc) - stmts.append(_new_definition(func_ir, len_func_var, - ir.Global('range_iter_len', range_iter_len, loc=loc), loc)) - size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc) - - stmts.append(_new_definition(func_ir, size_var, size_val, loc)) - - size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) - stmts.append(_new_definition(func_ir, size_tuple_var, - ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) - - # Insert array allocation - array_var = ir.Var(scope, mk_unique_var("array"), loc) - empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) - if dtype_def and dtype_mod_def: - # when dtype is present, we'll call emtpy with dtype - dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc) - dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc) - stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)) - stmts.append(_new_definition(func_ir, dtype_var, - ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc)) - stmts.append(_new_definition(func_ir, empty_func, - ir.Global('empty', np.empty, loc=loc), loc)) - array_kws = [('dtype', dtype_var)] - else: - # otherwise we'll call unsafe_empty_inferred - stmts.append(_new_definition(func_ir, empty_func, - ir.Global('unsafe_empty_inferred', - unsafe_empty_inferred, loc=loc), loc)) - array_kws = [] - # array_var = empty_func(size_tuple_var) - stmts.append(_new_definition(func_ir, array_var, - ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc)) - - # Add back removed just in case they are used by something else - for var in removed: - stmts.append(_new_definition(func_ir, var, array_var, loc)) - - # Add back terminator - stmts.append(terminator) - # Modify loop_entry - loop_entry.body = stmts - - if range_def: - if range_def[0] != 0: - # when range doesn't start from 0, index_var becomes loop index - # (iter_first_var) minus an offset (range_def[0]) - terminator = loop_header.terminator - assert(isinstance(terminator, ir.Branch)) - # find the block in the loop body that header jumps to - block_id = terminator.truebr - blk = func_ir.blocks[block_id] - loc = blk.loc - blk.body.insert(0, _new_definition(func_ir, index_var, - ir.Expr.binop(fn='-', lhs=iter_first_var, - rhs=range_def[0], loc=loc), - loc)) - else: - # Insert index_var increment to the end of loop header - loc = loop_header.loc - terminator = loop_header.terminator - stmts = loop_header.body[0:-1] - next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc) - one = ir.Var(scope, mk_unique_var("one"), loc) - # one = 1 - stmts.append(_new_definition(func_ir, one, - ir.Const(value=1,loc=loc), loc)) - # next_index_var = index_var + 1 - stmts.append(_new_definition(func_ir, next_index_var, - ir.Expr.binop(fn='+', lhs=index_var, rhs=one, loc=loc), loc)) - # index_var = next_index_var - stmts.append(_new_definition(func_ir, index_var, next_index_var, loc)) - stmts.append(terminator) - loop_header.body = stmts - - # In append_block, change list_append into array assign - for i in range(len(append_block.body)): - if append_block.body[i] == append_stmt: - debug_print("Replace append with SetItem") - append_block.body[i] = ir.SetItem(target=array_var, index=index_var, - value=append_stmt.value.args[0], loc=append_stmt.loc) - - # replace array call, by changing "a = array(b)" to "a = b" - stmt = func_ir.blocks[exit_block].body[array_call_index] - # stmt can be either array call or SetItem, we only replace array call - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): - stmt.value = array_var - func_ir._definitions[stmt.target.name] = [stmt.value] - - return True - - -def _find_unsafe_empty_inferred(func_ir, expr): - unsafe_empty_inferred - require(isinstance(expr, ir.Expr) and expr.op == 'call') - callee = expr.func - callee_def = get_definition(func_ir, callee) - require(isinstance(callee_def, ir.Global)) - _make_debug_print("_find_unsafe_empty_inferred")(callee_def.value) - return callee_def.value == unsafe_empty_inferred - - -def _fix_nested_array(func_ir): - """Look for assignment like: a[..] = b, where both a and b are numpy arrays, and - try to eliminate array b by expanding a with an extra dimension. - """ - blocks = func_ir.blocks - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - empty_deadmap = dict([(label, set()) for label in blocks.keys()]) - livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap) - - def find_array_def(arr): - """Find numpy array definition such as - arr = numba.unsafe.ndarray.empty_inferred(...). - If it is arr = b[...], find array definition of b recursively. - """ - arr_def = func_ir.get_definition(arr) - _make_debug_print("find_array_def")(arr, arr_def) - if isinstance(arr_def, ir.Expr): - if guard(_find_unsafe_empty_inferred, func_ir, arr_def): - return arr_def - elif arr_def.op == 'getitem': - return find_array_def(arr_def.value) - raise GuardException - - def fix_dependencies(expr, varlist): - """Double check if all variables in varlist are defined before - expr is used. Try to move constant definition when the check fails. - Bails out by raising GuardException if it can't be moved. - """ - debug_print = _make_debug_print("fix_dependencies") - for label, block in blocks.items(): - scope = block.scope - body = block.body - defined = set() - for i in range(len(body)): - inst = body[i] - if isinstance(inst, ir.Assign): - defined.add(inst.target.name) - if inst.value == expr: - new_varlist = [] - for var in varlist: - # var must be defined before this inst, or live - # and not later defined. - if (var.name in defined or - (var.name in livemap[label] and - not (var.name in usedefs.defmap[label]))): - debug_print(var.name, " already defined") - new_varlist.append(var) - else: - debug_print(var.name, " not yet defined") - var_def = get_definition(func_ir, var.name) - if isinstance(var_def, ir.Const): - loc = var.loc - new_var = ir.Var(scope, mk_unique_var("new_var"), loc) - new_const = ir.Const(var_def.value, loc) - new_vardef = _new_definition(func_ir, - new_var, new_const, loc) - new_body = [] - new_body.extend(body[:i]) - new_body.append(new_vardef) - new_body.extend(body[i:]) - block.body = new_body - new_varlist.append(new_var) - else: - raise GuardException - return new_varlist - # when expr is not found in block - raise GuardException - - def fix_array_assign(stmt): - """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the - following: - 1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred - 2. find the source array creation for lhs, insert an extra dimension of size of b. - 3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx] - """ - require(isinstance(stmt, ir.SetItem)) - require(isinstance(stmt.value, ir.Var)) - debug_print = _make_debug_print("fix_array_assign") - debug_print("found SetItem: ", stmt) - lhs = stmt.target - # Find the source array creation of lhs - lhs_def = find_array_def(lhs) - debug_print("found lhs_def: ", lhs_def) - rhs_def = get_definition(func_ir, stmt.value) - debug_print("found rhs_def: ", rhs_def) - require(isinstance(rhs_def, ir.Expr)) - if rhs_def.op == 'cast': - rhs_def = get_definition(func_ir, rhs_def.value) - require(isinstance(rhs_def, ir.Expr)) - require(_find_unsafe_empty_inferred(func_ir, rhs_def)) - # Find the array dimension of rhs - dim_def = get_definition(func_ir, rhs_def.args[0]) - require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple') - debug_print("dim_def = ", dim_def) - extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ] - debug_print("extra_dims = ", extra_dims) - # Expand size tuple when creating lhs_def with extra_dims - size_tuple_def = get_definition(func_ir, lhs_def.args[0]) - require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple') - debug_print("size_tuple_def = ", size_tuple_def) - extra_dims = fix_dependencies(size_tuple_def, extra_dims) - size_tuple_def.items += extra_dims - # In-place modify rhs_def to be getitem - rhs_def.op = 'getitem' - rhs_def.value = get_definition(func_ir, lhs, lhs_only=True) - rhs_def.index = stmt.index - del rhs_def._kws['func'] - del rhs_def._kws['args'] - del rhs_def._kws['vararg'] - del rhs_def._kws['kws'] - # success - return True - - for label in find_topo_order(func_ir.blocks): - block = func_ir.blocks[label] - for stmt in block.body: - if guard(fix_array_assign, stmt): - block.body.remove(stmt) - -def _new_definition(func_ir, var, value, loc): - func_ir._definitions[var.name] = [value] - return ir.Assign(value=value, target=var, loc=loc) - -@rewrites.register_rewrite('after-inference') -class RewriteArrayOfConsts(rewrites.Rewrite): - '''The RewriteArrayOfConsts class is responsible for finding - 1D array creations from a constant list, and rewriting it into - direct initialization of array elements without creating the list. - ''' - def __init__(self, pipeline, *args, **kws): - self.typingctx = pipeline.typingctx - super(RewriteArrayOfConsts, self).__init__(pipeline, *args, **kws) - - def match(self, func_ir, block, typemap, calltypes): - if len(calltypes) == 0: - return False - self.crnt_block = block - self.new_body = guard(_inline_const_arraycall, block, func_ir, - self.typingctx, typemap, calltypes) - return self.new_body != None - - def apply(self): - self.crnt_block.body = self.new_body - return self.crnt_block - - -def _inline_const_arraycall(block, func_ir, context, typemap, calltypes): - """Look for array(list) call where list is a constant list created by build_list, - and turn them into direct array creation and initialization, if the following - conditions are met: - 1. The build_list call immediate preceeds the array call; - 2. The list variable is no longer live after array call; - If any condition check fails, no modification will be made. - """ - debug_print = _make_debug_print("inline_const_arraycall") - scope = block.scope - - def inline_array(array_var, expr, stmts, list_vars, dels): - """Check to see if the given "array_var" is created from a list - of constants, and try to inline the list definition as array - initialization. - - Extra statements produced with be appended to "stmts". - """ - callname = guard(find_callname, func_ir, expr) - require(callname and callname[1] == 'numpy' and callname[0] == 'array') - require(expr.args[0].name in list_vars) - ret_type = calltypes[expr].return_type - require(isinstance(ret_type, types.ArrayCompatible) and - ret_type.ndim == 1) - loc = expr.loc - list_var = expr.args[0] - array_typ = typemap[array_var.name] - debug_print("inline array_var = ", array_var, " list_var = ", list_var) - dtype = array_typ.dtype - seq, op = find_build_sequence(func_ir, list_var) - size = len(seq) - size_var = ir.Var(scope, mk_unique_var("size"), loc) - size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) - size_typ = types.intp - size_tuple_typ = types.UniTuple(size_typ, 1) - - typemap[size_var.name] = size_typ - typemap[size_tuple_var.name] = size_tuple_typ - - stmts.append(_new_definition(func_ir, size_var, - ir.Const(size, loc=loc), loc)) - - stmts.append(_new_definition(func_ir, size_tuple_var, - ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) - - empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) - fnty = get_np_ufunc_typ(np.empty) - sig = context.resolve_function_type(fnty, (size_typ,), {}) - typemap[empty_func.name] = fnty # - - stmts.append(_new_definition(func_ir, empty_func, - ir.Global('empty', np.empty, loc=loc), loc)) - - empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc) - calltypes[empty_call] = typing.signature(array_typ, size_typ) - stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) - - for i in range(size): - index_var = ir.Var(scope, mk_unique_var("index"), loc) - index_typ = types.intp - typemap[index_var.name] = index_typ - stmts.append(_new_definition(func_ir, index_var, - ir.Const(i, loc), loc)) - setitem = ir.SetItem(array_var, index_var, seq[i], loc) - calltypes[setitem] = typing.signature(types.none, array_typ, - index_typ, dtype) - stmts.append(setitem) - - stmts.extend(dels) - return True - - # list_vars keep track of the variable created from the latest - # build_list instruction, as well as its synonyms. - list_vars = [] - # dead_vars keep track of those in list_vars that are considered dead. - dead_vars = [] - # list_items keep track of the elements used in build_list. - list_items = [] - stmts = [] - # dels keep track of the deletion of list_items, which will need to be - # moved after array initialization. - dels = [] - modified = False - for inst in block.body: - if isinstance(inst, ir.Assign): - if isinstance(inst.value, ir.Var): - if inst.value.name in list_vars: - list_vars.append(inst.target.name) - stmts.append(inst) - continue - elif isinstance(inst.value, ir.Expr): - expr = inst.value - if expr.op == 'build_list': - list_vars = [inst.target.name] - list_items = [x.name for x in expr.items] - stmts.append(inst) - continue - elif expr.op == 'call' and expr in calltypes: - arr_var = inst.target - if guard(inline_array, inst.target, expr, - stmts, list_vars, dels): - modified = True - continue - elif isinstance(inst, ir.Del): - removed_var = inst.value - if removed_var in list_items: - dels.append(inst) - continue - elif removed_var in list_vars: - # one of the list_vars is considered dead. - dead_vars.append(removed_var) - list_vars.remove(removed_var) - stmts.append(inst) - if list_vars == []: - # if all list_vars are considered dead, we need to filter - # them out from existing stmts to completely remove - # build_list. - # Note that if a translation didn't take place, dead_vars - # will also be empty when we reach this point. - body = [] - for inst in stmts: - if ((isinstance(inst, ir.Assign) and - inst.target.name in dead_vars) or - (isinstance(inst, ir.Del) and - inst.value in dead_vars)): - continue - body.append(inst) - stmts = body - dead_vars = [] - modified = True - continue - stmts.append(inst) - - # If the list is used in any capacity between build_list and array - # call, then we must call off the translation for this list because - # it could be mutated and list_items would no longer be applicable. - list_var_used = any([ x.name in list_vars for x in inst.list_vars() ]) - if list_var_used: - list_vars = [] - dead_vars = [] - list_items = [] - dels = [] - - return stmts if modified else None diff --git a/numba/numba/interpreter.py b/numba/numba/interpreter.py deleted file mode 100644 index 0f830bcea..000000000 --- a/numba/numba/interpreter.py +++ /dev/null @@ -1,1009 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -import dis -import sys -from copy import copy - -from . import config, ir, controlflow, dataflow, utils, errors, six -from .utils import builtins, PYVERSION -from .errors import NotDefinedError - - -class Assigner(object): - """ - This object keeps track of potential assignment simplifications - inside a code block. - For example `$O.1 = x` followed by `y = $0.1` can be simplified - into `y = x`, but it's not possible anymore if we have `x = z` - in-between those two instructions. - - NOTE: this is not only an optimization, but is actually necessary - due to certain limitations of Numba - such as only accepting the - returning of an array passed as function argument. - """ - - def __init__(self): - # { destination variable name -> source Var object } - self.dest_to_src = {} - # Basically a reverse mapping of dest_to_src: - # { source variable name -> all destination names in dest_to_src } - self.src_invalidate = collections.defaultdict(list) - self.unused_dests = set() - - def assign(self, srcvar, destvar): - """ - Assign *srcvar* to *destvar*. Return either *srcvar* or a possible - simplified assignment source (earlier assigned to *srcvar*). - """ - srcname = srcvar.name - destname = destvar.name - if destname in self.src_invalidate: - # destvar will change, invalidate all previously known simplifications - for d in self.src_invalidate.pop(destname): - self.dest_to_src.pop(d) - if srcname in self.dest_to_src: - srcvar = self.dest_to_src[srcname] - if destvar.is_temp: - self.dest_to_src[destname] = srcvar - self.src_invalidate[srcname].append(destname) - self.unused_dests.add(destname) - return srcvar - - def get_assignment_source(self, destname): - """ - Get a possible assignment source (a ir.Var instance) to replace - *destname*, otherwise None. - """ - if destname in self.dest_to_src: - return self.dest_to_src[destname] - self.unused_dests.discard(destname) - return None - - -class Interpreter(object): - """A bytecode interpreter that builds up the IR. - """ - - def __init__(self, func_id): - self.func_id = func_id - self.arg_count = func_id.arg_count - self.arg_names = func_id.arg_names - self.loc = self.first_loc = ir.Loc.from_function_id(func_id) - self.is_generator = func_id.is_generator - - # { inst offset : ir.Block } - self.blocks = {} - # { name: [definitions] } of local variables - self.definitions = collections.defaultdict(list) - - def interpret(self, bytecode): - """ - Generate IR for this bytecode. - """ - self.bytecode = bytecode - - self.scopes = [] - global_scope = ir.Scope(parent=None, loc=self.loc) - self.scopes.append(global_scope) - - # Control flow analysis - self.cfa = controlflow.ControlFlowAnalysis(bytecode) - self.cfa.run() - if config.DUMP_CFG: - self.cfa.dump() - - # Data flow analysis - self.dfa = dataflow.DataFlowAnalysis(self.cfa) - self.dfa.run() - - # Temp states during interpretation - self.current_block = None - self.current_block_offset = None - self.syntax_blocks = [] - self.dfainfo = None - - firstblk = min(self.cfa.blocks.keys()) - self.scopes.append(ir.Scope(parent=self.current_scope, loc=self.loc)) - # Interpret loop - for inst, kws in self._iter_inst(): - self._dispatch(inst, kws) - - return ir.FunctionIR(self.blocks, self.is_generator, self.func_id, - self.first_loc, self.definitions, - self.arg_count, self.arg_names) - - def init_first_block(self): - # Define variables receiving the function arguments - for index, name in enumerate(self.arg_names): - val = ir.Arg(index=index, name=name, loc=self.loc) - self.store(val, name) - - def _iter_inst(self): - for blkct, block in enumerate(self.cfa.iterliveblocks()): - firstinst = self.bytecode[block.body[0]] - self._start_new_block(firstinst) - if blkct == 0: - # Is first block - self.loc = self.loc.with_lineno(firstinst.lineno) - self.init_first_block() - for offset, kws in self.dfainfo.insts: - inst = self.bytecode[offset] - self.loc = self.loc.with_lineno(inst.lineno) - yield inst, kws - self._end_current_block() - - def _start_new_block(self, inst): - oldblock = self.current_block - self.insert_block(inst.offset) - # Ensure the last block is terminated - if oldblock is not None and not oldblock.is_terminated: - jmp = ir.Jump(inst.offset, loc=self.loc) - oldblock.append(jmp) - # Get DFA block info - self.dfainfo = self.dfa.infos[self.current_block_offset] - self.assigner = Assigner() - - def _end_current_block(self): - self._remove_unused_temporaries() - self._insert_outgoing_phis() - - def _remove_unused_temporaries(self): - """ - Remove assignments to unused temporary variables from the - current block. - """ - new_body = [] - for inst in self.current_block.body: - if (isinstance(inst, ir.Assign) - and inst.target.is_temp - and inst.target.name in self.assigner.unused_dests): - continue - new_body.append(inst) - self.current_block.body = new_body - - def _insert_outgoing_phis(self): - """ - Add assignments to forward requested outgoing values - to subsequent blocks. - """ - for phiname, varname in self.dfainfo.outgoing_phis.items(): - target = self.current_scope.get_or_define(phiname, - loc=self.loc) - stmt = ir.Assign(value=self.get(varname), target=target, - loc=self.loc) - self.definitions[target.name].append(stmt.value) - if not self.current_block.is_terminated: - self.current_block.append(stmt) - else: - self.current_block.insert_before_terminator(stmt) - - def get_global_value(self, name): - """ - Get a global value from the func_global (first) or - as a builtins (second). If both failed, return a ir.UNDEFINED. - """ - try: - return utils.get_function_globals(self.func_id.func)[name] - except KeyError: - return getattr(builtins, name, ir.UNDEFINED) - - def get_closure_value(self, index): - """ - Get a value from the cell contained in this function's closure. - If not set, return a ir.UNDEFINED. - """ - cell = self.func_id.func.__closure__[index] - try: - return cell.cell_contents - except ValueError: - return ir.UNDEFINED - - @property - def current_scope(self): - return self.scopes[-1] - - @property - def code_consts(self): - return self.bytecode.co_consts - - @property - def code_locals(self): - return self.bytecode.co_varnames - - @property - def code_names(self): - return self.bytecode.co_names - - @property - def code_cellvars(self): - return self.bytecode.co_cellvars - - @property - def code_freevars(self): - return self.bytecode.co_freevars - - def _dispatch(self, inst, kws): - assert self.current_block is not None - fname = "op_%s" % inst.opname.replace('+', '_') - try: - fn = getattr(self, fname) - except AttributeError: - raise NotImplementedError(inst) - else: - try: - return fn(inst, **kws) - except errors.NotDefinedError as e: - if e.loc is None: - loc = self.loc - else: - loc = e.loc - - err = errors.NotDefinedError(e.name, loc=loc) - if not config.FULL_TRACEBACKS: - six.raise_from(err, None) - else: - raise err - - - # --- Scope operations --- - - def store(self, value, name, redefine=False): - """ - Store *value* (a Expr or Var instance) into the variable named *name* - (a str object). - """ - if redefine or self.current_block_offset in self.cfa.backbone: - rename = not (name in self.code_cellvars) - target = self.current_scope.redefine(name, loc=self.loc, rename=rename) - else: - target = self.current_scope.get_or_define(name, loc=self.loc) - if isinstance(value, ir.Var): - value = self.assigner.assign(value, target) - stmt = ir.Assign(value=value, target=target, loc=self.loc) - self.current_block.append(stmt) - self.definitions[target.name].append(value) - - def get(self, name): - """ - Get the variable (a Var instance) with the given *name*. - """ - # Implicit argument for comprehension starts with '.' - # See Parameter class in inspect.py (from Python source) - if name[0] == '.' and name[1:].isdigit(): - name = 'implicit{}'.format(name[1:]) - - # Try to simplify the variable lookup by returning an earlier - # variable assigned to *name*. - var = self.assigner.get_assignment_source(name) - if var is None: - var = self.current_scope.get(name) - return var - - # --- Block operations --- - - def insert_block(self, offset, scope=None, loc=None): - scope = scope or self.current_scope - loc = loc or self.loc - blk = ir.Block(scope=scope, loc=loc) - self.blocks[offset] = blk - self.current_block = blk - self.current_block_offset = offset - return blk - - # --- Bytecode handlers --- - - def op_PRINT_ITEM(self, inst, item, printvar, res): - item = self.get(item) - printgv = ir.Global("print", print, loc=self.loc) - self.store(value=printgv, name=printvar) - call = ir.Expr.call(self.get(printvar), (item,), (), loc=self.loc) - self.store(value=call, name=res) - - def op_PRINT_NEWLINE(self, inst, printvar, res): - printgv = ir.Global("print", print, loc=self.loc) - self.store(value=printgv, name=printvar) - call = ir.Expr.call(self.get(printvar), (), (), loc=self.loc) - self.store(value=call, name=res) - - def op_UNPACK_SEQUENCE(self, inst, iterable, stores, tupleobj): - count = len(stores) - # Exhaust the iterable into a tuple-like object - tup = ir.Expr.exhaust_iter(value=self.get(iterable), loc=self.loc, - count=count) - self.store(name=tupleobj, value=tup) - - # then index the tuple-like object to extract the values - for i, st in enumerate(stores): - expr = ir.Expr.static_getitem(self.get(tupleobj), - index=i, index_var=None, - loc=self.loc) - self.store(expr, st) - - def op_BUILD_SLICE(self, inst, start, stop, step, res, slicevar): - start = self.get(start) - stop = self.get(stop) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - if step is None: - sliceinst = ir.Expr.call(self.get(slicevar), (start, stop), (), - loc=self.loc) - else: - step = self.get(step) - sliceinst = ir.Expr.call(self.get(slicevar), (start, stop, step), - (), loc=self.loc) - self.store(value=sliceinst, name=res) - - def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar): - base = self.get(base) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) - self.store(value=index, name=indexvar) - - expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) - self.store(value=expr, name=res) - - def op_SLICE_1(self, inst, base, start, nonevar, res, slicevar, indexvar): - base = self.get(base) - start = self.get(start) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, none), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) - self.store(value=expr, name=res) - - def op_SLICE_2(self, inst, base, nonevar, stop, res, slicevar, indexvar): - base = self.get(base) - stop = self.get(stop) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (none, stop,), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) - self.store(value=expr, name=res) - - def op_SLICE_3(self, inst, base, start, stop, res, slicevar, indexvar): - base = self.get(base) - start = self.get(start) - stop = self.get(stop) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, stop), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) - self.store(value=expr, name=res) - - def op_STORE_SLICE_0(self, inst, base, value, slicevar, indexvar, nonevar): - base = self.get(base) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.SetItem(base, self.get(indexvar), self.get(value), - loc=self.loc) - self.current_block.append(stmt) - - def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, - indexvar): - base = self.get(base) - start = self.get(start) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, none), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.SetItem(base, self.get(indexvar), self.get(value), - loc=self.loc) - self.current_block.append(stmt) - - def op_STORE_SLICE_2(self, inst, base, nonevar, stop, value, slicevar, - indexvar): - base = self.get(base) - stop = self.get(stop) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (none, stop,), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.SetItem(base, self.get(indexvar), self.get(value), - loc=self.loc) - self.current_block.append(stmt) - - def op_STORE_SLICE_3(self, inst, base, start, stop, value, slicevar, - indexvar): - base = self.get(base) - start = self.get(start) - stop = self.get(stop) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, stop), (), - loc=self.loc) - self.store(value=index, name=indexvar) - stmt = ir.SetItem(base, self.get(indexvar), self.get(value), - loc=self.loc) - self.current_block.append(stmt) - - def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar): - base = self.get(base) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) - self.current_block.append(stmt) - - def op_DELETE_SLICE_1(self, inst, base, start, nonevar, slicevar, indexvar): - base = self.get(base) - start = self.get(start) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, none), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) - self.current_block.append(stmt) - - def op_DELETE_SLICE_2(self, inst, base, nonevar, stop, slicevar, indexvar): - base = self.get(base) - stop = self.get(stop) - - nonegv = ir.Const(None, loc=self.loc) - self.store(value=nonegv, name=nonevar) - none = self.get(nonevar) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (none, stop,), (), - loc=self.loc) - self.store(value=index, name=indexvar) - - stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) - self.current_block.append(stmt) - - def op_DELETE_SLICE_3(self, inst, base, start, stop, slicevar, indexvar): - base = self.get(base) - start = self.get(start) - stop = self.get(stop) - - slicegv = ir.Global("slice", slice, loc=self.loc) - self.store(value=slicegv, name=slicevar) - - index = ir.Expr.call(self.get(slicevar), (start, stop), (), - loc=self.loc) - self.store(value=index, name=indexvar) - stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) - self.current_block.append(stmt) - - def op_LOAD_FAST(self, inst, res): - srcname = self.code_locals[inst.arg] - self.store(value=self.get(srcname), name=res) - - def op_STORE_FAST(self, inst, value): - dstname = self.code_locals[inst.arg] - value = self.get(value) - self.store(value=value, name=dstname) - - def op_DUP_TOPX(self, inst, orig, duped): - for src, dst in zip(orig, duped): - self.store(value=self.get(src), name=dst) - - op_DUP_TOP = op_DUP_TOPX - op_DUP_TOP_TWO = op_DUP_TOPX - - def op_STORE_ATTR(self, inst, target, value): - attr = self.code_names[inst.arg] - sa = ir.SetAttr(target=self.get(target), value=self.get(value), - attr=attr, loc=self.loc) - self.current_block.append(sa) - - def op_DELETE_ATTR(self, inst, target): - attr = self.code_names[inst.arg] - sa = ir.DelAttr(target=self.get(target), attr=attr, loc=self.loc) - self.current_block.append(sa) - - def op_LOAD_ATTR(self, inst, item, res): - item = self.get(item) - attr = self.code_names[inst.arg] - getattr = ir.Expr.getattr(item, attr, loc=self.loc) - self.store(getattr, res) - - def op_LOAD_CONST(self, inst, res): - value = self.code_consts[inst.arg] - const = ir.Const(value, loc=self.loc) - self.store(const, res) - - def op_LOAD_GLOBAL(self, inst, res): - name = self.code_names[inst.arg] - value = self.get_global_value(name) - gl = ir.Global(name, value, loc=self.loc) - self.store(gl, res) - - def op_LOAD_DEREF(self, inst, res): - n_cellvars = len(self.code_cellvars) - if inst.arg < n_cellvars: - name = self.code_cellvars[inst.arg] - gl = self.get(name) - else: - idx = inst.arg - n_cellvars - name = self.code_freevars[idx] - value = self.get_closure_value(idx) - gl = ir.FreeVar(idx, name, value, loc=self.loc) - self.store(gl, res) - - def op_STORE_DEREF(self, inst, value): - n_cellvars = len(self.code_cellvars) - if inst.arg < n_cellvars: - dstname = self.code_cellvars[inst.arg] - else: - dstname = self.code_freevars[inst.arg - n_cellvars] - value = self.get(value) - self.store(value=value, name=dstname) - - def op_SETUP_LOOP(self, inst): - assert self.blocks[inst.offset] is self.current_block - loop = ir.Loop(inst.offset, exit=(inst.next + inst.arg)) - self.syntax_blocks.append(loop) - - if PYVERSION < (3, 6): - - def op_CALL_FUNCTION(self, inst, func, args, kws, res, vararg): - func = self.get(func) - args = [self.get(x) for x in args] - if vararg is not None: - vararg = self.get(vararg) - - # Process keywords - keyvalues = [] - removethese = [] - for k, v in kws: - k, v = self.get(k), self.get(v) - for inst in self.current_block.body: - if isinstance(inst, ir.Assign) and inst.target is k: - removethese.append(inst) - keyvalues.append((inst.value.value, v)) - - # Remove keyword constant statements - for inst in removethese: - self.current_block.remove(inst) - - expr = ir.Expr.call(func, args, keyvalues, loc=self.loc, - vararg=vararg) - self.store(expr, res) - - op_CALL_FUNCTION_VAR = op_CALL_FUNCTION - else: - def op_CALL_FUNCTION(self, inst, func, args, res): - func = self.get(func) - args = [self.get(x) for x in args] - expr = ir.Expr.call(func, args, (), loc=self.loc) - self.store(expr, res) - - def op_CALL_FUNCTION_KW(self, inst, func, args, names, res): - func = self.get(func) - args = [self.get(x) for x in args] - # Find names const - names = self.get(names) - for inst in self.current_block.body: - if isinstance(inst, ir.Assign) and inst.target is names: - self.current_block.remove(inst) - keys = inst.value.value - break - - nkeys = len(keys) - posvals = args[:-nkeys] - kwvals = args[-nkeys:] - keyvalues = list(zip(keys, kwvals)) - - expr = ir.Expr.call(func, posvals, keyvalues, loc=self.loc) - self.store(expr, res) - - def op_CALL_FUNCTION_EX(self, inst, func, vararg, res): - func = self.get(func) - vararg = self.get(vararg) - expr = ir.Expr.call(func, [], [], loc=self.loc, vararg=vararg) - self.store(expr, res) - - def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, inst, tuples, temps): - first = self.get(tuples[0]) - for other, tmp in zip(map(self.get, tuples[1:]), temps): - out = ir.Expr.binop(fn='+', lhs=first, rhs=other, loc=self.loc) - self.store(out, tmp) - first = tmp - - def op_BUILD_CONST_KEY_MAP(self, inst, keys, keytmps, values, res): - # Unpack the constant key-tuple and reused build_map which takes - # a sequence of (key, value) pair. - keyvar = self.get(keys) - # TODO: refactor this pattern. occurred several times. - for inst in self.current_block.body: - if isinstance(inst, ir.Assign) and inst.target is keyvar: - self.current_block.remove(inst) - keytup = inst.value.value - break - assert len(keytup) == len(values) - keyconsts = [ir.Const(value=x, loc=self.loc) for x in keytup] - for kval, tmp in zip(keyconsts, keytmps): - self.store(kval, tmp) - items = list(zip(map(self.get, keytmps), map(self.get, values))) - expr = ir.Expr.build_map(items=items, size=2, loc=self.loc) - self.store(expr, res) - - def op_GET_ITER(self, inst, value, res): - expr = ir.Expr.getiter(value=self.get(value), loc=self.loc) - self.store(expr, res) - - def op_FOR_ITER(self, inst, iterator, pair, indval, pred): - """ - Assign new block other this instruction. - """ - assert inst.offset in self.blocks, "FOR_ITER must be block head" - - # Emit code - val = self.get(iterator) - - pairval = ir.Expr.iternext(value=val, loc=self.loc) - self.store(pairval, pair) - - iternext = ir.Expr.pair_first(value=self.get(pair), loc=self.loc) - self.store(iternext, indval) - - isvalid = ir.Expr.pair_second(value=self.get(pair), loc=self.loc) - self.store(isvalid, pred) - - # Conditional jump - br = ir.Branch(cond=self.get(pred), truebr=inst.next, - falsebr=inst.get_jump_target(), - loc=self.loc) - self.current_block.append(br) - - def op_BINARY_SUBSCR(self, inst, target, index, res): - index = self.get(index) - target = self.get(target) - expr = ir.Expr.getitem(target, index=index, loc=self.loc) - self.store(expr, res) - - def op_STORE_SUBSCR(self, inst, target, index, value): - index = self.get(index) - target = self.get(target) - value = self.get(value) - stmt = ir.SetItem(target=target, index=index, value=value, - loc=self.loc) - self.current_block.append(stmt) - - def op_DELETE_SUBSCR(self, inst, target, index): - index = self.get(index) - target = self.get(target) - stmt = ir.DelItem(target=target, index=index, loc=self.loc) - self.current_block.append(stmt) - - def op_BUILD_TUPLE(self, inst, items, res): - expr = ir.Expr.build_tuple(items=[self.get(x) for x in items], - loc=self.loc) - self.store(expr, res) - - def op_BUILD_LIST(self, inst, items, res): - expr = ir.Expr.build_list(items=[self.get(x) for x in items], - loc=self.loc) - self.store(expr, res) - - def op_BUILD_SET(self, inst, items, res): - expr = ir.Expr.build_set(items=[self.get(x) for x in items], - loc=self.loc) - self.store(expr, res) - - def op_BUILD_MAP(self, inst, items, size, res): - items = [(self.get(k), self.get(v)) for k, v in items] - expr = ir.Expr.build_map(items=items, size=size, loc=self.loc) - self.store(expr, res) - - def op_STORE_MAP(self, inst, dct, key, value): - stmt = ir.StoreMap(dct=self.get(dct), key=self.get(key), - value=self.get(value), loc=self.loc) - self.current_block.append(stmt) - - def op_UNARY_NEGATIVE(self, inst, value, res): - value = self.get(value) - expr = ir.Expr.unary('-', value=value, loc=self.loc) - return self.store(expr, res) - - def op_UNARY_POSITIVE(self, inst, value, res): - value = self.get(value) - expr = ir.Expr.unary('+', value=value, loc=self.loc) - return self.store(expr, res) - - def op_UNARY_INVERT(self, inst, value, res): - value = self.get(value) - expr = ir.Expr.unary('~', value=value, loc=self.loc) - return self.store(expr, res) - - def op_UNARY_NOT(self, inst, value, res): - value = self.get(value) - expr = ir.Expr.unary('not', value=value, loc=self.loc) - return self.store(expr, res) - - def _binop(self, op, lhs, rhs, res): - lhs = self.get(lhs) - rhs = self.get(rhs) - expr = ir.Expr.binop(op, lhs=lhs, rhs=rhs, loc=self.loc) - self.store(expr, res) - - def _inplace_binop(self, op, lhs, rhs, res): - lhs = self.get(lhs) - rhs = self.get(rhs) - expr = ir.Expr.inplace_binop(op + '=', op, lhs=lhs, rhs=rhs, loc=self.loc) - self.store(expr, res) - - def op_BINARY_ADD(self, inst, lhs, rhs, res): - self._binop('+', lhs, rhs, res) - - def op_BINARY_SUBTRACT(self, inst, lhs, rhs, res): - self._binop('-', lhs, rhs, res) - - def op_BINARY_MULTIPLY(self, inst, lhs, rhs, res): - self._binop('*', lhs, rhs, res) - - def op_BINARY_DIVIDE(self, inst, lhs, rhs, res): - self._binop('/?', lhs, rhs, res) - - def op_BINARY_TRUE_DIVIDE(self, inst, lhs, rhs, res): - self._binop('/', lhs, rhs, res) - - def op_BINARY_FLOOR_DIVIDE(self, inst, lhs, rhs, res): - self._binop('//', lhs, rhs, res) - - def op_BINARY_MODULO(self, inst, lhs, rhs, res): - self._binop('%', lhs, rhs, res) - - def op_BINARY_POWER(self, inst, lhs, rhs, res): - self._binop('**', lhs, rhs, res) - - def op_BINARY_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): - self._binop('@', lhs, rhs, res) - - def op_BINARY_LSHIFT(self, inst, lhs, rhs, res): - self._binop('<<', lhs, rhs, res) - - def op_BINARY_RSHIFT(self, inst, lhs, rhs, res): - self._binop('>>', lhs, rhs, res) - - def op_BINARY_AND(self, inst, lhs, rhs, res): - self._binop('&', lhs, rhs, res) - - def op_BINARY_OR(self, inst, lhs, rhs, res): - self._binop('|', lhs, rhs, res) - - def op_BINARY_XOR(self, inst, lhs, rhs, res): - self._binop('^', lhs, rhs, res) - - def op_INPLACE_ADD(self, inst, lhs, rhs, res): - self._inplace_binop('+', lhs, rhs, res) - - def op_INPLACE_SUBTRACT(self, inst, lhs, rhs, res): - self._inplace_binop('-', lhs, rhs, res) - - def op_INPLACE_MULTIPLY(self, inst, lhs, rhs, res): - self._inplace_binop('*', lhs, rhs, res) - - def op_INPLACE_DIVIDE(self, inst, lhs, rhs, res): - self._inplace_binop('/?', lhs, rhs, res) - - def op_INPLACE_TRUE_DIVIDE(self, inst, lhs, rhs, res): - self._inplace_binop('/', lhs, rhs, res) - - def op_INPLACE_FLOOR_DIVIDE(self, inst, lhs, rhs, res): - self._inplace_binop('//', lhs, rhs, res) - - def op_INPLACE_MODULO(self, inst, lhs, rhs, res): - self._inplace_binop('%', lhs, rhs, res) - - def op_INPLACE_POWER(self, inst, lhs, rhs, res): - self._inplace_binop('**', lhs, rhs, res) - - def op_INPLACE_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): - self._inplace_binop('@', lhs, rhs, res) - - def op_INPLACE_LSHIFT(self, inst, lhs, rhs, res): - self._inplace_binop('<<', lhs, rhs, res) - - def op_INPLACE_RSHIFT(self, inst, lhs, rhs, res): - self._inplace_binop('>>', lhs, rhs, res) - - def op_INPLACE_AND(self, inst, lhs, rhs, res): - self._inplace_binop('&', lhs, rhs, res) - - def op_INPLACE_OR(self, inst, lhs, rhs, res): - self._inplace_binop('|', lhs, rhs, res) - - def op_INPLACE_XOR(self, inst, lhs, rhs, res): - self._inplace_binop('^', lhs, rhs, res) - - def op_JUMP_ABSOLUTE(self, inst): - jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) - self.current_block.append(jmp) - - def op_JUMP_FORWARD(self, inst): - jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) - self.current_block.append(jmp) - - def op_POP_BLOCK(self, inst): - self.syntax_blocks.pop() - - def op_RETURN_VALUE(self, inst, retval, castval): - self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval) - ret = ir.Return(self.get(castval), loc=self.loc) - self.current_block.append(ret) - - def op_COMPARE_OP(self, inst, lhs, rhs, res): - op = dis.cmp_op[inst.arg] - self._binop(op, lhs, rhs, res) - - def op_BREAK_LOOP(self, inst): - loop = self.syntax_blocks[-1] - assert isinstance(loop, ir.Loop) - jmp = ir.Jump(target=loop.exit, loc=self.loc) - self.current_block.append(jmp) - - def _op_JUMP_IF(self, inst, pred, iftrue): - brs = { - True: inst.get_jump_target(), - False: inst.next, - } - truebr = brs[iftrue] - falsebr = brs[not iftrue] - bra = ir.Branch(cond=self.get(pred), truebr=truebr, falsebr=falsebr, - loc=self.loc) - self.current_block.append(bra) - - def op_JUMP_IF_FALSE(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=False) - - def op_JUMP_IF_TRUE(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=True) - - def op_POP_JUMP_IF_FALSE(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=False) - - def op_POP_JUMP_IF_TRUE(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=True) - - def op_JUMP_IF_FALSE_OR_POP(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=False) - - def op_JUMP_IF_TRUE_OR_POP(self, inst, pred): - self._op_JUMP_IF(inst, pred=pred, iftrue=True) - - def op_RAISE_VARARGS(self, inst, exc): - if exc is not None: - exc = self.get(exc) - stmt = ir.Raise(exception=exc, loc=self.loc) - self.current_block.append(stmt) - - def op_YIELD_VALUE(self, inst, value, res): - # initialize index to None. it's being set later in post-processing - index = None - inst = ir.Yield(value=self.get(value), index=index, loc=self.loc) - return self.store(inst, res) - - def op_MAKE_FUNCTION(self, inst, name, code, closure, annotations, kwdefaults, defaults, res): - if annotations != None: - raise NotImplementedError("op_MAKE_FUNCTION with annotations is not implemented") - if kwdefaults != None: - raise NotImplementedError("op_MAKE_FUNCTION with kwdefaults is not implemented") - if isinstance(defaults, tuple): - defaults = tuple([self.get(name) for name in defaults]) - fcode = self.definitions[code][0].value - if name: - name = self.get(name) - if closure: - closure = self.get(closure) - expr = ir.Expr.make_function(name, fcode, closure, defaults, self.loc) - self.store(expr, res) - - def op_MAKE_CLOSURE(self, inst, name, code, closure, annotations, kwdefaults, defaults, res): - self.op_MAKE_FUNCTION(inst, name, code, closure, annotations, kwdefaults, defaults, res) - - def op_LOAD_CLOSURE(self, inst, res): - n_cellvars = len(self.code_cellvars) - if inst.arg < n_cellvars: - name = self.code_cellvars[inst.arg] - try: - gl = self.get(name) - except NotDefinedError as e: - raise NotImplementedError("Unsupported use of op_LOAD_CLOSURE encountered") - else: - idx = inst.arg - n_cellvars - name = self.code_freevars[idx] - value = self.get_closure_value(idx) - gl = ir.FreeVar(idx, name, value, loc=self.loc) - self.store(gl, res) - - def op_LIST_APPEND(self, inst, target, value, appendvar, res): - target = self.get(target) - value = self.get(value) - appendattr = ir.Expr.getattr(target, 'append', loc=self.loc) - self.store(value=appendattr, name=appendvar) - appendinst = ir.Expr.call(self.get(appendvar), (value,), (), loc=self.loc) - self.store(value=appendinst, name=res) - - - # NOTE: The LOAD_METHOD opcode is implemented as a LOAD_ATTR for ease, - # however this means a new object (the bound-method instance) could be - # created. Conversely, using a pure LOAD_METHOD no intermediary is present - # and it is essentially like a pointer grab and forward to CALL_METHOD. The - # net outcome is that the implementation in Numba produces the same result, - # but in object mode it may be that it runs more slowly than it would if - # run in CPython. - - def op_LOAD_METHOD(self, *args, **kws): - self.op_LOAD_ATTR(*args, **kws) - - def op_CALL_METHOD(self, *args, **kws): - self.op_CALL_FUNCTION(*args, **kws) diff --git a/numba/numba/io_support.py b/numba/numba/io_support.py deleted file mode 100644 index dff5bdb06..000000000 --- a/numba/numba/io_support.py +++ /dev/null @@ -1,7 +0,0 @@ -try: - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO -except ImportError: - from io import StringIO diff --git a/numba/numba/ir.py b/numba/numba/ir.py deleted file mode 100644 index ac3772e63..000000000 --- a/numba/numba/ir.py +++ /dev/null @@ -1,1015 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from collections import defaultdict -import copy -import itertools -import os -import linecache -import pprint -import sys -import warnings -from numba import config, errors - -from . import utils -from .errors import (NotDefinedError, RedefinedError, VerificationError, - ConstantInferenceError) - -# terminal color markup -_termcolor = errors.termcolor() - -class Loc(object): - """Source location - - """ - - def __init__(self, filename, line, col=None): - self.filename = filename - self.line = line - self.col = col - - @classmethod - def from_function_id(cls, func_id): - return cls(func_id.filename, func_id.firstlineno) - - def __repr__(self): - return "Loc(filename=%s, line=%s, col=%s)" % (self.filename, - self.line, self.col) - - def __str__(self): - if self.col is not None: - return "%s (%s:%s)" % (self.filename, self.line, self.col) - else: - return "%s (%s)" % (self.filename, self.line) - - def strformat(self, nlines_up=2): - try: - # Try to get a relative path - # ipython/jupyter input just returns as self.filename - path = os.path.relpath(self.filename) - except ValueError: - # Fallback to absolute path if error occurred in getting the - # relative path. - # This may happen on windows if the drive is different - path = os.path.abspath(self.filename) - - lines = linecache.getlines(path) - - ret = [] # accumulates output - if lines and self.line: - - def count_spaces(string): - spaces = 0 - for x in itertools.takewhile(str.isspace, str(string)): - spaces += 1 - return spaces - - # A few places in the code still use no `loc` or default to line 1 - # this is often in places where exceptions are used for the purposes - # of flow control. As a result max is in use to prevent slice from - # `[negative: positive]` - selected = lines[max(0, self.line - nlines_up):self.line] - - # see if selected contains a definition - def_found = False - for x in selected: - if 'def ' in x: - def_found = True - - # no definition found, try and find one - if not def_found: - # try and find a def, go backwards from error line - fn_name = None - for x in reversed(lines[:self.line - 1]): - if 'def ' in x: - fn_name = x - break - if fn_name: - ret.append(fn_name) - spaces = count_spaces(x) - ret.append(' '*(4 + spaces) + '\n') - - if selected: - ret.extend(selected[:-1]) - ret.append(_termcolor.highlight(selected[-1])) - - # point at the problem with a caret - spaces = count_spaces(selected[-1]) - ret.append(' '*(spaces) + _termcolor.indicate("^")) - - # if in the REPL source may not be available - if not ret: - ret = "" - - err = _termcolor.filename('\nFile "%s", line %d:')+'\n%s' - tmp = err % (path, self.line, _termcolor.code(''.join(ret))) - return tmp - - def with_lineno(self, line, col=None): - """ - Return a new Loc with this line number. - """ - return type(self)(self.filename, line, col) - - -class VarMap(object): - def __init__(self): - self._con = {} - - def define(self, name, var): - if name in self._con: - raise RedefinedError(name) - else: - self._con[name] = var - - def get(self, name): - try: - return self._con[name] - except KeyError: - raise NotDefinedError(name) - - def __contains__(self, name): - return name in self._con - - def __len__(self): - return len(self._con) - - def __repr__(self): - return pprint.pformat(self._con) - - def __hash__(self): - return hash(self.name) - - def __iter__(self): - return self._con.iterkeys() - - -class Inst(object): - """ - Base class for all IR instructions. - """ - - def list_vars(self): - """ - List the variables used (read or written) by the instruction. - """ - raise NotImplementedError - - def _rec_list_vars(self, val): - """ - A recursive helper used to implement list_vars() in subclasses. - """ - if isinstance(val, Var): - return [val] - elif isinstance(val, Inst): - return val.list_vars() - elif isinstance(val, (list, tuple)): - lst = [] - for v in val: - lst.extend(self._rec_list_vars(v)) - return lst - elif isinstance(val, dict): - lst = [] - for v in val.values(): - lst.extend(self._rec_list_vars(v)) - return lst - else: - return [] - - -class Stmt(Inst): - """ - Base class for IR statements (instructions which can appear on their - own in a Block). - """ - # Whether this statement ends its basic block (i.e. it will either jump - # to another block or exit the function). - is_terminator = False - # Whether this statement exits the function. - is_exit = False - - def list_vars(self): - return self._rec_list_vars(self.__dict__) - - -class Terminator(Stmt): - """ - IR statements that are terminators: the last statement in a block. - A terminator must either: - - exit the function - - jump to a block - - All subclass of Terminator must override `.get_targets()` to return a list - of jump targets. - """ - is_terminator = True - - def get_targets(self): - raise NotImplementedError(type(self)) - - -class Expr(Inst): - """ - An IR expression (an instruction which can only be part of a larger - statement). - """ - - def __init__(self, op, loc, **kws): - self.op = op - self.loc = loc - self._kws = kws - - def __getattr__(self, name): - if name.startswith('_'): - return Inst.__getattr__(self, name) - return self._kws[name] - - def __setattr__(self, name, value): - if name in ('op', 'loc', '_kws'): - self.__dict__[name] = value - else: - self._kws[name] = value - - @classmethod - def binop(cls, fn, lhs, rhs, loc): - op = 'binop' - return cls(op=op, loc=loc, fn=fn, lhs=lhs, rhs=rhs, - static_lhs=UNDEFINED, static_rhs=UNDEFINED) - - @classmethod - def inplace_binop(cls, fn, immutable_fn, lhs, rhs, loc): - op = 'inplace_binop' - return cls(op=op, loc=loc, fn=fn, immutable_fn=immutable_fn, - lhs=lhs, rhs=rhs, - static_lhs=UNDEFINED, static_rhs=UNDEFINED) - - @classmethod - def unary(cls, fn, value, loc): - op = 'unary' - return cls(op=op, loc=loc, fn=fn, value=value) - - @classmethod - def call(cls, func, args, kws, loc, vararg=None): - op = 'call' - return cls(op=op, loc=loc, func=func, args=args, kws=kws, - vararg=vararg) - - @classmethod - def build_tuple(cls, items, loc): - op = 'build_tuple' - return cls(op=op, loc=loc, items=items) - - @classmethod - def build_list(cls, items, loc): - op = 'build_list' - return cls(op=op, loc=loc, items=items) - - @classmethod - def build_set(cls, items, loc): - op = 'build_set' - return cls(op=op, loc=loc, items=items) - - @classmethod - def build_map(cls, items, size, loc): - op = 'build_map' - return cls(op=op, loc=loc, items=items, size=size) - - @classmethod - def pair_first(cls, value, loc): - op = 'pair_first' - return cls(op=op, loc=loc, value=value) - - @classmethod - def pair_second(cls, value, loc): - op = 'pair_second' - return cls(op=op, loc=loc, value=value) - - @classmethod - def getiter(cls, value, loc): - op = 'getiter' - return cls(op=op, loc=loc, value=value) - - @classmethod - def iternext(cls, value, loc): - op = 'iternext' - return cls(op=op, loc=loc, value=value) - - @classmethod - def exhaust_iter(cls, value, count, loc): - op = 'exhaust_iter' - return cls(op=op, loc=loc, value=value, count=count) - - @classmethod - def getattr(cls, value, attr, loc): - op = 'getattr' - return cls(op=op, loc=loc, value=value, attr=attr) - - @classmethod - def getitem(cls, value, index, loc): - op = 'getitem' - return cls(op=op, loc=loc, value=value, index=index) - - @classmethod - def static_getitem(cls, value, index, index_var, loc): - op = 'static_getitem' - return cls(op=op, loc=loc, value=value, index=index, - index_var=index_var) - - @classmethod - def cast(cls, value, loc): - """ - A node for implicit casting at the return statement - """ - op = 'cast' - return cls(op=op, value=value, loc=loc) - - @classmethod - def make_function(cls, name, code, closure, defaults, loc): - """ - A node for making a function object. - """ - op = 'make_function' - return cls(op=op, name=name, code=code, closure=closure, defaults=defaults, loc=loc) - - def __repr__(self): - if self.op == 'call': - args = ', '.join(str(a) for a in self.args) - pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) - kws = ', '.join('%s=%s' % (k, v) for k, v in pres_order) - vararg = '*%s' % (self.vararg,) if self.vararg is not None else '' - arglist = ', '.join(filter(None, [args, vararg, kws])) - return 'call %s(%s)' % (self.func, arglist) - elif self.op == 'binop': - return '%s %s %s' % (self.lhs, self.fn, self.rhs) - else: - pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) - args = ('%s=%s' % (k, v) for k, v in pres_order) - return '%s(%s)' % (self.op, ', '.join(args)) - - def list_vars(self): - return self._rec_list_vars(self._kws) - - def infer_constant(self): - raise ConstantInferenceError('%s' % self, loc=self.loc) - - -class SetItem(Stmt): - """ - target[index] = value - """ - - def __init__(self, target, index, value, loc): - self.target = target - self.index = index - self.value = value - self.loc = loc - - def __repr__(self): - return '%s[%s] = %s' % (self.target, self.index, self.value) - - -class StaticSetItem(Stmt): - """ - target[constant index] = value - """ - - def __init__(self, target, index, index_var, value, loc): - self.target = target - self.index = index - self.index_var = index_var - self.value = value - self.loc = loc - - def __repr__(self): - return '%s[%r] = %s' % (self.target, self.index, self.value) - - -class DelItem(Stmt): - """ - del target[index] - """ - - def __init__(self, target, index, loc): - self.target = target - self.index = index - self.loc = loc - - def __repr__(self): - return 'del %s[%s]' % (self.target, self.index) - - -class SetAttr(Stmt): - def __init__(self, target, attr, value, loc): - self.target = target - self.attr = attr - self.value = value - self.loc = loc - - def __repr__(self): - return '(%s).%s = %s' % (self.target, self.attr, self.value) - - -class DelAttr(Stmt): - def __init__(self, target, attr, loc): - self.target = target - self.attr = attr - self.loc = loc - - def __repr__(self): - return 'del (%s).%s' % (self.target, self.attr) - - -class StoreMap(Stmt): - def __init__(self, dct, key, value, loc): - self.dct = dct - self.key = key - self.value = value - self.loc = loc - - def __repr__(self): - return '%s[%s] = %s' % (self.dct, self.key, self.value) - - -class Del(Stmt): - def __init__(self, value, loc): - self.value = value - self.loc = loc - - def __str__(self): - return "del %s" % self.value - - -class Raise(Terminator): - is_exit = True - - def __init__(self, exception, loc): - self.exception = exception - self.loc = loc - - def __str__(self): - return "raise %s" % self.exception - - def get_targets(self): - return [] - - -class StaticRaise(Terminator): - """ - Raise an exception class and arguments known at compile-time. - Note that if *exc_class* is None, a bare "raise" statement is implied - (i.e. re-raise the current exception). - """ - is_exit = True - - def __init__(self, exc_class, exc_args, loc): - self.exc_class = exc_class - self.exc_args = exc_args - self.loc = loc - - def __str__(self): - if self.exc_class is None: - return "raise" - elif self.exc_args is None: - return "raise %s" % (self.exc_class,) - else: - return "raise %s(%s)" % (self.exc_class, - ", ".join(map(repr, self.exc_args))) - - def get_targets(self): - return [] - - -class Return(Terminator): - """ - Return to caller. - """ - is_exit = True - - def __init__(self, value, loc): - self.value = value - self.loc = loc - - def __str__(self): - return 'return %s' % self.value - - def get_targets(self): - return [] - - -class Jump(Terminator): - """ - Unconditional branch. - """ - - def __init__(self, target, loc): - self.target = target - self.loc = loc - - def __str__(self): - return 'jump %s' % self.target - - def get_targets(self): - return [self.target] - - -class Branch(Terminator): - """ - Conditional branch. - """ - - def __init__(self, cond, truebr, falsebr, loc): - self.cond = cond - self.truebr = truebr - self.falsebr = falsebr - self.loc = loc - - def __str__(self): - return 'branch %s, %s, %s' % (self.cond, self.truebr, self.falsebr) - - def get_targets(self): - return [self.truebr, self.falsebr] - - -class Assign(Stmt): - """ - Assign to a variable. - """ - def __init__(self, value, target, loc): - self.value = value - self.target = target - self.loc = loc - - def __str__(self): - return '%s = %s' % (self.target, self.value) - - -class Print(Stmt): - """ - Print some values. - """ - def __init__(self, args, vararg, loc): - self.args = args - self.vararg = vararg - # Constant-inferred arguments - self.consts = {} - self.loc = loc - - def __str__(self): - return 'print(%s)' % ', '.join(str(v) for v in self.args) - - -class Yield(Inst): - def __init__(self, value, loc, index): - self.value = value - self.loc = loc - self.index = index - - def __str__(self): - return 'yield %s' % (self.value,) - - def list_vars(self): - return [self.value] - - -class Arg(object): - def __init__(self, name, index, loc): - self.name = name - self.index = index - self.loc = loc - - def __repr__(self): - return 'arg(%d, name=%s)' % (self.index, self.name) - - def infer_constant(self): - raise ConstantInferenceError('%s' % self, loc=self.loc) - - -class Const(object): - def __init__(self, value, loc): - self.value = value - self.loc = loc - - def __repr__(self): - return 'const(%s, %s)' % (type(self.value).__name__, self.value) - - def infer_constant(self): - return self.value - -class Global(object): - def __init__(self, name, value, loc): - self.name = name - self.value = value - self.loc = loc - - def __str__(self): - return 'global(%s: %s)' % (self.name, self.value) - - def infer_constant(self): - return self.value - - def __deepcopy__(self, memo): - # don't copy value since it can fail (e.g. modules) - # value is readonly and doesn't need copying - return Global(self.name, self.value, copy.deepcopy(self.loc)) - - -class FreeVar(object): - """ - A freevar, as loaded by LOAD_DECREF. - (i.e. a variable defined in an enclosing non-global scope) - """ - - def __init__(self, index, name, value, loc): - # index inside __code__.co_freevars - self.index = index - # variable name - self.name = name - # frozen value - self.value = value - self.loc = loc - - def __str__(self): - return 'freevar(%s: %s)' % (self.name, self.value) - - def infer_constant(self): - return self.value - - -class Var(object): - """ - Attributes - ----------- - - scope: Scope - - - name: str - - - loc: Loc - Definition location - """ - - def __init__(self, scope, name, loc): - self.scope = scope - self.name = name - self.loc = loc - - def __repr__(self): - return 'Var(%s, %s)' % (self.name, self.loc) - - def __str__(self): - return self.name - - @property - def is_temp(self): - return self.name.startswith("$") - - -class Intrinsic(object): - """ - A low-level "intrinsic" function. Suitable as the callable of a "call" - expression. - - The given *name* is backend-defined and will be inserted as-is - in the generated low-level IR. - The *type* is the equivalent Numba signature of calling the intrinsic. - """ - - def __init__(self, name, type, args): - self.name = name - self.type = type - self.loc = None - self.args = args - - def __repr__(self): - return 'Intrinsic(%s, %s, %s)' % (self.name, self.type, self.loc) - - def __str__(self): - return self.name - - -class Scope(object): - """ - Attributes - ----------- - - parent: Scope - Parent scope - - - localvars: VarMap - Scope-local variable map - - - loc: Loc - Start of scope location - - """ - - def __init__(self, parent, loc): - self.parent = parent - self.localvars = VarMap() - self.loc = loc - self.redefined = defaultdict(int) - - def define(self, name, loc): - """ - Define a variable - """ - v = Var(scope=self, name=name, loc=loc) - self.localvars.define(v.name, v) - return v - - def get(self, name): - """ - Refer to a variable. Returns the latest version. - """ - if name in self.redefined: - name = "%s.%d" % (name, self.redefined[name]) - return self.get_exact(name) - - def get_exact(self, name): - """ - Refer to a variable. The returned variable has the exact - name (exact variable version). - """ - try: - return self.localvars.get(name) - except NotDefinedError: - if self.has_parent: - return self.parent.get(name) - else: - raise - - def get_or_define(self, name, loc): - if name in self.redefined: - name = "%s.%d" % (name, self.redefined[name]) - - v = Var(scope=self, name=name, loc=loc) - if name not in self.localvars: - return self.define(name, loc) - else: - return self.localvars.get(name) - - def redefine(self, name, loc, rename=True): - """ - Redefine if the name is already defined - """ - if name not in self.localvars: - return self.define(name, loc) - elif not rename: - # Must use the same name if the variable is a cellvar, which - # means it could be captured in a closure. - return self.localvars.get(name) - else: - ct = self.redefined[name] - self.redefined[name] = ct + 1 - newname = "%s.%d" % (name, ct + 1) - return self.define(newname, loc) - - def make_temp(self, loc): - n = len(self.localvars) - v = Var(scope=self, name='$%d' % n, loc=loc) - self.localvars.define(v.name, v) - return v - - @property - def has_parent(self): - return self.parent is not None - - def __repr__(self): - return "Scope(has_parent=%r, num_vars=%d, %s)" % (self.has_parent, - len(self.localvars), - self.loc) - - -class Block(object): - """A code block - - """ - - def __init__(self, scope, loc): - self.scope = scope - self.body = [] - self.loc = loc - - def copy(self): - block = Block(self.scope, self.loc) - block.body = self.body[:] - return block - - def find_exprs(self, op=None): - """ - Iterate over exprs of the given *op* in this block. - """ - for inst in self.body: - if isinstance(inst, Assign): - expr = inst.value - if isinstance(expr, Expr): - if op is None or expr.op == op: - yield expr - - def find_insts(self, cls=None): - """ - Iterate over insts of the given class in this block. - """ - for inst in self.body: - if isinstance(inst, cls): - yield inst - - def find_variable_assignment(self, name): - """ - Returns the assignment inst associated with variable "name", None if - it cannot be found. - """ - for x in self.find_insts(cls=Assign): - if x.target.name == name: - return x - return None - - def prepend(self, inst): - assert isinstance(inst, Stmt) - self.body.insert(0, inst) - - def append(self, inst): - assert isinstance(inst, Stmt) - self.body.append(inst) - - def remove(self, inst): - assert isinstance(inst, Stmt) - del self.body[self.body.index(inst)] - - def clear(self): - del self.body[:] - - def dump(self, file=None): - # Avoid early bind of sys.stdout as default value - file = file or sys.stdout - for inst in self.body: - if hasattr(inst, 'dump'): - inst.dump(file) - else: - inst_vars = sorted(str(v) for v in inst.list_vars()) - print(' %-40s %s' % (inst, inst_vars), file=file) - - @property - def terminator(self): - return self.body[-1] - - @property - def is_terminated(self): - return self.body and self.body[-1].is_terminator - - def verify(self): - if not self.is_terminated: - raise VerificationError("Missing block terminator") - # Only the last instruction can be a terminator - for inst in self.body[:-1]: - if inst.is_terminator: - raise VerificationError("Terminator before the last " - "instruction") - - def insert_after(self, stmt, other): - """ - Insert *stmt* after *other*. - """ - index = self.body.index(other) - self.body.insert(index + 1, stmt) - - def insert_before_terminator(self, stmt): - assert isinstance(stmt, Stmt) - assert self.is_terminated - self.body.insert(-1, stmt) - - def __repr__(self): - return "" % (self.loc,) - - -class Loop(object): - __slots__ = "entry", "exit" - - def __init__(self, entry, exit): - self.entry = entry - self.exit = exit - - def __repr__(self): - args = self.entry, self.exit - return "Loop(entry=%s, exit=%s)" % args - - -class FunctionIR(object): - - def __init__(self, blocks, is_generator, func_id, loc, - definitions, arg_count, arg_names): - self.blocks = blocks - self.is_generator = is_generator - self.func_id = func_id - self.loc = loc - self.arg_count = arg_count - self.arg_names = arg_names - - self._definitions = definitions - - self._reset_analysis_variables() - - def _reset_analysis_variables(self): - from . import consts - - self._consts = consts.ConstantInference(self) - - # Will be computed by PostProcessor - self.generator_info = None - self.variable_lifetime = None - # { ir.Block: { variable names (potentially) alive at start of block } } - self.block_entry_vars = {} - - def derive(self, blocks, arg_count=None, arg_names=None, - force_non_generator=False): - """ - Derive a new function IR from this one, using the given blocks, - and possibly modifying the argument count and generator flag. - - Post-processing will have to be run again on the new IR. - """ - firstblock = blocks[min(blocks)] - - new_ir = copy.copy(self) - new_ir.blocks = blocks - new_ir.loc = firstblock.loc - if force_non_generator: - new_ir.is_generator = False - if arg_count is not None: - new_ir.arg_count = arg_count - if arg_names is not None: - new_ir.arg_names = arg_names - new_ir._reset_analysis_variables() - # Make fresh func_id - new_ir.func_id = new_ir.func_id.derive() - return new_ir - - def copy(self): - new_ir = copy.copy(self) - blocks = {} - block_entry_vars = {} - for label, block in self.blocks.items(): - new_block = block.copy() - blocks[label] = new_block - if block in self.block_entry_vars: - block_entry_vars[new_block] = self.block_entry_vars[block] - new_ir.blocks = blocks - new_ir.block_entry_vars = block_entry_vars - return new_ir - - def get_block_entry_vars(self, block): - """ - Return a set of variable names possibly alive at the beginning of - the block. - """ - return self.block_entry_vars[block] - - def infer_constant(self, name): - """ - Try to infer the constant value of a given variable. - """ - if isinstance(name, Var): - name = name.name - return self._consts.infer_constant(name) - - def get_definition(self, value, lhs_only=False): - """ - Get the definition site for the given variable name or instance. - A Expr instance is returned by default, but if lhs_only is set - to True, the left-hand-side variable is returned instead. - """ - lhs = value - while True: - if isinstance(value, Var): - lhs = value - name = value.name - elif isinstance(value, str): - lhs = value - name = value - else: - return lhs if lhs_only else value - defs = self._definitions[name] - if len(defs) == 0: - raise KeyError("no definition for %r" - % (name,)) - if len(defs) > 1: - raise KeyError("more than one definition for %r" - % (name,)) - value = defs[0] - - def dump(self, file=None): - # Avoid early bind of sys.stdout as default value - file = file or sys.stdout - for offset, block in sorted(self.blocks.items()): - print('label %s:' % (offset,), file=file) - block.dump(file=file) - - def dump_generator_info(self, file=None): - file = file or sys.stdout - gi = self.generator_info - print("generator state variables:", sorted(gi.state_vars), file=file) - for index, yp in sorted(gi.yield_points.items()): - print("yield point #%d: live variables = %s, weak live variables = %s" - % (index, sorted(yp.live_vars), sorted(yp.weak_live_vars)), - file=file) - - -# A stub for undefined global reference -UNDEFINED = object() diff --git a/numba/numba/ir_utils.py b/numba/numba/ir_utils.py deleted file mode 100644 index f0c4b6f6c..000000000 --- a/numba/numba/ir_utils.py +++ /dev/null @@ -1,1724 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# -import numpy - -import types as pytypes -import collections - -from llvmlite import ir as lir - -import numba -from numba.six import exec_ -from numba import ir, types, typing, config, analysis, utils, cgutils, rewrites -from numba.typing.templates import signature, infer_global, AbstractTemplate -from numba.targets.imputils import impl_ret_untracked -from numba.analysis import (compute_live_map, compute_use_defs, - compute_cfg_from_blocks) -from numba.errors import TypingError, UnsupportedError -import copy - -_unique_var_count = 0 - - -def mk_unique_var(prefix): - global _unique_var_count - var = prefix + "." + str(_unique_var_count) - _unique_var_count = _unique_var_count + 1 - return var - - -_max_label = 0 - - -def get_unused_var_name(prefix, var_table): - """ Get a new var name with a given prefix and - make sure it is unused in the given variable table. - """ - cur = 0 - while True: - var = prefix + str(cur) - if var not in var_table: - return var - cur += 1 - - -def next_label(): - global _max_label - _max_label += 1 - return _max_label - - -def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc): - """generate an array allocation with np.empty() and return list of nodes. - size_var can be an int variable or tuple of int variables. - """ - out = [] - ndims = 1 - size_typ = types.intp - if isinstance(size_var, tuple): - if len(size_var) == 1: - size_var = size_var[0] - size_var = convert_size_to_var(size_var, typemap, scope, loc, out) - else: - # tuple_var = build_tuple([size_var...]) - ndims = len(size_var) - tuple_var = ir.Var(scope, mk_unique_var("$tuple_var"), loc) - if typemap: - typemap[tuple_var.name] = types.containers.UniTuple( - types.intp, ndims) - # constant sizes need to be assigned to vars - new_sizes = [convert_size_to_var(s, typemap, scope, loc, out) - for s in size_var] - tuple_call = ir.Expr.build_tuple(new_sizes, loc) - tuple_assign = ir.Assign(tuple_call, tuple_var, loc) - out.append(tuple_assign) - size_var = tuple_var - size_typ = types.containers.UniTuple(types.intp, ndims) - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - if typemap: - typemap[g_np_var.name] = types.misc.Module(numpy) - g_np = ir.Global('np', numpy, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - # attr call: empty_attr = getattr(g_np_var, empty) - empty_attr_call = ir.Expr.getattr(g_np_var, "empty", loc) - attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc) - if typemap: - typemap[attr_var.name] = get_np_ufunc_typ(numpy.empty) - attr_assign = ir.Assign(empty_attr_call, attr_var, loc) - # alloc call: lhs = empty_attr(size_var, typ_var) - typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) - if typemap: - typemap[typ_var.name] = types.functions.NumberClass(dtype) - # assuming str(dtype) returns valid np dtype string - dtype_str = str(dtype) - if dtype_str=='bool': - # empty doesn't like 'bool' sometimes (e.g. kmeans example) - dtype_str = 'bool_' - np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) - typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) - alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc) - if calltypes: - calltypes[alloc_call] = typemap[attr_var.name].get_call_type( - typing.Context(), [size_typ, types.functions.NumberClass(dtype)], {}) - # signature( - # types.npytypes.Array(dtype, ndims, 'C'), size_typ, - # types.functions.NumberClass(dtype)) - alloc_assign = ir.Assign(alloc_call, lhs, loc) - - out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign]) - return out - - -def convert_size_to_var(size_var, typemap, scope, loc, nodes): - if isinstance(size_var, int): - new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc) - if typemap: - typemap[new_size.name] = types.intp - size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc) - nodes.append(size_assign) - return new_size - assert isinstance(size_var, ir.Var) - return size_var - - -def get_np_ufunc_typ(func): - """get type of the incoming function from builtin registry""" - for (k, v) in typing.npydecl.registry.globals: - if k == func: - return v - raise RuntimeError("type for func ", func, " not found") - - -def mk_range_block(typemap, start, stop, step, calltypes, scope, loc): - """make a block that initializes loop range and iteration variables. - target label in jump needs to be set. - """ - # g_range_var = Global(range) - g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc) - typemap[g_range_var.name] = get_global_func_typ(range) - g_range = ir.Global('range', range, loc) - g_range_assign = ir.Assign(g_range, g_range_var, loc) - arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc) - # range_call_var = call g_range_var(start, stop, step) - range_call = ir.Expr.call(g_range_var, args, (), loc) - calltypes[range_call] = typemap[g_range_var.name].get_call_type( - typing.Context(), [types.intp] * len(args), {}) - #signature(types.range_state64_type, types.intp) - range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc) - typemap[range_call_var.name] = types.iterators.RangeType(types.intp) - range_call_assign = ir.Assign(range_call, range_call_var, loc) - # iter_var = getiter(range_call_var) - iter_call = ir.Expr.getiter(range_call_var, loc) - calltypes[iter_call] = signature(types.range_iter64_type, - types.range_state64_type) - iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc) - typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp) - iter_call_assign = ir.Assign(iter_call, iter_var, loc) - # $phi = iter_var - phi_var = ir.Var(scope, mk_unique_var("$phi"), loc) - typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp) - phi_assign = ir.Assign(iter_var, phi_var, loc) - # jump to header - jump_header = ir.Jump(-1, loc) - range_block = ir.Block(scope, loc) - range_block.body = arg_nodes + [g_range_assign, range_call_assign, - iter_call_assign, phi_assign, jump_header] - return range_block - - -def _mk_range_args(typemap, start, stop, step, scope, loc): - nodes = [] - if isinstance(stop, ir.Var): - g_stop_var = stop - else: - assert isinstance(stop, int) - g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc) - if typemap: - typemap[g_stop_var.name] = types.intp - stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc) - nodes.append(stop_assign) - if start == 0 and step == 1: - return nodes, [g_stop_var] - - if isinstance(start, ir.Var): - g_start_var = start - else: - assert isinstance(start, int) - g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc) - if typemap: - typemap[g_start_var.name] = types.intp - start_assign = ir.Assign(ir.Const(start, loc), g_start_var, loc) - nodes.append(start_assign) - if step == 1: - return nodes, [g_start_var, g_stop_var] - - if isinstance(step, ir.Var): - g_step_var = step - else: - assert isinstance(step, int) - g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc) - if typemap: - typemap[g_step_var.name] = types.intp - step_assign = ir.Assign(ir.Const(step, loc), g_step_var, loc) - nodes.append(step_assign) - - return nodes, [g_start_var, g_stop_var, g_step_var] - - -def get_global_func_typ(func): - """get type variable for func() from builtin registry""" - for (k, v) in typing.templates.builtin_registry.globals: - if k == func: - return v - raise RuntimeError("func type not found {}".format(func)) - - -def mk_loop_header(typemap, phi_var, calltypes, scope, loc): - """make a block that is a loop header updating iteration variables. - target labels in branch need to be set. - """ - # iternext_var = iternext(phi_var) - iternext_var = ir.Var(scope, mk_unique_var("$iternext_var"), loc) - typemap[iternext_var.name] = types.containers.Pair( - types.intp, types.boolean) - iternext_call = ir.Expr.iternext(phi_var, loc) - calltypes[iternext_call] = signature( - types.containers.Pair( - types.intp, - types.boolean), - types.range_iter64_type) - iternext_assign = ir.Assign(iternext_call, iternext_var, loc) - # pair_first_var = pair_first(iternext_var) - pair_first_var = ir.Var(scope, mk_unique_var("$pair_first_var"), loc) - typemap[pair_first_var.name] = types.intp - pair_first_call = ir.Expr.pair_first(iternext_var, loc) - pair_first_assign = ir.Assign(pair_first_call, pair_first_var, loc) - # pair_second_var = pair_second(iternext_var) - pair_second_var = ir.Var(scope, mk_unique_var("$pair_second_var"), loc) - typemap[pair_second_var.name] = types.boolean - pair_second_call = ir.Expr.pair_second(iternext_var, loc) - pair_second_assign = ir.Assign(pair_second_call, pair_second_var, loc) - # phi_b_var = pair_first_var - phi_b_var = ir.Var(scope, mk_unique_var("$phi"), loc) - typemap[phi_b_var.name] = types.intp - phi_b_assign = ir.Assign(pair_first_var, phi_b_var, loc) - # branch pair_second_var body_block out_block - branch = ir.Branch(pair_second_var, -1, -1, loc) - header_block = ir.Block(scope, loc) - header_block.body = [iternext_assign, pair_first_assign, - pair_second_assign, phi_b_assign, branch] - return header_block - - -def find_op_typ(op, arg_typs): - for ft in typing.templates.builtin_registry.functions: - if ft.key == op: - try: - func_typ = types.Function(ft).get_call_type(typing.Context(), - arg_typs, {}) - except TypingError: - func_typ = None - if func_typ is not None: - return func_typ - raise RuntimeError("unknown array operation") - - -def legalize_names(varnames): - """returns a dictionary for conversion of variable names to legal - parameter names. - """ - var_map = {} - for var in varnames: - new_name = var.replace("_", "__").replace("$", "_").replace(".", "_") - assert new_name not in var_map - var_map[var] = new_name - return var_map - - -def get_name_var_table(blocks): - """create a mapping from variable names to their ir.Var objects""" - def get_name_var_visit(var, namevar): - namevar[var.name] = var - return var - namevar = {} - visit_vars(blocks, get_name_var_visit, namevar) - return namevar - - -def replace_var_names(blocks, namedict): - """replace variables (ir.Var to ir.Var) from dictionary (name -> name)""" - # remove identity values to avoid infinite loop - new_namedict = {} - for l, r in namedict.items(): - if l != r: - new_namedict[l] = r - - def replace_name(var, namedict): - assert isinstance(var, ir.Var) - while var.name in namedict: - var = ir.Var(var.scope, namedict[var.name], var.loc) - return var - visit_vars(blocks, replace_name, new_namedict) - - -def replace_var_callback(var, vardict): - assert isinstance(var, ir.Var) - while var.name in vardict.keys(): - new_var = vardict[var.name] - var = ir.Var(new_var.scope, new_var.name, new_var.loc) - return var - - -def replace_vars(blocks, vardict): - """replace variables (ir.Var to ir.Var) from dictionary (name -> ir.Var)""" - # remove identity values to avoid infinite loop - new_vardict = {} - for l, r in vardict.items(): - if l != r.name: - new_vardict[l] = r - visit_vars(blocks, replace_var_callback, new_vardict) - - -def replace_vars_stmt(stmt, vardict): - visit_vars_stmt(stmt, replace_var_callback, vardict) - - -def replace_vars_inner(node, vardict): - return visit_vars_inner(node, replace_var_callback, vardict) - - -# other packages that define new nodes add calls to visit variables in them -# format: {type:function} -visit_vars_extensions = {} - - -def visit_vars(blocks, callback, cbdata): - """go over statements of block bodies and replace variable names with - dictionary. - """ - for block in blocks.values(): - for stmt in block.body: - visit_vars_stmt(stmt, callback, cbdata) - return - - -def visit_vars_stmt(stmt, callback, cbdata): - # let external calls handle stmt if type matches - for t, f in visit_vars_extensions.items(): - if isinstance(stmt, t): - f(stmt, callback, cbdata) - return - if isinstance(stmt, ir.Assign): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.value = visit_vars_inner(stmt.value, callback, cbdata) - elif isinstance(stmt, ir.Arg): - stmt.name = visit_vars_inner(stmt.name, callback, cbdata) - elif isinstance(stmt, ir.Return): - stmt.value = visit_vars_inner(stmt.value, callback, cbdata) - elif isinstance(stmt, ir.Raise): - stmt.exception = visit_vars_inner(stmt.exception, callback, cbdata) - elif isinstance(stmt, ir.Branch): - stmt.cond = visit_vars_inner(stmt.cond, callback, cbdata) - elif isinstance(stmt, ir.Jump): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - elif isinstance(stmt, ir.Del): - # Because Del takes only a var name, we make up by - # constructing a temporary variable. - var = ir.Var(None, stmt.value, stmt.loc) - var = visit_vars_inner(var, callback, cbdata) - stmt.value = var.name - elif isinstance(stmt, ir.DelAttr): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) - elif isinstance(stmt, ir.SetAttr): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) - stmt.value = visit_vars_inner(stmt.value, callback, cbdata) - elif isinstance(stmt, ir.DelItem): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.index = visit_vars_inner(stmt.index, callback, cbdata) - elif isinstance(stmt, ir.StaticSetItem): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.index_var = visit_vars_inner(stmt.index_var, callback, cbdata) - stmt.value = visit_vars_inner(stmt.value, callback, cbdata) - elif isinstance(stmt, ir.SetItem): - stmt.target = visit_vars_inner(stmt.target, callback, cbdata) - stmt.index = visit_vars_inner(stmt.index, callback, cbdata) - stmt.value = visit_vars_inner(stmt.value, callback, cbdata) - elif isinstance(stmt, ir.Print): - stmt.args = [visit_vars_inner(x, callback, cbdata) for x in stmt.args] - else: - # TODO: raise NotImplementedError("no replacement for IR node: ", stmt) - pass - return - - -def visit_vars_inner(node, callback, cbdata): - if isinstance(node, ir.Var): - return callback(node, cbdata) - elif isinstance(node, list): - return [visit_vars_inner(n, callback, cbdata) for n in node] - elif isinstance(node, tuple): - return tuple([visit_vars_inner(n, callback, cbdata) for n in node]) - elif isinstance(node, ir.Expr): - # if node.op in ['binop', 'inplace_binop']: - # lhs = node.lhs.name - # rhs = node.rhs.name - # node.lhs.name = callback, cbdata.get(lhs, lhs) - # node.rhs.name = callback, cbdata.get(rhs, rhs) - for arg in node._kws.keys(): - node._kws[arg] = visit_vars_inner(node._kws[arg], callback, cbdata) - return node - - -add_offset_to_labels_extensions = {} - - -def add_offset_to_labels(blocks, offset): - """add an offset to all block labels and jump/branch targets - """ - new_blocks = {} - for l, b in blocks.items(): - # some parfor last blocks might be empty - term = None - if b.body: - term = b.body[-1] - for inst in b.body: - for T, f in add_offset_to_labels_extensions.items(): - if isinstance(inst, T): - f_max = f(inst, offset) - if isinstance(term, ir.Jump): - b.body[-1] = ir.Jump(term.target + offset, term.loc) - if isinstance(term, ir.Branch): - b.body[-1] = ir.Branch(term.cond, term.truebr + offset, - term.falsebr + offset, term.loc) - new_blocks[l + offset] = b - return new_blocks - - -find_max_label_extensions = {} - - -def find_max_label(blocks): - max_label = 0 - for l, b in blocks.items(): - term = None - if b.body: - term = b.body[-1] - for inst in b.body: - for T, f in find_max_label_extensions.items(): - if isinstance(inst, T): - f_max = f(inst) - if f_max > max_label: - max_label = f_max - if l > max_label: - max_label = l - return max_label - - -def remove_dels(blocks): - """remove ir.Del nodes""" - for block in blocks.values(): - new_body = [] - for stmt in block.body: - if not isinstance(stmt, ir.Del): - new_body.append(stmt) - block.body = new_body - return - - -def remove_args(blocks): - """remove ir.Arg nodes""" - for block in blocks.values(): - new_body = [] - for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): - continue - new_body.append(stmt) - block.body = new_body - return - - -def remove_dead(blocks, args, func_ir, typemap=None, alias_map=None, arg_aliases=None): - """dead code elimination using liveness and CFG info. - Returns True if something has been removed, or False if nothing is removed. - """ - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) - call_table, _ = get_call_table(blocks) - if alias_map is None or arg_aliases is None: - alias_map, arg_aliases = find_potential_aliases(blocks, args, typemap, - func_ir) - if config.DEBUG_ARRAY_OPT == 1: - print("alias map:", alias_map) - # keep set for easier search - alias_set = set(alias_map.keys()) - - removed = False - for label, block in blocks.items(): - # find live variables at each statement to delete dead assignment - lives = {v.name for v in block.terminator.list_vars()} - # find live variables at the end of block - for out_blk, _data in cfg.successors(label): - lives |= live_map[out_blk] - removed |= remove_dead_block(block, lives, call_table, arg_aliases, - alias_map, alias_set, func_ir, typemap) - return removed - - -# other packages that define new nodes add calls to remove dead code in them -# format: {type:function} -remove_dead_extensions = {} - - -def remove_dead_block(block, lives, call_table, arg_aliases, alias_map, - alias_set, func_ir, typemap): - """remove dead code using liveness info. - Mutable arguments (e.g. arrays) that are not definitely assigned are live - after return of function. - """ - # TODO: find mutable args that are not definitely assigned instead of - # assuming all args are live after return - removed = False - - # add statements in reverse order - new_body = [block.terminator] - # for each statement in reverse order, excluding terminator - for stmt in reversed(block.body[:-1]): - # aliases of lives are also live - alias_lives = set() - init_alias_lives = lives & alias_set - for v in init_alias_lives: - alias_lives |= alias_map[v] - lives_n_aliases = lives | alias_lives | arg_aliases - # let external calls handle stmt if type matches - if type(stmt) in remove_dead_extensions: - f = remove_dead_extensions[type(stmt)] - stmt = f(stmt, lives, arg_aliases, alias_map, func_ir, typemap) - if stmt is None: - removed = True - continue - # ignore assignments that their lhs is not live or lhs==rhs - if isinstance(stmt, ir.Assign): - lhs = stmt.target - rhs = stmt.value - if lhs.name not in lives and has_no_side_effect( - rhs, lives_n_aliases, call_table): - removed = True - continue - if isinstance(rhs, ir.Var) and lhs.name == rhs.name: - removed = True - continue - # TODO: remove other nodes like SetItem etc. - if isinstance(stmt, ir.SetItem): - name = stmt.target.name - if name not in lives_n_aliases: - continue - - if type(stmt) in analysis.ir_extension_usedefs: - def_func = analysis.ir_extension_usedefs[type(stmt)] - uses, defs = def_func(stmt) - lives -= defs - lives |= uses - else: - lives |= {v.name for v in stmt.list_vars()} - if isinstance(stmt, ir.Assign): - lives.remove(lhs.name) - - new_body.append(stmt) - new_body.reverse() - block.body = new_body - return removed - -# list of functions -remove_call_handlers = [] - -def remove_dead_random_call(rhs, lives, call_list): - if len(call_list) == 3 and call_list[1:] == ['random', numpy]: - return call_list[0] != 'seed' - return False - -remove_call_handlers.append(remove_dead_random_call) - -def has_no_side_effect(rhs, lives, call_table): - """ Returns True if this expression has no side effects that - would prevent re-ordering. - """ - if isinstance(rhs, ir.Expr) and rhs.op == 'call': - func_name = rhs.func.name - if func_name not in call_table or call_table[func_name] == []: - return False - call_list = call_table[func_name] - if (call_list == ['empty', numpy] or - call_list == [slice] or - call_list == ['stencil', numba] or - call_list == ['log', numpy] or - call_list == [numba.array_analysis.wrap_index]): - return True - elif (isinstance(call_list[0], numba.extending._Intrinsic) and - (call_list[0]._name == 'empty_inferred' or - call_list[0]._name == 'unsafe_empty_inferred')): - return True - from numba.targets.registry import CPUDispatcher - from numba.targets.linalg import dot_3_mv_check_args - if isinstance(call_list[0], CPUDispatcher): - py_func = call_list[0].py_func - if py_func == dot_3_mv_check_args: - return True - for f in remove_call_handlers: - if f(rhs, lives, call_list): - return True - return False - if isinstance(rhs, ir.Expr) and rhs.op == 'inplace_binop': - return rhs.lhs.name not in lives - if isinstance(rhs, ir.Yield): - return False - if isinstance(rhs, ir.Expr) and rhs.op == 'pair_first': - # don't remove pair_first since prange looks for it - return False - return True - -is_pure_extensions = [] - -def is_pure(rhs, lives, call_table): - """ Returns True if every time this expression is evaluated it - returns the same result. This is not the case for things - like calls to numpy.random. - """ - if isinstance(rhs, ir.Expr) and rhs.op == 'call': - func_name = rhs.func.name - if func_name not in call_table or call_table[func_name] == []: - return False - call_list = call_table[func_name] - if (call_list == [slice] or - call_list == ['log', numpy] or - call_list == ['empty', numpy]): - return True - for f in is_pure_extensions: - if f(rhs, lives, call_list): - return True - return False - if isinstance(rhs, ir.Yield): - return False - return True - -alias_analysis_extensions = {} - -def find_potential_aliases(blocks, args, typemap, func_ir, alias_map=None, - arg_aliases=None): - "find all array aliases and argument aliases to avoid remove as dead" - if alias_map is None: - alias_map = {} - if arg_aliases is None: - arg_aliases = set(a for a in args if not is_immutable_type(a, typemap)) - - # update definitions since they are not guaranteed to be up-to-date - # FIXME keep definitions up-to-date to avoid the need for rebuilding - func_ir._definitions = build_definitions(func_ir.blocks) - np_alias_funcs = ['ravel', 'transpose', 'reshape'] - - for bl in blocks.values(): - for instr in bl.body: - if type(instr) in alias_analysis_extensions: - f = alias_analysis_extensions[type(instr)] - f(instr, args, typemap, func_ir, alias_map, arg_aliases) - if isinstance(instr, ir.Assign): - expr = instr.value - lhs = instr.target.name - # only mutable types can alias - if is_immutable_type(lhs, typemap): - continue - if isinstance(expr, ir.Var) and lhs!=expr.name: - _add_alias(lhs, expr.name, alias_map, arg_aliases) - # subarrays like A = B[0] for 2D B - if (isinstance(expr, ir.Expr) and (expr.op == 'cast' or - expr.op in ['getitem', 'static_getitem'])): - _add_alias(lhs, expr.value.name, alias_map, arg_aliases) - # array attributes like A.T - if (isinstance(expr, ir.Expr) and expr.op == 'getattr' - and expr.attr in ['T', 'ctypes', 'flat']): - _add_alias(lhs, expr.value.name, alias_map, arg_aliases) - # calls that can create aliases such as B = A.ravel() - if isinstance(expr, ir.Expr) and expr.op == 'call': - fdef = guard(find_callname, func_ir, expr, typemap) - # TODO: sometimes gufunc backend creates duplicate code - # causing find_callname to fail. Example: test_argmax - # ignored here since those cases don't create aliases - # but should be fixed in general - if fdef is None: - continue - fname, fmod = fdef - if fmod == 'numpy' and fname in np_alias_funcs: - _add_alias(lhs, expr.args[0].name, alias_map, arg_aliases) - if isinstance(fmod, ir.Var) and fname in np_alias_funcs: - _add_alias(lhs, fmod.name, alias_map, arg_aliases) - - # copy to avoid changing size during iteration - old_alias_map = copy.deepcopy(alias_map) - # combine all aliases transitively - for v in old_alias_map: - for w in old_alias_map[v]: - alias_map[v] |= alias_map[w] - for w in old_alias_map[v]: - alias_map[w] = alias_map[v] - - return alias_map, arg_aliases - -def _add_alias(lhs, rhs, alias_map, arg_aliases): - if rhs in arg_aliases: - arg_aliases.add(lhs) - else: - if rhs not in alias_map: - alias_map[rhs] = set() - if lhs not in alias_map: - alias_map[lhs] = set() - alias_map[rhs].add(lhs) - alias_map[lhs].add(rhs) - return - -def is_immutable_type(var, typemap): - # Conservatively, assume mutable if type not available - if typemap is None or var not in typemap: - return False - typ = typemap[var] - # TODO: add more immutable types - if isinstance(typ, (types.Number, types.scalars._NPDatetimeBase, - types.containers.BaseTuple, - types.iterators.RangeType)): - return True - if typ==types.string: - return True - # consevatively, assume mutable - return False - -def copy_propagate(blocks, typemap): - """compute copy propagation information for each block using fixed-point - iteration on data flow equations: - in_b = intersect(predec(B)) - out_b = gen_b | (in_b - kill_b) - """ - cfg = compute_cfg_from_blocks(blocks) - entry = cfg.entry_point() - - # format: dict of block labels to copies as tuples - # label -> (l,r) - c_data = init_copy_propagate_data(blocks, entry, typemap) - (gen_copies, all_copies, kill_copies, in_copies, out_copies) = c_data - - old_point = None - new_point = copy.deepcopy(out_copies) - # comparison works since dictionary of built-in types - while old_point != new_point: - for label in blocks.keys(): - if label == entry: - continue - predecs = [i for i, _d in cfg.predecessors(label)] - # in_b = intersect(predec(B)) - in_copies[label] = out_copies[predecs[0]].copy() - for p in predecs: - in_copies[label] &= out_copies[p] - - # out_b = gen_b | (in_b - kill_b) - out_copies[label] = (gen_copies[label] - | (in_copies[label] - kill_copies[label])) - old_point = new_point - new_point = copy.deepcopy(out_copies) - if config.DEBUG_ARRAY_OPT == 1: - print("copy propagate out_copies:", out_copies) - return in_copies, out_copies - - -def init_copy_propagate_data(blocks, entry, typemap): - """get initial condition of copy propagation data flow for each block. - """ - # gen is all definite copies, extra_kill is additional ones that may hit - # for example, parfors can have control flow so they may hit extra copies - gen_copies, extra_kill = get_block_copies(blocks, typemap) - # set of all program copies - all_copies = set() - for l, s in gen_copies.items(): - all_copies |= gen_copies[l] - kill_copies = {} - for label, gen_set in gen_copies.items(): - kill_copies[label] = set() - for lhs, rhs in all_copies: - if lhs in extra_kill[label] or rhs in extra_kill[label]: - kill_copies[label].add((lhs, rhs)) - # a copy is killed if it is not in this block and lhs or rhs are - # assigned in this block - assigned = {lhs for lhs, rhs in gen_set} - if ((lhs, rhs) not in gen_set - and (lhs in assigned or rhs in assigned)): - kill_copies[label].add((lhs, rhs)) - # set initial values - # all copies are in for all blocks except entry - in_copies = {l: all_copies.copy() for l in blocks.keys()} - in_copies[entry] = set() - out_copies = {} - for label in blocks.keys(): - # out_b = gen_b | (in_b - kill_b) - out_copies[label] = (gen_copies[label] - | (in_copies[label] - kill_copies[label])) - out_copies[entry] = gen_copies[entry] - return (gen_copies, all_copies, kill_copies, in_copies, out_copies) - - -# other packages that define new nodes add calls to get copies in them -# format: {type:function} -copy_propagate_extensions = {} - - -def get_block_copies(blocks, typemap): - """get copies generated and killed by each block - """ - block_copies = {} - extra_kill = {} - for label, block in blocks.items(): - assign_dict = {} - extra_kill[label] = set() - # assignments as dict to replace with latest value - for stmt in block.body: - for T, f in copy_propagate_extensions.items(): - if isinstance(stmt, T): - gen_set, kill_set = f(stmt, typemap) - for lhs, rhs in gen_set: - assign_dict[lhs] = rhs - # if a=b is in dict and b is killed, a is also killed - new_assign_dict = {} - for l, r in assign_dict.items(): - if l not in kill_set and r not in kill_set: - new_assign_dict[l] = r - if r in kill_set: - extra_kill[label].add(l) - assign_dict = new_assign_dict - extra_kill[label] |= kill_set - if isinstance(stmt, ir.Assign): - lhs = stmt.target.name - if isinstance(stmt.value, ir.Var): - rhs = stmt.value.name - # copy is valid only if same type (see - # TestCFunc.test_locals) - if typemap[lhs] == typemap[rhs]: - assign_dict[lhs] = rhs - continue - if isinstance(stmt.value, - ir.Expr) and stmt.value.op == 'inplace_binop': - in1_var = stmt.value.lhs.name - in1_typ = typemap[in1_var] - # inplace_binop assigns first operand if mutable - if not (isinstance(in1_typ, types.Number) - or in1_typ == types.string): - extra_kill[label].add(in1_var) - # if a=b is in dict and b is killed, a is also killed - new_assign_dict = {} - for l, r in assign_dict.items(): - if l != in1_var and r != in1_var: - new_assign_dict[l] = r - if r == in1_var: - extra_kill[label].add(l) - assign_dict = new_assign_dict - extra_kill[label].add(lhs) - block_cps = set(assign_dict.items()) - block_copies[label] = block_cps - return block_copies, extra_kill - - -# other packages that define new nodes add calls to apply copy propagate in them -# format: {type:function} -apply_copy_propagate_extensions = {} - - -def apply_copy_propagate(blocks, in_copies, name_var_table, typemap, calltypes, - save_copies=None): - """apply copy propagation to IR: replace variables when copies available""" - # save_copies keeps an approximation of the copies that were applied, so - # that the variable names of removed user variables can be recovered to some - # extent. - if save_copies is None: - save_copies = [] - - for label, block in blocks.items(): - var_dict = {l: name_var_table[r] for l, r in in_copies[label]} - # assignments as dict to replace with latest value - for stmt in block.body: - if type(stmt) in apply_copy_propagate_extensions: - f = apply_copy_propagate_extensions[type(stmt)] - f(stmt, var_dict, name_var_table, - typemap, calltypes, save_copies) - # only rhs of assignments should be replaced - # e.g. if x=y is available, x in x=z shouldn't be replaced - elif isinstance(stmt, ir.Assign): - stmt.value = replace_vars_inner(stmt.value, var_dict) - else: - replace_vars_stmt(stmt, var_dict) - fix_setitem_type(stmt, typemap, calltypes) - for T, f in copy_propagate_extensions.items(): - if isinstance(stmt, T): - gen_set, kill_set = f(stmt, typemap) - for lhs, rhs in gen_set: - if rhs in name_var_table: - var_dict[lhs] = name_var_table[rhs] - for l, r in var_dict.copy().items(): - if l in kill_set or r.name in kill_set: - var_dict.pop(l) - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var): - lhs = stmt.target.name - rhs = stmt.value.name - # rhs could be replaced with lhs from previous copies - if lhs != rhs: - # copy is valid only if same type (see - # TestCFunc.test_locals) - if typemap[lhs] == typemap[rhs] and rhs in name_var_table: - var_dict[lhs] = name_var_table[rhs] - else: - var_dict.pop(lhs, None) - # a=b kills previous t=a - lhs_kill = [] - for k, v in var_dict.items(): - if v.name == lhs: - lhs_kill.append(k) - for k in lhs_kill: - var_dict.pop(k, None) - if (isinstance(stmt, ir.Assign) - and not isinstance(stmt.value, ir.Var)): - lhs = stmt.target.name - var_dict.pop(lhs, None) - # previous t=a is killed if a is killed - lhs_kill = [] - for k, v in var_dict.items(): - if v.name == lhs: - lhs_kill.append(k) - for k in lhs_kill: - var_dict.pop(k, None) - save_copies.extend(var_dict.items()) - - return save_copies - -def fix_setitem_type(stmt, typemap, calltypes): - """Copy propagation can replace setitem target variable, which can be array - with 'A' layout. The replaced variable can be 'C' or 'F', so we update - setitem call type reflect this (from matrix power test) - """ - if not isinstance(stmt, (ir.SetItem, ir.StaticSetItem)): - return - t_typ = typemap[stmt.target.name] - s_typ = calltypes[stmt].args[0] - # test_optional t_typ can be Optional with array - if not isinstance( - s_typ, - types.npytypes.Array) or not isinstance( - t_typ, - types.npytypes.Array): - return - if s_typ.layout == 'A' and t_typ.layout != 'A': - new_s_typ = s_typ.copy(layout=t_typ.layout) - calltypes[stmt].args = ( - new_s_typ, - calltypes[stmt].args[1], - calltypes[stmt].args[2]) - return - - -def dprint_func_ir(func_ir, title, blocks=None): - """Debug print function IR, with an optional blocks argument - that may differ from the IR's original blocks. - """ - if config.DEBUG_ARRAY_OPT == 1: - ir_blocks = func_ir.blocks - func_ir.blocks = ir_blocks if blocks == None else blocks - name = func_ir.func_id.func_qualname - print(("IR %s: %s" % (title, name)).center(80, "-")) - func_ir.dump() - print("-" * 40) - func_ir.blocks = ir_blocks - - -def find_topo_order(blocks, cfg = None): - """find topological order of blocks such that true branches are visited - first (e.g. for_break test in test_dataflow). - """ - if cfg == None: - cfg = compute_cfg_from_blocks(blocks) - post_order = [] - seen = set() - - def _dfs_rec(node): - if node not in seen: - seen.add(node) - succs = cfg._succs[node] - last_inst = blocks[node].body[-1] - if isinstance(last_inst, ir.Branch): - succs = [last_inst.falsebr, last_inst.truebr] - for dest in succs: - if (node, dest) not in cfg._back_edges: - _dfs_rec(dest) - post_order.append(node) - - _dfs_rec(cfg.entry_point()) - post_order.reverse() - return post_order - - -# other packages that define new nodes add calls to get call table -# format: {type:function} -call_table_extensions = {} - - -def get_call_table(blocks, call_table=None, reverse_call_table=None): - """returns a dictionary of call variables and their references. - """ - # call_table example: c = np.zeros becomes c:["zeroes", np] - # reverse_call_table example: c = np.zeros becomes np_var:c - if call_table is None: - call_table = {} - if reverse_call_table is None: - reverse_call_table = {} - - topo_order = find_topo_order(blocks) - for label in reversed(topo_order): - for inst in reversed(blocks[label].body): - if isinstance(inst, ir.Assign): - lhs = inst.target.name - rhs = inst.value - if isinstance(rhs, ir.Expr) and rhs.op == 'call': - call_table[rhs.func.name] = [] - if isinstance(rhs, ir.Expr) and rhs.op == 'getattr': - if lhs in call_table: - call_table[lhs].append(rhs.attr) - reverse_call_table[rhs.value.name] = lhs - if lhs in reverse_call_table: - call_var = reverse_call_table[lhs] - call_table[call_var].append(rhs.attr) - reverse_call_table[rhs.value.name] = call_var - if isinstance(rhs, ir.Global): - if lhs in call_table: - call_table[lhs].append(rhs.value) - if lhs in reverse_call_table: - call_var = reverse_call_table[lhs] - call_table[call_var].append(rhs.value) - if isinstance(rhs, ir.FreeVar): - if lhs in call_table: - call_table[lhs].append(rhs.value) - if lhs in reverse_call_table: - call_var = reverse_call_table[lhs] - call_table[call_var].append(rhs.value) - for T, f in call_table_extensions.items(): - if isinstance(inst, T): - f(inst, call_table, reverse_call_table) - return call_table, reverse_call_table - - -# other packages that define new nodes add calls to get tuple table -# format: {type:function} -tuple_table_extensions = {} - - -def get_tuple_table(blocks, tuple_table=None): - """returns a dictionary of tuple variables and their values. - """ - if tuple_table is None: - tuple_table = {} - - for block in blocks.values(): - for inst in block.body: - if isinstance(inst, ir.Assign): - lhs = inst.target.name - rhs = inst.value - if isinstance(rhs, ir.Expr) and rhs.op == 'build_tuple': - tuple_table[lhs] = rhs.items - if isinstance(rhs, ir.Const) and isinstance(rhs.value, tuple): - tuple_table[lhs] = rhs.value - for T, f in tuple_table_extensions.items(): - if isinstance(inst, T): - f(inst, tuple_table) - return tuple_table - - -def get_stmt_writes(stmt): - writes = set() - if isinstance(stmt, (ir.Assign, ir.SetItem, ir.StaticSetItem)): - writes.add(stmt.target.name) - return writes - - -def rename_labels(blocks): - """rename labels of function body blocks according to topological sort. - The set of labels of these blocks will remain unchanged. - """ - topo_order = find_topo_order(blocks) - - # make a block with return last if available (just for readability) - return_label = -1 - for l, b in blocks.items(): - if isinstance(b.body[-1], ir.Return): - return_label = l - # some cases like generators can have no return blocks - if return_label != -1: - topo_order.remove(return_label) - topo_order.append(return_label) - - label_map = {} - all_labels = sorted(topo_order, reverse=True) - for label in topo_order: - label_map[label] = all_labels.pop() - # update target labels in jumps/branches - for b in blocks.values(): - term = b.terminator - if isinstance(term, ir.Jump): - term.target = label_map[term.target] - if isinstance(term, ir.Branch): - term.truebr = label_map[term.truebr] - term.falsebr = label_map[term.falsebr] - # update blocks dictionary keys - new_blocks = {} - for k, b in blocks.items(): - new_label = label_map[k] - new_blocks[new_label] = b - - return new_blocks - - -def simplify_CFG(blocks): - """transform chains of blocks that have no loop into a single block""" - # first, inline single-branch-block to its predecessors - cfg = compute_cfg_from_blocks(blocks) - def find_single_branch(label): - block = blocks[label] - return len(block.body) == 1 and isinstance(block.body[0], ir.Branch) - single_branch_blocks = list(filter(find_single_branch, blocks.keys())) - marked_for_del = set() - for label in single_branch_blocks: - inst = blocks[label].body[0] - predecessors = cfg.predecessors(label) - delete_block = True - for (p, q) in predecessors: - block = blocks[p] - if isinstance(block.body[-1], ir.Jump): - block.body[-1] = copy.copy(inst) - else: - delete_block = False - if delete_block: - marked_for_del.add(label) - # Delete marked labels - for label in marked_for_del: - del blocks[label] - merge_adjacent_blocks(blocks) - return rename_labels(blocks) - - -arr_math = ['min', 'max', 'sum', 'prod', 'mean', 'var', 'std', - 'cumsum', 'cumprod', 'argmin', 'argmax', 'argsort', - 'nonzero', 'ravel'] - - -def canonicalize_array_math(func_ir, typemap, calltypes, typingctx): - # save array arg to call - # call_varname -> array - blocks = func_ir.blocks - saved_arr_arg = {} - topo_order = find_topo_order(blocks) - for label in topo_order: - block = blocks[label] - new_body = [] - for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): - lhs = stmt.target.name - rhs = stmt.value - # replace A.func with np.func, and save A in saved_arr_arg - if (rhs.op == 'getattr' and rhs.attr in arr_math - and isinstance( - typemap[rhs.value.name], types.npytypes.Array)): - rhs = stmt.value - arr = rhs.value - saved_arr_arg[lhs] = arr - scope = arr.scope - loc = arr.loc - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - typemap[g_np_var.name] = types.misc.Module(numpy) - g_np = ir.Global('np', numpy, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - rhs.value = g_np_var - new_body.append(g_np_assign) - func_ir._definitions[g_np_var.name] = [g_np] - # update func var type - func = getattr(numpy, rhs.attr) - func_typ = get_np_ufunc_typ(func) - typemap.pop(lhs) - typemap[lhs] = func_typ - if rhs.op == 'call' and rhs.func.name in saved_arr_arg: - # add array as first arg - arr = saved_arr_arg[rhs.func.name] - rhs.args = [arr] + rhs.args - # update call type signature to include array arg - old_sig = calltypes.pop(rhs) - calltypes[rhs] = typemap[rhs.func.name].get_call_type( - typingctx, [typemap[arr.name]] + list(old_sig.args), {}) - - new_body.append(stmt) - block.body = new_body - return - - -# format: {type:function} -array_accesses_extensions = {} - - -def get_array_accesses(blocks, accesses=None): - """returns a set of arrays accessed and their indices. - """ - if accesses is None: - accesses = set() - - for block in blocks.values(): - for inst in block.body: - if isinstance(inst, ir.SetItem): - accesses.add((inst.target.name, inst.index.name)) - if isinstance(inst, ir.StaticSetItem): - accesses.add((inst.target.name, inst.index_var.name)) - if isinstance(inst, ir.Assign): - lhs = inst.target.name - rhs = inst.value - if isinstance(rhs, ir.Expr) and rhs.op == 'getitem': - accesses.add((rhs.value.name, rhs.index.name)) - if isinstance(rhs, ir.Expr) and rhs.op == 'static_getitem': - index = rhs.index - # slice is unhashable, so just keep the variable - if index is None or is_slice_index(index): - index = rhs.index_var.name - accesses.add((rhs.value.name, index)) - for T, f in array_accesses_extensions.items(): - if isinstance(inst, T): - f(inst, accesses) - return accesses - -def is_slice_index(index): - """see if index is a slice index or has slice in it""" - if isinstance(index, slice): - return True - if isinstance(index, tuple): - for i in index: - if isinstance(i, slice): - return True - return False - -def merge_adjacent_blocks(blocks): - cfg = compute_cfg_from_blocks(blocks) - # merge adjacent blocks - removed = set() - for label in list(blocks.keys()): - if label in removed: - continue - block = blocks[label] - succs = list(cfg.successors(label)) - while True: - if len(succs) != 1: - break - next_label = succs[0][0] - if next_label in removed: - break - preds = list(cfg.predecessors(next_label)) - succs = list(cfg.successors(next_label)) - if len(preds) != 1 or preds[0][0] != label: - break - next_block = blocks[next_label] - # XXX: commented out since scope objects are not consistent - # thoughout the compiler. for example, pieces of code are compiled - # and inlined on the fly without proper scope merge. - # if block.scope != next_block.scope: - # break - # merge - block.body.pop() # remove Jump - block.body += next_block.body - del blocks[next_label] - removed.add(next_label) - label = next_label - -def restore_copy_var_names(blocks, save_copies, typemap): - """ - restores variable names of user variables after applying copy propagation - """ - rename_dict = {} - for (a, b) in save_copies: - # a is string name, b is variable - # if a is user variable and b is generated temporary and b is not - # already renamed - if (not a.startswith('$') and b.name.startswith('$') - and b.name not in rename_dict): - new_name = mk_unique_var('${}'.format(a)); - rename_dict[b.name] = new_name - typ = typemap.pop(b.name) - typemap[new_name] = typ - - replace_var_names(blocks, rename_dict) - -def simplify(func_ir, typemap, calltypes): - remove_dels(func_ir.blocks) - # get copies in to blocks and out from blocks - in_cps, out_cps = copy_propagate(func_ir.blocks, typemap) - # table mapping variable names to ir.Var objects to help replacement - name_var_table = get_name_var_table(func_ir.blocks) - save_copies = apply_copy_propagate( - func_ir.blocks, - in_cps, - name_var_table, - typemap, - calltypes) - restore_copy_var_names(func_ir.blocks, save_copies, typemap) - # remove dead code to enable fusion - remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap) - func_ir.blocks = simplify_CFG(func_ir.blocks) - if config.DEBUG_ARRAY_OPT == 1: - dprint_func_ir(func_ir, "after simplify") - -class GuardException(Exception): - pass - -def require(cond): - """ - Raise GuardException if the given condition is False. - """ - if not cond: - raise GuardException - -def guard(func, *args, **kwargs): - """ - Run a function with given set of arguments, and guard against - any GuardException raised by the function by returning None, - or the expected return results if no such exception was raised. - """ - try: - return func(*args, **kwargs) - except GuardException: - return None - -def get_definition(func_ir, name, **kwargs): - """ - Same as func_ir.get_definition(name), but raise GuardException if - exception KeyError is caught. - """ - try: - return func_ir.get_definition(name, **kwargs) - except KeyError: - raise GuardException - -def build_definitions(blocks, definitions=None): - """Build the definitions table of the given blocks by scanning - through all blocks and instructions, useful when the definitions - table is out-of-sync. - Will return a new definition table if one is not passed. - """ - if definitions is None: - definitions = collections.defaultdict(list) - - for block in blocks.values(): - for inst in block.body: - if isinstance(inst, ir.Assign): - name = inst.target.name - definition = definitions.get(name, []) - if definition == []: - definitions[name] = definition - definition.append(inst.value) - if type(inst) in build_defs_extensions: - f = build_defs_extensions[type(inst)] - f(inst, definitions) - - return definitions - -build_defs_extensions = {} - -def find_callname(func_ir, expr, typemap=None, definition_finder=get_definition): - """Check if a call expression is calling a numpy function, and - return the callee's function name and module name (both are strings), - or raise GuardException. For array attribute calls such as 'a.f(x)' - when 'a' is a numpy array, the array variable 'a' is returned - in place of the module name. - """ - require(isinstance(expr, ir.Expr) and expr.op == 'call') - callee = expr.func - callee_def = definition_finder(func_ir, callee) - attrs = [] - obj = None - while True: - if isinstance(callee_def, (ir.Global, ir.FreeVar)): - # require(callee_def.value == numpy) - # these checks support modules like numpy, numpy.random as well as - # calls like len() and intrinsitcs like assertEquiv - keys = ['name', '_name', '__name__'] - value = None - for key in keys: - if hasattr(callee_def.value, key): - value = getattr(callee_def.value, key) - break - if not value: - raise GuardException - attrs.append(value) - def_val = callee_def.value - # get the underlying definition of Intrinsic object to be able to - # find the module effectively. - # Otherwise, it will return numba.extending - if isinstance(def_val, numba.extending._Intrinsic): - def_val = def_val._defn - if hasattr(def_val, '__module__'): - mod_name = def_val.__module__ - # it might be a numpy function imported directly - if (hasattr(numpy, value) - and def_val == getattr(numpy, value)): - attrs += ['numpy'] - # it might be a np.random function imported directly - elif (hasattr(numpy.random, value) - and def_val == getattr(numpy.random, value)): - attrs += ['random', 'numpy'] - elif mod_name is not None: - attrs.append(mod_name) - else: - class_name = def_val.__class__.__name__ - if class_name == 'builtin_function_or_method': - class_name = 'builtin' - if class_name != 'module': - attrs.append(class_name) - break - elif isinstance(callee_def, ir.Expr) and callee_def.op == 'getattr': - obj = callee_def.value - attrs.append(callee_def.attr) - if typemap and obj.name in typemap: - typ = typemap[obj.name] - if isinstance(typ, types.npytypes.Array): - return attrs[0], obj - callee_def = definition_finder(func_ir, obj) - else: - # obj.func calls where obj is not np array - if obj is not None: - return '.'.join(reversed(attrs)), obj - raise GuardException - return attrs[0], '.'.join(reversed(attrs[1:])) - -def find_build_sequence(func_ir, var): - """Check if a variable is constructed via build_tuple or - build_list or build_set, and return the sequence and the - operator, or raise GuardException otherwise. - Note: only build_tuple is immutable, so use with care. - """ - require(isinstance(var, ir.Var)) - var_def = get_definition(func_ir, var) - require(isinstance(var_def, ir.Expr)) - build_ops = ['build_tuple', 'build_list', 'build_set'] - require(var_def.op in build_ops) - return var_def.items, var_def.op - -def find_const(func_ir, var): - """Check if a variable is defined as constant, and return - the constant value, or raise GuardException otherwise. - """ - require(isinstance(var, ir.Var)) - var_def = get_definition(func_ir, var) - require(isinstance(var_def, ir.Const)) - return var_def.value - -def compile_to_numba_ir(mk_func, glbls, typingctx=None, arg_typs=None, - typemap=None, calltypes=None): - """ - Compile a function or a make_function node to Numba IR. - - Rename variables and - labels to avoid conflict if inlined somewhere else. Perform type inference - if typingctx and other typing inputs are available and update typemap and - calltypes. - """ - from numba import compiler - # mk_func can be actual function or make_function node - if hasattr(mk_func, 'code'): - code = mk_func.code - elif hasattr(mk_func, '__code__'): - code = mk_func.__code__ - else: - raise NotImplementedError("function type not recognized {}".format(mk_func)) - f_ir = get_ir_of_code(glbls, code) - remove_dels(f_ir.blocks) - - # relabel by adding an offset - global _max_label - f_ir.blocks = add_offset_to_labels(f_ir.blocks, _max_label + 1) - max_label = max(f_ir.blocks.keys()) - _max_label = max_label - - # rename all variables to avoid conflict - var_table = get_name_var_table(f_ir.blocks) - new_var_dict = {} - for name, var in var_table.items(): - new_var_dict[name] = mk_unique_var(name) - replace_var_names(f_ir.blocks, new_var_dict) - - # perform type inference if typingctx is available and update type - # data structures typemap and calltypes - if typingctx: - f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( - typingctx, f_ir, arg_typs, None) - # remove argument entries like arg.a from typemap - arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] - for a in arg_names: - f_typemap.pop(a) - typemap.update(f_typemap) - calltypes.update(f_calltypes) - return f_ir - -def get_ir_of_code(glbls, fcode): - """ - Compile a code object to get its IR. - """ - nfree = len(fcode.co_freevars) - func_env = "\n".join([" c_%d = None" % i for i in range(nfree)]) - func_clo = ",".join(["c_%d" % i for i in range(nfree)]) - func_arg = ",".join(["x_%d" % i for i in range(fcode.co_argcount)]) - func_text = "def g():\n%s\n def f(%s):\n return (%s)\n return f" % ( - func_env, func_arg, func_clo) - loc = {} - exec_(func_text, glbls, loc) - - # hack parameter name .0 for Python 3 versions < 3.6 - if utils.PYVERSION >= (3,) and utils.PYVERSION < (3, 6): - co_varnames = list(fcode.co_varnames) - if co_varnames[0] == ".0": - co_varnames[0] = "implicit0" - fcode = pytypes.CodeType( - fcode.co_argcount, - fcode.co_kwonlyargcount, - fcode.co_nlocals, - fcode.co_stacksize, - fcode.co_flags, - fcode.co_code, - fcode.co_consts, - fcode.co_names, - tuple(co_varnames), - fcode.co_filename, - fcode.co_name, - fcode.co_firstlineno, - fcode.co_lnotab, - fcode.co_freevars, - fcode.co_cellvars) - - f = loc['g']() - f.__code__ = fcode - f.__name__ = fcode.co_name - from numba import compiler - ir = compiler.run_frontend(f) - # we need to run the before inference rewrite pass to normalize the IR - # XXX: check rewrite pass flag? - # for example, Raise nodes need to become StaticRaise before type inference - class DummyPipeline(object): - def __init__(self, f_ir): - self.typingctx = None - self.targetctx = None - self.args = None - self.func_ir = f_ir - self.typemap = None - self.return_type = None - self.calltypes = None - rewrites.rewrite_registry.apply('before-inference', - DummyPipeline(ir), ir) - return ir - -def replace_arg_nodes(block, args): - """ - Replace ir.Arg(...) with variables - """ - for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): - idx = stmt.value.index - assert(idx < len(args)) - stmt.value = args[idx] - return - -def replace_returns(blocks, target, return_label): - """ - Return return statement by assigning directly to target, and a jump. - """ - for block in blocks.values(): - casts = [] - for i, stmt in enumerate(block.body): - if isinstance(stmt, ir.Return): - assert(i + 1 == len(block.body)) - block.body[i] = ir.Assign(stmt.value, target, stmt.loc) - block.body.append(ir.Jump(return_label, stmt.loc)) - # remove cast of the returned value - for cast in casts: - if cast.target.name == stmt.value.name: - cast.value = cast.value.value - elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast': - casts.append(stmt) - -def gen_np_call(func_as_str, func, lhs, args, typingctx, typemap, calltypes): - scope = args[0].scope - loc = args[0].loc - - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - typemap[g_np_var.name] = types.misc.Module(numpy) - g_np = ir.Global('np', numpy, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - # attr call: _attr = getattr(g_np_var, func_as_str) - np_attr_call = ir.Expr.getattr(g_np_var, func_as_str, loc) - attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) - func_var_typ = get_np_ufunc_typ(func) - typemap[attr_var.name] = func_var_typ - attr_assign = ir.Assign(np_attr_call, attr_var, loc) - # np call: lhs = np_attr(*args) - np_call = ir.Expr.call(attr_var, args, (), loc) - arg_types = [typemap[x.name] for x in args] - func_typ = func_var_typ.get_call_type(typingctx, arg_types, {}) - calltypes[np_call] = func_typ - np_assign = ir.Assign(np_call, lhs, loc) - return [g_np_assign, attr_assign, np_assign] - -def dump_blocks(blocks): - for label, block in blocks.items(): - print(label, ":") - for stmt in block.body: - print(" ", stmt) - -def is_get_setitem(stmt): - """stmt is getitem assignment or setitem (and static cases)""" - return is_getitem(stmt) or is_setitem(stmt) - - -def is_getitem(stmt): - """true if stmt is a getitem or static_getitem assignment""" - return (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op in ['getitem', 'static_getitem']) - -def is_setitem(stmt): - """true if stmt is a SetItem or StaticSetItem node""" - return isinstance(stmt, (ir.SetItem, ir.StaticSetItem)) - -def index_var_of_get_setitem(stmt): - """get index variable for getitem/setitem nodes (and static cases)""" - if is_getitem(stmt): - if stmt.value.op == 'getitem': - return stmt.value.index - else: - return stmt.value.index_var - - if is_setitem(stmt): - if isinstance(stmt, ir.SetItem): - return stmt.index - else: - return stmt.index_var - - return None - -def set_index_var_of_get_setitem(stmt, new_index): - if is_getitem(stmt): - if stmt.value.op == 'getitem': - stmt.value.index = new_index - else: - stmt.value.index_var = new_index - elif is_setitem(stmt): - if isinstance(stmt, ir.SetItem): - stmt.index = new_index - else: - stmt.index_var = new_index - else: - raise ValueError("getitem or setitem node expected but received {}".format( - stmt)) - -def is_namedtuple_class(c): - """check if c is a namedtuple class""" - if not isinstance(c, type): - return False - # should have only tuple as superclass - bases = c.__bases__ - if len(bases) != 1 or bases[0] != tuple: - return False - # should have _make method - if not hasattr(c, '_make'): - return False - # should have _fields that is all string - fields = getattr(c, '_fields', None) - if not isinstance(fields, tuple): - return False - return all(isinstance(f, str) for f in fields) - -def raise_on_unsupported_feature(func_ir): - """ - Helper function to walk IR and raise if it finds op codes - that are unsupported. Could be extended to cover IR sequences - as well as op codes. Intended use is to call it as a pipeline - stage just prior to lowering to prevent LoweringErrors for known - unsupported features. - """ - for blk in func_ir.blocks.values(): - for stmt in blk.find_insts(ir.Assign): - # This raises on finding `make_function` - if isinstance(stmt.value, ir.Expr): - if stmt.value.op == 'make_function': - val = stmt.value - - # See if the construct name can be refined - code = getattr(val, 'code', None) - if code is not None: - # check if this is a closure, the co_name will - # be the captured function name which is not - # useful so be explicit - if getattr(val, 'closure', None) is not None: - use = '' - expr = '' - else: - use = code.co_name - expr = '(%s) ' % use - else: - use = '' - expr = '' - - msg = ("Numba encountered the use of a language " - "feature it does not support in this context: " - "%s (op code: make_function not supported). If " - "the feature is explicitly supported it is " - "likely that the result of the expression %s" - "is being used in an unsupported manner.") % \ - (use, expr) - raise UnsupportedError(msg, stmt.value.loc) diff --git a/numba/numba/itanium_mangler.py b/numba/numba/itanium_mangler.py deleted file mode 100644 index 7d0915c05..000000000 --- a/numba/numba/itanium_mangler.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -Itanium CXX ABI Mangler - -Reference: http://mentorembedded.github.io/cxx-abi/abi.html - -The basics of the mangling scheme. - -We are hijacking the CXX mangling scheme for our use. We map Python modules -into CXX namespace. A `module1.submodule2.foo` is mapped to -`module1::submodule2::foo`. For parameterized numba types, we treat them as -templated types; for example, `array(int64, 1d, C)` becomes an -`array`. - -All mangled names are prefixed with "_Z". It is followed by the name of the -entity. A name contains one or more identifiers. Each identifier is encoded -as "". If the name is namespaced and, therefore, -has multiple identifiers, the entire name is encoded as "NE". - -For functions, arguments types follow. There are condensed encodings for basic -built-in types; e.g. "i" for int, "f" for float. For other types, the -previously mentioned name encoding should be used. - -For templated types, the template parameters are encoded immediately after the -name. If it is namespaced, it should be within the 'N' 'E' marker. Template -parameters are encoded in "IE", where each parameter is encoded using -the mentioned name encoding scheme. Template parameters can contain literal -values like the '1' in the array type shown earlier. There is special encoding -scheme for them to avoid leading digits. -""" - -from __future__ import print_function, absolute_import - -import re - -from numba import types, utils - - -# According the scheme, valid characters for mangled names are [a-zA-Z0-9_$]. -# We borrow the '$' as the escape character to encode invalid char into -# '$xx' where 'xx' is the hex codepoint. -_re_invalid_char = re.compile(r'[^a-z0-9_]', re.I) - -PREFIX = "_Z" - -# C names to mangled type code -C2CODE = { - 'void': 'v', - 'wchar_t': 'w', - 'bool': 'b', - 'char': 'c', - 'signed char': 'a', - 'unsigned char': 'h', - 'short': 's', - 'unsigned short': 't', - 'int': 'i', - 'unsigned int': 'j', - 'long': 'l', - 'unsigned long': 'm', - 'long long': 'x', # __int64 - 'unsigned long long': 'y', # unsigned __int64 - '__int128': 'n', - 'unsigned __int128': 'o', - 'float': 'f', - 'double': 'd', - 'long double': 'e', # __float80 - '__float128': 'g', - 'ellipsis': 'z', -} - -# Numba types to C names -N2C = { - types.void: 'void', - types.boolean: 'bool', - types.uint8: 'unsigned char', - types.int8: 'signed char', - types.uint16: 'unsigned short', - types.int16: 'short', - types.uint32: 'unsigned int', - types.int32: 'int', - types.uint64: 'unsigned long long', - types.int64: 'long long', - types.float32: 'float', - types.float64: 'double', -} - - -def _escape_string(text): - """Escape the given string so that it only contains ASCII characters - of [a-zA-Z0-9_$]. - - The dollar symbol ($) and other invalid characters are escaped into - the string sequence of "$xx" where "xx" is the hex codepoint of the char. - - Multibyte characters are encoded into utf8 and converted into the above - hex format. - """ - def repl(m): - return ''.join(('$%02x' % utils.asbyteint(ch)) - for ch in m.group(0).encode('utf8')) - ret = re.sub(_re_invalid_char, repl, text) - # Return str if we got a unicode (for py2) - if not isinstance(ret, str): - return ret.encode('ascii') - return ret - - -def _fix_lead_digit(text): - """ - Fix text with leading digit - """ - if text and text[0].isdigit(): - return '_' + text - else: - return text - - -def _len_encoded(string): - """ - Prefix string with digit indicating the length. - Add underscore if string is prefixed with digits. - """ - string = _fix_lead_digit(string) - return '%u%s' % (len(string), string) - - -def mangle_identifier(ident, template_params=''): - """ - Mangle the identifier with optional template parameters. - - Note: - - This treats '.' as '::' in C++. - """ - parts = [_len_encoded(_escape_string(x)) for x in ident.split('.')] - if len(parts) > 1: - return 'N%s%sE' % (''.join(parts), template_params) - else: - return '%s%s' % (parts[0], template_params) - - -def mangle_type_c(typ): - """ - Mangle C type name - - Args - ---- - typ: str - C type name - """ - if typ in C2CODE: - return C2CODE[typ] - else: - return mangle_identifier(typ) - - -def mangle_type_or_value(typ): - """ - Mangle type parameter and arbitrary value. - """ - # Handle numba types - if isinstance(typ, types.Type): - if typ in N2C: - return mangle_type_c(N2C[typ]) - else: - return mangle_templated_ident(*typ.mangling_args) - # Handle integer literal - elif isinstance(typ, int): - return 'Li%dE' % typ - # Handle str as identifier - elif isinstance(typ, str): - return mangle_identifier(typ) - # Otherwise - else: - enc = _escape_string(str(typ)) - return _len_encoded(enc) - - -# Alias -mangle_type = mangle_type_or_value -mangle_value = mangle_type_or_value - - -def mangle_templated_ident(identifier, parameters): - """ - Mangle templated identifier. - """ - template_params = ('I%sE' % ''.join(map(mangle_type_or_value, parameters)) - if parameters else '') - return mangle_identifier(identifier, template_params) - - -def mangle_args_c(argtys): - """ - Mangle sequence of C type names - """ - return ''.join([mangle_type_c(t) for t in argtys]) - - -def mangle_args(argtys): - """ - Mangle sequence of Numba type objects and arbitrary values. - """ - return ''.join([mangle_type_or_value(t) for t in argtys]) - - -def mangle_c(ident, argtys): - """ - Mangle identifier with C type names - """ - return PREFIX + mangle_identifier(ident) + mangle_args_c(argtys) - - -def mangle(ident, argtys): - """ - Mangle identifier with Numba type objects and arbitrary values. - """ - return PREFIX + mangle_identifier(ident) + mangle_args(argtys) - - -def prepend_namespace(mangled, ns): - """ - Prepend namespace to mangled name. - """ - if not mangled.startswith(PREFIX): - raise ValueError('input is not a mangled name') - elif mangled.startswith(PREFIX + 'N'): - # nested - remaining = mangled[3:] - ret = PREFIX + 'N' + mangle_identifier(ns) + remaining - else: - # non-nested - remaining = mangled[2:] - head, tail = _split_mangled_ident(remaining) - ret = PREFIX + 'N' + mangle_identifier(ns) + head + 'E' + tail - return ret - - -def _split_mangled_ident(mangled): - """ - Returns `(head, tail)` where `head` is the ` + ` encoded - identifier and `tail` is the remaining. - """ - ct = int(mangled) - ctlen = len(str(ct)) - at = ctlen + ct - return mangled[:at], mangled[at:] - - diff --git a/numba/numba/jitclass/__init__.py b/numba/numba/jitclass/__init__.py deleted file mode 100644 index e92e657fd..000000000 --- a/numba/numba/jitclass/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .decorators import jitclass -from . import boxing # Has import-time side effect diff --git a/numba/numba/jitclass/_box.c b/numba/numba/jitclass/_box.c deleted file mode 100644 index 09aa7a7cd..000000000 --- a/numba/numba/jitclass/_box.c +++ /dev/null @@ -1,184 +0,0 @@ -/* -Implements jitclass Box type in python c-api level. -*/ -#include "../_pymodule.h" - -typedef struct { - PyObject_HEAD - void *meminfoptr, *dataptr; -} BoxObject; - - -/* Store function defined in numba.runtime._nrt_python for use in box_dealloc. - * It points to a function is code segment that does not need user deallocation - * and does not disappear while the process is stil running. - */ -static void (*MemInfo_release)(void*) = NULL; - - -/* - * Box.__init__() - * Takes no arguments. - * meminfoptr and dataptr are set to NULL. - */ -static -int Box_init(BoxObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "", keywords)) - { - return -1; - } - /* Initialize attributes to NULL */ - self->meminfoptr = NULL; - self->dataptr = NULL; - return 0; -} - -/* - * Box destructor - * Release MemInfo pointed by meminfoptr. - * Free the instance. - */ -static -void box_dealloc(BoxObject *box) -{ - if (box->meminfoptr) MemInfo_release((void*)box->meminfoptr); - Py_TYPE(box)->tp_free((PyObject *) box); -} - - -static const char Box_doc[] = "A box for numba created jit-class instance"; - - -static PyTypeObject BoxType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_box.Box", /*tp_name*/ - sizeof(BoxObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)box_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - Box_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)Box_init, /* tp_init */ - 0, /* tp_alloc */ - PyType_GenericNew, /* tp_new */ -}; - - -/* Import MemInfo_Release from numba.runtime._nrt_python once for use in - * Box_dealloc. - */ -static void * -import_meminfo_release(void) { - PyObject *nrtmod = NULL; - PyObject *helperdct = NULL; - PyObject *mi_rel_fn = NULL; - void *fnptr = NULL; - /* from numba.runtime import _nrt_python */ - nrtmod = PyImport_ImportModule("numba.runtime._nrt_python"); - if (!nrtmod) goto cleanup; - /* helperdct = _nrt_python.c_helpers */ - helperdct = PyObject_GetAttrString(nrtmod, "c_helpers"); - if (!helperdct) goto cleanup; - /* helperdct['MemInfo_release'] */ - mi_rel_fn = PyDict_GetItemString(helperdct, "MemInfo_release"); - if (!mi_rel_fn) goto cleanup; - fnptr = PyLong_AsVoidPtr(mi_rel_fn); - -cleanup: - Py_XDECREF(nrtmod); - Py_XDECREF(helperdct); - return fnptr; -} - -/* Debug utils. - * Get internal dataptr field from Box. - */ -static -PyObject* box_get_dataptr(PyObject *self, PyObject *args) { - BoxObject *box; - if (!PyArg_ParseTuple(args, "O!", &BoxType, (PyObject*)&box)) - return NULL; - return PyLong_FromVoidPtr(box->dataptr); -} - -/* Debug utils. - * Get internal meminfoptr field from Box. - */ -static -PyObject* box_get_meminfoptr(PyObject *self, PyObject *args) { - BoxObject *box; - if (!PyArg_ParseTuple(args, "O!", &BoxType, (PyObject*)&box)) - return NULL; - return PyLong_FromVoidPtr(box->meminfoptr); -} - - -static PyMethodDef ext_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } - declmethod(box_get_dataptr), - declmethod(box_get_meminfoptr), - { NULL }, -#undef declmethod -}; - - -MOD_INIT(_box) { - PyObject *m; - - MOD_DEF(m, "_box", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - - /* init BoxType */ - if (PyType_Ready(&BoxType)) - return MOD_ERROR_VAL; - - /* import and cache NRT_MemInfo_release function pointer */ - MemInfo_release = import_meminfo_release(); - if (!MemInfo_release) return MOD_ERROR_VAL; - - /* bind BoxType */ - Py_INCREF(&BoxType); - PyModule_AddObject(m, "Box", (PyObject *) (&BoxType)); - - /* bind address to direct access utils */; - PyModule_AddObject(m, "box_meminfoptr_offset", - PyLong_FromSsize_t(offsetof(BoxObject, meminfoptr))); - PyModule_AddObject(m, "box_dataptr_offset", - PyLong_FromSsize_t(offsetof(BoxObject, dataptr))); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/jitclass/base.py b/numba/numba/jitclass/base.py deleted file mode 100644 index 6c0d03473..000000000 --- a/numba/numba/jitclass/base.py +++ /dev/null @@ -1,468 +0,0 @@ -from __future__ import absolute_import, print_function - -from collections import OrderedDict, Sequence -import types as pytypes -import inspect - -from llvmlite import ir as llvmir - -from numba import types -from numba.targets.registry import cpu_target -from numba import njit -from numba.typing import templates -from numba.datamodel import default_manager, models -from numba.targets import imputils -from numba import cgutils, utils -from numba.six import exec_ -from . import _box - - -############################################################################## -# Data model - -class InstanceModel(models.StructModel): - def __init__(self, dmm, fe_typ): - cls_data_ty = types.ClassDataType(fe_typ) - # MemInfoPointer uses the `dtype` attribute to traverse for nested - # NRT MemInfo. Since we handle nested NRT MemInfo ourselves, - # we will replace provide MemInfoPointer with an opaque type - # so that it does not raise exception for nested meminfo. - dtype = types.Opaque('Opaque.' + str(cls_data_ty)) - members = [ - ('meminfo', types.MemInfoPointer(dtype)), - ('data', types.CPointer(cls_data_ty)), - ] - super(InstanceModel, self).__init__(dmm, fe_typ, members) - - -class InstanceDataModel(models.StructModel): - def __init__(self, dmm, fe_typ): - clsty = fe_typ.class_type - members = [(_mangle_attr(k), v) for k, v in clsty.struct.items()] - super(InstanceDataModel, self).__init__(dmm, fe_typ, members) - - -default_manager.register(types.ClassInstanceType, InstanceModel) -default_manager.register(types.ClassDataType, InstanceDataModel) -default_manager.register(types.ClassType, models.OpaqueModel) - - -def _mangle_attr(name): - """ - Mangle attributes. - The resulting name does not startswith an underscore '_'. - """ - return 'm_' + name - - -############################################################################## -# Class object - -_ctor_template = """ -def ctor({args}): - return __numba_cls_({args}) -""" - - -def _getargs(fn): - """ - Returns list of positional and keyword argument names in order. - """ - sig = utils.pysignature(fn) - params = sig.parameters - args = [k for k, v in params.items() - if (v.kind & v.POSITIONAL_OR_KEYWORD) == v.POSITIONAL_OR_KEYWORD] - return args - - -class JitClassType(type): - """ - The type of any jitclass. - """ - def __new__(cls, name, bases, dct): - if len(bases) != 1: - raise TypeError("must have exactly one base class") - [base] = bases - if isinstance(base, JitClassType): - raise TypeError("cannot subclass from a jitclass") - assert 'class_type' in dct, 'missing "class_type" attr' - outcls = type.__new__(cls, name, bases, dct) - outcls._set_init() - return outcls - - def _set_init(cls): - """ - Generate a wrapper for calling the constructor from pure Python. - Note the wrapper will only accept positional arguments. - """ - init = cls.class_type.instance_type.methods['__init__'] - # get postitional and keyword arguments - # offset by one to exclude the `self` arg - args = _getargs(init)[1:] - ctor_source = _ctor_template.format(args=', '.join(args)) - glbls = {"__numba_cls_": cls} - exec_(ctor_source, glbls) - ctor = glbls['ctor'] - cls._ctor = njit(ctor) - - def __instancecheck__(cls, instance): - if isinstance(instance, _box.Box): - return instance._numba_type_.class_type is cls.class_type - return False - - def __call__(cls, *args, **kwargs): - return cls._ctor(*args, **kwargs) - - -############################################################################## -# Registration utils - -def _validate_spec(spec): - for k, v in spec.items(): - if not isinstance(k, str): - raise TypeError("spec keys should be strings, got %r" % (k,)) - if not isinstance(v, types.Type): - raise TypeError("spec values should be Numba type instances, got %r" - % (v,)) - - -def _fix_up_private_attr(clsname, spec): - """ - Apply the same changes to dunder names as CPython would. - """ - out = OrderedDict() - for k, v in spec.items(): - if k.startswith('__') and not k.endswith('__'): - k = '_' + clsname + k - out[k] = v - return out - - -def register_class_type(cls, spec, class_ctor, builder): - """ - Internal function to create a jitclass. - - Args - ---- - cls: the original class object (used as the prototype) - spec: the structural specification contains the field types. - class_ctor: the numba type to represent the jitclass - builder: the internal jitclass builder - """ - # Normalize spec - if isinstance(spec, Sequence): - spec = OrderedDict(spec) - _validate_spec(spec) - - # Fix up private attribute names - spec = _fix_up_private_attr(cls.__name__, spec) - - # Copy methods from base classes - clsdct = {} - for basecls in reversed(inspect.getmro(cls)): - clsdct.update(basecls.__dict__) - - methods = dict((k, v) for k, v in clsdct.items() - if isinstance(v, pytypes.FunctionType)) - props = dict((k, v) for k, v in clsdct.items() - if isinstance(v, property)) - - others = dict((k, v) for k, v in clsdct.items() - if k not in methods and k not in props) - - # Check for name shadowing - shadowed = (set(methods) | set(props)) & set(spec) - if shadowed: - raise NameError("name shadowing: {0}".format(', '.join(shadowed))) - - docstring = others.pop('__doc__', "") - _drop_ignored_attrs(others) - if others: - msg = "class members are not yet supported: {0}" - members = ', '.join(others.keys()) - raise TypeError(msg.format(members)) - - for k, v in props.items(): - if v.fdel is not None: - raise TypeError("deleter is not supported: {0}".format(k)) - - jitmethods = {} - for k, v in methods.items(): - jitmethods[k] = njit(v) - - jitprops = {} - for k, v in props.items(): - dct = {} - if v.fget: - dct['get'] = njit(v.fget) - if v.fset: - dct['set'] = njit(v.fset) - jitprops[k] = dct - - # Instantiate class type - class_type = class_ctor(cls, ConstructorTemplate, spec, jitmethods, - jitprops) - - cls = JitClassType(cls.__name__, (cls,), dict(class_type=class_type, - __doc__=docstring)) - - # Register resolution of the class object - typingctx = cpu_target.typing_context - typingctx.insert_global(cls, class_type) - - # Register class - targetctx = cpu_target.target_context - builder(class_type, methods, typingctx, targetctx).register() - - return cls - - -class ConstructorTemplate(templates.AbstractTemplate): - """ - Base class for jitclass constructor templates. - """ - - def generic(self, args, kws): - # Redirect resolution to __init__ - instance_type = self.key.instance_type - ctor = instance_type.jitmethods['__init__'] - boundargs = (instance_type.get_reference_type(),) + args - disp_type = types.Dispatcher(ctor) - sig = disp_type.get_call_type(self.context, boundargs, kws) - # Actual constructor returns an instance value (not None) - out = templates.signature(instance_type, *sig.args[1:]) - return out - - -def _drop_ignored_attrs(dct): - # ignore anything defined by object - drop = set(['__weakref__', - '__module__', - '__dict__']) - for k, v in dct.items(): - if isinstance(v, (pytypes.BuiltinFunctionType, - pytypes.BuiltinMethodType)): - drop.add(k) - elif getattr(v, '__objclass__', None) is object: - drop.add(k) - - for k in drop: - del dct[k] - - -class ClassBuilder(object): - """ - A jitclass builder for a mutable jitclass. This will register - typing and implementation hooks to the given typing and target contexts. - """ - class_impl_registry = imputils.Registry() - implemented_methods = set() - - def __init__(self, class_type, methods, typingctx, targetctx): - self.class_type = class_type - self.methods = methods - self.typingctx = typingctx - self.targetctx = targetctx - - def register(self): - """ - Register to the frontend and backend. - """ - # Register generic implementations for all jitclasses - self._register_methods(self.class_impl_registry, - self.class_type.instance_type) - # NOTE other registrations are done at the top-level - # (see ctor_impl and attr_impl below) - self.targetctx.install_registry(self.class_impl_registry) - - def _register_methods(self, registry, instance_type): - """ - Register method implementations for the given instance type. - """ - for meth in instance_type.jitmethods: - # There's no way to retrive the particular method name - # inside the implementation function, so we have to register a - # specific closure for each different name - if meth not in self.implemented_methods: - self._implement_method(registry, meth) - self.implemented_methods.add(meth) - - def _implement_method(self, registry, attr): - @registry.lower((types.ClassInstanceType, attr), - types.ClassInstanceType, types.VarArg(types.Any)) - def imp(context, builder, sig, args): - instance_type = sig.args[0] - method = instance_type.jitmethods[attr] - disp_type = types.Dispatcher(method) - call = context.get_function(disp_type, sig) - out = call(builder, args) - return imputils.impl_ret_new_ref(context, builder, - sig.return_type, out) - - -@templates.infer_getattr -class ClassAttribute(templates.AttributeTemplate): - key = types.ClassInstanceType - - def generic_resolve(self, instance, attr): - if attr in instance.struct: - # It's a struct field => the type is well-known - return instance.struct[attr] - - elif attr in instance.jitmethods: - # It's a jitted method => typeinfer it - meth = instance.jitmethods[attr] - disp_type = types.Dispatcher(meth) - - class MethodTemplate(templates.AbstractTemplate): - key = (self.key, attr) - - def generic(self, args, kws): - args = (instance,) + tuple(args) - sig = disp_type.get_call_type(self.context, args, kws) - return sig.as_method() - - return types.BoundFunction(MethodTemplate, instance) - - elif attr in instance.jitprops: - # It's a jitted property => typeinfer its getter - impdct = instance.jitprops[attr] - getter = impdct['get'] - disp_type = types.Dispatcher(getter) - sig = disp_type.get_call_type(self.context, (instance,), {}) - return sig.return_type - - -@ClassBuilder.class_impl_registry.lower_getattr_generic(types.ClassInstanceType) -def attr_impl(context, builder, typ, value, attr): - """ - Generic getattr() for @jitclass instances. - """ - if attr in typ.struct: - # It's a struct field - inst = context.make_helper(builder, typ, value=value) - data_pointer = inst.data - data = context.make_data_helper(builder, typ.get_data_type(), - ref=data_pointer) - return imputils.impl_ret_borrowed(context, builder, - typ.struct[attr], - getattr(data, _mangle_attr(attr))) - elif attr in typ.jitprops: - # It's a jitted property - getter = typ.jitprops[attr]['get'] - sig = templates.signature(None, typ) - dispatcher = types.Dispatcher(getter) - sig = dispatcher.get_call_type(context.typing_context, [typ], {}) - call = context.get_function(dispatcher, sig) - out = call(builder, [value]) - return imputils.impl_ret_new_ref(context, builder, sig.return_type, out) - - raise NotImplementedError('attribute {0!r} not implemented'.format(attr)) - - -@ClassBuilder.class_impl_registry.lower_setattr_generic(types.ClassInstanceType) -def attr_impl(context, builder, sig, args, attr): - """ - Generic setattr() for @jitclass instances. - """ - typ, valty = sig.args - target, val = args - - if attr in typ.struct: - # It's a struct member - inst = context.make_helper(builder, typ, value=target) - data_ptr = inst.data - data = context.make_data_helper(builder, typ.get_data_type(), - ref=data_ptr) - - # Get old value - attr_type = typ.struct[attr] - oldvalue = getattr(data, _mangle_attr(attr)) - - # Store n - setattr(data, _mangle_attr(attr), val) - context.nrt.incref(builder, attr_type, val) - - # Delete old value - context.nrt.decref(builder, attr_type, oldvalue) - - elif attr in typ.jitprops: - # It's a jitted property - setter = typ.jitprops[attr]['set'] - disp_type = types.Dispatcher(setter) - sig = disp_type.get_call_type(context.typing_context, - (typ, valty), {}) - call = context.get_function(disp_type, sig) - call(builder, (target, val)) - - else: - raise NotImplementedError('attribute {0!r} not implemented'.format(attr)) - - -def imp_dtor(context, module, instance_type): - llvoidptr = context.get_value_type(types.voidptr) - llsize = context.get_value_type(types.uintp) - dtor_ftype = llvmir.FunctionType(llvmir.VoidType(), - [llvoidptr, llsize, llvoidptr]) - - fname = "_Dtor.{0}".format(instance_type.name) - dtor_fn = module.get_or_insert_function(dtor_ftype, - name=fname) - if dtor_fn.is_declaration: - # Define - builder = llvmir.IRBuilder(dtor_fn.append_basic_block()) - - alloc_fe_type = instance_type.get_data_type() - alloc_type = context.get_value_type(alloc_fe_type) - - ptr = builder.bitcast(dtor_fn.args[0], alloc_type.as_pointer()) - data = context.make_helper(builder, alloc_fe_type, ref=ptr) - - context.nrt.decref(builder, alloc_fe_type, data._getvalue()) - - builder.ret_void() - - return dtor_fn - - -@ClassBuilder.class_impl_registry.lower(types.ClassType, types.VarArg(types.Any)) -def ctor_impl(context, builder, sig, args): - """ - Generic constructor (__new__) for jitclasses. - """ - # Allocate the instance - inst_typ = sig.return_type - alloc_type = context.get_data_type(inst_typ.get_data_type()) - alloc_size = context.get_abi_sizeof(alloc_type) - - meminfo = context.nrt.meminfo_alloc_dtor( - builder, - context.get_constant(types.uintp, alloc_size), - imp_dtor(context, builder.module, inst_typ), - ) - data_pointer = context.nrt.meminfo_data(builder, meminfo) - data_pointer = builder.bitcast(data_pointer, - alloc_type.as_pointer()) - - # Nullify all data - builder.store(cgutils.get_null_value(alloc_type), - data_pointer) - - inst_struct = context.make_helper(builder, inst_typ) - inst_struct.meminfo = meminfo - inst_struct.data = data_pointer - - # Call the jitted __init__ - # TODO: extract the following into a common util - init_sig = (sig.return_type,) + sig.args - - init = inst_typ.jitmethods['__init__'] - disp_type = types.Dispatcher(init) - call = context.get_function(disp_type, types.void(*init_sig)) - realargs = [inst_struct._getvalue()] + list(args) - call(builder, realargs) - - # Prepare return value - ret = inst_struct._getvalue() - - return imputils.impl_ret_new_ref(context, builder, inst_typ, ret) diff --git a/numba/numba/jitclass/boxing.py b/numba/numba/jitclass/boxing.py deleted file mode 100644 index 60e9e9a5d..000000000 --- a/numba/numba/jitclass/boxing.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Implement logic relating to wrapping (box) and unwrapping (unbox) instances -of jitclasses for use inside the python interpreter. -""" -from __future__ import print_function, absolute_import - -from functools import wraps, partial - -from llvmlite import ir - -from numba import types, cgutils -from numba.pythonapi import box, unbox, NativeValue -from numba import njit -from numba.six import exec_ -from . import _box - - -_getter_code_template = """ -def accessor(__numba_self_): - return __numba_self_.{0} -""" - -_setter_code_template = """ -def mutator(__numba_self_, __numba_val): - __numba_self_.{0} = __numba_val -""" - -_method_code_template = """ -def method(__numba_self_, *args): - return __numba_self_.{method}(*args) -""" - - -def _generate_property(field, template, fname): - """ - Generate simple function that get/set a field of the instance - """ - source = template.format(field) - glbls = {} - exec_(source, glbls) - return njit(glbls[fname]) - - -_generate_getter = partial(_generate_property, template=_getter_code_template, - fname='accessor') -_generate_setter = partial(_generate_property, template=_setter_code_template, - fname='mutator') - - -def _generate_method(name, func): - """ - Generate a wrapper for calling a method. Note the wrapper will only - accept positional arguments. - """ - source = _method_code_template.format(method=name) - glbls = {} - exec_(source, glbls) - method = njit(glbls['method']) - - @wraps(func) - def wrapper(*args, **kwargs): - return method(*args, **kwargs) - - return wrapper - - -_cache_specialized_box = {} - - -def _specialize_box(typ): - """ - Create a subclass of Box that is specialized to the jitclass. - - This function caches the result to avoid code bloat. - """ - # Check cache - if typ in _cache_specialized_box: - return _cache_specialized_box[typ] - dct = {'__slots__': (), - '_numba_type_': typ, - '__doc__': typ.class_type.class_def.__doc__, - } - # Inject attributes as class properties - for field in typ.struct: - getter = _generate_getter(field) - setter = _generate_setter(field) - dct[field] = property(getter, setter) - # Inject properties as class properties - for field, impdct in typ.jitprops.items(): - getter = None - setter = None - if 'get' in impdct: - getter = _generate_getter(field) - if 'set' in impdct: - setter = _generate_setter(field) - # get docstring from either the fget or fset - imp = impdct.get('get') or impdct.get('set') or None - doc = getattr(imp, '__doc__', None) - dct[field] = property(getter, setter, doc=doc) - # Inject methods as class members - for name, func in typ.methods.items(): - if not (name.startswith('__') and name.endswith('__')): - dct[name] = _generate_method(name, func) - # Create subclass - subcls = type(typ.classname, (_box.Box,), dct) - # Store to cache - _cache_specialized_box[typ] = subcls - - # Pre-compile attribute getter. - # Note: This must be done after the "box" class is created because - # compiling the getter requires the "box" class to be defined. - for k, v in dct.items(): - if isinstance(v, property): - prop = getattr(subcls, k) - if prop.fget is not None: - fget = prop.fget - fast_fget = fget.compile((typ,)) - fget.disable_compile() - setattr(subcls, k, - property(fast_fget, prop.fset, prop.fdel, - doc=prop.__doc__)) - - return subcls - - -############################################################################### -# Implement box/unbox for call wrapper - -@box(types.ClassInstanceType) -def _box_class_instance(typ, val, c): - meminfo, dataptr = cgutils.unpack_tuple(c.builder, val) - - # Create Box instance - box_subclassed = _specialize_box(typ) - # Note: the ``box_subclassed`` is kept alive by the cache - int_addr_boxcls = c.context.get_constant(types.uintp, id(box_subclassed)) - - box_cls = c.builder.inttoptr(int_addr_boxcls, c.pyapi.pyobj) - box = c.pyapi.call_function_objargs(box_cls, ()) - - # Initialize Box instance - llvoidptr = ir.IntType(8).as_pointer() - addr_meminfo = c.builder.bitcast(meminfo, llvoidptr) - addr_data = c.builder.bitcast(dataptr, llvoidptr) - - def set_member(member_offset, value): - # Access member by byte offset - offset = c.context.get_constant(types.uintp, member_offset) - ptr = cgutils.pointer_add(c.builder, box, offset) - casted = c.builder.bitcast(ptr, llvoidptr.as_pointer()) - c.builder.store(value, casted) - - set_member(_box.box_meminfoptr_offset, addr_meminfo) - set_member(_box.box_dataptr_offset, addr_data) - return box - - -@unbox(types.ClassInstanceType) -def _unbox_class_instance(typ, val, c): - def access_member(member_offset): - # Access member by byte offset - offset = c.context.get_constant(types.uintp, member_offset) - llvoidptr = ir.IntType(8).as_pointer() - ptr = cgutils.pointer_add(c.builder, val, offset) - casted = c.builder.bitcast(ptr, llvoidptr.as_pointer()) - return c.builder.load(casted) - - struct_cls = cgutils.create_struct_proxy(typ) - inst = struct_cls(c.context, c.builder) - - # load from Python object - ptr_meminfo = access_member(_box.box_meminfoptr_offset) - ptr_dataptr = access_member(_box.box_dataptr_offset) - - # store to native structure - inst.meminfo = c.builder.bitcast(ptr_meminfo, inst.meminfo.type) - inst.data = c.builder.bitcast(ptr_dataptr, inst.data.type) - - ret = inst._getvalue() - - c.context.nrt.incref(c.builder, typ, ret) - - return NativeValue(ret, is_error=c.pyapi.c_api_error()) diff --git a/numba/numba/jitclass/decorators.py b/numba/numba/jitclass/decorators.py deleted file mode 100644 index 764c69844..000000000 --- a/numba/numba/jitclass/decorators.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import, print_function - -from numba import config, types -from .base import register_class_type, ClassBuilder - - -def jitclass(spec): - """ - A decorator for creating a jitclass. - - **arguments**: - - - spec: - Specifies the types of each field on this class. - Must be a dictionary or a sequence. - With a dictionary, use collections.OrderedDict for stable ordering. - With a sequence, it must contain 2-tuples of (fieldname, fieldtype). - - **returns**: - - A callable that takes a class object, which will be compiled. - """ - def wrap(cls): - if config.DISABLE_JIT: - return cls - else: - return register_class_type(cls, spec, types.ClassType, ClassBuilder) - - return wrap diff --git a/numba/numba/lowering.py b/numba/numba/lowering.py deleted file mode 100644 index f080f3f37..000000000 --- a/numba/numba/lowering.py +++ /dev/null @@ -1,1034 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import weakref -import time -from collections import namedtuple, deque -from functools import partial - -from llvmlite.llvmpy.core import Constant, Type, Builder -from llvmlite import ir as llvmir - -from . import (_dynfunc, cgutils, config, funcdesc, generators, ir, types, - typing, utils) -from .errors import LoweringError, new_error_context -from .targets import removerefctpass -from .funcdesc import default_mangler -from . import debuginfo, utils - - -class Environment(_dynfunc.Environment): - """Stores globals and constant pyobjects for runtime. - - It is often needed to convert b/w nopython objects and pyobjects. - """ - __slots__ = ('env_name', '__weakref__') - # A weak-value dictionary to store live environment with env_name as the - # key. - _memo = weakref.WeakValueDictionary() - - @classmethod - def from_fndesc(cls, fndesc): - mod = fndesc.lookup_module() - try: - # Avoid creating new Env - return cls._memo[fndesc.env_name] - except KeyError: - inst = cls(mod.__dict__) - inst.env_name = fndesc.env_name - cls._memo[fndesc.env_name] = inst - return inst - - def __reduce__(self): - return _rebuild_env, ( - self.globals['__name__'], - self.consts, - self.env_name - ) - - def __del__(self): - if utils is None or utils.IS_PY3: - return - if _keepalive is None: - return - _keepalive.append((time.time(), self)) - if len(_keepalive) > 10: - cur = time.time() - while _keepalive and cur - _keepalive[0][0] > 1: - _keepalive.popleft() - - -_keepalive = deque() - - -def _rebuild_env(modname, consts, env_name): - if env_name in Environment._memo: - return Environment._memo[env_name] - from . import serialize - mod = serialize._rebuild_module(modname) - env = Environment(mod.__dict__) - env.consts[:] = consts - return env - - -_VarArgItem = namedtuple("_VarArgItem", ("vararg", "index")) - - -class BaseLower(object): - """ - Lower IR to LLVM - """ - def __init__(self, context, library, fndesc, func_ir): - self.library = library - self.fndesc = fndesc - self.blocks = utils.SortedMap(utils.iteritems(func_ir.blocks)) - self.func_ir = func_ir - self.call_conv = context.call_conv - self.generator_info = func_ir.generator_info - - # Initialize LLVM - self.module = self.library.create_ir_module(self.fndesc.unique_name) - - # Python execution environment (will be available to the compiled - # function). - self.env = Environment.from_fndesc(self.fndesc) - - # Internal states - self.blkmap = {} - self.varmap = {} - self.firstblk = min(self.blocks.keys()) - self.loc = -1 - - # Specializes the target context as seen inside the Lowerer - # This adds: - # - environment: the python exceution environment - self.context = context.subtarget(environment=self.env, - fndesc=self.fndesc) - - # Debuginfo - dibuildercls = (self.context.DIBuilder - if self.context.enable_debuginfo - else debuginfo.DummyDIBuilder) - - self.debuginfo = dibuildercls(module=self.module, - filepath=func_ir.loc.filename) - - # Subclass initialization - self.init() - - def init(self): - pass - - def init_pyapi(self): - """ - Init the Python API and Environment Manager for the function being - lowered. - """ - if self.pyapi is not None: - return - self.pyapi = self.context.get_python_api(self.builder) - - # Store environment argument for later use - self.env_manager = self.context.get_env_manager(self.builder) - self.env_body = self.env_manager.env_body - self.envarg = self.env_manager.env_ptr - - def pre_lower(self): - """ - Called before lowering all blocks. - """ - # A given Lower object can be used for several LL functions - # (for generators) and it's important to use a new API and - # EnvironmentManager. - self.pyapi = None - self.debuginfo.mark_subprogram(function=self.builder.function, - name=self.fndesc.qualname, - loc=self.func_ir.loc) - - def post_lower(self): - """ - Called after all blocks are lowered - """ - self.debuginfo.finalize() - - def pre_block(self, block): - """ - Called before lowering a block. - """ - - def return_exception(self, exc_class, exc_args=None): - self.call_conv.return_user_exc(self.builder, exc_class, exc_args) - - def emit_environment_object(self): - """Emit a pointer to hold the Environment object. - """ - # Define global for the environment and initialize it to NULL - envname = self.context.get_env_name(self.fndesc) - gvenv = self.context.declare_env_global(self.module, envname) - - def lower(self): - # Emit the Env into the module - self.emit_environment_object() - if self.generator_info is None: - self.genlower = None - self.lower_normal_function(self.fndesc) - else: - self.genlower = self.GeneratorLower(self) - self.gentype = self.genlower.gentype - - self.genlower.lower_init_func(self) - self.genlower.lower_next_func(self) - if self.gentype.has_finalizer: - self.genlower.lower_finalize_func(self) - - if config.DUMP_LLVM: - print(("LLVM DUMP %s" % self.fndesc).center(80, '-')) - print(self.module) - print('=' * 80) - - # Special optimization to remove NRT on functions that do not need it. - if self.context.enable_nrt and self.generator_info is None: - removerefctpass.remove_unnecessary_nrt_usage(self.function, - context=self.context, - fndesc=self.fndesc) - - # Run target specific post lowering transformation - self.context.post_lowering(self.module, self.library) - - # Materialize LLVM Module - self.library.add_ir_module(self.module) - - def extract_function_arguments(self): - self.fnargs = self.call_conv.decode_arguments(self.builder, - self.fndesc.argtypes, - self.function) - return self.fnargs - - def lower_normal_function(self, fndesc): - """ - Lower non-generator *fndesc*. - """ - self.setup_function(fndesc) - - # Init argument values - self.extract_function_arguments() - entry_block_tail = self.lower_function_body() - - # Close tail of entry block - self.builder.position_at_end(entry_block_tail) - self.builder.branch(self.blkmap[self.firstblk]) - - def lower_function_body(self): - """ - Lower the current function's body, and return the entry block. - """ - # Init Python blocks - for offset in self.blocks: - bname = "B%s" % offset - self.blkmap[offset] = self.function.append_basic_block(bname) - - self.pre_lower() - # pre_lower() may have changed the current basic block - entry_block_tail = self.builder.basic_block - - self.debug_print("# function begin: {0}".format( - self.fndesc.unique_name)) - # Lower all blocks - for offset, block in sorted(self.blocks.items()): - bb = self.blkmap[offset] - self.builder.position_at_end(bb) - self.lower_block(block) - - self.post_lower() - return entry_block_tail - - def lower_block(self, block): - """ - Lower the given block. - """ - self.pre_block(block) - for inst in block.body: - self.loc = inst.loc - defaulterrcls = partial(LoweringError, loc=self.loc) - with new_error_context('lowering "{inst}" at {loc}', inst=inst, - loc=self.loc, errcls_=defaulterrcls): - self.lower_inst(inst) - - def create_cpython_wrapper(self, release_gil=False): - """ - Create CPython wrapper(s) around this function (or generator). - """ - if self.genlower: - self.context.create_cpython_wrapper(self.library, - self.genlower.gendesc, - self.env, self.call_helper, - release_gil=release_gil) - self.context.create_cpython_wrapper(self.library, self.fndesc, - self.env, self.call_helper, - release_gil=release_gil) - - def setup_function(self, fndesc): - # Setup function - self.function = self.context.declare_function(self.module, fndesc) - self.entry_block = self.function.append_basic_block('entry') - self.builder = Builder(self.entry_block) - self.call_helper = self.call_conv.init_call_helper(self.builder) - - def typeof(self, varname): - return self.fndesc.typemap[varname] - - def debug_print(self, msg): - if config.DEBUG_JIT: - self.context.debug_print(self.builder, "DEBUGJIT: {0}".format(msg)) - - @property - def has_dynamic_globals(self): - """ - If true, then can't cache LLVM module accross process calls. - """ - return self.library.has_dynamic_globals - - -# Dictionary mapping instruction class to its lowering function. -lower_extensions = {} - -class Lower(BaseLower): - GeneratorLower = generators.GeneratorLower - - def lower_inst(self, inst): - # Set debug location for all subsequent LL instructions - self.debuginfo.mark_location(self.builder, self.loc) - self.debug_print(str(inst)) - if isinstance(inst, ir.Assign): - ty = self.typeof(inst.target.name) - val = self.lower_assign(ty, inst) - self.storevar(val, inst.target.name) - - elif isinstance(inst, ir.Branch): - cond = self.loadvar(inst.cond.name) - tr = self.blkmap[inst.truebr] - fl = self.blkmap[inst.falsebr] - - condty = self.typeof(inst.cond.name) - pred = self.context.cast(self.builder, cond, condty, types.boolean) - assert pred.type == Type.int(1), ("cond is not i1: %s" % pred.type) - self.builder.cbranch(pred, tr, fl) - - elif isinstance(inst, ir.Jump): - target = self.blkmap[inst.target] - self.builder.branch(target) - - elif isinstance(inst, ir.Return): - if self.generator_info: - # StopIteration - self.genlower.return_from_generator(self) - return - val = self.loadvar(inst.value.name) - oty = self.typeof(inst.value.name) - ty = self.fndesc.restype - if isinstance(ty, types.Optional): - # If returning an optional type - self.call_conv.return_optional_value(self.builder, ty, oty, val) - return - if ty != oty: - val = self.context.cast(self.builder, val, oty, ty) - retval = self.context.get_return_value(self.builder, ty, val) - self.call_conv.return_value(self.builder, retval) - - elif isinstance(inst, ir.StaticSetItem): - signature = self.fndesc.calltypes[inst] - assert signature is not None - try: - impl = self.context.get_function('static_setitem', signature) - except NotImplementedError: - return self.lower_setitem(inst.target, inst.index_var, inst.value, signature) - else: - target = self.loadvar(inst.target.name) - value = self.loadvar(inst.value.name) - valuety = self.typeof(inst.value.name) - value = self.context.cast(self.builder, value, valuety, - signature.args[2]) - return impl(self.builder, (target, inst.index, value)) - - elif isinstance(inst, ir.Print): - self.lower_print(inst) - - elif isinstance(inst, ir.SetItem): - signature = self.fndesc.calltypes[inst] - assert signature is not None - return self.lower_setitem(inst.target, inst.index, inst.value, signature) - - elif isinstance(inst, ir.DelItem): - target = self.loadvar(inst.target.name) - index = self.loadvar(inst.index.name) - - targetty = self.typeof(inst.target.name) - indexty = self.typeof(inst.index.name) - - signature = self.fndesc.calltypes[inst] - assert signature is not None - impl = self.context.get_function('delitem', signature) - - assert targetty == signature.args[0] - index = self.context.cast(self.builder, index, indexty, - signature.args[1]) - - return impl(self.builder, (target, index)) - - elif isinstance(inst, ir.Del): - self.delvar(inst.value) - - elif isinstance(inst, ir.SetAttr): - target = self.loadvar(inst.target.name) - value = self.loadvar(inst.value.name) - signature = self.fndesc.calltypes[inst] - - targetty = self.typeof(inst.target.name) - valuety = self.typeof(inst.value.name) - assert signature is not None - assert signature.args[0] == targetty - impl = self.context.get_setattr(inst.attr, signature) - - # Convert argument to match - value = self.context.cast(self.builder, value, valuety, - signature.args[1]) - - return impl(self.builder, (target, value)) - - elif isinstance(inst, ir.StaticRaise): - self.lower_static_raise(inst) - - else: - for _class, func in lower_extensions.items(): - if isinstance(inst, _class): - func(self, inst) - return - raise NotImplementedError(type(inst)) - - def lower_setitem(self, target_var, index_var, value_var, signature): - target = self.loadvar(target_var.name) - value = self.loadvar(value_var.name) - index = self.loadvar(index_var.name) - - targetty = self.typeof(target_var.name) - valuety = self.typeof(value_var.name) - indexty = self.typeof(index_var.name) - - impl = self.context.get_function('setitem', signature) - - # Convert argument to match - if isinstance(targetty, types.Optional): - target = self.context.cast(self.builder, target, targetty, - targetty.type) - else: - assert targetty == signature.args[0] - - index = self.context.cast(self.builder, index, indexty, - signature.args[1]) - value = self.context.cast(self.builder, value, valuety, - signature.args[2]) - - return impl(self.builder, (target, index, value)) - - def lower_static_raise(self, inst): - if inst.exc_class is None: - # Reraise - self.return_exception(None) - else: - self.return_exception(inst.exc_class, inst.exc_args) - - def lower_assign(self, ty, inst): - value = inst.value - # In nopython mode, closure vars are frozen like globals - if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)): - res = self.context.get_constant_generic(self.builder, ty, - value.value) - self.incref(ty, res) - return res - - elif isinstance(value, ir.Expr): - return self.lower_expr(ty, value) - - elif isinstance(value, ir.Var): - val = self.loadvar(value.name) - oty = self.typeof(value.name) - res = self.context.cast(self.builder, val, oty, ty) - self.incref(ty, res) - return res - - elif isinstance(value, ir.Arg): - # Cast from the argument type to the local variable type - # (note the "arg.FOO" convention as used in typeinfer) - argty = self.typeof("arg." + value.name) - if isinstance(argty, types.Omitted): - pyval = argty.value - # use the type of the constant value - valty = self.context.typing_context.resolve_value_type(pyval) - const = self.context.get_constant_generic(self.builder, valty, - pyval) - # cast it to the variable type - res = self.context.cast(self.builder, const, valty, ty) - else: - val = self.fnargs[value.index] - res = self.context.cast(self.builder, val, argty, ty) - self.incref(ty, res) - return res - - elif isinstance(value, ir.Yield): - res = self.lower_yield(ty, value) - self.incref(ty, res) - return res - - raise NotImplementedError(type(value), value) - - def lower_yield(self, retty, inst): - yp = self.generator_info.yield_points[inst.index] - assert yp.inst is inst - y = generators.LowerYield(self, yp, yp.live_vars) - y.lower_yield_suspend() - # Yield to caller - val = self.loadvar(inst.value.name) - typ = self.typeof(inst.value.name) - - # cast the local val to the type yielded - yret = self.context.cast(self.builder, val, typ, - self.gentype.yield_type) - - # get the return repr of yielded value - retval = self.context.get_return_value(self.builder, typ, yret) - - # return - self.call_conv.return_value(self.builder, retval) - - # Resumption point - y.lower_yield_resume() - # None is returned by the yield expression - return self.context.get_constant_generic(self.builder, retty, None) - - def lower_binop(self, resty, expr, op): - lhs = expr.lhs - rhs = expr.rhs - static_lhs = expr.static_lhs - static_rhs = expr.static_rhs - lty = self.typeof(lhs.name) - rty = self.typeof(rhs.name) - lhs = self.loadvar(lhs.name) - rhs = self.loadvar(rhs.name) - - # Convert argument to match - signature = self.fndesc.calltypes[expr] - lhs = self.context.cast(self.builder, lhs, lty, signature.args[0]) - rhs = self.context.cast(self.builder, rhs, rty, signature.args[1]) - - def cast_result(res): - return self.context.cast(self.builder, res, - signature.return_type, resty) - - # First try with static operands, if known - def try_static_impl(tys, args): - if any(a is ir.UNDEFINED for a in args): - return None - static_sig = typing.signature(signature.return_type, *tys) - try: - static_impl = self.context.get_function(op, static_sig) - return static_impl(self.builder, args) - except NotImplementedError: - return None - - res = try_static_impl((types.Const(static_lhs), types.Const(static_rhs)), - (static_lhs, static_rhs)) - if res is not None: - return cast_result(res) - - res = try_static_impl((types.Const(static_lhs), rty), - (static_lhs, rhs)) - if res is not None: - return cast_result(res) - - res = try_static_impl((lty, types.Const(static_rhs)), - (lhs, static_rhs)) - if res is not None: - return cast_result(res) - - # Normal implementation for generic arguments - impl = self.context.get_function(op, signature) - res = impl(self.builder, (lhs, rhs)) - return cast_result(res) - - def lower_getitem(self, resty, expr, value, index, signature): - baseval = self.loadvar(value.name) - indexval = self.loadvar(index.name) - impl = self.context.get_function("getitem", signature) - argvals = (baseval, indexval) - argtyps = (self.typeof(value.name), - self.typeof(index.name)) - castvals = [self.context.cast(self.builder, av, at, ft) - for av, at, ft in zip(argvals, argtyps, - signature.args)] - res = impl(self.builder, castvals) - return self.context.cast(self.builder, res, - signature.return_type, - resty) - - def _cast_var(self, var, ty): - """ - Cast a Numba IR variable to the given Numba type, returning a - low-level value. - """ - if isinstance(var, _VarArgItem): - varty = self.typeof(var.vararg.name)[var.index] - val = self.builder.extract_value(self.loadvar(var.vararg.name), - var.index) - else: - varty = self.typeof(var.name) - val = self.loadvar(var.name) - return self.context.cast(self.builder, val, varty, ty) - - def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args): - if vararg: - # Inject *args from function call - # The lowering will be done in _cast_var() above. - tp_vararg = self.typeof(vararg.name) - assert isinstance(tp_vararg, types.BaseTuple) - pos_args = pos_args + [_VarArgItem(vararg, i) - for i in range(len(tp_vararg))] - - # Fold keyword arguments and resolve default argument values - pysig = signature.pysig - if pysig is None: - if kw_args: - raise NotImplementedError("unsupported keyword arguments " - "when calling %s" % (fnty,)) - argvals = [self._cast_var(var, sigty) - for var, sigty in zip(pos_args, signature.args)] - else: - def normal_handler(index, param, var): - return self._cast_var(var, signature.args[index]) - - def default_handler(index, param, default): - return self.context.get_constant_generic( - self.builder, signature.args[index], default) - - def stararg_handler(index, param, vars): - stararg_ty = signature.args[index] - assert isinstance(stararg_ty, types.BaseTuple), stararg_ty - values = [self._cast_var(var, sigty) - for var, sigty in zip(vars, stararg_ty)] - return cgutils.make_anonymous_struct(self.builder, values) - - argvals = typing.fold_arguments(pysig, - pos_args, dict(kw_args), - normal_handler, - default_handler, - stararg_handler) - return argvals - - def lower_print(self, inst): - """ - Lower a ir.Print() - """ - # We handle this, as far as possible, as a normal call to built-in - # print(). This will make it easy to undo the special ir.Print - # rewrite when it becomes unnecessary (e.g. when we have native - # strings). - sig = self.fndesc.calltypes[inst] - assert sig.return_type == types.none - fnty = self.context.typing_context.resolve_value_type(print) - - # Fix the call signature to inject any constant-inferred - # string argument - pos_tys = list(sig.args) - pos_args = list(inst.args) - for i in range(len(pos_args)): - if i in inst.consts: - pyval = inst.consts[i] - if isinstance(pyval, str): - pos_tys[i] = types.Const(pyval) - - fixed_sig = typing.signature(sig.return_type, *pos_tys) - fixed_sig.pysig = sig.pysig - - argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {}) - impl = self.context.get_function(print, fixed_sig) - impl(self.builder, argvals) - - def lower_call(self, resty, expr): - signature = self.fndesc.calltypes[expr] - self.debug_print("# lower_call: expr = {0}".format(expr)) - if isinstance(signature.return_type, types.Phantom): - return self.context.get_dummy_value() - - if isinstance(expr.func, ir.Intrinsic): - fnty = expr.func.name - argvals = expr.func.args - else: - fnty = self.typeof(expr.func.name) - argvals = self.fold_call_args(fnty, signature, - expr.args, expr.vararg, expr.kws) - - if isinstance(fnty, types.ExternalFunction): - # Handle a named external function - self.debug_print("# external function") - fndesc = funcdesc.ExternalFunctionDescriptor( - fnty.symbol, fnty.sig.return_type, fnty.sig.args) - func = self.context.declare_external_function(self.builder.module, - fndesc) - res = self.context.call_external_function( - self.builder, func, fndesc.argtypes, argvals) - - elif isinstance(fnty, types.NumbaFunction): - # Handle a compiled Numba function - self.debug_print("# calling numba function") - res = self.context.call_internal(self.builder, fnty.fndesc, - fnty.sig, argvals) - - elif isinstance(fnty, types.ExternalFunctionPointer): - self.debug_print("# calling external function pointer") - # Handle a C function pointer - pointer = self.loadvar(expr.func.name) - # If the external function pointer uses libpython - if fnty.requires_gil: - self.init_pyapi() - # Acquire the GIL - gil_state = self.pyapi.gil_ensure() - # Make PyObjects - newargvals = [] - pyvals = [] - for exptyp, gottyp, aval in zip(fnty.sig.args, signature.args, - argvals): - # Adjust argument values to pyobjects - if exptyp == types.ffi_forced_object: - self.incref(gottyp, aval) - obj = self.pyapi.from_native_value(gottyp, aval, - self.env_manager) - newargvals.append(obj) - pyvals.append(obj) - else: - newargvals.append(aval) - - # Call external function - res = self.context.call_function_pointer(self.builder, pointer, - newargvals, fnty.cconv) - # Release PyObjects - for obj in pyvals: - self.pyapi.decref(obj) - - # Release the GIL - self.pyapi.gil_release(gil_state) - # If the external function pointer does NOT use libpython - else: - res = self.context.call_function_pointer(self.builder, pointer, - argvals, fnty.cconv) - - elif isinstance(fnty, types.RecursiveCall): - # Recursive call - qualprefix = fnty.overloads[signature.args] - mangler = self.context.mangler or default_mangler - mangled_name = mangler(qualprefix, signature.args) - # special case self recursion - if self.builder.function.name.startswith(mangled_name): - res = self.context.call_internal(self.builder, self.fndesc, - signature, argvals) - else: - res = self.context.call_unresolved(self.builder, mangled_name, - signature, argvals) - - else: - # Normal function resolution - self.debug_print("# calling normal function: {0}".format(fnty)) - self.debug_print("# signature: {0}".format(signature)) - impl = self.context.get_function(fnty, signature) - if signature.recvr: - # The "self" object is passed as the function object - # for bounded function - the_self = self.loadvar(expr.func.name) - # Prepend the self reference - argvals = [the_self] + list(argvals) - - res = impl(self.builder, argvals) - - libs = getattr(impl, "libs", ()) - for lib in libs: - self.library.add_linking_library(lib) - - return self.context.cast(self.builder, res, signature.return_type, - resty) - - def lower_expr(self, resty, expr): - if expr.op == 'binop': - return self.lower_binop(resty, expr, expr.fn) - elif expr.op == 'inplace_binop': - lty = self.typeof(expr.lhs.name) - if lty.mutable: - return self.lower_binop(resty, expr, expr.fn) - else: - # inplace operators on non-mutable types reuse the same - # definition as the corresponding copying operators. - return self.lower_binop(resty, expr, expr.immutable_fn) - elif expr.op == 'unary': - val = self.loadvar(expr.value.name) - typ = self.typeof(expr.value.name) - # Get function - signature = self.fndesc.calltypes[expr] - impl = self.context.get_function(expr.fn, signature) - # Convert argument to match - val = self.context.cast(self.builder, val, typ, signature.args[0]) - res = impl(self.builder, [val]) - res = self.context.cast(self.builder, res, - signature.return_type, resty) - return res - - elif expr.op == 'call': - res = self.lower_call(resty, expr) - return res - - elif expr.op == 'pair_first': - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - res = self.context.pair_first(self.builder, val, ty) - self.incref(resty, res) - return res - - elif expr.op == 'pair_second': - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - res = self.context.pair_second(self.builder, val, ty) - self.incref(resty, res) - return res - - elif expr.op in ('getiter', 'iternext'): - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - signature = self.fndesc.calltypes[expr] - impl = self.context.get_function(expr.op, signature) - [fty] = signature.args - castval = self.context.cast(self.builder, val, ty, fty) - res = impl(self.builder, (castval,)) - res = self.context.cast(self.builder, res, signature.return_type, - resty) - return res - - elif expr.op == 'exhaust_iter': - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - # Unpack optional - if isinstance(ty, types.Optional): - val = self.context.cast(self.builder, val, ty, ty.type) - ty = ty.type - - # If we have a tuple, we needn't do anything - # (and we can't iterate over the heterogeneous ones). - if isinstance(ty, types.BaseTuple): - assert ty == resty - self.incref(ty, val) - return val - - itemty = ty.iterator_type.yield_type - tup = self.context.get_constant_undef(resty) - pairty = types.Pair(itemty, types.boolean) - getiter_sig = typing.signature(ty.iterator_type, ty) - getiter_impl = self.context.get_function('getiter', - getiter_sig) - iternext_sig = typing.signature(pairty, ty.iterator_type) - iternext_impl = self.context.get_function('iternext', - iternext_sig) - iterobj = getiter_impl(self.builder, (val,)) - # We call iternext() as many times as desired (`expr.count`). - for i in range(expr.count): - pair = iternext_impl(self.builder, (iterobj,)) - is_valid = self.context.pair_second(self.builder, - pair, pairty) - with cgutils.if_unlikely(self.builder, - self.builder.not_(is_valid)): - self.return_exception(ValueError) - item = self.context.pair_first(self.builder, - pair, pairty) - tup = self.builder.insert_value(tup, item, i) - - # Call iternext() once more to check that the iterator - # is exhausted. - pair = iternext_impl(self.builder, (iterobj,)) - is_valid = self.context.pair_second(self.builder, - pair, pairty) - with cgutils.if_unlikely(self.builder, is_valid): - self.return_exception(ValueError) - - self.decref(ty.iterator_type, iterobj) - return tup - - elif expr.op == "getattr": - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - - if isinstance(resty, types.BoundFunction): - # if we are getting out a method, assume we have typed this - # properly and just build a bound function object - casted = self.context.cast(self.builder, val, ty, resty.this) - res = self.context.get_bound_function(self.builder, casted, - resty.this) - self.incref(resty, res) - return res - else: - impl = self.context.get_getattr(ty, expr.attr) - attrty = self.context.typing_context.resolve_getattr(ty, - expr.attr) - - if impl is None: - # ignore the attribute - return self.context.get_dummy_value() - else: - res = impl(self.context, self.builder, ty, val, expr.attr) - - # Cast the attribute type to the expected output type - res = self.context.cast(self.builder, res, attrty, resty) - return res - - elif expr.op == "static_getitem": - signature = typing.signature(resty, self.typeof(expr.value.name), - types.Const(expr.index)) - try: - # Both get_function() and the returned implementation can - # raise NotImplementedError if the types aren't supported - impl = self.context.get_function("static_getitem", signature) - return impl(self.builder, (self.loadvar(expr.value.name), expr.index)) - except NotImplementedError: - if expr.index_var is None: - raise - # Fall back on the generic getitem() implementation - # for this type. - signature = self.fndesc.calltypes[expr] - return self.lower_getitem(resty, expr, expr.value, - expr.index_var, signature) - - elif expr.op == "getitem": - signature = self.fndesc.calltypes[expr] - return self.lower_getitem(resty, expr, expr.value, expr.index, - signature) - - elif expr.op == "build_tuple": - itemvals = [self.loadvar(i.name) for i in expr.items] - itemtys = [self.typeof(i.name) for i in expr.items] - castvals = [self.context.cast(self.builder, val, fromty, toty) - for val, toty, fromty in zip(itemvals, resty, itemtys)] - tup = self.context.make_tuple(self.builder, resty, castvals) - self.incref(resty, tup) - return tup - - elif expr.op == "build_list": - itemvals = [self.loadvar(i.name) for i in expr.items] - itemtys = [self.typeof(i.name) for i in expr.items] - castvals = [self.context.cast(self.builder, val, fromty, resty.dtype) - for val, fromty in zip(itemvals, itemtys)] - return self.context.build_list(self.builder, resty, castvals) - - elif expr.op == "build_set": - # Insert in reverse order, as Python does - items = expr.items[::-1] - itemvals = [self.loadvar(i.name) for i in items] - itemtys = [self.typeof(i.name) for i in items] - castvals = [self.context.cast(self.builder, val, fromty, resty.dtype) - for val, fromty in zip(itemvals, itemtys)] - return self.context.build_set(self.builder, resty, castvals) - - elif expr.op == "cast": - val = self.loadvar(expr.value.name) - ty = self.typeof(expr.value.name) - castval = self.context.cast(self.builder, val, ty, resty) - self.incref(resty, castval) - return castval - - elif expr.op in self.context.special_ops: - res = self.context.special_ops[expr.op](self, expr) - return res - - raise NotImplementedError(expr) - - def _alloca_var(self, name, fetype): - """ - Ensure the given variable has an allocated stack slot. - """ - if name not in self.varmap: - # If not already defined, allocate it - llty = self.context.get_value_type(fetype) - ptr = self.alloca_lltype(name, llty) - # Remember the pointer - self.varmap[name] = ptr - - def getvar(self, name): - """ - Get a pointer to the given variable's slot. - """ - return self.varmap[name] - - def loadvar(self, name): - """ - Load the given variable's value. - """ - ptr = self.getvar(name) - return self.builder.load(ptr) - - def storevar(self, value, name): - """ - Store the value into the given variable. - """ - fetype = self.typeof(name) - - # Define if not already - self._alloca_var(name, fetype) - - # Clean up existing value stored in the variable - old = self.loadvar(name) - self.decref(fetype, old) - - # Store variable - ptr = self.getvar(name) - if value.type != ptr.type.pointee: - msg = ("Storing {value.type} to ptr of {ptr.type.pointee} ('{name}'). " - "FE type {fetype}").format(value=value, ptr=ptr, - fetype=fetype, name=name) - raise AssertionError(msg) - - self.builder.store(value, ptr) - - def delvar(self, name): - """ - Delete the given variable. - """ - fetype = self.typeof(name) - - # Define if not already (may happen if the variable is deleted - # at the beginning of a loop, but only set later in the loop) - self._alloca_var(name, fetype) - - ptr = self.getvar(name) - self.decref(fetype, self.builder.load(ptr)) - # Zero-fill variable to avoid double frees on subsequent dels - self.builder.store(Constant.null(ptr.type.pointee), ptr) - - def alloca(self, name, type): - lltype = self.context.get_value_type(type) - return self.alloca_lltype(name, lltype) - - def alloca_lltype(self, name, lltype): - # Is user variable? - is_uservar = not name.startswith('$') - # Allocate space for variable - aptr = cgutils.alloca_once(self.builder, lltype, name=name, zfill=True) - if is_uservar: - # Emit debug info for user variable - sizeof = self.context.get_abi_sizeof(lltype) - self.debuginfo.mark_variable(self.builder, aptr, name=name, - lltype=lltype, size=sizeof, - loc=self.loc) - return aptr - - def incref(self, typ, val): - if not self.context.enable_nrt: - return - - self.context.nrt.incref(self.builder, typ, val) - - def decref(self, typ, val): - if not self.context.enable_nrt: - return - - self.context.nrt.decref(self.builder, typ, val) diff --git a/numba/numba/macro.py b/numba/numba/macro.py deleted file mode 100644 index 25640a936..000000000 --- a/numba/numba/macro.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -Macro handling. - -Macros are expanded on block-by-block -""" -from __future__ import absolute_import, print_function, division - -# Expose the Macro object from the corresponding IR rewrite pass -from .rewrites.macros import Macro diff --git a/numba/numba/mathnames.h b/numba/numba/mathnames.h deleted file mode 100644 index b06f7013b..000000000 --- a/numba/numba/mathnames.h +++ /dev/null @@ -1,78 +0,0 @@ -MATH_UNARY(sin, double, double) -MATH_UNARY(sinf, float, float) - -MATH_UNARY(cos, double, double) -MATH_UNARY(cosf, float, float) - -MATH_UNARY(tan, double, double) -MATH_UNARY(tanf, float, float) - -MATH_UNARY(sinh, double, double) -MATH_UNARY(sinhf, float, float) - -MATH_UNARY(cosh, double, double) -MATH_UNARY(coshf, float, float) - -MATH_UNARY(tanh, double, double) -MATH_UNARY(tanhf, float, float) - -MATH_UNARY(asin, double, double) -MATH_UNARY(asinf, float, float) - -MATH_UNARY(acos, double, double) -MATH_UNARY(acosf, float, float) - -MATH_UNARY(atan, double, double) -MATH_UNARY(atanf, float, float) - -MATH_UNARY(asinh, double, double) -MATH_UNARY(asinhf, float, float) - -MATH_UNARY(acosh, double, double) -MATH_UNARY(acoshf, float, float) - -MATH_UNARY(atanh, double, double) -MATH_UNARY(atanhf, float, float) - -MATH_UNARY(exp, double, double) -MATH_UNARY(expf, float, float) - -MATH_UNARY(expm1, double, double) -MATH_UNARY(expm1f, float, float) - -MATH_UNARY(sqrt, double, double) -MATH_UNARY(sqrtf, float, float) - -MATH_UNARY(fabs, double, double) -MATH_UNARY(fabsf, float, float) - -MATH_UNARY(floor, double, double) -MATH_UNARY(floorf, float, float) - -MATH_UNARY(ceil, double, double) -MATH_UNARY(ceilf, float, float) - -MATH_UNARY(log, double, double) -MATH_UNARY(logf, float, float) - -MATH_UNARY(log10, double, double) -MATH_UNARY(log10f, float, float) - -MATH_UNARY(log1p, double, double) -MATH_UNARY(log1pf, float, float) - -MATH_UNARY(round, double, double) -MATH_UNARY(roundf, float, float) - -MATH_UNARY(trunc, double, double) -MATH_UNARY(truncf, float, float) - -MATH_BINARY(pow, double, double, double) -MATH_BINARY(powf, float, float, float) - -MATH_BINARY(fmod, double, double, double) -MATH_BINARY(fmodf, float, float, float) - -MATH_BINARY(atan2_fixed, double, double, double) -MATH_BINARY(atan2, double, double, double) -MATH_BINARY(atan2f, float, float, float) diff --git a/numba/numba/mviewbuf.c b/numba/numba/mviewbuf.c deleted file mode 100644 index 53bdb453b..000000000 --- a/numba/numba/mviewbuf.c +++ /dev/null @@ -1,461 +0,0 @@ -#include "_pymodule.h" - -static int get_buffer(PyObject* obj, Py_buffer *buf, int force) -{ - Py_buffer read_buf; - int flags = PyBUF_ND|PyBUF_STRIDES|PyBUF_FORMAT; - int ret; - - /* Attempt to get a writable buffer */ - if (!PyObject_GetBuffer(obj, buf, flags|PyBUF_WRITABLE)) - return 0; - if (!force) - return -1; - - /* Make a writable buffer from a read-only buffer */ - PyErr_Clear(); - if(-1 == PyObject_GetBuffer(obj, &read_buf, flags)) - return -1; - ret = PyBuffer_FillInfo(buf, NULL, read_buf.buf, read_buf.len, 0, - flags|PyBUF_WRITABLE); - PyBuffer_Release(&read_buf); - return ret; -} - -static void free_buffer(Py_buffer * buf) -{ - PyBuffer_Release(buf); -} - -/** - * Return a pointer to the data of a writable buffer from obj. If only a - * read-only buffer is available and force is True, a read-write buffer based on - * the read-only buffer is obtained. Note that this may have some surprising - * effects on buffers which expect the data from their read-only buffer not to - * be modified. - */ -static PyObject* -memoryview_get_buffer(PyObject *self, PyObject *args){ - PyObject *obj = NULL; - int force = 0; - PyObject *ret = NULL; - void * ptr = NULL; - const void* roptr = NULL; - Py_ssize_t buflen; - Py_buffer buf; - - if (!PyArg_ParseTuple(args, "O|i", &obj, &force)) - return NULL; - - if (!get_buffer(obj, &buf, force)) { /* new buffer api */ - ret = PyLong_FromVoidPtr(buf.buf); - free_buffer(&buf); - } else { /* old buffer api */ - PyErr_Clear(); - if (-1 == PyObject_AsWriteBuffer(obj, &ptr, &buflen)) { - if (!force) - return NULL; - PyErr_Clear(); - if(-1 == PyObject_AsReadBuffer(obj, &roptr, &buflen)) - return NULL; - ptr = (void*) roptr; - } - ret = PyLong_FromVoidPtr(ptr); - } - return ret; -} - -/** - * Gets a half-open range [start, end) which contains the array data - * Modified from numpy/core/src/multiarray/array_assign.c - */ -static PyObject* -get_extents(Py_ssize_t *shape, Py_ssize_t *strides, int ndim, - Py_ssize_t itemsize, Py_ssize_t ptr) -{ - Py_ssize_t start, end; - int idim; - Py_ssize_t *dimensions = shape; - PyObject *ret = NULL; - - if (ndim < 0 ){ - PyErr_SetString(PyExc_ValueError, "buffer ndim < 0"); - return NULL; - } - - if (!dimensions) { - if (ndim == 0) { - start = end = ptr; - end += itemsize; - return Py_BuildValue("nn", start, end); - } - PyErr_SetString(PyExc_ValueError, "buffer shape is not defined"); - return NULL; - } - - if (!strides) { - PyErr_SetString(PyExc_ValueError, "buffer strides is not defined"); - return NULL; - } - - /* Calculate with a closed range [start, end] */ - start = end = ptr; - for (idim = 0; idim < ndim; ++idim) { - Py_ssize_t stride = strides[idim], dim = dimensions[idim]; - /* If the array size is zero, return an empty range */ - if (dim == 0) { - start = end = ptr; - ret = Py_BuildValue("nn", start, end); - break; - } - /* Expand either upwards or downwards depending on stride */ - else { - if (stride > 0) { - end += stride * (dim - 1); - } - else if (stride < 0) { - start += stride * (dim - 1); - } - } - } - - if (!ret) { - /* Return a half-open range */ - Py_ssize_t out_start = start; - Py_ssize_t out_end = end + itemsize; - - ret = Py_BuildValue("nn", out_start, out_end); - } - - return ret; -} - -static PyObject* -memoryview_get_extents(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - PyObject *ret = NULL; - Py_buffer b; - const void * ptr = NULL; - Py_ssize_t bufptr, buflen; - if (!PyArg_ParseTuple(args, "O", &obj)) - return NULL; - - if (!get_buffer(obj, &b, 0)) { /* new buffer api */ - ret = get_extents(b.shape, b.strides, b.ndim, b.itemsize, - (Py_ssize_t)b.buf); - free_buffer(&b); - } else { /* old buffer api */ - PyErr_Clear(); - if (-1 == PyObject_AsReadBuffer(obj, &ptr, &buflen)) return NULL; - bufptr = (Py_ssize_t)ptr; - ret = Py_BuildValue("nn", bufptr, bufptr + buflen); - } - return ret; -} - -static PyObject* -memoryview_get_extents_info(PyObject *self, PyObject *args) -{ - int i; - Py_ssize_t *shape_ary = NULL; - Py_ssize_t *strides_ary = NULL; - PyObject *shape_tuple = NULL; - PyObject *strides_tuple = NULL; - PyObject *shape = NULL, *strides = NULL; - Py_ssize_t itemsize = 0; - int ndim = 0; - PyObject* res = NULL; - - if (!PyArg_ParseTuple(args, "OOin", &shape, &strides, &ndim, &itemsize)) - goto cleanup; - - if (ndim < 0) { - PyErr_SetString(PyExc_ValueError, "ndim is negative"); - goto cleanup; - } - - if (itemsize <= 0) { - PyErr_SetString(PyExc_ValueError, "ndim <= 0"); - goto cleanup; - } - - shape_ary = malloc(sizeof(Py_ssize_t) * ndim + 1); - strides_ary = malloc(sizeof(Py_ssize_t) * ndim + 1); - - shape_tuple = PySequence_Fast(shape, "shape is not a sequence"); - if (!shape_tuple) goto cleanup; - - for (i = 0; i < ndim; ++i) { - shape_ary[i] = PyNumber_AsSsize_t( - PySequence_Fast_GET_ITEM(shape_tuple, i), - PyExc_OverflowError); - } - - strides_tuple = PySequence_Fast(strides, "strides is not a sequence"); - if (!strides_tuple) goto cleanup; - - for (i = 0; i < ndim; ++i) { - strides_ary[i] = PyNumber_AsSsize_t( - PySequence_Fast_GET_ITEM(strides_tuple, i), - PyExc_OverflowError); - } - - res = get_extents(shape_ary, strides_ary, ndim, itemsize, 0); -cleanup: - free(shape_ary); - free(strides_ary); - Py_XDECREF(shape_tuple); - Py_XDECREF(strides_tuple); - return res; -} - - -/* new type to expose buffer interface */ -typedef struct { - PyObject_HEAD - /* Type-specific fields go here. */ -} MemAllocObject; - - -static int -get_bufinfo(PyObject *self, Py_ssize_t *psize, void **pptr) -{ - PyObject *buflen = NULL; - PyObject *bufptr = NULL; - Py_ssize_t size = 0; - void* ptr = NULL; - int ret = -1; - - buflen = PyObject_GetAttrString(self, "_buflen_"); - if (!buflen) goto cleanup; - - bufptr = PyObject_GetAttrString(self, "_bufptr_"); - if (!bufptr) goto cleanup; - - size = PyNumber_AsSsize_t(buflen, PyExc_OverflowError); - if (size == -1 && PyErr_Occurred()) goto cleanup; - else if (size < 0) { - PyErr_SetString(PyExc_ValueError, "negative buffer size"); - goto cleanup; - } - - ptr = PyLong_AsVoidPtr(PyNumber_Long(bufptr)); - if (PyErr_Occurred()) - goto cleanup; - else if (!ptr) { - PyErr_SetString(PyExc_ValueError, "null buffer pointer"); - goto cleanup; - } - - *psize = size; - *pptr = ptr; - ret = 0; -cleanup: - Py_XDECREF(buflen); - Py_XDECREF(bufptr); - return ret; -} - - -#if PY_MAJOR_VERSION >= 3 - - - static int - MemAllocObject_getbuffer(PyObject *self, Py_buffer *view, int flags) - { - Py_ssize_t size = 0; - void *ptr = 0; - int readonly; - - if(-1 == get_bufinfo(self, &size, &ptr)) - return -1; - - readonly = (PyBUF_WRITABLE & flags) != PyBUF_WRITABLE; - - /* fill buffer */ - if (-1 == PyBuffer_FillInfo(view, self, (void*)ptr, size, readonly, flags)) - return -1; - - return 0; - } - - static void - MemAllocObject_releasebuffer(PyObject *self, Py_buffer *view) - { - /* Do nothing */ - } - - static PyBufferProcs MemAlloc_as_buffer = { - MemAllocObject_getbuffer, - MemAllocObject_releasebuffer, - }; -#else - static int - MemAllocObject_getbufferproc(PyObject *self, Py_buffer *view, int flags) - { - Py_ssize_t size = 0; - void *ptr = 0; - int readonly; - - if(-1 == get_bufinfo(self, &size, &ptr)) - return -1; - - readonly = (PyBUF_WRITABLE & flags) != PyBUF_WRITABLE; - - /* fill buffer */ - if (-1 == PyBuffer_FillInfo(view, self, (void*)ptr, size, readonly, flags)) - return -1; - - return 0; - } - - static Py_ssize_t - MemAllocObject_writebufferproc(PyObject *self, Py_ssize_t segment, - void **ptrptr) - { - Py_ssize_t size = 0; - if (segment != 0) { - PyErr_SetString(PyExc_ValueError, "invalid segment"); - return -1; - } - - if(-1 == get_bufinfo(self, &size, ptrptr)) - return -1; - return size; - } - - static Py_ssize_t - MemAllocObject_readbufferproc(PyObject *self, Py_ssize_t segment, - void **ptrptr) - { - return MemAllocObject_writebufferproc(self, segment, ptrptr); - } - - - static Py_ssize_t - MemAllocObject_segcountproc(PyObject *self, Py_ssize_t *lenp) - { - void *ptr = 0; - if (lenp){ - if(-1 == get_bufinfo(self, lenp, &ptr)) return 0; - } - return 1; - } - - static Py_ssize_t - MemAllocObject_charbufferproc(PyObject *self, Py_ssize_t segment, - char **ptrptr) - { - return MemAllocObject_writebufferproc(self, segment, (void*)ptrptr); - } - - static PyBufferProcs MemAlloc_as_buffer = { - MemAllocObject_readbufferproc, /*bf_getreadbuffer*/ - MemAllocObject_writebufferproc, /*bf_getwritebuffer*/ - MemAllocObject_segcountproc, /*bf_getsegcount*/ - MemAllocObject_charbufferproc, /*bf_getcharbuffer*/ - /* new buffer protocol */ - MemAllocObject_getbufferproc, /*bf_getbuffer*/ - NULL, /*bf_releasebuffer*/ - }; -#endif - -static PyTypeObject MemAllocType = { -#if PY_MAJOR_VERSION >= 3 - PyVarObject_HEAD_INIT(NULL, 0) -#else - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ -#endif - "mviewbuf.MemAlloc", /* tp_name */ - sizeof(MemAllocObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - 0, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ -#if PY_MAJOR_VERSION >= 3 - 0, /* tp_reserved */ -#else - 0, /* tp_compare */ -#endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - &MemAlloc_as_buffer, /*tp_as_buffer*/ - (Py_TPFLAGS_DEFAULT -#if PY_MAJOR_VERSION < 3 - | Py_TPFLAGS_CHECKTYPES -#endif -#if (PY_VERSION_HEX >= 0x02060000) && (PY_VERSION_HEX < 0x03000000) - | Py_TPFLAGS_HAVE_NEWBUFFER -#endif - | Py_TPFLAGS_BASETYPE), /* tp_flags */ - 0, /* tp_doc */ - - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ -#if PY_VERSION_HEX >= 0x02060000 - 0, /* tp_version_tag */ -#endif -}; - - -static PyMethodDef core_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } - declmethod(memoryview_get_buffer), - declmethod(memoryview_get_extents), - declmethod(memoryview_get_extents_info), - { NULL }, -#undef declmethod -}; - - -MOD_INIT(mviewbuf) { - PyObject *module; - MOD_DEF(module, "mviewbuf", "No docs", core_methods) - if (module == NULL) - return MOD_ERROR_VAL; - - MemAllocType.tp_new = PyType_GenericNew; - if (PyType_Ready(&MemAllocType) < 0){ - return MOD_ERROR_VAL; - } - - Py_INCREF(&MemAllocType); - PyModule_AddObject(module, "MemAlloc", (PyObject*)&MemAllocType); - - return MOD_SUCCESS_VAL(module); -} - diff --git a/numba/numba/npdatetime.py b/numba/numba/npdatetime.py deleted file mode 100644 index 8fecb3d00..000000000 --- a/numba/numba/npdatetime.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Helper functions for np.timedelta64 and np.datetime64. -For now, multiples-of-units (for example timedeltas expressed in tens -of seconds) are not supported. -""" - - -import numpy as np - - -DATETIME_UNITS = { - 'Y': 0, # Years - 'M': 1, # Months - 'W': 2, # Weeks - # Yes, there's a gap here - 'D': 4, # Days - 'h': 5, # Hours - 'm': 6, # Minutes - 's': 7, # Seconds - 'ms': 8, # Milliseconds - 'us': 9, # Microseconds - 'ns': 10, # Nanoseconds - 'ps': 11, # Picoseconds - 'fs': 12, # Femtoseconds - 'as': 13, # Attoseconds - '': 14, # "generic", i.e. unit-less -} - -NAT = np.timedelta64('nat').astype(np.int64) - -# NOTE: numpy has several inconsistent functions for timedelta casting: -# - can_cast_timedelta64_{metadata,units}() disallows "safe" casting -# to and from generic units -# - cast_timedelta_to_timedelta() allows casting from (but not to) -# generic units -# - compute_datetime_metadata_greatest_common_divisor() allows casting from -# generic units (used for promotion) - - -def same_kind(src, dest): - """ - Whether the *src* and *dest* units are of the same kind. - """ - return (DATETIME_UNITS[src] < 5) == (DATETIME_UNITS[dest] < 5) - - -def can_cast_timedelta_units(src, dest): - # Mimick numpy's "safe" casting and promotion - # `dest` must be more precise than `src` and they must be compatible - # for conversion. - # XXX should we switch to enforcing "same-kind" for Numpy 1.10+ ? - src = DATETIME_UNITS[src] - dest = DATETIME_UNITS[dest] - if src == dest: - return True - if src == 14: - return True - if src > dest: - return False - if dest == 14: - # unit-less timedelta64 is not compatible with anything else - return False - if src <= 1 and dest > 1: - # Cannot convert between months or years and other units - return False - return True - - -# Exact conversion factors from one unit to the immediately more precise one -_factors = { - 0: (1, 12), # Years -> Months - 2: (4, 7), # Weeks -> Days - 4: (5, 24), # Days -> Hours - 5: (6, 60), # Hours -> Minutes - 6: (7, 60), # Minutes -> Seconds - 7: (8, 1000), - 8: (9, 1000), - 9: (10, 1000), - 10: (11, 1000), - 11: (12, 1000), - 12: (13, 1000), -} - -def _get_conversion_multiplier(big_unit_code, small_unit_code): - """ - Return an integer multiplier allowing to convert from *big_unit_code* - to *small_unit_code*. - None is returned if the conversion is not possible through a - simple integer multiplication. - """ - # Mimicks get_datetime_units_factor() in numpy's datetime.c, - # with a twist to allow no-op conversion from generic units. - if big_unit_code == 14: - return 1 - c = big_unit_code - factor = 1 - while c < small_unit_code: - try: - c, mult = _factors[c] - except KeyError: - # No possible conversion - return None - factor *= mult - if c == small_unit_code: - return factor - else: - return None - -def get_timedelta_conversion_factor(src_unit, dest_unit): - """ - Return an integer multiplier allowing to convert from timedeltas - of *src_unit* to *dest_unit*. - """ - return _get_conversion_multiplier(DATETIME_UNITS[src_unit], - DATETIME_UNITS[dest_unit]) - -def get_datetime_timedelta_conversion(datetime_unit, timedelta_unit): - """ - Compute a possible conversion for combining *datetime_unit* and - *timedelta_unit* (presumably for adding or subtracting). - Return (result unit, integer datetime multiplier, integer timedelta multiplier). - RuntimeError is raised if the combination is impossible. - """ - # XXX now unused (I don't know where / how Numpy uses this) - dt_unit_code = DATETIME_UNITS[datetime_unit] - td_unit_code = DATETIME_UNITS[timedelta_unit] - if td_unit_code == 14 or dt_unit_code == 14: - return datetime_unit, 1, 1 - if td_unit_code < 2 and dt_unit_code >= 2: - # Cannot combine Y or M timedelta64 with a finer-grained datetime64 - raise RuntimeError("cannot combine datetime64(%r) and timedelta64(%r)" - % (datetime_unit, timedelta_unit)) - dt_factor, td_factor = 1, 1 - - # If years or months, the datetime unit is first scaled to weeks or days, - # then conversion continues below. This is the same algorithm as used - # in Numpy's get_datetime_conversion_factor() (src/multiarray/datetime.c): - # """Conversions between years/months and other units use - # the factor averaged over the 400 year leap year cycle.""" - if dt_unit_code == 0: - if td_unit_code >= 4: - dt_factor = 97 + 400 * 365 - td_factor = 400 - dt_unit_code = 4 - elif td_unit_code == 2: - dt_factor = 97 + 400 * 365 - td_factor = 400 * 7 - dt_unit_code = 2 - elif dt_unit_code == 1: - if td_unit_code >= 4: - dt_factor = 97 + 400 * 365 - td_factor = 400 * 12 - dt_unit_code = 4 - elif td_unit_code == 2: - dt_factor = 97 + 400 * 365 - td_factor = 400 * 12 * 7 - dt_unit_code = 2 - - if td_unit_code >= dt_unit_code: - factor = _get_conversion_multiplier(dt_unit_code, td_unit_code) - assert factor is not None, (dt_unit_code, td_unit_code) - return timedelta_unit, dt_factor * factor, td_factor - else: - factor = _get_conversion_multiplier(td_unit_code, dt_unit_code) - assert factor is not None, (dt_unit_code, td_unit_code) - return datetime_unit, dt_factor, td_factor * factor - - -def combine_datetime_timedelta_units(datetime_unit, timedelta_unit): - """ - Return the unit result of combining *datetime_unit* with *timedelta_unit* - (e.g. by adding or subtracting). None is returned if combining - those units is forbidden. - """ - dt_unit_code = DATETIME_UNITS[datetime_unit] - td_unit_code = DATETIME_UNITS[timedelta_unit] - if dt_unit_code == 14: - return timedelta_unit - elif td_unit_code == 14: - return datetime_unit - if td_unit_code < 2 and dt_unit_code >= 2: - return None - if dt_unit_code > td_unit_code: - return datetime_unit - else: - return timedelta_unit - -def get_best_unit(unit_a, unit_b): - """ - Get the best (i.e. finer-grained) of two units. - """ - a = DATETIME_UNITS[unit_a] - b = DATETIME_UNITS[unit_b] - if a == 14: - return unit_b - if b == 14: - return unit_a - if b > a: - return unit_b - return unit_a diff --git a/numba/numba/npyufunc/__init__.py b/numba/numba/npyufunc/__init__.py deleted file mode 100644 index e1ca9d325..000000000 --- a/numba/numba/npyufunc/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -from .decorators import Vectorize, GUVectorize, vectorize, guvectorize -from ._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One -from . import _internal, array_exprs, parfor -if hasattr(_internal, 'PyUFunc_ReorderableNone'): - PyUFunc_ReorderableNone = _internal.PyUFunc_ReorderableNone -del _internal, array_exprs - - -def _init(): - - def init_vectorize(): - from numba.cuda.vectorizers import CUDAVectorize - return CUDAVectorize - - def init_guvectorize(): - from numba.cuda.vectorizers import CUDAGUFuncVectorize - return CUDAGUFuncVectorize - - Vectorize.target_registry.ondemand['cuda'] = init_vectorize - GUVectorize.target_registry.ondemand['cuda'] = init_guvectorize - -_init() -del _init diff --git a/numba/numba/npyufunc/_internal.c b/numba/numba/npyufunc/_internal.c deleted file mode 100644 index b8bd55316..000000000 --- a/numba/numba/npyufunc/_internal.c +++ /dev/null @@ -1,715 +0,0 @@ -#include "_internal.h" -#include "Python.h" - -/* A small object that handles deallocation of some of a PyUFunc's fields */ -typedef struct { - PyObject_HEAD - /* Borrowed reference */ - PyUFuncObject *ufunc; - /* Owned reference to ancilliary object */ - PyObject *object; -} PyUFuncCleaner; - -PyTypeObject PyUFuncCleaner_Type; - - -static PyObject * -cleaner_new(PyUFuncObject *ufunc, PyObject *object) -{ - PyUFuncCleaner *obj = PyObject_New(PyUFuncCleaner, &PyUFuncCleaner_Type); - if (obj != NULL) { - obj->ufunc = ufunc; - Py_XINCREF(object); - obj->object = object; - } - return (PyObject *) obj; -} - -/* Deallocate the PyArray_malloc calls */ -static void -cleaner_dealloc(PyUFuncCleaner *self) -{ - PyUFuncObject *ufunc = self->ufunc; - Py_XDECREF(self->object); - if (ufunc->functions) - PyArray_free(ufunc->functions); - if (ufunc->types) - PyArray_free(ufunc->types); - if (ufunc->data) - PyArray_free(ufunc->data); - PyObject_Del(self); -} - -PyTypeObject PyUFuncCleaner_Type = { -#if PY_MAJOR_VERSION >= 3 - PyVarObject_HEAD_INIT(NULL, 0) -#else - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ -#endif - "numba._UFuncCleaner", /* tp_name*/ - sizeof(PyUFuncCleaner), /* tp_basicsize*/ - 0, /* tp_itemsize */ - /* methods */ - (destructor) cleaner_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ -#if defined(NPY_PY3K) - 0, /* tp_reserved */ -#else - 0, /* tp_compare */ -#endif - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ -}; - -/* ______________________________________________________________________ - * DUFunc: A call-time (hence dynamic) specializable ufunc. - */ - -typedef struct { - PyObject_HEAD - PyObject * dispatcher; - PyUFuncObject * ufunc; - PyObject * keepalive; - int frozen; -} PyDUFuncObject; - -static void -dufunc_dealloc(PyDUFuncObject *self) -{ - /* Note: There is no need to call PyArray_free() on - self->ufunc->ptr, since ufunc_dealloc() will do it for us. */ - Py_XDECREF(self->ufunc); - Py_XDECREF(self->dispatcher); - Py_XDECREF(self->keepalive); - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyObject * -dufunc_repr(PyDUFuncObject *dufunc) -{ - return PyString_FromFormat("", dufunc->ufunc->name); -} - -static PyObject * -dufunc_call(PyDUFuncObject *self, PyObject *args, PyObject *kws) -{ - PyObject *result=NULL, *method=NULL; - - result = PyUFunc_Type.tp_call((PyObject *)self->ufunc, args, kws); - if ((!self->frozen) && - (result == NULL) && - (PyErr_Occurred()) && - (PyErr_ExceptionMatches(PyExc_TypeError))) { - - /* Break back into Python when we fail at dispatch. */ - PyErr_Clear(); - method = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); - - if (method) { - result = PyObject_Call(method, args, kws); - if (result) { - Py_DECREF(result); - result = PyUFunc_Type.tp_call((PyObject *)self->ufunc, args, - kws); - } - } - Py_XDECREF(method); - } - return result; -} - -static Py_ssize_t -_get_nin(PyObject * py_func_obj) -{ - int result = -1; - PyObject *inspect=NULL, *getargspec=NULL, *argspec=NULL, *args=NULL; - - inspect = PyImport_ImportModule("inspect"); - if (!inspect) goto _get_nin_cleanup; - getargspec = PyObject_GetAttrString(inspect, "getargspec"); - if (!getargspec) goto _get_nin_cleanup; - argspec = PyObject_CallFunctionObjArgs(getargspec, py_func_obj, NULL); - if (!argspec) goto _get_nin_cleanup; - args = PyObject_GetAttrString(argspec, "args"); - if (!args) goto _get_nin_cleanup; - result = PyList_Size(args); - - _get_nin_cleanup: - Py_XDECREF(args); - Py_XDECREF(argspec); - Py_XDECREF(getargspec); - Py_XDECREF(inspect); - return result; -} - -static int -dufunc_init(PyDUFuncObject *self, PyObject *args, PyObject *kws) -{ - PyObject *dispatcher=NULL, *keepalive=NULL, *py_func_obj=NULL, *tmp; - PyUFuncObject *ufunc=NULL; - int identity=PyUFunc_None; - int nin=-1, nout=1; - char *name=NULL, *doc=NULL; - - static char * kwlist[] = {"dispatcher", "identity", "_keepalive", "nin", - "nout", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kws, "O|iO!nn", kwlist, - &dispatcher, &identity, - &PyList_Type, &keepalive, &nin, &nout)) { - return -1; - } - - py_func_obj = PyObject_GetAttrString(dispatcher, "py_func"); - if (!py_func_obj) { - return -1; - } - - if (nin < 0) { - nin = (int)_get_nin(py_func_obj); - if ((nin < 0) || (PyErr_Occurred())) { - Py_XDECREF(py_func_obj); - return -1; - } - } - - /* Construct the UFunc. */ - tmp = PyObject_GetAttrString(py_func_obj, "__name__"); - if (tmp) { - name = PyString_AsString(tmp); - } - Py_XDECREF(tmp); - tmp = PyObject_GetAttrString(py_func_obj, "__doc__"); - if (tmp && (tmp != Py_None)) { - doc = PyString_AsString(tmp); - } - Py_XDECREF(tmp); - tmp = NULL; - Py_XDECREF(py_func_obj); - py_func_obj = NULL; - if (!name) { - return -1; - } - ufunc = (PyUFuncObject *)PyUFunc_FromFuncAndData(NULL, NULL, NULL, 0, - nin, nout, identity, - name, doc, 0); - if (!ufunc) { - return -1; - } - - /* Construct a keepalive list if none was given. */ - if (!keepalive) { - keepalive = PyList_New(0); - if (!keepalive) { - Py_XDECREF(ufunc); - return -1; - } - } else { - Py_INCREF(keepalive); - } - - tmp = self->dispatcher; - Py_INCREF(dispatcher); - self->dispatcher = dispatcher; - Py_XDECREF(tmp); - - tmp = (PyObject*)self->ufunc; - self->ufunc = ufunc; - Py_XDECREF(tmp); - - tmp = self->keepalive; - /* Already incref'ed, either by PyList_New(), or else clause, both above. */ - self->keepalive = keepalive; - Py_XDECREF(tmp); - - self->frozen = 0; - - return 0; -} - -static PyMemberDef dufunc_members[] = { - {"_dispatcher", T_OBJECT_EX, offsetof(PyDUFuncObject, dispatcher), 0, - "Dispatcher object for the core Python function."}, - {"ufunc", T_OBJECT_EX, offsetof(PyDUFuncObject, ufunc), 0, - "Numpy Ufunc for the dynamic ufunc."}, - {"_keepalive", T_OBJECT_EX, offsetof(PyDUFuncObject, keepalive), 0, - "List of objects to keep alive during life of dufunc."}, - {NULL} -}; - -/* ____________________________________________________________ - * Shims to expose ufunc methods. - */ - -static struct _ufunc_dispatch { - PyCFunctionWithKeywords ufunc_reduce; - PyCFunctionWithKeywords ufunc_accumulate; - PyCFunctionWithKeywords ufunc_reduceat; - PyCFunctionWithKeywords ufunc_outer; -#if NPY_API_VERSION >= 0x00000008 - PyCFunction ufunc_at; -#endif -} ufunc_dispatch; - -static int -init_ufunc_dispatch(void) -{ - int result = 0; - PyMethodDef * crnt = PyUFunc_Type.tp_methods; - const char * crnt_name = NULL; - for (; crnt->ml_name != NULL; crnt++) { - crnt_name = crnt->ml_name; - switch(crnt_name[0]) { - case 'a': - if (strncmp(crnt_name, "accumulate", 11) == 0) { - ufunc_dispatch.ufunc_accumulate = - (PyCFunctionWithKeywords)crnt->ml_meth; -#if NPY_API_VERSION >= 0x00000008 - } else if (strncmp(crnt_name, "at", 3) == 0) { - ufunc_dispatch.ufunc_at = crnt->ml_meth; -#endif - } else { - result = -1; - } - break; - case 'o': - if (strncmp(crnt_name, "outer", 6) == 0) { - ufunc_dispatch.ufunc_outer = - (PyCFunctionWithKeywords)crnt->ml_meth; - } else { - result = -1; - } - break; - case 'r': - if (strncmp(crnt_name, "reduce", 7) == 0) { - ufunc_dispatch.ufunc_reduce = - (PyCFunctionWithKeywords)crnt->ml_meth; - } else if (strncmp(crnt_name, "reduceat", 9) == 0) { - ufunc_dispatch.ufunc_reduceat = - (PyCFunctionWithKeywords)crnt->ml_meth; - } else { - result = -1; - } - break; - default: - result = -1; /* Unknown method */ - } - if (result < 0) break; - } - if (result == 0) { - /* Sanity check. */ - result = ((ufunc_dispatch.ufunc_reduce != NULL) - && (ufunc_dispatch.ufunc_accumulate != NULL) - && (ufunc_dispatch.ufunc_reduceat != NULL) - && (ufunc_dispatch.ufunc_outer != NULL) -#if NPY_API_VERSION >= 0x00000008 - && (ufunc_dispatch.ufunc_at != NULL) -#endif - ); - } - return result; -} - -static PyObject * -dufunc_reduce(PyDUFuncObject * self, PyObject * args, PyObject *kws) -{ - return ufunc_dispatch.ufunc_reduce((PyObject*)self->ufunc, args, kws); -} - -static PyObject * -dufunc_accumulate(PyDUFuncObject * self, PyObject * args, PyObject *kws) -{ - return ufunc_dispatch.ufunc_accumulate((PyObject*)self->ufunc, args, kws); -} - -static PyObject * -dufunc_reduceat(PyDUFuncObject * self, PyObject * args, PyObject *kws) -{ - return ufunc_dispatch.ufunc_reduceat((PyObject*)self->ufunc, args, kws); -} - -static PyObject * -dufunc_outer(PyDUFuncObject * self, PyObject * args, PyObject *kws) -{ - return ufunc_dispatch.ufunc_outer((PyObject*)self->ufunc, args, kws); -} - -#if NPY_API_VERSION >= 0x00000008 -static PyObject * -dufunc_at(PyDUFuncObject * self, PyObject * args) -{ - return ufunc_dispatch.ufunc_at((PyObject*)self->ufunc, args); -} -#endif - -static PyObject * -dufunc__compile_for_args(PyDUFuncObject * self, PyObject * args, - PyObject * kws) -{ - PyErr_SetString(PyExc_NotImplementedError, - "Abstract method _DUFunc._compile_for_args() called!"); - return NULL; -} - -static int * -_build_arg_types_array(PyObject * type_list, Py_ssize_t nargs) -{ - int *arg_types_array=NULL; - Py_ssize_t idx, arg_types_size = PyList_Size(type_list); - - if (arg_types_size != nargs) { - PyErr_SetString( - PyExc_ValueError, - "argument type list size does not equal ufunc argument count"); - return NULL; - } - arg_types_array = PyArray_malloc(sizeof(int) * nargs); - if (!arg_types_array) { - PyErr_NoMemory(); - return NULL; - } - for (idx = 0; idx < nargs; idx++) { - arg_types_array[idx] = (int)PyLong_AsLong(PyList_GET_ITEM(type_list, - idx)); - } - if (PyErr_Occurred()) { - PyArray_free(arg_types_array); - arg_types_array = NULL; - } - return arg_types_array; -} - -static PyObject * -dufunc__add_loop(PyDUFuncObject * self, PyObject * args) -{ - PyUFuncObject * ufunc=self->ufunc; - void *loop_ptr=NULL, *data_ptr=NULL; - int idx=-1, usertype=NPY_VOID; - int *arg_types_arr=NULL; - PyObject *arg_types=NULL, *loop_obj=NULL, *data_obj=NULL; - PyUFuncGenericFunction old_func=NULL; - - if (self->frozen) { - PyErr_SetString(PyExc_ValueError, - "_DUFunc._add_loop() called for frozen dufunc"); - return NULL; - } - - if (!PyArg_ParseTuple(args, "O!O!|O!", - &PyLong_Type, &loop_obj, &PyList_Type, &arg_types, - &PyLong_Type, &data_obj)) { - return NULL; - } - - loop_ptr = PyLong_AsVoidPtr(loop_obj); - if (PyErr_Occurred()) { - return NULL; - } - if (data_obj) { - data_ptr = PyLong_AsVoidPtr(data_obj); - if (PyErr_Occurred()) { - return NULL; - } - } - - arg_types_arr = _build_arg_types_array(arg_types, (Py_ssize_t)ufunc->nargs); - if (!arg_types_arr) goto _dufunc__add_loop_fail; - - /* Check to see if any of the input types are user defined dtypes. - If they are, we should use PyUFunc_RegisterLoopForType() since - dispatch on a user defined dtype uses a Python dictionary - keyed by usertype (and not the functions array). - - For more information, see how the usertype argument is used in - PyUFunc_RegisterLoopForType(), defined by Numpy at - .../numpy/core/src/umath/ufunc_object.c - */ - for (idx = 0; idx < ufunc->nargs; idx++) { - if (arg_types_arr[idx] >= NPY_USERDEF) { - usertype = arg_types_arr[idx]; - } - } - - if (usertype != NPY_VOID) { - if (PyUFunc_RegisterLoopForType(ufunc, usertype, - (PyUFuncGenericFunction)loop_ptr, - arg_types_arr, data_ptr) < 0) { - goto _dufunc__add_loop_fail; - } - } else if (PyUFunc_ReplaceLoopBySignature(ufunc, - (PyUFuncGenericFunction)loop_ptr, - arg_types_arr, &old_func) == 0) { - /* TODO: Consider freeing any memory held by the old loop (somehow) */ - for (idx = 0; idx < ufunc->ntypes; idx++) { - if (ufunc->functions[idx] == (PyUFuncGenericFunction)loop_ptr) { - ufunc->data[idx] = data_ptr; - break; - } - } - } else { - /* The following is an attempt to loosely follow the allocation - code in Numpy. See ufunc_frompyfunc() in - .../numpy/core/src/umath/umathmodule.c. - - The primary goal is to allocate a single chunk of memory to - hold the functions, data, and types loop arrays: - - ptr == |<- functions ->|<- data ->|<- types ->| - - */ - int ntypes=ufunc->ntypes + 1; - PyUFuncGenericFunction *functions=NULL; - void **data=NULL; - char *types=NULL; - void *newptr=NULL, *oldptr=NULL; - size_t functions_size=sizeof(PyUFuncGenericFunction) * ntypes; - size_t data_size=sizeof(void *) * ntypes; - size_t type_ofs=sizeof(char) * ufunc->ntypes * ufunc->nargs; - size_t newsize=(functions_size + data_size + - (sizeof(char) * ntypes * ufunc->nargs)); - - oldptr = ufunc->ptr; - newptr = PyArray_malloc(newsize); - if (!newptr) { - PyErr_NoMemory(); - goto _dufunc__add_loop_fail; - } - functions = (PyUFuncGenericFunction*)newptr; - memcpy(functions, ufunc->functions, - sizeof(PyUFuncGenericFunction) * ufunc->ntypes); - functions[ntypes - 1] = (PyUFuncGenericFunction)loop_ptr; - data = (void **)((char *)functions + functions_size); - memcpy(data, ufunc->data, sizeof(void *) * ufunc->ntypes); - data[ntypes - 1] = data_ptr; - types = (char *)data + data_size; - memcpy(types, ufunc->types, sizeof(char) * ufunc->ntypes * - ufunc->nargs); - for (idx = 0; idx < ufunc->nargs; idx++) { - types[idx + type_ofs] = (char)arg_types_arr[idx]; - } - - ufunc->ntypes = ntypes; - ufunc->functions = functions; - ufunc->types = types; - ufunc->data = data; - ufunc->ptr = newptr; - PyArray_free(oldptr); - } - - PyArray_free(arg_types_arr); - Py_INCREF(Py_None); - return Py_None; - - _dufunc__add_loop_fail: - PyArray_free(arg_types_arr); - return NULL; -} - -static struct PyMethodDef dufunc_methods[] = { - {"reduce", - (PyCFunction)dufunc_reduce, - METH_VARARGS | METH_KEYWORDS, NULL }, - {"accumulate", - (PyCFunction)dufunc_accumulate, - METH_VARARGS | METH_KEYWORDS, NULL }, - {"reduceat", - (PyCFunction)dufunc_reduceat, - METH_VARARGS | METH_KEYWORDS, NULL }, - {"outer", - (PyCFunction)dufunc_outer, - METH_VARARGS | METH_KEYWORDS, NULL}, -#if NPY_API_VERSION >= 0x00000008 - {"at", - (PyCFunction)dufunc_at, - METH_VARARGS, NULL}, -#endif - {"_compile_for_args", - (PyCFunction)dufunc__compile_for_args, - METH_VARARGS | METH_KEYWORDS, - "Abstract method: subclasses should overload _compile_for_args() to compile the ufunc at the given arguments' types."}, - {"_add_loop", - (PyCFunction)dufunc__add_loop, - METH_VARARGS, - NULL}, - {NULL, NULL, 0, NULL} /* sentinel */ -}; - -static PyObject * -dufunc_getfrozen(PyDUFuncObject * self, void * closure) -{ - PyObject *result=(self->frozen) ? Py_True : Py_False; - Py_INCREF(result); - return result; -} - -static int -dufunc_setfrozen(PyDUFuncObject * self, PyObject * value, void * closure) -{ - int result=0; - if (PyObject_IsTrue(value)) { - self->frozen = 1; - } else { - PyErr_SetString(PyExc_ValueError, - "cannot clear the _DUFunc.frozen flag"); - result = -1; - } - return result; -} - -static PyGetSetDef dufunc_getsets[] = { - {"_frozen", - (getter)dufunc_getfrozen, (setter)dufunc_setfrozen, - "flag indicating call-time compilation has been disabled", - NULL}, - {NULL} /* Sentinel */ -}; - -PyTypeObject PyDUFunc_Type = { -#if PY_MAJOR_VERSION >= 3 - PyVarObject_HEAD_INIT(NULL, 0) -#else - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ -#endif - "numba._DUFunc", /* tp_name*/ - sizeof(PyDUFuncObject), /* tp_basicsize*/ - 0, /* tp_itemsize */ - /* methods */ - (destructor) dufunc_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare/tp_reserved */ - (reprfunc) dufunc_repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - (ternaryfunc) dufunc_call, /* tp_call */ - (reprfunc) dufunc_repr, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - dufunc_methods, /* tp_methods */ - dufunc_members, /* tp_members */ - dufunc_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc) dufunc_init, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ -}; - -/* ______________________________________________________________________ - * Module initialization boilerplate follows. - */ - -static PyMethodDef ext_methods[] = { - {"fromfunc", (PyCFunction) ufunc_fromfunc, METH_VARARGS, NULL}, - { NULL } -}; - -/* Don't remove this marker, it is used for inserting licensing code */ -/*MARK1*/ - -MOD_INIT(_internal) -{ - PyObject *m; - - /* Don't remove this marker, it is used for inserting licensing code */ - /*MARK2*/ - - import_array(); - import_umath(); - - MOD_DEF(m, "_internal", "No docs", - ext_methods) - - if (m == NULL) - return MOD_ERROR_VAL; - - if (PyType_Ready(&PyUFuncCleaner_Type) < 0) - return MOD_ERROR_VAL; - - PyDUFunc_Type.tp_new = PyType_GenericNew; - if (init_ufunc_dispatch() <= 0) - return MOD_ERROR_VAL; - if (PyType_Ready(&PyDUFunc_Type) < 0) - return MOD_ERROR_VAL; - Py_INCREF(&PyDUFunc_Type); - if (PyModule_AddObject(m, "_DUFunc", (PyObject *)&PyDUFunc_Type) < 0) - return MOD_ERROR_VAL; - - if (PyModule_AddIntMacro(m, PyUFunc_One) - || PyModule_AddIntMacro(m, PyUFunc_Zero) - || PyModule_AddIntMacro(m, PyUFunc_None) -#if NPY_API_VERSION >= 0x00000007 - || PyModule_AddIntMacro(m, PyUFunc_ReorderableNone) -#endif - ) - return MOD_ERROR_VAL; - - return MOD_SUCCESS_VAL(m); -} - - -#include "_ufunc.c" diff --git a/numba/numba/npyufunc/_internal.h b/numba/numba/npyufunc/_internal.h deleted file mode 100644 index 3527d1554..000000000 --- a/numba/numba/npyufunc/_internal.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Python include */ - -#ifndef NUMBA_UFUNC_INTERNAL_H_ -#define NUMBA_UFUNC_INTERNAL_H_ - -#include "../_pymodule.h" -#include - -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include "numpy/ndarrayobject.h" -#include "numpy/ufuncobject.h" - -extern PyObject *ufunc_fromfunc(PyObject *NPY_UNUSED(dummy), PyObject *args); - -int PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, - PyObject *args, PyObject *kwds, - PyArrayObject **op); - -#define APPEND_(X, Y) X #Y -#define APPEND(X, Y) APPEND_(X, Y) -#define SENTRY_VALID_LONG(X) if( (X) == -1 ){ \ - PyErr_SetString(PyExc_RuntimeError, \ - APPEND("PyLong_AsLong overflow at ", __LINE__)); \ - return NULL; \ -} - -#endif /* NUMBA_UFUNC_INTERNAL_H_ */ \ No newline at end of file diff --git a/numba/numba/npyufunc/_ufunc.c b/numba/numba/npyufunc/_ufunc.c deleted file mode 100644 index fac8b58cb..000000000 --- a/numba/numba/npyufunc/_ufunc.c +++ /dev/null @@ -1,218 +0,0 @@ - -/* Included by _internal.c */ -#include "_internal.h" - -static int -get_string(PyObject *obj, char **s, const char *type_error_message) -{ - *s = NULL; - if (!PyString_Check(obj) && obj != Py_None) { - PyErr_SetString(PyExc_TypeError, type_error_message); - return -1; - } - if (obj != Py_None) { - *s = PyString_AsString(obj); - if (!*s) - return -1; - } - return 0; -} - - -PyObject * -ufunc_fromfunc(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - int nin, nout; - int nfuncs, ntypes, ndata; - PyObject *func_list; - PyObject *type_list; - PyObject *data_list; - PyObject *func_obj; - PyObject *type_obj; - PyObject *data_obj; - PyObject *object; /* object to hold on to while ufunc is alive */ - PyObject *pyname, *pydoc; - char *name = NULL, *doc = NULL; - char *signature = NULL; - int identity; - - int i, j; - int custom_dtype = 0; - PyUFuncGenericFunction *funcs; - int *types; - void **data; - PyUFuncObject *ufunc; - - if (!PyArg_ParseTuple(args, "OOO!O!iiOOi|s", - &pyname, &pydoc, - &PyList_Type, &func_list, - &PyList_Type, &type_list, - &nin, &nout, &data_list, - &object, &identity, &signature)) { - return NULL; - } - if (get_string(pyname, &name, "name should be str or None")) - return NULL; - if (get_string(pydoc, &doc, "doc should be str or None")) - return NULL; - /* Ensure the pointers to C strings stay alive until the ufunc dies. */ - object = PyTuple_Pack(3, object, pyname, pydoc); - if (!object) - return NULL; - - nfuncs = PyList_Size(func_list); - - ntypes = PyList_Size(type_list); - if (ntypes != nfuncs) { - PyErr_SetString(PyExc_TypeError, "length of types list must be same as length of function pointer list"); - return NULL; - } - - ndata = PyList_Size(data_list); - if (ndata != nfuncs) { - PyErr_SetString(PyExc_TypeError, "length of data pointer list must be same as length of function pointer list"); - return NULL; - } - - funcs = PyArray_malloc(nfuncs * sizeof(PyUFuncGenericFunction)); - if (funcs == NULL) { - return NULL; - } - - /* build function pointer array */ - for (i = 0; i < nfuncs; i++) { - func_obj = PyList_GetItem(func_list, i); - /* Function pointers are passed in as long objects. - Is there a better way to do this? */ - if (PyLong_Check(func_obj)) { - funcs[i] = (PyUFuncGenericFunction)PyLong_AsVoidPtr(func_obj); - } - else { - PyErr_SetString(PyExc_TypeError, "function pointer must be long object, or None"); - return NULL; - } - } - - types = PyArray_malloc(nfuncs * (nin+nout) * sizeof(int)); - if (types == NULL) { - return NULL; - } - - /* build function signatures array */ - for (i = 0; i < nfuncs; i++) { - type_obj = PyList_GetItem(type_list, i); - if (!type_obj) - return NULL; - - for (j = 0; j < (nin+nout); j++) { - int dtype_num; - PyObject *dtype_num_obj = PyList_GetItem(type_obj, j); - if (!dtype_num_obj) - return NULL; - - SENTRY_VALID_LONG( - types[i*(nin+nout) + j] = PyLong_AsLong(dtype_num_obj) - ); - - dtype_num = PyLong_AsLong(PyList_GetItem(type_obj, j)); - - SENTRY_VALID_LONG(dtype_num); - - if (dtype_num >= NPY_USERDEF) { - custom_dtype = dtype_num; - } - } - } - - data = PyArray_malloc(nfuncs * sizeof(void *)); - if (data == NULL) { - return NULL; - } - - /* build function data pointers array */ - for (i = 0; i < nfuncs; i++) { - if (PyList_Check(data_list)) { - data_obj = PyList_GetItem(data_list, i); - if (PyLong_Check(data_obj)) { - data[i] = PyLong_AsVoidPtr(data_obj); - } - else if (data_obj == Py_None) { - data[i] = NULL; - } - else { - PyErr_SetString(PyExc_TypeError, "data pointer must be long object, or None"); - return NULL; - } - } - else if (data_list == Py_None) { - data[i] = NULL; - } - else { - PyErr_SetString(PyExc_TypeError, "data pointers argument must be a list of void pointers, or None"); - return NULL; - } - } - - if (!custom_dtype) { - char *char_types = PyArray_malloc(nfuncs * (nin+nout) * sizeof(char)); - for (i = 0; i < nfuncs; i++) { - for (j = 0; j < (nin+nout); j++) { - char_types[i*(nin+nout) + j] = (char)types[i*(nin+nout) + j]; - } - } - PyArray_free(types); - ufunc = (PyUFuncObject *) PyUFunc_FromFuncAndDataAndSignature( - (PyUFuncGenericFunction*) funcs, data, (char*) char_types, - nfuncs, nin, nout, - identity, - name, doc, - 0 /* check_return */, signature); - if (!ufunc) { - PyArray_free(funcs); - PyArray_free(data); - Py_DECREF(object); - return NULL; - } - /* XXX funcs, char_types and data won't be free'ed when the ufunc dies */ - } - else { - ufunc = (PyUFuncObject *) PyUFunc_FromFuncAndDataAndSignature( - 0, 0, 0, 0, - nin, - nout, - identity, - name, doc, - 0 /* check_return */, signature); - if (!ufunc) { - PyArray_free(funcs); - PyArray_free(data); - PyArray_free(types); - Py_DECREF(object); - return NULL; - } - - PyUFunc_RegisterLoopForType(ufunc, - custom_dtype, - funcs[0], - types, - 0); - PyArray_free(funcs); - PyArray_free(types); - PyArray_free(data); - funcs = NULL; - data = NULL; - } - - /* Create the sentinel object to clean up dynamically-allocated fields - when the ufunc is destroyed. */ - ufunc->obj = cleaner_new(ufunc, object); - Py_DECREF(object); - if (ufunc->obj == NULL) { - PyArray_free(funcs); - PyArray_free(data); - Py_DECREF(ufunc); - return NULL; - } - - return (PyObject *) ufunc; -} diff --git a/numba/numba/npyufunc/array_exprs.py b/numba/numba/npyufunc/array_exprs.py deleted file mode 100644 index 1ee975841..000000000 --- a/numba/numba/npyufunc/array_exprs.py +++ /dev/null @@ -1,405 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import ast -from collections import defaultdict, OrderedDict -import contextlib -import sys - -import numpy as np - -from .. import compiler, ir, types, rewrites, six -from ..typing import npydecl -from .dufunc import DUFunc - - -def _is_ufunc(func): - return isinstance(func, (np.ufunc, DUFunc)) - - -@rewrites.register_rewrite('after-inference') -class RewriteArrayExprs(rewrites.Rewrite): - '''The RewriteArrayExprs class is responsible for finding array - expressions in Numba intermediate representation code, and - rewriting those expressions to a single operation that will expand - into something similar to a ufunc call. - ''' - def __init__(self, pipeline, *args, **kws): - super(RewriteArrayExprs, self).__init__(pipeline, *args, **kws) - # Install a lowering hook if we are using this rewrite. - special_ops = self.pipeline.targetctx.special_ops - if 'arrayexpr' not in special_ops: - special_ops['arrayexpr'] = _lower_array_expr - - def match(self, func_ir, block, typemap, calltypes): - """ - Using typing and a basic block, search the basic block for array - expressions. - Return True when one or more matches were found, False otherwise. - """ - # We can trivially reject everything if there are no - # calls in the type results. - if len(calltypes) == 0: - return False - - self.crnt_block = block - self.typemap = typemap - # { variable name: IR assignment (of a function call or operator) } - self.array_assigns = OrderedDict() - # { variable name: IR assignment (of a constant) } - self.const_assigns = {} - - assignments = block.find_insts(ir.Assign) - for instr in assignments: - target_name = instr.target.name - expr = instr.value - # Does it assign an expression to an array variable? - if (isinstance(expr, ir.Expr) and - isinstance(typemap.get(target_name, None), types.Array)): - self._match_array_expr(instr, expr, target_name) - elif isinstance(expr, ir.Const): - # Track constants since we might need them for an - # array expression. - self.const_assigns[target_name] = expr - - return len(self.array_assigns) > 0 - - def _match_array_expr(self, instr, expr, target_name): - """ - Find whether the given assignment (*instr*) of an expression (*expr*) - to variable *target_name* is an array expression. - """ - # We've matched a subexpression assignment to an - # array variable. Now see if the expression is an - # array expression. - expr_op = expr.op - array_assigns = self.array_assigns - - if ((expr_op in ('unary', 'binop')) and ( - expr.fn in npydecl.supported_array_operators)): - # It is an array operator that maps to a ufunc. - array_assigns[target_name] = instr - - elif ((expr_op == 'call') and (expr.func.name in self.typemap)): - # It could be a match for a known ufunc call. - func_type = self.typemap[expr.func.name] - if isinstance(func_type, types.Function): - func_key = func_type.typing_key - if _is_ufunc(func_key): - # If so, check whether an explicit output is passed. - if not self._has_explicit_output(expr, func_key): - # If not, match it as a (sub)expression. - array_assigns[target_name] = instr - - def _has_explicit_output(self, expr, func): - """ - Return whether the *expr* call to *func* (a ufunc) features an - explicit output argument. - """ - nargs = len(expr.args) + len(expr.kws) - if expr.vararg is not None: - # XXX *args unsupported here, assume there may be an explicit - # output - return True - return nargs > func.nin - - def _get_array_operator(self, ir_expr): - ir_op = ir_expr.op - if ir_op in ('unary', 'binop'): - return ir_expr.fn - elif ir_op == 'call': - return self.typemap[ir_expr.func.name].typing_key - raise NotImplementedError( - "Don't know how to find the operator for '{0}' expressions.".format( - ir_op)) - - def _get_operands(self, ir_expr): - '''Given a Numba IR expression, return the operands to the expression - in order they appear in the expression. - ''' - ir_op = ir_expr.op - if ir_op == 'binop': - return ir_expr.lhs, ir_expr.rhs - elif ir_op == 'unary': - return ir_expr.list_vars() - elif ir_op == 'call': - return ir_expr.args - raise NotImplementedError( - "Don't know how to find the operands for '{0}' expressions.".format( - ir_op)) - - def _translate_expr(self, ir_expr): - '''Translate the given expression from Numba IR to an array expression - tree. - ''' - ir_op = ir_expr.op - if ir_op == 'arrayexpr': - return ir_expr.expr - operands_or_args = [self.const_assigns.get(op_var.name, op_var) - for op_var in self._get_operands(ir_expr)] - return self._get_array_operator(ir_expr), operands_or_args - - def _handle_matches(self): - '''Iterate over the matches, trying to find which instructions should - be rewritten, deleted, or moved. - ''' - replace_map = {} - dead_vars = set() - used_vars = defaultdict(int) - for instr in self.array_assigns.values(): - expr = instr.value - arr_inps = [] - arr_expr = self._get_array_operator(expr), arr_inps - new_expr = ir.Expr(op='arrayexpr', - loc=expr.loc, - expr=arr_expr, - ty=self.typemap[instr.target.name]) - new_instr = ir.Assign(new_expr, instr.target, instr.loc) - replace_map[instr] = new_instr - self.array_assigns[instr.target.name] = new_instr - for operand in self._get_operands(expr): - operand_name = operand.name - if operand_name in self.array_assigns: - child_assign = self.array_assigns[operand_name] - child_expr = child_assign.value - child_operands = child_expr.list_vars() - for operand in child_operands: - used_vars[operand.name] += 1 - arr_inps.append(self._translate_expr(child_expr)) - if child_assign.target.is_temp: - dead_vars.add(child_assign.target.name) - replace_map[child_assign] = None - elif operand_name in self.const_assigns: - arr_inps.append(self.const_assigns[operand_name]) - else: - used_vars[operand.name] += 1 - arr_inps.append(operand) - return replace_map, dead_vars, used_vars - - def _get_final_replacement(self, replacement_map, instr): - '''Find the final replacement instruction for a given initial - instruction by chasing instructions in a map from instructions - to replacement instructions. - ''' - replacement = replacement_map[instr] - while replacement in replacement_map: - replacement = replacement_map[replacement] - return replacement - - def apply(self): - '''When we've found array expressions in a basic block, rewrite that - block, returning a new, transformed block. - ''' - # Part 1: Figure out what instructions should be rewritten - # based on the matches found. - replace_map, dead_vars, used_vars = self._handle_matches() - # Part 2: Using the information above, rewrite the target - # basic block. - result = self.crnt_block.copy() - result.clear() - delete_map = {} - for instr in self.crnt_block.body: - if isinstance(instr, ir.Assign): - if instr in replace_map: - replacement = self._get_final_replacement( - replace_map, instr) - if replacement: - result.append(replacement) - for var in replacement.value.list_vars(): - var_name = var.name - if var_name in delete_map: - result.append(delete_map.pop(var_name)) - if used_vars[var_name] > 0: - used_vars[var_name] -= 1 - - else: - result.append(instr) - elif isinstance(instr, ir.Del): - instr_value = instr.value - if used_vars[instr_value] > 0: - used_vars[instr_value] -= 1 - delete_map[instr_value] = instr - elif instr_value not in dead_vars: - result.append(instr) - else: - result.append(instr) - if delete_map: - for instr in delete_map.values(): - result.insert_before_terminator(instr) - return result - - -_unaryops = { - '+' : ast.UAdd, - '-' : ast.USub, - '~' : ast.Invert, -} - -_binops = { - '+' : ast.Add, - '-' : ast.Sub, - '*' : ast.Mult, - '/' : ast.Div, - '/?' : ast.Div, - '%' : ast.Mod, - '|' : ast.BitOr, - '>>' : ast.RShift, - '^' : ast.BitXor, - '<<' : ast.LShift, - '&' : ast.BitAnd, - '**' : ast.Pow, - '//' : ast.FloorDiv, -} - -_cmpops = { - '==' : ast.Eq, - '!=' : ast.NotEq, - '<' : ast.Lt, - '<=' : ast.LtE, - '>' : ast.Gt, - '>=' : ast.GtE, -} - - -def _arr_expr_to_ast(expr): - '''Build a Python expression AST from an array expression built by - RewriteArrayExprs. - ''' - if isinstance(expr, tuple): - op, arr_expr_args = expr - ast_args = [] - env = {} - for arg in arr_expr_args: - ast_arg, child_env = _arr_expr_to_ast(arg) - ast_args.append(ast_arg) - env.update(child_env) - if op in npydecl.supported_array_operators: - if len(ast_args) == 2: - if op in _binops: - return ast.BinOp( - ast_args[0], _binops[op](), ast_args[1]), env - if op in _cmpops: - return ast.Compare( - ast_args[0], [_cmpops[op]()], [ast_args[1]]), env - else: - assert op in _unaryops - return ast.UnaryOp(_unaryops[op](), ast_args[0]), env - elif _is_ufunc(op): - fn_name = "__ufunc_or_dufunc_{0}".format( - hex(hash(op)).replace("-", "_")) - fn_ast_name = ast.Name(fn_name, ast.Load()) - env[fn_name] = op # Stash the ufunc or DUFunc in the environment - if sys.version_info >= (3, 5): - ast_call = ast.Call(fn_ast_name, ast_args, []) - else: - ast_call = ast.Call(fn_ast_name, ast_args, [], None, None) - return ast_call, env - elif isinstance(expr, ir.Var): - return ast.Name(expr.name, ast.Load(), - lineno=expr.loc.line, - col_offset=expr.loc.col if expr.loc.col else 0), {} - elif isinstance(expr, ir.Const): - return ast.Num(expr.value), {} - raise NotImplementedError( - "Don't know how to translate array expression '%r'" % (expr,)) - - -@contextlib.contextmanager -def _legalize_parameter_names(var_list): - """ - Legalize names in the variable list for use as a Python function's - parameter names. - """ - var_map = OrderedDict() - for var in var_list: - old_name = var.name - new_name = old_name.replace("$", "_").replace(".", "_") - # Caller should ensure the names are unique - assert new_name not in var_map - var_map[new_name] = var, old_name - var.name = new_name - param_names = list(var_map) - try: - yield param_names - finally: - # Make sure the old names are restored, to avoid confusing - # other parts of Numba (see issue #1466) - for var, old_name in var_map.values(): - var.name = old_name - - -def _lower_array_expr(lowerer, expr): - '''Lower an array expression built by RewriteArrayExprs. - ''' - expr_name = "__numba_array_expr_%s" % (hex(hash(expr)).replace("-", "_")) - expr_filename = expr.loc.filename - expr_var_list = expr.list_vars() - # The expression may use a given variable several times, but we - # should only create one parameter for it. - expr_var_unique = sorted(set(expr_var_list), key=lambda var: var.name) - - # Arguments are the names external to the new closure - expr_args = [var.name for var in expr_var_unique] - - # 1. Create an AST tree from the array expression. - - with _legalize_parameter_names(expr_var_unique) as expr_params: - - if hasattr(ast, "arg"): - # Should be Python 3.x - ast_args = [ast.arg(param_name, None) - for param_name in expr_params] - else: - # Should be Python 2.x - ast_args = [ast.Name(param_name, ast.Param()) - for param_name in expr_params] - # Parse a stub function to ensure the AST is populated with - # reasonable defaults for the Python version. - ast_module = ast.parse('def {0}(): return'.format(expr_name), - expr_filename, 'exec') - assert hasattr(ast_module, 'body') and len(ast_module.body) == 1 - ast_fn = ast_module.body[0] - ast_fn.args.args = ast_args - ast_fn.body[0].value, namespace = _arr_expr_to_ast(expr.expr) - ast.fix_missing_locations(ast_module) - - # 2. Compile the AST module and extract the Python function. - - code_obj = compile(ast_module, expr_filename, 'exec') - six.exec_(code_obj, namespace) - impl = namespace[expr_name] - - # 3. Now compile a ufunc using the Python function as kernel. - - context = lowerer.context - builder = lowerer.builder - outer_sig = expr.ty(*(lowerer.typeof(name) for name in expr_args)) - inner_sig_args = [] - for argty in outer_sig.args: - if isinstance(argty, types.Array): - inner_sig_args.append(argty.dtype) - else: - inner_sig_args.append(argty) - inner_sig = outer_sig.return_type.dtype(*inner_sig_args) - - # Follow the Numpy error model. Note this also allows e.g. vectorizing - # division (issue #1223). - flags = compiler.Flags() - flags.set('error_model', 'numpy') - cres = context.compile_subroutine_no_cache(builder, impl, inner_sig, flags=flags) - - # Create kernel subclass calling our native function - from ..targets import npyimpl - - class ExprKernel(npyimpl._Kernel): - def generate(self, *args): - arg_zip = zip(args, self.outer_sig.args, inner_sig.args) - cast_args = [self.cast(val, inty, outty) - for val, inty, outty in arg_zip] - result = self.context.call_internal( - builder, cres.fndesc, inner_sig, cast_args) - return self.cast(result, inner_sig.return_type, - self.outer_sig.return_type) - - args = [lowerer.loadvar(name) for name in expr_args] - return npyimpl.numpy_ufunc_kernel( - context, builder, outer_sig, args, ExprKernel, explicit_output=False) diff --git a/numba/numba/npyufunc/decorators.py b/numba/numba/npyufunc/decorators.py deleted file mode 100644 index 0d2e45e3b..000000000 --- a/numba/numba/npyufunc/decorators.py +++ /dev/null @@ -1,182 +0,0 @@ -from __future__ import print_function, division, absolute_import -import inspect - -from . import _internal, dufunc -from .ufuncbuilder import GUFuncBuilder -from .parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder - -from numba.targets.registry import TargetRegistry - - -class _BaseVectorize(object): - - @classmethod - def get_identity(cls, kwargs): - return kwargs.pop('identity', None) - - @classmethod - def get_cache(cls, kwargs): - return kwargs.pop('cache', False) - - @classmethod - def get_target_implementation(cls, kwargs): - target = kwargs.pop('target', 'cpu') - try: - return cls.target_registry[target] - except KeyError: - raise ValueError("Unsupported target: %s" % target) - - -class Vectorize(_BaseVectorize): - target_registry = TargetRegistry({'cpu': dufunc.DUFunc, - 'parallel': ParallelUFuncBuilder,}) - - def __new__(cls, func, **kws): - identity = cls.get_identity(kws) - cache = cls.get_cache(kws) - imp = cls.get_target_implementation(kws) - return imp(func, identity=identity, cache=cache, targetoptions=kws) - - -class GUVectorize(_BaseVectorize): - target_registry = TargetRegistry({'cpu': GUFuncBuilder, - 'parallel': ParallelGUFuncBuilder,}) - - def __new__(cls, func, signature, **kws): - identity = cls.get_identity(kws) - cache = cls.get_cache(kws) - imp = cls.get_target_implementation(kws) - return imp(func, signature, identity=identity, cache=cache, - targetoptions=kws) - - -def vectorize(ftylist_or_function=(), **kws): - """vectorize(ftylist_or_function=(), target='cpu', identity=None, **kws) - - A decorator that creates a Numpy ufunc object using Numba compiled - code. When no arguments or only keyword arguments are given, - vectorize will return a Numba dynamic ufunc (DUFunc) object, where - compilation/specialization may occur at call-time. - - Args - ----- - ftylist_or_function: function or iterable - - When the first argument is a function, signatures are dealt - with at call-time. - - When the first argument is an iterable of type signatures, - which are either function type object or a string describing - the function type, signatures are finalized at decoration - time. - - Keyword Args - ------------ - - target: str - A string for code generation target. Default to "cpu". - - identity: int, str, or None - The identity (or unit) value for the element-wise function - being implemented. Allowed values are None (the default), 0, 1, - and "reorderable". - - cache: bool - Turns on caching. - - - Returns - -------- - - A NumPy universal function - - Examples - ------- - @vectorize(['float32(float32, float32)', - 'float64(float64, float64)'], identity=1) - def sum(a, b): - return a + b - - @vectorize - def sum(a, b): - return a + b - - @vectorize(identity=1) - def mul(a, b): - return a * b - - """ - if isinstance(ftylist_or_function, str): - # Common user mistake - ftylist = [ftylist_or_function] - elif inspect.isfunction(ftylist_or_function): - return dufunc.DUFunc(ftylist_or_function, **kws) - elif ftylist_or_function is not None: - ftylist = ftylist_or_function - - def wrap(func): - vec = Vectorize(func, **kws) - for sig in ftylist: - vec.add(sig) - if len(ftylist) > 0: - vec.disable_compile() - return vec.build_ufunc() - - return wrap - - -def guvectorize(ftylist, signature, **kws): - """guvectorize(ftylist, signature, target='cpu', identity=None, **kws) - - A decorator to create numpy generialized-ufunc object from Numba compiled - code. - - Args - ----- - ftylist: iterable - An iterable of type signatures, which are either - function type object or a string describing the - function type. - - signature: str - A NumPy generialized-ufunc signature. - e.g. "(m, n), (n, p)->(m, p)" - - identity: int, str, or None - The identity (or unit) value for the element-wise function - being implemented. Allowed values are None (the default), 0, 1, - and "reorderable". - - cache: bool - Turns on caching. - - target: str - A string for code generation target. Defaults to "cpu". - - Returns - -------- - - A NumPy generialized universal-function - - Example - ------- - @guvectorize(['void(int32[:,:], int32[:,:], int32[:,:])', - 'void(float32[:,:], float32[:,:], float32[:,:])'], - '(x, y),(x, y)->(x, y)') - def add_2d_array(a, b): - for i in range(c.shape[0]): - for j in range(c.shape[1]): - c[i, j] = a[i, j] + b[i, j] - - """ - if isinstance(ftylist, str): - # Common user mistake - ftylist = [ftylist] - - def wrap(func): - guvec = GUVectorize(func, signature, **kws) - for fty in ftylist: - guvec.add(fty) - return guvec.build_ufunc() - - return wrap diff --git a/numba/numba/npyufunc/deviceufunc.py b/numba/numba/npyufunc/deviceufunc.py deleted file mode 100644 index e9a5d2943..000000000 --- a/numba/numba/npyufunc/deviceufunc.py +++ /dev/null @@ -1,839 +0,0 @@ -""" -Implements custom ufunc dispatch mechanism for non-CPU devices. -""" -from __future__ import print_function, absolute_import - -from collections import OrderedDict -import operator -import warnings -from functools import reduce - -import numpy as np - -from numba.six import exec_ -from numba.utils import longint -from numba.utils import IS_PY3 -from numba.npyufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity -from numba import sigutils, types -from numba.typing import signature -from numba.npyufunc.sigparse import parse_signature - - -def _broadcast_axis(a, b): - """ - Raises - ------ - ValueError if broadcast fails - """ - if a == b: - return a - elif a == 1: - return b - elif b == 1: - return a - else: - raise ValueError("failed to broadcast {0} and {1}".format(a, b)) - - -def _pairwise_broadcast(shape1, shape2): - """ - Raises - ------ - ValueError if broadcast fails - """ - shape1, shape2 = map(tuple, [shape1, shape2]) - - while len(shape1) < len(shape2): - shape1 = (1,) + shape1 - - while len(shape1) > len(shape2): - shape2 = (1,) + shape2 - - return tuple(_broadcast_axis(a, b) for a, b in zip(shape1, shape2)) - - -def _multi_broadcast(*shapelist): - """ - Raises - ------ - ValueError if broadcast fails - """ - assert shapelist - - result = shapelist[0] - others = shapelist[1:] - try: - for i, each in enumerate(others, start=1): - result = _pairwise_broadcast(result, each) - except ValueError: - raise ValueError("failed to broadcast argument #{0}".format(i)) - else: - return result - - -class UFuncMechanism(object): - """ - Prepare ufunc arguments for vectorize. - """ - DEFAULT_STREAM = None - SUPPORT_DEVICE_SLICING = False - - def __init__(self, typemap, args): - """Never used directly by user. Invoke by UFuncMechanism.call(). - """ - self.typemap = typemap - self.args = args - nargs = len(self.args) - self.argtypes = [None] * nargs - self.scalarpos = [] - self.signature = None - self.arrays = [None] * nargs - - def _fill_arrays(self): - """ - Get all arguments in array form - """ - for i, arg in enumerate(self.args): - if isinstance(arg, np.ndarray): - self.arrays[i] = arg - elif self.is_device_array(arg): - self.arrays[i] = self.as_device_array(arg) - elif isinstance(arg, (int, longint, float, complex, np.number)): - # Is scalar - self.scalarpos.append(i) - else: - raise TypeError("argument #%d has invalid type of %s" \ - % (i + 1, type(arg) )) - - def _fill_argtypes(self): - """ - Get dtypes - """ - for i, ary in enumerate(self.arrays): - if ary is not None: - self.argtypes[i] = ary.dtype - - def _resolve_signature(self): - """Resolve signature. - May have ambiguous case. - """ - matches = [] - # Resolve scalar args exact match first - if self.scalarpos: - # Try resolve scalar arguments - for formaltys in self.typemap: - match_map = [] - for i, (formal, actual) in enumerate(zip(formaltys, - self.argtypes)): - if actual is None: - actual = np.asarray(self.args[i]).dtype - - match_map.append(actual == formal) - - if all(match_map): - matches.append(formaltys) - - # No matching with exact match; try coercing the scalar arguments - if not matches: - matches = [] - for formaltys in self.typemap: - all_matches = all(actual is None or formal == actual - for formal, actual in - zip(formaltys, self.argtypes)) - if all_matches: - matches.append(formaltys) - - if not matches: - raise TypeError("No matching version. GPU ufunc requires array " - "arguments to have the exact types. This behaves " - "like regular ufunc with casting='no'.") - - if len(matches) > 1: - raise TypeError("Failed to resolve ufunc due to ambiguous " - "signature. Too many untyped scalars. " - "Use numpy dtype object to type tag.") - - # Try scalar arguments - self.argtypes = matches[0] - - def _get_actual_args(self): - """Return the actual arguments - Casts scalar arguments to np.array. - """ - for i in self.scalarpos: - self.arrays[i] = np.array([self.args[i]], dtype=self.argtypes[i]) - - return self.arrays - - def _broadcast(self, arys): - """Perform numpy ufunc broadcasting - """ - shapelist = [a.shape for a in arys] - shape = _multi_broadcast(*shapelist) - - for i, ary in enumerate(arys): - if ary.shape == shape: - pass - - else: - if self.is_device_array(ary): - arys[i] = self.broadcast_device(ary, shape) - - else: - ax_differs = [ax for ax in range(len(shape)) - if ax >= ary.ndim - or ary.shape[ax] != shape[ax]] - - missingdim = len(shape) - len(ary.shape) - strides = [0] * missingdim + list(ary.strides) - - for ax in ax_differs: - strides[ax] = 0 - - strided = np.lib.stride_tricks.as_strided(ary, - shape=shape, - strides=strides) - - arys[i] = self.force_array_layout(strided) - - return arys - - def get_arguments(self): - """Prepare and return the arguments for the ufunc. - Does not call to_device(). - """ - self._fill_arrays() - self._fill_argtypes() - self._resolve_signature() - arys = self._get_actual_args() - return self._broadcast(arys) - - def get_function(self): - """Returns (result_dtype, function) - """ - return self.typemap[self.argtypes] - - def is_device_array(self, obj): - """Is the `obj` a device array? - Override in subclass - """ - return False - - def as_device_array(self, obj): - """Convert the `obj` to a device array - Override in subclass - - Default implementation is an identity function - """ - return obj - - def broadcast_device(self, ary, shape): - """Handles ondevice broadcasting - - Override in subclass to add support. - """ - raise NotImplementedError("broadcasting on device is not supported") - - def force_array_layout(self, ary): - """Ensures array layout met device requirement. - - Override in sublcass - """ - return ary - - @classmethod - def call(cls, typemap, args, kws): - """Perform the entire ufunc call mechanism. - """ - # Handle keywords - stream = kws.pop('stream', cls.DEFAULT_STREAM) - out = kws.pop('out', None) - - if kws: - warnings.warn("unrecognized keywords: %s" % ', '.join(kws)) - - # Begin call resolution - cr = cls(typemap, args) - args = cr.get_arguments() - resty, func = cr.get_function() - - outshape = args[0].shape - - # Adjust output value - if out is not None and cr.is_device_array(out): - out = cr.as_device_array(out) - - def attempt_ravel(a): - if cr.SUPPORT_DEVICE_SLICING: - raise NotImplementedError - - try: - # Call the `.ravel()` method - return a.ravel() - except NotImplementedError: - # If it is not a device array - if not cr.is_device_array(a): - raise - # For device array, retry ravel on the host by first - # copying it back. - else: - hostary = cr.to_host(a, stream).ravel() - return cr.to_device(hostary, stream) - - if args[0].ndim > 1: - args = [attempt_ravel(a) for a in args] - - # Prepare argument on the device - devarys = [] - any_device = False - for a in args: - if cr.is_device_array(a): - devarys.append(a) - any_device = True - else: - dev_a = cr.to_device(a, stream=stream) - devarys.append(dev_a) - - # Launch - shape = args[0].shape - if out is None: - # No output is provided - devout = cr.device_array(shape, resty, stream=stream) - - devarys.extend([devout]) - cr.launch(func, shape[0], stream, devarys) - - if any_device: - # If any of the arguments are on device, - # Keep output on the device - return devout.reshape(outshape) - else: - # Otherwise, transfer output back to host - return devout.copy_to_host().reshape(outshape) - - elif cr.is_device_array(out): - # If output is provided and it is a device array, - # Return device array - if out.ndim > 1: - out = attempt_ravel(out) - devout = out - devarys.extend([devout]) - cr.launch(func, shape[0], stream, devarys) - return devout.reshape(outshape) - - else: - # If output is provided and it is a host array, - # Return host array - assert out.shape == shape - assert out.dtype == resty - devout = cr.device_array(shape, resty, stream=stream) - devarys.extend([devout]) - cr.launch(func, shape[0], stream, devarys) - return devout.copy_to_host(out, stream=stream).reshape(outshape) - - def to_device(self, hostary, stream): - """Implement to device transfer - Override in subclass - """ - raise NotImplementedError - - def to_host(self, devary, stream): - """Implement to host transfer - Override in subclass - """ - raise NotImplementedError - - def device_array(self, shape, dtype, stream): - """Implements device allocation - Override in subclass - """ - raise NotImplementedError - - def launch(self, func, count, stream, args): - """Implements device function invocation - Override in subclass - """ - raise NotImplementedError - - -def to_dtype(ty): - return np.dtype(str(ty)) - - -class DeviceVectorize(_BaseUFuncBuilder): - def __init__(self, func, identity=None, cache=False, targetoptions={}): - if cache: - raise TypeError("caching is not supported") - assert not targetoptions - self.py_func = func - self.identity = parse_identity(identity) - # { arg_dtype: (return_dtype), cudakernel } - self.kernelmap = OrderedDict() - - @property - def pyfunc(self): - return self.py_func - - def add(self, sig=None, argtypes=None, restype=None): - # Handle argtypes - if argtypes is not None: - warnings.warn("Keyword argument argtypes is deprecated", - DeprecationWarning) - assert sig is None - if restype is None: - sig = tuple(argtypes) - else: - sig = restype(*argtypes) - del argtypes - del restype - - # compile core as device function - args, return_type = sigutils.normalize_signature(sig) - devfnsig = signature(return_type, *args) - - funcname = self.pyfunc.__name__ - kernelsource = self._get_kernel_source(self._kernel_template, - devfnsig, funcname) - corefn, return_type = self._compile_core(devfnsig) - glbl = self._get_globals(corefn) - sig = signature(types.void, *([a[:] for a in args] + [return_type[:]])) - exec_(kernelsource, glbl) - - stager = glbl['__vectorized_%s' % funcname] - kernel = self._compile_kernel(stager, sig) - - argdtypes = tuple(to_dtype(t) for t in devfnsig.args) - resdtype = to_dtype(return_type) - self.kernelmap[tuple(argdtypes)] = resdtype, kernel - - def build_ufunc(self): - raise NotImplementedError - - def _get_kernel_source(self, template, sig, funcname): - args = ['a%d' % i for i in range(len(sig.args))] - fmts = dict(name=funcname, - args=', '.join(args), - argitems=', '.join('%s[__tid__]' % i for i in args)) - return template.format(**fmts) - - def _compile_core(self, sig): - raise NotImplementedError - - def _get_globals(self, corefn): - raise NotImplementedError - - def _compile_kernel(self, fnobj, sig): - raise NotImplementedError - - -class DeviceGUFuncVectorize(_BaseUFuncBuilder): - def __init__(self, func, sig, identity=None, cache=False, targetoptions={}): - if cache: - raise TypeError("caching is not supported") - # Allow nopython flag to be set. - if not targetoptions.pop('nopython', True): - raise TypeError("nopython flag must be True") - # Are there any more target options? - if targetoptions: - opts = ', '.join([repr(k) for k in targetoptions.keys()]) - fmt = "The following target options are not supported: {0}" - raise TypeError(fmt.format(opts)) - - self.py_func = func - self.identity = parse_identity(identity) - self.signature = sig - self.inputsig, self.outputsig = parse_signature(self.signature) - assert len(self.outputsig) == 1, "only support 1 output" - # { arg_dtype: (return_dtype), cudakernel } - self.kernelmap = OrderedDict() - - @property - def pyfunc(self): - return self.py_func - - def add(self, sig=None, argtypes=None, restype=None): - # Handle argtypes - if argtypes is not None: - warnings.warn("Keyword argument argtypes is deprecated", - DeprecationWarning) - assert sig is None - if restype is None: - sig = tuple(argtypes) - else: - sig = restype(*argtypes) - del argtypes - del restype - - indims = [len(x) for x in self.inputsig] - outdims = [len(x) for x in self.outputsig] - args, return_type = sigutils.normalize_signature(sig) - - funcname = self.py_func.__name__ - src = expand_gufunc_template(self._kernel_template, indims, - outdims, funcname, args) - - glbls = self._get_globals(sig) - - exec_(src, glbls) - fnobj = glbls['__gufunc_{name}'.format(name=funcname)] - - outertys = list(_determine_gufunc_outer_types(args, indims + outdims)) - kernel = self._compile_kernel(fnobj, sig=tuple(outertys)) - - dtypes = tuple(np.dtype(str(t.dtype)) for t in outertys) - self.kernelmap[tuple(dtypes[:-1])] = dtypes[-1], kernel - - def _compile_kernel(self, fnobj, sig): - raise NotImplementedError - - def _get_globals(self, sig): - raise NotImplementedError - - -def _determine_gufunc_outer_types(argtys, dims): - for at, nd in zip(argtys, dims): - if isinstance(at, types.Array): - yield at.copy(ndim=nd + 1) - else: - if nd > 0: - raise ValueError("gufunc signature mismatch: ndim>0 for scalar") - yield types.Array(dtype=at, ndim=1, layout='A') - - -def expand_gufunc_template(template, indims, outdims, funcname, argtypes): - """Expand gufunc source template - """ - argdims = indims + outdims - argnames = ["arg{0}".format(i) for i in range(len(argdims))] - checkedarg = "min({0})".format(', '.join(["{0}.shape[0]".format(a) - for a in argnames])) - inputs = [_gen_src_for_indexing(aref, adims, atype) - for aref, adims, atype in zip(argnames, indims, argtypes)] - outputs = [_gen_src_for_indexing(aref, adims, atype) - for aref, adims, atype in zip(argnames[len(indims):], outdims, - argtypes[len(indims):])] - argitems = inputs + outputs - src = template.format(name=funcname, args=', '.join(argnames), - checkedarg=checkedarg, - argitems=', '.join(argitems)) - return src - - -def _gen_src_for_indexing(aref, adims, atype): - return "{aref}[{sliced}]".format(aref=aref, - sliced=_gen_src_index(adims, atype)) - - -def _gen_src_index(adims, atype): - if adims > 0: - return ','.join(['__tid__'] + [':'] * adims) - elif isinstance(atype, types.Array) and atype.ndim - 1 == adims: - # Special case for 0-nd in shape-signature but - # 1d array in type signature. - # Slice it so that the result has the same dimension. - return '__tid__:(__tid__ + 1)' - else: - return '__tid__' - - -class GUFuncEngine(object): - '''Determine how to broadcast and execute a gufunc - base on input shape and signature - ''' - - @classmethod - def from_signature(cls, signature): - return cls(*parse_signature(signature)) - - def __init__(self, inputsig, outputsig): - # signatures - self.sin = inputsig - self.sout = outputsig - # argument count - self.nin = len(self.sin) - self.nout = len(self.sout) - - def schedule(self, ishapes): - if len(ishapes) != self.nin: - raise TypeError('invalid number of input argument') - - # associate symbol values for input signature - symbolmap = {} - outer_shapes = [] - inner_shapes = [] - - for argn, (shape, symbols) in enumerate(zip(ishapes, self.sin)): - argn += 1 # start from 1 for human - inner_ndim = len(symbols) - if len(shape) < inner_ndim: - fmt = "arg #%d: insufficient inner dimension" - raise ValueError(fmt % (argn,)) - if inner_ndim: - inner_shape = shape[-inner_ndim:] - outer_shape = shape[:-inner_ndim] - else: - inner_shape = () - outer_shape = shape - - for axis, (dim, sym) in enumerate(zip(inner_shape, symbols)): - axis += len(outer_shape) - if sym in symbolmap: - if symbolmap[sym] != dim: - fmt = "arg #%d: shape[%d] mismatch argument" - raise ValueError(fmt % (argn, axis)) - symbolmap[sym] = dim - - outer_shapes.append(outer_shape) - inner_shapes.append(inner_shape) - - # solve output shape - oshapes = [] - for outsig in self.sout: - oshape = [] - for sym in outsig: - oshape.append(symbolmap[sym]) - oshapes.append(tuple(oshape)) - - # find the biggest outershape as looping dimension - sizes = [reduce(operator.mul, s, 1) for s in outer_shapes] - largest_i = np.argmax(sizes) - loopdims = outer_shapes[largest_i] - - pinned = [False] * self.nin # same argument for each iteration - for i, d in enumerate(outer_shapes): - if d != loopdims: - if d == (1,) or d == (): - pinned[i] = True - else: - fmt = "arg #%d: outer dimension mismatch" - raise ValueError(fmt % (i + 1,)) - - return GUFuncSchedule(self, inner_shapes, oshapes, loopdims, pinned) - - -class GUFuncSchedule(object): - def __init__(self, parent, ishapes, oshapes, loopdims, pinned): - self.parent = parent - # core shapes - self.ishapes = ishapes - self.oshapes = oshapes - # looping dimension - self.loopdims = loopdims - self.loopn = reduce(operator.mul, loopdims, 1) - # flags - self.pinned = pinned - - self.output_shapes = [loopdims + s for s in oshapes] - - def __str__(self): - import pprint - - attrs = 'ishapes', 'oshapes', 'loopdims', 'loopn', 'pinned' - values = [(k, getattr(self, k)) for k in attrs] - return pprint.pformat(dict(values)) - - -class GenerializedUFunc(object): - def __init__(self, kernelmap, engine): - self.kernelmap = kernelmap - self.engine = engine - self.max_blocksize = 2 ** 30 - assert self.engine.nout == 1, "only support single output" - - def __call__(self, *args, **kws): - callsteps = self._call_steps(self.engine.nin, self.engine.nout, - args, kws) - callsteps.prepare_inputs() - indtypes, schedule, outdtype, kernel = self._schedule( - callsteps.norm_inputs, callsteps.output) - callsteps.adjust_input_types(indtypes) - callsteps.allocate_outputs(schedule, outdtype) - callsteps.prepare_kernel_parameters() - newparams, newretval = self._broadcast(schedule, - callsteps.kernel_parameters, - callsteps.kernel_returnvalue) - callsteps.launch_kernel(kernel, schedule.loopn, newparams + [newretval]) - return callsteps.post_process_result() - - def _schedule(self, inputs, out): - input_shapes = [a.shape for a in inputs] - schedule = self.engine.schedule(input_shapes) - - # find kernel - idtypes = tuple(i.dtype for i in inputs) - try: - outdtype, kernel = self.kernelmap[idtypes] - except KeyError: - # No exact match, then use the first compatbile. - # This does not match the numpy dispatching exactly. - # Later, we may just jit a new version for the missing signature. - idtypes = self._search_matching_signature(idtypes) - # Select kernel - outdtype, kernel = self.kernelmap[idtypes] - - # check output - if out is not None and schedule.output_shapes[0] != out.shape: - raise ValueError('output shape mismatch') - - return idtypes, schedule, outdtype, kernel - - def _search_matching_signature(self, idtypes): - """ - Given the input types in `idtypes`, return a compatible sequence of - types that is defined in `kernelmap`. - - Note: Ordering is guaranteed by `kernelmap` being a OrderedDict - """ - for sig in self.kernelmap.keys(): - if all(np.can_cast(actual, desired) - for actual, desired in zip(sig, idtypes)): - return sig - else: - raise TypeError("no matching signature") - - def _broadcast(self, schedule, params, retval): - assert schedule.loopn > 0, "zero looping dimension" - - odim = 1 if not schedule.loopdims else schedule.loopn - newparams = [] - for p, cs in zip(params, schedule.ishapes): - if not cs and p.size == 1: - # Broadcast scalar input - devary = self._broadcast_scalar_input(p, odim) - newparams.append(devary) - else: - # Broadcast vector input - newparams.append(self._broadcast_array(p, odim, cs)) - newretval = retval.reshape(odim, *schedule.oshapes[0]) - return newparams, newretval - - def _broadcast_array(self, ary, newdim, innerdim): - newshape = (newdim,) + innerdim - # No change in shape - if ary.shape == newshape: - return ary - - # Creating new dimension - elif len(ary.shape) < len(newshape): - assert newshape[-len(ary.shape):] == ary.shape, \ - "cannot add dim and reshape at the same time" - return self._broadcast_add_axis(ary, newshape) - - # Collapsing dimension - else: - return ary.reshape(*newshape) - - def _broadcast_add_axis(self, ary, newshape): - raise NotImplementedError("cannot add new axis") - - def _broadcast_scalar_input(self, ary, shape): - raise NotImplementedError - - -class GUFuncCallSteps(object): - __slots__ = [ - 'args', - 'kwargs', - 'output', - 'norm_inputs', - 'kernel_returnvalue', - 'kernel_parameters', - '_is_device_array', - '_need_device_conversion', - ] - - def __init__(self, nin, nout, args, kwargs): - if nout > 1: - raise ValueError('multiple output is not supported') - self.args = args - self.kwargs = kwargs - - user_output_is_device = False - self.output = self.kwargs.get('out') - if self.output is not None: - user_output_is_device = self.is_device_array(self.output) - if user_output_is_device: - self.output = self.as_device_array(self.output) - self._is_device_array = [self.is_device_array(a) for a in self.args] - self._need_device_conversion = (not any(self._is_device_array) and - not user_output_is_device) - - # Normalize inputs - inputs = [] - for a, isdev in zip(self.args, self._is_device_array): - if isdev: - inputs.append(self.as_device_array(a)) - else: - inputs.append(np.asarray(a)) - self.norm_inputs = inputs[:nin] - # Check if there are extra arguments for outputs. - unused_inputs = inputs[nin:] - if unused_inputs: - if self.output is not None: - raise ValueError("cannot specify 'out' as both a positional " - "and keyword argument") - else: - [self.output] = unused_inputs - - def adjust_input_types(self, indtypes): - """ - Attempt to cast the inputs to the required types if necessary - and if they are not device array. - - Side effect: Only affects the element of `norm_inputs` that requires - a type cast. - """ - for i, (ity, val) in enumerate(zip(indtypes, self.norm_inputs)): - if ity != val.dtype: - if not hasattr(val, 'astype'): - msg = ("compatible signature is possible by casting but " - "{0} does not support .astype()").format(type(val)) - raise TypeError(msg) - # Cast types - self.norm_inputs[i] = val.astype(ity) - - def allocate_outputs(self, schedule, outdtype): - # allocate output - if self._need_device_conversion or self.output is None: - retval = self.device_array(shape=schedule.output_shapes[0], - dtype=outdtype) - else: - retval = self.output - self.kernel_returnvalue = retval - - def prepare_kernel_parameters(self): - params = [] - for inp, isdev in zip(self.norm_inputs, self._is_device_array): - if isdev: - params.append(inp) - else: - params.append(self.to_device(inp)) - assert all(self.is_device_array(a) for a in params) - self.kernel_parameters = params - - def post_process_result(self): - if self._need_device_conversion: - out = self.to_host(self.kernel_returnvalue, self.output) - elif self.output is None: - out = self.kernel_returnvalue - else: - out = self.output - return out - - def prepare_inputs(self): - pass - - def launch_kernel(self, kernel, nelem, args): - raise NotImplementedError - - def is_device_array(self, obj): - raise NotImplementedError - - def as_device_array(self, obj): - return obj - - def to_device(self, hostary): - raise NotImplementedError - - def device_array(self, shape, dtype): - raise NotImplementedError diff --git a/numba/numba/npyufunc/dufunc.py b/numba/numba/npyufunc/dufunc.py deleted file mode 100644 index 23d1c417b..000000000 --- a/numba/numba/npyufunc/dufunc.py +++ /dev/null @@ -1,286 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import numpy as np - -from .. import jit, typeof, utils, types, numpy_support, sigutils -from ..typing import npydecl -from ..typing.templates import AbstractTemplate, signature -from . import _internal, ufuncbuilder -from ..dispatcher import Dispatcher -from .. import array_analysis - -def make_dufunc_kernel(_dufunc): - from ..targets import npyimpl - - class DUFuncKernel(npyimpl._Kernel): - """ - npyimpl._Kernel subclass responsible for lowering a DUFunc kernel - (element-wise function) inside a broadcast loop (which is - generated by npyimpl.numpy_ufunc_kernel()). - """ - dufunc = _dufunc - - def __init__(self, context, builder, outer_sig): - super(DUFuncKernel, self).__init__(context, builder, outer_sig) - self.inner_sig, self.cres = self.dufunc.find_ewise_function( - outer_sig.args) - - def generate(self, *args): - isig = self.inner_sig - osig = self.outer_sig - cast_args = [self.cast(val, inty, outty) - for val, inty, outty in zip(args, osig.args, isig.args)] - if self.cres.objectmode: - func_type = self.context.call_conv.get_function_type( - types.pyobject, [types.pyobject] * len(isig.args)) - else: - func_type = self.context.call_conv.get_function_type( - isig.return_type, isig.args) - module = self.builder.block.function.module - entry_point = module.get_or_insert_function( - func_type, name=self.cres.fndesc.llvm_func_name) - entry_point.attributes.add("alwaysinline") - - _, res = self.context.call_conv.call_function( - self.builder, entry_point, isig.return_type, isig.args, - cast_args) - return self.cast(res, isig.return_type, osig.return_type) - - DUFuncKernel.__name__ += _dufunc.ufunc.__name__ - return DUFuncKernel - - -class DUFuncLowerer(object): - '''Callable class responsible for lowering calls to a specific DUFunc. - ''' - def __init__(self, dufunc): - self.kernel = make_dufunc_kernel(dufunc) - self.libs = [] - - def __call__(self, context, builder, sig, args): - from ..targets import npyimpl - explicit_output = len(args) > self.kernel.dufunc.ufunc.nin - return npyimpl.numpy_ufunc_kernel(context, builder, sig, args, - self.kernel, - explicit_output=explicit_output) - - -class DUFunc(_internal._DUFunc): - """ - Dynamic universal function (DUFunc) intended to act like a normal - Numpy ufunc, but capable of call-time (just-in-time) compilation - of fast loops specialized to inputs. - """ - # NOTE: __base_kwargs must be kept in synch with the kwlist in - # _internal.c:dufunc_init() - __base_kwargs = set(('identity', '_keepalive', 'nin', 'nout')) - - def __init__(self, py_func, identity=None, cache=False, targetoptions={}): - if isinstance(py_func, Dispatcher): - py_func = py_func.py_func - self.targetoptions = targetoptions.copy() - kws = {} - kws['identity'] = ufuncbuilder.parse_identity(identity) - - dispatcher = jit(target='npyufunc', cache=cache)(py_func) - super(DUFunc, self).__init__(dispatcher, **kws) - # Loop over a copy of the keys instead of the keys themselves, - # since we're changing the dictionary while looping. - self._install_type() - self._lower_me = DUFuncLowerer(self) - self._install_cg() - self.__name__ = py_func.__name__ - self.__doc__ = py_func.__doc__ - - def build_ufunc(self): - """ - For compatibility with the various *UFuncBuilder classes. - """ - return self - - @property - def nin(self): - return self.ufunc.nin - - @property - def nout(self): - return self.ufunc.nout - - @property - def nargs(self): - return self.ufunc.nargs - - @property - def ntypes(self): - return self.ufunc.ntypes - - @property - def types(self): - return self.ufunc.types - - @property - def identity(self): - return self.ufunc.identity - - def disable_compile(self): - """ - Disable the compilation of new signatures at call time. - """ - # If disabling compilation then there must be at least one signature - assert len(self._dispatcher.overloads) > 0 - self._frozen = True - - def add(self, sig): - """ - Compile the DUFunc for the given signature. - """ - args, return_type = sigutils.normalize_signature(sig) - return self._compile_for_argtys(args, return_type) - - def _compile_for_args(self, *args, **kws): - nin = self.ufunc.nin - if kws: - if 'out' in kws: - out = kws.pop('out') - args += (out,) - if kws: - raise TypeError("unexpected keyword arguments to ufunc: %s" - % ", ".join(repr(k) for k in sorted(kws))) - - args_len = len(args) - assert (args_len == nin) or (args_len == nin + self.ufunc.nout) - assert not kws - argtys = [] - # To avoid a mismatch in how Numba types values as opposed to - # Numpy, we need to first check for scalars. For example, on - # 64-bit systems, numba.typeof(3) => int32, but - # np.array(3).dtype => int64. - for arg in args[:nin]: - if numpy_support.is_arrayscalar(arg): - argtys.append(numpy_support.map_arrayscalar_type(arg)) - else: - argty = typeof(arg) - if isinstance(argty, types.Array): - argty = argty.dtype - argtys.append(argty) - return self._compile_for_argtys(tuple(argtys)) - - def _compile_for_argtys(self, argtys, return_type=None): - """ - Given a tuple of argument types (these should be the array - dtypes, and not the array types themselves), compile the - element-wise function for those inputs, generate a UFunc loop - wrapper, and register the loop with the Numpy ufunc object for - this DUFunc. - """ - if self._frozen: - raise RuntimeError("compilation disabled for %s" % (self,)) - assert isinstance(argtys, tuple) - if return_type is None: - sig = argtys - else: - sig = return_type(*argtys) - cres, argtys, return_type = ufuncbuilder._compile_element_wise_function( - self._dispatcher, self.targetoptions, sig) - actual_sig = ufuncbuilder._finalize_ufunc_signature( - cres, argtys, return_type) - dtypenums, ptr, env = ufuncbuilder._build_element_wise_ufunc_wrapper( - cres, actual_sig) - self._add_loop(utils.longint(ptr), dtypenums) - self._keepalive.append((ptr, cres.library, env)) - self._lower_me.libs.append(cres.library) - return cres - - def _install_type(self, typingctx=None): - """Constructs and installs a typing class for a DUFunc object in the - input typing context. If no typing context is given, then - _install_type() installs into the typing context of the - dispatcher object (should be same default context used by - jit() and njit()). - """ - if typingctx is None: - typingctx = self._dispatcher.targetdescr.typing_context - _ty_cls = type('DUFuncTyping_' + self.ufunc.__name__, - (AbstractTemplate,), - dict(key=self, generic=self._type_me)) - typingctx.insert_user_function(self, _ty_cls) - - def find_ewise_function(self, ewise_types): - """ - Given a tuple of element-wise argument types, find a matching - signature in the dispatcher. - - Return a 2-tuple containing the matching signature, and - compilation result. Will return two None's if no matching - signature was found. - """ - if self._frozen: - # If we cannot compile, coerce to the best matching loop - loop = numpy_support.ufunc_find_matching_loop(self, ewise_types) - if loop is None: - return None, None - ewise_types = tuple(loop.inputs + loop.outputs)[:len(ewise_types)] - for sig, cres in self._dispatcher.overloads.items(): - if sig.args == ewise_types: - return sig, cres - return None, None - - def _type_me(self, argtys, kwtys): - """ - Implement AbstractTemplate.generic() for the typing class - built by DUFunc._install_type(). - - Return the call-site signature after either validating the - element-wise signature or compiling for it. - """ - assert not kwtys - ufunc = self.ufunc - _handle_inputs_result = npydecl.Numpy_rules_ufunc._handle_inputs( - ufunc, argtys, kwtys) - base_types, explicit_outputs, ndims, layout = _handle_inputs_result - explicit_output_count = len(explicit_outputs) - if explicit_output_count > 0: - ewise_types = tuple(base_types[:-len(explicit_outputs)]) - else: - ewise_types = tuple(base_types) - sig, cres = self.find_ewise_function(ewise_types) - if sig is None: - # Matching element-wise signature was not found; must - # compile. - if self._frozen: - raise TypeError("cannot call %s with types %s" - % (self, argtys)) - self._compile_for_argtys(ewise_types) - sig, cres = self.find_ewise_function(ewise_types) - assert sig is not None - if explicit_output_count > 0: - outtys = list(explicit_outputs) - elif ufunc.nout == 1: - if ndims > 0: - outtys = [types.Array(sig.return_type, ndims, layout)] - else: - outtys = [sig.return_type] - else: - raise NotImplementedError("typing gufuncs (nout > 1)") - outtys.extend(argtys) - return signature(*outtys) - - def _install_cg(self, targetctx=None): - """ - Install an implementation function for a DUFunc object in the - given target context. If no target context is given, then - _install_cg() installs into the target context of the - dispatcher object (should be same default context used by - jit() and njit()). - """ - if targetctx is None: - targetctx = self._dispatcher.targetdescr.target_context - _any = types.Any - _arr = types.Array - # Either all outputs are explicit or none of them are - sig0 = (_any,) * self.ufunc.nin + (_arr,) * self.ufunc.nout - sig1 = (_any,) * self.ufunc.nin - targetctx.insert_func_defn( - [(self._lower_me, self, sig) for sig in (sig0, sig1)]) - -array_analysis.MAP_TYPES.append(DUFunc) diff --git a/numba/numba/npyufunc/gufunc_scheduler.cpp b/numba/numba/npyufunc/gufunc_scheduler.cpp deleted file mode 100644 index 655804faa..000000000 --- a/numba/numba/npyufunc/gufunc_scheduler.cpp +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Copyright (c) 2017 Intel Corporation - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include -#include -#include -#include "gufunc_scheduler.h" - -// round not available on VS2010. -double guround (double number) { - return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); -} - -class RangeActual { -public: - std::vector start, end; - - RangeActual() {} - - RangeActual(intp s, intp e) { - start.push_back(s); - end.push_back(e); - } - - RangeActual(const std::vector &s, const std::vector &e) { - assert(s.size() == e.size()); - start = s; - end = e; - } - - RangeActual(const std::vector &lens) { - for(uintp i = 0; i < lens.size(); ++i) { - start.push_back(0); - end.push_back(lens[i] - 1); - } - } - - RangeActual(uintp num_dims, intp *lens) { - for(uintp i = 0; i < num_dims; ++i) { - start.push_back(0); - end.push_back(lens[i] - 1); - } - } - - RangeActual(uintp num_dims, intp *starts, intp *ends) { - for(uintp i = 0; i < num_dims; ++i) { - start.push_back(starts[i]); - end.push_back(ends[i]); - } - } - - uintp ndim() const { - return start.size(); - } - - std::vector iters_per_dim() const { - std::vector ret; - for(uintp i = 0; i < start.size(); ++i) { - intp ret_val = end[i] - start[i] + 1; - if(end[i] < start[i]) - ret_val = 0; - ret.push_back(ret_val); - } - return ret; - } -}; - -class dimlength { -public: - uintp dim; - intp length; - dimlength(uintp d, intp l) : dim(d), length(l) {} -}; - -struct dimlength_by_dim { - bool operator()(const dimlength &a, const dimlength &b) const { - return a.dim < b.dim; - } -}; - -struct dimlength_by_length_reverse { - bool operator()(const dimlength &a, const dimlength &b) const { - return a.length > b.length; - } -}; - -class isf_range { -public: - uintp dim; - intp lower_bound, upper_bound; - isf_range(uintp d, intp l, intp u) : dim(d), lower_bound(l), upper_bound(u) {} -}; - -struct isf_range_by_dim { - bool operator()(const isf_range &a, const isf_range &b) const { - return a.dim < b.dim; - } -}; - -/* - * m_a is the current start of the partition. - * m_b is the current end of the partition. - * m_c is the start of the next partition. - */ -class chunk_info { -public: - intp m_a, m_b, m_c; - chunk_info(intp a, intp b, intp c) : m_a(a), m_b(b), m_c(c) {} -}; - -/* - * Split a space starting at rs and ending at re into "divisions" parts. - */ -chunk_info chunk(intp rs, intp re, intp divisions) { - assert(divisions >= 1); - intp total = (re - rs) + 1; - // If only one division then everything goes into that division. - if( divisions == 1) { - return chunk_info(rs, re, re + 1); - } else { - intp len = total / divisions; - intp res_end = rs + len - 1; - // Return the first division by starting at the beginning (rs) and going to - // the remaining length divided by the number of divisions. - return chunk_info(rs, res_end, res_end + 1); - } -} - -chunk_info equalizing_chunk(intp rs, intp re, intp divisions, float thread_percent) { - assert(divisions >= 1); - intp total = (re - rs) + 1; - if (divisions == 1) { - return chunk_info(rs, re, re + 1); - } - else { - intp len = total * thread_percent; - intp res_end = rs + len - 1; - return chunk_info(rs, res_end, res_end + 1); - } -} - -RangeActual isfRangeToActual(const std::vector &build) { - std::vector bunsort(build); - std::sort(bunsort.begin(), bunsort.end(), isf_range_by_dim()); - std::vector lower_bounds(bunsort.size()), upper_bounds(bunsort.size()); - for(uintp i = 0; i < bunsort.size(); ++i) { - lower_bounds[i] = bunsort[i].lower_bound; - upper_bounds[i] = bunsort[i].upper_bound; - } - return RangeActual(lower_bounds, upper_bounds); -} - -/* - * Does the main work of splitting the iteration space between threads. - * In general, we start by allocating a number of threads to handle the largest dimension - * then call the routine recursively to allocate threads to the next largest dimension - * and so one. - */ -void divide_work(const RangeActual &full_iteration_space, - std::vector &assignments, - std::vector &build, - uintp start_thread, - uintp end_thread, - const std::vector &dims, - uintp index) { - // Number of threads used for this dimension. - uintp num_threads = (end_thread - start_thread) + 1; - - assert(num_threads >= 1); - // If there is only one thread left then it gets all the remaining work. - if(num_threads == 1) { - assert(build.size() <= dims.size()); - - // build holds the ongoing constructed range of iterations in each dimension. - // If the length of build is the number of dims then we have a complete allocation - // so store it in assignments. - if(build.size() == dims.size()) { - assignments[start_thread] = isfRangeToActual(build); - } else { - // There are still more dimenions to add. - // Create a copy of the incoming build. - std::vector new_build(build.begin()+0, build.begin()+index); - // Add an entry to new_build for this thread to handle the entire current dimension. - new_build.push_back(isf_range(dims[index].dim, full_iteration_space.start[dims[index].dim], full_iteration_space.end[dims[index].dim])); - // Recursively process. - divide_work(full_iteration_space, assignments, new_build, start_thread, end_thread, dims, index+1); - } - } else { - // There is more than 1 thread for handling this dimension so need to split the dimension between the threads. - assert(index < dims.size()); - intp total_len = 0; - // Compute the total number of iterations in the remaining dimensions to be processed, including the current one. - for(uintp i = index; i < dims.size(); ++i) total_len += dims[i].length > 1 ? dims[i].length : 0; - uintp divisions_for_this_dim; - if(total_len == 0) { - divisions_for_this_dim = num_threads; - } else { - // We allocate the remaining threads proportionally to the ratio of the current dimension length to the total. - divisions_for_this_dim = intp(guround(num_threads * ((float)dims[index].length / total_len))); - } - - // These are used to divide the iteration space. - intp chunkstart = full_iteration_space.start[dims[index].dim]; - intp chunkend = full_iteration_space.end[dims[index].dim]; - - // These are used to divide threads. - intp threadstart = start_thread; - intp threadend = end_thread; - - // for each division of the current dimension... - for(uintp i = 0; i < divisions_for_this_dim; ++i) { - chunk_info chunk_thread = chunk(threadstart, threadend, divisions_for_this_dim - i); - // Number of threads used for this division. - uintp threads_used_here = (1 + (chunk_thread.m_b - chunk_thread.m_a)); - chunk_info chunk_index = equalizing_chunk(chunkstart, chunkend, divisions_for_this_dim - i, threads_used_here / (float)num_threads); - // Remember that the next division has threads_used_here fewer threads to allocate. - num_threads -= threads_used_here; - // m_c contains the next start value so update the iteration space and thread space in preparation for next iteration of this loop. - chunkstart = chunk_index.m_c; - threadstart = chunk_thread.m_c; - // Copy the incoming build to new_build. - std::vector new_build(build.begin()+0, build.begin()+index); - // Add this dimension to new_build to handle start=m_a to end=m_b. - new_build.push_back(isf_range(dims[index].dim, chunk_index.m_a, chunk_index.m_b)); - // Recursively process the next dimension. - divide_work(full_iteration_space, assignments, new_build, chunk_thread.m_a, chunk_thread.m_b, dims, index+1); - } - } -} - -/* - * Convert from internal format of vector of ranges to a flattened 2D-array usable by Python. - */ -template -void flatten_schedule(const std::vector &sched, T *out_sched) { - uintp outer = sched.size(); - uintp inner = sched[0].start.size(); - for(uintp i = 0; i < outer; ++i) { - for(uintp j = 0; j < inner; ++j) { - out_sched[(i*inner*2) + j] = sched[i].start[j]; - } - for(uintp j = 0; j < inner; ++j) { - out_sched[(i*inner*2) + j + inner] = sched[i].end[j]; - } - } -} - -/* - * Main routine that computes a static schedule. - * full_space is the iteration space in each dimension. - * num_sched is the number of worker threads. - */ -std::vector create_schedule(const RangeActual &full_space, uintp num_sched) { - // Compute the number of iterations to be run for each dimension. - std::vector ipd = full_space.iters_per_dim(); - - // We special-case one dimensional. - if(full_space.ndim() == 1) { - // Get the number of iterations for the single dimension. - intp ra_len = ipd[0]; - // If there are fewer iterations for the single dimension than there are threads... - if(ra_len < 0 || (uintp)ra_len <= num_sched) { - std::vector ret; - for(uintp i = 0; i < num_sched; ++i) { - // If the amount of iterations is less than the current thread then give it no work, - // signified by start of 1 and end of 0. - if(ra_len < 0 || (uintp)ra_len <= i) { - ret.push_back(RangeActual((intp)1, (intp)0)); - } else { - // Give just i'th iteration to thread i. - ret.push_back(RangeActual(full_space.start[0] + i, full_space.start[0] + i)); - } - } - return ret; - } else { - // There are more iterations than threads. - // Compute the modal number of iterations to assign to each thread. - intp ilen = ra_len / num_sched; - - std::vector ret; - // For each thread... - for(uintp i = 0; i < num_sched; ++i) { - // Compute the start iteration number for that thread as the start iteration - // plus the modal number of iterations times the thread number. - intp start = full_space.start[0] + (ilen * i); - intp end; - // If this isn't the last thread then the end iteration number is one less - // than the start iteration number of the next thread. If it is the last - // thread then assign all remaining iterations to it. - if(i < num_sched-1) { - end = full_space.start[0] + (ilen * (i+1)) - 1; - } else { - end = full_space.end[0]; - } - // Record the iteration start and end in the schedule. - ret.push_back(RangeActual(start, end)); - } - return ret; - } - } else { - // Two or more dimensions are handled generically here. - std::vector dims; - // Create a vector of objects associating dimensional index to length. - for(uintp i = 0; i < ipd.size(); ++i) dims.push_back(dimlength(i, ipd[i])); - // Sort the dimensions in the reverse order of their length. - std::sort(dims.begin(), dims.end(), dimlength_by_length_reverse()); - std::vector assignments(num_sched, RangeActual((intp)1,(intp)0)); - std::vector build; - // Compute the division of work across dimensinos and threads. - divide_work(full_space, assignments, build, 0, num_sched-1, dims, 0); - return assignments; - } -} - -/* - num_dim (D) is the number of dimensions of the iteration space. - starts is the range-start of each of those dimensions, inclusive. - ends is the range-end of each of those dimensions, inclusive. - num_threads is the number (N) of chunks to break the iteration space into - sched is pre-allocated memory for the schedule to be stored in and is of size NxD. - debug is non-zero if DEBUG_ARRAY_OPT is turned on. -*/ -extern "C" void do_scheduling_signed(uintp num_dim, intp *starts, intp *ends, uintp num_threads, intp *sched, intp debug) { - if (debug) { - printf("num_dim = %d\n", (int)num_dim); - printf("ranges = ("); - for (unsigned i = 0; i < num_dim; i++) { - printf("[%d, %d], ", (int)starts[i], (int)ends[i]); - } - printf(")\n"); - printf("num_threads = %d\n", (int)num_threads); - } - - if (num_threads == 0) return; - - RangeActual full_space(num_dim, starts, ends); - std::vector ret = create_schedule(full_space, num_threads); - flatten_schedule(ret, sched); -} - -extern "C" void do_scheduling_unsigned(uintp num_dim, intp *starts, intp *ends, uintp num_threads, uintp *sched, intp debug) { - if (debug) { - printf("num_dim = %d\n", (int)num_dim); - printf("ranges = ("); - for (unsigned i = 0; i < num_dim; i++) { - printf("[%d, %d], ", (int)starts[i], (int)ends[i]); - } - printf(")\n"); - printf("num_threads = %d\n", (int)num_threads); - } - - if (num_threads == 0) return; - - RangeActual full_space(num_dim, starts, ends); - std::vector ret = create_schedule(full_space, num_threads); - flatten_schedule(ret, sched); -} diff --git a/numba/numba/npyufunc/gufunc_scheduler.h b/numba/numba/npyufunc/gufunc_scheduler.h deleted file mode 100644 index f5f0a4920..000000000 --- a/numba/numba/npyufunc/gufunc_scheduler.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017 Intel Corporation - * SPDX-License-Identifier: BSD-2-Clause - */ - -#ifndef GUFUNC_SCHEDULER -#define GUFUNC_SCHEDULER - -/* define int64_t and uint64_t for Visual Studio, where stdint only available > VS2008 */ -#ifdef _MSC_VER - #define int64_t signed __int64 - #define uint64_t unsigned __int64 -#else - #include -#endif - -#ifndef __SIZEOF_POINTER__ - /* MSVC doesn't define __SIZEOF_POINTER__ */ - #if defined(_WIN64) - #define intp int64_t - #define uintp uint64_t - #elif defined(_WIN32) - #define intp int - #define uintp unsigned - #else - #error "cannot determine size of intp" - #endif -#elif __SIZEOF_POINTER__ == 8 - #define intp int64_t - #define uintp uint64_t -#else - #define intp int - #define uintp unsigned -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - -void do_scheduling_signed(uintp num_dim, intp *starts, intp *ends, uintp num_threads, intp *sched, intp debug); -void do_scheduling_unsigned(uintp num_dim, intp *starts, intp *ends, uintp num_threads, uintp *sched, intp debug); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/numba/numba/npyufunc/parallel.py b/numba/numba/npyufunc/parallel.py deleted file mode 100644 index 37a6825a8..000000000 --- a/numba/numba/npyufunc/parallel.py +++ /dev/null @@ -1,437 +0,0 @@ -""" -This file implements the code-generator for parallel-vectorize. - -ParallelUFunc is the platform independent base class for generating -the thread dispatcher. This thread dispatcher launches threads -that execute the generated function of UFuncCore. -UFuncCore is subclassed to specialize for the input/output types. -The actual workload is invoked inside the function generated by UFuncCore. -UFuncCore also defines a work-stealing mechanism that allows idle threads -to steal works from other threads. -""" -from __future__ import print_function, absolute_import - -import sys -import os - -import numpy as np - -import llvmlite.llvmpy.core as lc -import llvmlite.binding as ll - -from numba.npyufunc import ufuncbuilder -from numba.numpy_support import as_dtype -from numba import types, utils, cgutils, config - -def get_thread_count(): - """ - Gets the available thread count. - """ - t = config.NUMBA_NUM_THREADS - if t < 1: - raise ValueError("Number of threads specified must be > 0.") - return t - -NUM_THREADS = get_thread_count() - - -class ParallelUFuncBuilder(ufuncbuilder.UFuncBuilder): - def build(self, cres, sig): - _launch_threads() - _init() - - # Buider wrapper for ufunc entry point - ctx = cres.target_context - signature = cres.signature - library = cres.library - fname = cres.fndesc.llvm_func_name - - ptr = build_ufunc_wrapper(library, ctx, fname, signature, cres) - # Get dtypes - dtypenums = [np.dtype(a.name).num for a in signature.args] - dtypenums.append(np.dtype(signature.return_type.name).num) - keepalive = () - return dtypenums, ptr, keepalive - - -def build_ufunc_wrapper(library, ctx, fname, signature, cres): - innerfunc = ufuncbuilder.build_ufunc_wrapper(library, ctx, fname, - signature, objmode=False, - cres=cres) - return build_ufunc_kernel(library, ctx, innerfunc, signature) - - -def build_ufunc_kernel(library, ctx, innerfunc, sig): - """Wrap the original CPU ufunc with a parallel dispatcher. - - Args - ---- - ctx - numba's codegen context - - innerfunc - llvm function of the original CPU ufunc - - sig - type signature of the ufunc - - Details - ------- - - Generate a function of the following signature: - - void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps, - void* data) - - Divide the work equally across all threads and let the last thread take all - the left over. - - - """ - # Declare types and function - byte_t = lc.Type.int(8) - byte_ptr_t = lc.Type.pointer(byte_t) - - intp_t = ctx.get_value_type(types.intp) - - fnty = lc.Type.function(lc.Type.void(), [lc.Type.pointer(byte_ptr_t), - lc.Type.pointer(intp_t), - lc.Type.pointer(intp_t), - byte_ptr_t]) - wrapperlib = ctx.codegen().create_library('parallelufuncwrapper') - mod = wrapperlib.create_ir_module('parallel.ufunc.wrapper') - lfunc = mod.add_function(fnty, name=".kernel." + str(innerfunc)) - - bb_entry = lfunc.append_basic_block('') - - # Function body starts - builder = lc.Builder(bb_entry) - - args, dimensions, steps, data = lfunc.args - - # Release the GIL (and ensure we have the GIL) - # Note: numpy ufunc may not always release the GIL; thus, - # we need to ensure we have the GIL. - pyapi = ctx.get_python_api(builder) - gil_state = pyapi.gil_ensure() - thread_state = pyapi.save_thread() - - # Distribute work - total = builder.load(dimensions) - ncpu = lc.Constant.int(total.type, NUM_THREADS) - - count = builder.udiv(total, ncpu) - - count_list = [] - remain = total - - for i in range(NUM_THREADS): - space = builder.alloca(intp_t) - count_list.append(space) - - if i == NUM_THREADS - 1: - # Last thread takes all leftover - builder.store(remain, space) - else: - builder.store(count, space) - remain = builder.sub(remain, count) - - # Array count is input signature plus 1 (due to output array) - array_count = len(sig.args) + 1 - - # Get the increment step for each array - steps_list = [] - for i in range(array_count): - ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)]) - step = builder.load(ptr) - steps_list.append(step) - - # Get the array argument set for each thread - args_list = [] - for i in range(NUM_THREADS): - space = builder.alloca(byte_ptr_t, - size=lc.Constant.int(lc.Type.int(), array_count)) - args_list.append(space) - - for j in range(array_count): - # For each array, compute subarray pointer - dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)]) - src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)]) - - baseptr = builder.load(src) - base = builder.ptrtoint(baseptr, intp_t) - multiplier = lc.Constant.int(count.type, i) - offset = builder.mul(steps_list[j], builder.mul(count, multiplier)) - addr = builder.inttoptr(builder.add(base, offset), baseptr.type) - - builder.store(addr, dst) - - # Declare external functions - add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5) - empty_fnty = lc.Type.function(lc.Type.void(), ()) - add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task') - synchronize = mod.get_or_insert_function(empty_fnty, - name='numba_synchronize') - ready = mod.get_or_insert_function(empty_fnty, name='numba_ready') - - # Add tasks for queue; one per thread - as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t) - - # Note: the runtime address is taken and used as a constant in the function. - fnptr = ctx.get_constant(types.uintp, innerfunc).inttoptr(byte_ptr_t) - for each_args, each_dims in zip(args_list, count_list): - innerargs = [as_void_ptr(x) for x - in [each_args, each_dims, steps, data]] - - builder.call(add_task, [fnptr] + innerargs) - - # Signal worker that we are ready - builder.call(ready, ()) - - # Wait for workers - builder.call(synchronize, ()) - - # Work is done. Reacquire the GIL - pyapi.restore_thread(thread_state) - pyapi.gil_release(gil_state) - - builder.ret_void() - - # Link and compile - wrapperlib.add_ir_module(mod) - wrapperlib.add_linking_library(library) - return wrapperlib.get_pointer_to_function(lfunc.name) - - -# --------------------------------------------------------------------------- - -class ParallelGUFuncBuilder(ufuncbuilder.GUFuncBuilder): - def __init__(self, py_func, signature, identity=None, cache=False, - targetoptions={}): - # Force nopython mode - targetoptions.update(dict(nopython=True)) - super(ParallelGUFuncBuilder, self).__init__(py_func=py_func, - signature=signature, - identity=identity, - cache=cache, - targetoptions=targetoptions) - - def build(self, cres): - """ - Returns (dtype numbers, function ptr, EnvironmentObject) - """ - _launch_threads() - _init() - - # Build wrapper for ufunc entry point - ptr, env, wrapper_name = build_gufunc_wrapper(self.py_func, cres, self.sin, self.sout, - cache=self.cache) - - # Get dtypes - dtypenums = [] - for a in cres.signature.args: - if isinstance(a, types.Array): - ty = a.dtype - else: - ty = a - dtypenums.append(as_dtype(ty).num) - - return dtypenums, ptr, env - - -def build_gufunc_wrapper(py_func, cres, sin, sout, cache): - library = cres.library - ctx = cres.target_context - signature = cres.signature - innerfunc, env, wrapper_name = ufuncbuilder.build_gufunc_wrapper(py_func, cres, sin, sout, - cache=cache) - sym_in = set(sym for term in sin for sym in term) - sym_out = set(sym for term in sout for sym in term) - inner_ndim = len(sym_in | sym_out) - - ptr, name = build_gufunc_kernel(library, ctx, innerfunc, signature, inner_ndim) - - return ptr, env, name - - -def build_gufunc_kernel(library, ctx, innerfunc, sig, inner_ndim): - """Wrap the original CPU gufunc with a parallel dispatcher. - - Args - ---- - ctx - numba's codegen context - - innerfunc - llvm function of the original CPU gufunc - - sig - type signature of the gufunc - - inner_ndim - inner dimension of the gufunc - - Details - ------- - - Generate a function of the following signature: - - void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps, - void* data) - - Divide the work equally across all threads and let the last thread take all - the left over. - - - """ - # Declare types and function - byte_t = lc.Type.int(8) - byte_ptr_t = lc.Type.pointer(byte_t) - - intp_t = ctx.get_value_type(types.intp) - - fnty = lc.Type.function(lc.Type.void(), [lc.Type.pointer(byte_ptr_t), - lc.Type.pointer(intp_t), - lc.Type.pointer(intp_t), - byte_ptr_t]) - wrapperlib = ctx.codegen().create_library('parallelufuncwrapper') - mod = wrapperlib.create_ir_module('parallel.gufunc.wrapper') - lfunc = mod.add_function(fnty, name=".kernel." + str(innerfunc)) - - bb_entry = lfunc.append_basic_block('') - - # Function body starts - builder = lc.Builder(bb_entry) - - args, dimensions, steps, data = lfunc.args - - # Release the GIL (and ensure we have the GIL) - # Note: numpy ufunc may not always release the GIL; thus, - # we need to ensure we have the GIL. - pyapi = ctx.get_python_api(builder) - gil_state = pyapi.gil_ensure() - thread_state = pyapi.save_thread() - - # Distribute work - total = builder.load(dimensions) - ncpu = lc.Constant.int(total.type, NUM_THREADS) - - count = builder.udiv(total, ncpu) - - count_list = [] - remain = total - - for i in range(NUM_THREADS): - space = cgutils.alloca_once(builder, intp_t, size=inner_ndim + 1) - cgutils.memcpy(builder, space, dimensions, - count=lc.Constant.int(intp_t, inner_ndim + 1)) - count_list.append(space) - - if i == NUM_THREADS - 1: - # Last thread takes all leftover - builder.store(remain, space) - else: - builder.store(count, space) - remain = builder.sub(remain, count) - - # Array count is input signature plus 1 (due to output array) - array_count = len(sig.args) + 1 - - # Get the increment step for each array - steps_list = [] - for i in range(array_count): - ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)]) - step = builder.load(ptr) - steps_list.append(step) - - # Get the array argument set for each thread - args_list = [] - for i in range(NUM_THREADS): - space = builder.alloca(byte_ptr_t, - size=lc.Constant.int(lc.Type.int(), array_count)) - args_list.append(space) - - for j in range(array_count): - # For each array, compute subarray pointer - dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)]) - src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)]) - - baseptr = builder.load(src) - base = builder.ptrtoint(baseptr, intp_t) - multiplier = lc.Constant.int(count.type, i) - offset = builder.mul(steps_list[j], builder.mul(count, multiplier)) - addr = builder.inttoptr(builder.add(base, offset), baseptr.type) - - builder.store(addr, dst) - - # Declare external functions - add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5) - empty_fnty = lc.Type.function(lc.Type.void(), ()) - add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task') - synchronize = mod.get_or_insert_function(empty_fnty, - name='numba_synchronize') - ready = mod.get_or_insert_function(empty_fnty, name='numba_ready') - - # Add tasks for queue; one per thread - as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t) - - # Note: the runtime address is taken and used as a constant in the function. - fnptr = ctx.get_constant(types.uintp, innerfunc).inttoptr(byte_ptr_t) - for each_args, each_dims in zip(args_list, count_list): - innerargs = [as_void_ptr(x) for x - in [each_args, each_dims, steps, data]] - builder.call(add_task, [fnptr] + innerargs) - - # Signal worker that we are ready - builder.call(ready, ()) - # Wait for workers - builder.call(synchronize, ()) - # Release the GIL - pyapi.restore_thread(thread_state) - pyapi.gil_release(gil_state) - - builder.ret_void() - - wrapperlib.add_ir_module(mod) - wrapperlib.add_linking_library(library) - return wrapperlib.get_pointer_to_function(lfunc.name), lfunc.name - - -# --------------------------------------------------------------------------- - - -def _launch_threads(): - """ - Initialize work queues and workers - """ - from . import workqueue as lib - from ctypes import CFUNCTYPE, c_int - - launch_threads = CFUNCTYPE(None, c_int)(lib.launch_threads) - launch_threads(NUM_THREADS) - - -_is_initialized = False - -def _init(): - from . import workqueue as lib - from ctypes import CFUNCTYPE, c_void_p - - global _is_initialized - if _is_initialized: - return - - ll.add_symbol('numba_add_task', lib.add_task) - ll.add_symbol('numba_synchronize', lib.synchronize) - ll.add_symbol('numba_ready', lib.ready) - ll.add_symbol('do_scheduling_signed', lib.do_scheduling_signed) - ll.add_symbol('do_scheduling_unsigned', lib.do_scheduling_unsigned) - - _is_initialized = True - - -_DYLD_WORKAROUND_SET = 'NUMBA_DYLD_WORKAROUND' in os.environ -_DYLD_WORKAROUND_VAL = int(os.environ.get('NUMBA_DYLD_WORKAROUND', 0)) - -if _DYLD_WORKAROUND_SET and _DYLD_WORKAROUND_VAL: - _launch_threads() diff --git a/numba/numba/npyufunc/parfor.py b/numba/numba/npyufunc/parfor.py deleted file mode 100644 index 8bc3006e7..000000000 --- a/numba/numba/npyufunc/parfor.py +++ /dev/null @@ -1,990 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import ast -from collections import defaultdict, OrderedDict -import sys -import copy -import numpy as np - -import llvmlite.llvmpy.core as lc -import llvmlite.ir.values as liv - -import numba -from .. import compiler, ir, types, six, cgutils, sigutils, lowering, parfor -from numba.ir_utils import (add_offset_to_labels, replace_var_names, - remove_dels, legalize_names, mk_unique_var, - rename_labels, get_name_var_table, visit_vars_inner, - get_definition, guard, find_callname, - get_call_table, is_pure, - get_unused_var_name, find_potential_aliases) -from numba.analysis import (compute_use_defs, compute_live_map, - compute_dead_maps, compute_cfg_from_blocks) -from ..typing import signature -from numba import config -from numba.targets.cpu import ParallelOptions -from numba.six import exec_ - - -def _lower_parfor_parallel(lowerer, parfor): - """Lowerer that handles LLVM code generation for parfor. - This function lowers a parfor IR node to LLVM. - The general approach is as follows: - 1) The code from the parfor's init block is lowered normally - in the context of the current function. - 2) The body of the parfor is transformed into a gufunc function. - 3) Code is inserted into the main function that calls do_scheduling - to divide the iteration space for each thread, allocates - reduction arrays, calls the gufunc function, and then invokes - the reduction function across the reduction arrays to produce - the final reduction values. - """ - typingctx = lowerer.context.typing_context - targetctx = lowerer.context - typemap = lowerer.fndesc.typemap - - if config.DEBUG_ARRAY_OPT: - print("_lower_parfor_parallel") - parfor.dump() - - # produce instructions for init_block - if config.DEBUG_ARRAY_OPT: - print("init_block = ", parfor.init_block, " ", type(parfor.init_block)) - for instr in parfor.init_block.body: - if config.DEBUG_ARRAY_OPT: - print("lower init_block instr = ", instr) - lowerer.lower_inst(instr) - - alias_map = {} - arg_aliases = {} - numba.parfor.find_potential_aliases_parfor(parfor, parfor.params, typemap, - lowerer.func_ir, alias_map, arg_aliases) - if config.DEBUG_ARRAY_OPT: - print("alias_map", alias_map) - print("arg_aliases", arg_aliases) - - # run get_parfor_outputs() and get_parfor_reductions() before gufunc creation - # since Jumps are modified so CFG of loop_body dict will become invalid - assert parfor.params != None - - parfor_output_arrays = numba.parfor.get_parfor_outputs( - parfor, parfor.params) - parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions( - parfor, parfor.params, lowerer.fndesc.calltypes) - # compile parfor body as a separate function to be used with GUFuncWrapper - flags = copy.copy(parfor.flags) - flags.set('error_model', 'numpy') - # Can't get here unless flags.set('auto_parallel', ParallelOptions(True)) - index_var_typ = typemap[parfor.loop_nests[0].index_variable.name] - # index variables should have the same type, check rest of indices - for l in parfor.loop_nests[1:]: - assert typemap[l.index_variable.name] == index_var_typ - numba.parfor.sequential_parfor_lowering = True - func, func_args, func_sig = _create_gufunc_for_parfor_body( - lowerer, parfor, typemap, typingctx, targetctx, flags, {}, - bool(alias_map), index_var_typ) - numba.parfor.sequential_parfor_lowering = False - - # get the shape signature - get_shape_classes = parfor.get_shape_classes - func_args = ['sched'] + func_args - num_reductions = len(parfor_redvars) - num_inputs = len(func_args) - len(parfor_output_arrays) - num_reductions - if config.DEBUG_ARRAY_OPT: - print("num_inputs = ", num_inputs) - print("parfor_outputs = ", parfor_output_arrays) - print("parfor_redvars = ", parfor_redvars) - gu_signature = _create_shape_signature( - get_shape_classes, - num_inputs, - num_reductions, - func_args, - func_sig) - if config.DEBUG_ARRAY_OPT: - print("gu_signature = ", gu_signature) - - # call the func in parallel by wrapping it with ParallelGUFuncBuilder - loop_ranges = [(l.start, l.stop, l.step) for l in parfor.loop_nests] - if config.DEBUG_ARRAY_OPT: - print("loop_nests = ", parfor.loop_nests) - print("loop_ranges = ", loop_ranges) - call_parallel_gufunc( - lowerer, - func, - gu_signature, - func_sig, - func_args, - loop_ranges, - parfor_redvars, - parfor_reddict, - parfor.init_block, - index_var_typ) - if config.DEBUG_ARRAY_OPT: - sys.stdout.flush() - - -# A work-around to prevent circular imports -lowering.lower_extensions[parfor.Parfor] = _lower_parfor_parallel - - -def _create_shape_signature( - get_shape_classes, - num_inputs, - num_reductions, - args, - func_sig): - '''Create shape signature for GUFunc - ''' - if config.DEBUG_ARRAY_OPT: - print("_create_shape_signature", num_inputs, num_reductions, args, func_sig) - for i in args[1:]: - print("argument", i, type(i), get_shape_classes(i)) - - num_inouts = len(args) - num_reductions - # maximum class number for array shapes - classes = [get_shape_classes(var) for var in args[1:]] - class_set = set() - for _class in classes: - if _class: - for i in _class: - class_set.add(i) - max_class = max(class_set) + 1 if class_set else 0 - classes.insert(0, (max_class,)) # force set the class of 'sched' argument - class_set.add(max_class) - class_map = {} - # TODO: use prefix + class number instead of single char - alphabet = ord('a') - for n in class_set: - if n >= 0: - class_map[n] = chr(alphabet) - alphabet += 1 - - alpha_dict = {'latest_alpha' : alphabet} - - def bump_alpha(c, class_map): - if c >= 0: - return class_map[c] - else: - alpha_dict['latest_alpha'] += 1 - return chr(alpha_dict['latest_alpha']) - - gu_sin = [] - gu_sout = [] - count = 0 - syms_sin = () - for cls in classes: - # print("create_shape_signature: var = ", var, " typ = ", typ) - count = count + 1 - if cls: - dim_syms = tuple(bump_alpha(c, class_map) for c in cls) - else: - dim_syms = () - if (count > num_inouts): - # assume all reduction vars are scalar - gu_sout.append(()) - elif count > num_inputs and all([s in syms_sin for s in dim_syms]): - # only when dim_syms are found in gu_sin, we consider this as - # output - gu_sout.append(dim_syms) - else: - gu_sin.append(dim_syms) - syms_sin += dim_syms - return (gu_sin, gu_sout) - -def _print_block(block): - for i, inst in enumerate(block.body): - print(" ", i, " ", inst) - -def _print_body(body_dict): - '''Pretty-print a set of IR blocks. - ''' - for label, block in body_dict.items(): - print("label: ", label) - _print_block(block) - - -def wrap_loop_body(loop_body): - blocks = loop_body.copy() # shallow copy is enough - first_label = min(blocks.keys()) - last_label = max(blocks.keys()) - loc = blocks[last_label].loc - blocks[last_label].body.append(ir.Jump(first_label, loc)) - return blocks - -def unwrap_loop_body(loop_body): - last_label = max(loop_body.keys()) - loop_body[last_label].body = loop_body[last_label].body[:-1] - -def compute_def_once_block(block, def_once, def_more): - assignments = block.find_insts(ir.Assign) - for one_assign in assignments: - a_def = one_assign.target.name - if a_def in def_more: - pass - elif a_def in def_once: - def_more.add(a_def) - def_once.remove(a_def) - else: - def_once.add(a_def) - -def compute_def_once_internal(loop_body, def_once, def_more): - for label, block in loop_body.items(): - compute_def_once_block(block, def_once, def_more) - for inst in block.body: - if isinstance(inst, parfor.Parfor): - compute_def_once_block(inst.init_block, def_once, def_more) - compute_def_once_internal(inst.loop_body, def_once, def_more) - -def compute_def_once(loop_body): - def_once = set() - def_more = set() - compute_def_once_internal(loop_body, def_once, def_more) - return def_once - -def find_vars(var, varset): - assert isinstance(var, ir.Var) - varset.add(var.name) - return var - -def _hoist_internal(inst, dep_on_param, call_table, hoisted, typemap): - uses = set() - visit_vars_inner(inst.value, find_vars, uses) - diff = uses.difference(dep_on_param) - if len(diff) == 0 and is_pure(inst.value, None, call_table): - if config.DEBUG_ARRAY_OPT == 1: - print("Will hoist instruction", inst) - hoisted.append(inst) - if not isinstance(typemap[inst.target.name], types.npytypes.Array): - dep_on_param += [inst.target.name] - return True - elif config.DEBUG_ARRAY_OPT == 1: - if len(diff) > 0: - print("Instruction", inst, " could not be hoisted because of a dependency.") - else: - print("Instruction", inst, " could not be hoisted because it isn't pure.") - return False - -def find_setitems_block(setitems, block): - for inst in block.body: - if isinstance(inst, ir.StaticSetItem) or isinstance(inst, ir.SetItem): - setitems.add(inst.target.name) - elif isinstance(inst, parfor.Parfor): - find_setitems_block(setitems, inst.init_block) - find_setitems_body(setitems, inst.loop_body) - -def find_setitems_body(setitems, loop_body): - for label, block in loop_body.items(): - find_setitems_block(setitems, block) - -def hoist(parfor_params, loop_body, typemap, wrapped_blocks): - dep_on_param = copy.copy(parfor_params) - hoisted = [] - - def_once = compute_def_once(loop_body) - (call_table, reverse_call_table) = get_call_table(wrapped_blocks) - - setitems = set() - find_setitems_body(setitems, loop_body) - dep_on_param = list(set(dep_on_param).difference(setitems)) - - for label, block in loop_body.items(): - new_block = [] - for inst in block.body: - if isinstance(inst, ir.Assign) and inst.target.name in def_once: - if _hoist_internal(inst, dep_on_param, call_table, - hoisted, typemap): - # don't add this instuction to the block since it is hoisted - continue - elif isinstance(inst, parfor.Parfor): - new_init_block = [] - if config.DEBUG_ARRAY_OPT == 1: - print("parfor") - inst.dump() - for ib_inst in inst.init_block.body: - if (isinstance(ib_inst, ir.Assign) and - ib_inst.target.name in def_once): - if _hoist_internal(ib_inst, dep_on_param, call_table, - hoisted, typemap): - # don't add this instuction to the block since it is hoisted - continue - new_init_block.append(ib_inst) - inst.init_block.body = new_init_block - - new_block.append(inst) - block.body = new_block - return hoisted - -def _create_gufunc_for_parfor_body( - lowerer, - parfor, - typemap, - typingctx, - targetctx, - flags, - locals, - has_aliases, - index_var_typ): - ''' - Takes a parfor and creates a gufunc function for its body. - There are two parts to this function. - 1) Code to iterate across the iteration space as defined by the schedule. - 2) The parfor body that does the work for a single point in the iteration space. - Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point - in the IR where the parfor body should be added. - This Python text is 'exec'ed into existence and its IR retrieved with run_frontend. - The IR is scanned for the sentinel assignment where that basic block is split and the IR - for the parfor body inserted. - ''' - - # The parfor body and the main function body share ir.Var nodes. - # We have to do some replacements of Var names in the parfor body to make them - # legal parameter names. If we don't copy then the Vars in the main function also - # would incorrectly change their name. - loop_body = copy.copy(parfor.loop_body) - remove_dels(loop_body) - - parfor_dim = len(parfor.loop_nests) - loop_indices = [l.index_variable.name for l in parfor.loop_nests] - - # Get all the parfor params. - parfor_params = parfor.params - # Get just the outputs of the parfor. - parfor_outputs = numba.parfor.get_parfor_outputs(parfor, parfor_params) - # Get all parfor reduction vars, and operators. - parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions( - parfor, parfor_params, lowerer.fndesc.calltypes) - # Compute just the parfor inputs as a set difference. - parfor_inputs = sorted( - list( - set(parfor_params) - - set(parfor_outputs) - - set(parfor_redvars))) - - if config.DEBUG_ARRAY_OPT == 1: - print("parfor_params = ", parfor_params, " ", type(parfor_params)) - print("parfor_outputs = ", parfor_outputs, " ", type(parfor_outputs)) - print("parfor_inputs = ", parfor_inputs, " ", type(parfor_inputs)) - print("parfor_redvars = ", parfor_redvars, " ", type(parfor_redvars)) - - # Reduction variables are represented as arrays, so they go under - # different names. - parfor_redarrs = [] - for var in parfor_redvars: - arr = var + "_arr" - parfor_redarrs.append(arr) - typemap[arr] = types.npytypes.Array(typemap[var], 1, "C") - - # Reorder all the params so that inputs go first then outputs. - parfor_params = parfor_inputs + parfor_outputs + parfor_redarrs - - if config.DEBUG_ARRAY_OPT == 1: - print("parfor_params = ", parfor_params, " ", type(parfor_params)) - print("loop_indices = ", loop_indices, " ", type(loop_indices)) - print("loop_body = ", loop_body, " ", type(loop_body)) - _print_body(loop_body) - - # Some Var are not legal parameter names so create a dict of potentially illegal - # param name to guaranteed legal name. - param_dict = legalize_names(parfor_params + parfor_redvars) - if config.DEBUG_ARRAY_OPT == 1: - print( - "param_dict = ", - sorted( - param_dict.items()), - " ", - type(param_dict)) - - # Some loop_indices are not legal parameter names so create a dict of potentially illegal - # loop index to guaranteed legal name. - ind_dict = legalize_names(loop_indices) - # Compute a new list of legal loop index names. - legal_loop_indices = [ind_dict[v] for v in loop_indices] - if config.DEBUG_ARRAY_OPT == 1: - print("ind_dict = ", sorted(ind_dict.items()), " ", type(ind_dict)) - print( - "legal_loop_indices = ", - legal_loop_indices, - " ", - type(legal_loop_indices)) - for pd in parfor_params: - print("pd = ", pd) - print("pd type = ", typemap[pd], " ", type(typemap[pd])) - - # Get the types of each parameter. - param_types = [typemap[v] for v in parfor_params] - # if config.DEBUG_ARRAY_OPT==1: - # param_types_dict = { v:typemap[v] for v in parfor_params } - # print("param_types_dict = ", param_types_dict, " ", type(param_types_dict)) - # print("param_types = ", param_types, " ", type(param_types)) - - # Replace illegal parameter names in the loop body with legal ones. - replace_var_names(loop_body, param_dict) - # remember the name before legalizing as the actual arguments - parfor_args = parfor_params - # Change parfor_params to be legal names. - parfor_params = [param_dict[v] for v in parfor_params] - parfor_params_orig = parfor_params - - parfor_params = [] - ascontig = False - for pindex in range(len(parfor_params_orig)): - if ascontig and pindex < len(parfor_inputs) and isinstance(param_types[pindex], types.npytypes.Array): - parfor_params.append(parfor_params_orig[pindex]+"param") - else: - parfor_params.append(parfor_params_orig[pindex]) - - # Change parfor body to replace illegal loop index vars with legal ones. - replace_var_names(loop_body, ind_dict) - loop_body_var_table = get_name_var_table(loop_body) - sentinel_name = get_unused_var_name("__sentinel__", loop_body_var_table) - - if config.DEBUG_ARRAY_OPT == 1: - print( - "legal parfor_params = ", - parfor_params, - " ", - type(parfor_params)) - - # Determine the unique names of the scheduling and gufunc functions. - # sched_func_name = "__numba_parfor_sched_%s" % (hex(hash(parfor)).replace("-", "_")) - gufunc_name = "__numba_parfor_gufunc_%s" % ( - hex(hash(parfor)).replace("-", "_")) - if config.DEBUG_ARRAY_OPT: - # print("sched_func_name ", type(sched_func_name), " ", sched_func_name) - print("gufunc_name ", type(gufunc_name), " ", gufunc_name) - - gufunc_txt = "" - - # Create the gufunc function. - gufunc_txt += "def " + gufunc_name + \ - "(sched, " + (", ".join(parfor_params)) + "):\n" - - for pindex in range(len(parfor_inputs)): - if ascontig and isinstance(param_types[pindex], types.npytypes.Array): - gufunc_txt += (" " + parfor_params_orig[pindex] - + " = np.ascontiguousarray(" + parfor_params[pindex] + ")\n") - - # Add initialization of reduction variables - for arr, var in zip(parfor_redarrs, parfor_redvars): - gufunc_txt += " " + param_dict[var] + \ - "=" + param_dict[arr] + "[0]\n" - - # For each dimension of the parfor, create a for loop in the generated gufunc function. - # Iterate across the proper values extracted from the schedule. - # The form of the schedule is start_dim0, start_dim1, ..., start_dimN, end_dim0, - # end_dim1, ..., end_dimN - for eachdim in range(parfor_dim): - for indent in range(eachdim + 1): - gufunc_txt += " " - sched_dim = eachdim - gufunc_txt += ("for " + - legal_loop_indices[eachdim] + - " in range(sched[" + - str(sched_dim) + - "], sched[" + - str(sched_dim + - parfor_dim) + - "] + np.uint8(1)):\n") - - if config.DEBUG_ARRAY_OPT_RUNTIME: - for indent in range(parfor_dim + 1): - gufunc_txt += " " - gufunc_txt += "print(" - for eachdim in range(parfor_dim): - gufunc_txt += "\"" + legal_loop_indices[eachdim] + "\"," + legal_loop_indices[eachdim] + "," - gufunc_txt += ")\n" - - # Add the sentinel assignment so that we can find the loop body position - # in the IR. - for indent in range(parfor_dim + 1): - gufunc_txt += " " - gufunc_txt += sentinel_name + " = 0\n" - # Add assignments of reduction variables (for returning the value) - for arr, var in zip(parfor_redarrs, parfor_redvars): - gufunc_txt += " " + param_dict[arr] + \ - "[0] = " + param_dict[var] + "\n" - gufunc_txt += " return None\n" - - if config.DEBUG_ARRAY_OPT: - print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt) - # Force gufunc outline into existence. - globls = {"np": np} - locls = {} - exec_(gufunc_txt, globls, locls) - gufunc_func = locls[gufunc_name] - - if config.DEBUG_ARRAY_OPT: - print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func) - # Get the IR for the gufunc outline. - gufunc_ir = compiler.run_frontend(gufunc_func) - - if config.DEBUG_ARRAY_OPT: - print("gufunc_ir dump ", type(gufunc_ir)) - gufunc_ir.dump() - print("loop_body dump ", type(loop_body)) - _print_body(loop_body) - - # rename all variables in gufunc_ir afresh - var_table = get_name_var_table(gufunc_ir.blocks) - new_var_dict = {} - reserved_names = [sentinel_name] + \ - list(param_dict.values()) + legal_loop_indices - for name, var in var_table.items(): - if not (name in reserved_names): - new_var_dict[name] = mk_unique_var(name) - replace_var_names(gufunc_ir.blocks, new_var_dict) - if config.DEBUG_ARRAY_OPT: - print("gufunc_ir dump after renaming ") - gufunc_ir.dump() - - gufunc_param_types = [ - numba.types.npytypes.Array( - index_var_typ, 1, "C")] + param_types - if config.DEBUG_ARRAY_OPT: - print( - "gufunc_param_types = ", - type(gufunc_param_types), - "\n", - gufunc_param_types) - - gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1 - - # Add gufunc stub last label to each parfor.loop_body label to prevent - # label conflicts. - loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label) - # new label for splitting sentinel block - new_label = max(loop_body.keys()) + 1 - - # If enabled, add a print statement after every assignment. - if config.DEBUG_ARRAY_OPT_RUNTIME: - for label, block in loop_body.items(): - new_block = block.copy() - new_block.clear() - loc = block.loc - scope = block.scope - for inst in block.body: - new_block.append(inst) - # Append print after assignment - if isinstance(inst, ir.Assign): - # Only apply to numbers - if typemap[inst.target.name] not in types.number_domain: - continue - - # Make constant string - strval = "{} =".format(inst.target.name) - strconsttyp = types.Const(strval) - - lhs = ir.Var(scope, mk_unique_var("str_const"), loc) - assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc), - target=lhs, loc=loc) - typemap[lhs.name] = strconsttyp - new_block.append(assign_lhs) - - # Make print node - print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) - new_block.append(print_node) - sig = numba.typing.signature(types.none, - typemap[lhs.name], - typemap[inst.target.name]) - lowerer.fndesc.calltypes[print_node] = sig - loop_body[label] = new_block - - if config.DEBUG_ARRAY_OPT: - print("parfor loop body") - _print_body(loop_body) - - wrapped_blocks = wrap_loop_body(loop_body) - hoisted = hoist(parfor_params, loop_body, typemap, wrapped_blocks) - start_block = gufunc_ir.blocks[min(gufunc_ir.blocks.keys())] - start_block.body = start_block.body[:-1] + hoisted + [start_block.body[-1]] - unwrap_loop_body(loop_body) - - if config.DEBUG_ARRAY_OPT: - print("After hoisting") - _print_body(loop_body) - - # Search all the block in the gufunc outline for the sentinel assignment. - for label, block in gufunc_ir.blocks.items(): - for i, inst in enumerate(block.body): - if isinstance( - inst, - ir.Assign) and inst.target.name == sentinel_name: - # We found the sentinel assignment. - loc = inst.loc - scope = block.scope - # split block across __sentinel__ - # A new block is allocated for the statements prior to the sentinel - # but the new block maintains the current block label. - prev_block = ir.Block(scope, loc) - prev_block.body = block.body[:i] - # The current block is used for statements after the sentinel. - block.body = block.body[i + 1:] - # But the current block gets a new label. - body_first_label = min(loop_body.keys()) - - # The previous block jumps to the minimum labelled block of the - # parfor body. - prev_block.append(ir.Jump(body_first_label, loc)) - # Add all the parfor loop body blocks to the gufunc function's - # IR. - for (l, b) in loop_body.items(): - gufunc_ir.blocks[l] = b - body_last_label = max(loop_body.keys()) - gufunc_ir.blocks[new_label] = block - gufunc_ir.blocks[label] = prev_block - # Add a jump from the last parfor body block to the block containing - # statements after the sentinel. - gufunc_ir.blocks[body_last_label].append( - ir.Jump(new_label, loc)) - break - else: - continue - break - - if config.DEBUG_ARRAY_OPT: - print("gufunc_ir last dump before renaming") - gufunc_ir.dump() - - gufunc_ir.blocks = rename_labels(gufunc_ir.blocks) - remove_dels(gufunc_ir.blocks) - - if config.DEBUG_ARRAY_OPT: - print("gufunc_ir last dump") - gufunc_ir.dump() - print("flags", flags) - print("typemap", typemap) - - old_alias = flags.noalias - if not has_aliases: - if config.DEBUG_ARRAY_OPT: - print("No aliases found so adding noalias flag.") - flags.noalias = True - kernel_func = compiler.compile_ir( - typingctx, - targetctx, - gufunc_ir, - gufunc_param_types, - types.none, - flags, - locals) - - flags.noalias = old_alias - - kernel_sig = signature(types.none, *gufunc_param_types) - if config.DEBUG_ARRAY_OPT: - print("kernel_sig = ", kernel_sig) - - return kernel_func, parfor_args, kernel_sig - - -def call_parallel_gufunc(lowerer, cres, gu_signature, outer_sig, expr_args, - loop_ranges, redvars, reddict, init_block, index_var_typ): - ''' - Adds the call to the gufunc function from the main function. - ''' - context = lowerer.context - builder = lowerer.builder - library = lowerer.library - - from .parallel import (ParallelGUFuncBuilder, build_gufunc_wrapper, - get_thread_count, _launch_threads, _init) - - if config.DEBUG_ARRAY_OPT: - print("make_parallel_loop") - print("args = ", expr_args) - print("outer_sig = ", outer_sig.args, outer_sig.return_type, - outer_sig.recvr, outer_sig.pysig) - print("loop_ranges = ", loop_ranges) - - # Build the wrapper for GUFunc - args, return_type = sigutils.normalize_signature(outer_sig) - llvm_func = cres.library.get_function(cres.fndesc.llvm_func_name) - sin, sout = gu_signature - - # These are necessary for build_gufunc_wrapper to find external symbols - _launch_threads() - _init() - - wrapper_ptr, env, wrapper_name = build_gufunc_wrapper(llvm_func, cres, sin, - sout, {}) - cres.library._ensure_finalized() - - if config.DEBUG_ARRAY_OPT: - print("parallel function = ", wrapper_name, cres) - - # loadvars for loop_ranges - def load_range(v): - if isinstance(v, ir.Var): - return lowerer.loadvar(v.name) - else: - return context.get_constant(types.uintp, v) - - num_dim = len(loop_ranges) - for i in range(num_dim): - start, stop, step = loop_ranges[i] - start = load_range(start) - stop = load_range(stop) - assert(step == 1) # We do not support loop steps other than 1 - step = load_range(step) - loop_ranges[i] = (start, stop, step) - - if config.DEBUG_ARRAY_OPT: - print("call_parallel_gufunc loop_ranges[{}] = ".format(i), start, - stop, step) - cgutils.printf(builder, "loop range[{}]: %d %d (%d)\n".format(i), - start, stop, step) - - # Commonly used LLVM types and constants - byte_t = lc.Type.int(8) - byte_ptr_t = lc.Type.pointer(byte_t) - byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t) - intp_t = context.get_value_type(types.intp) - uintp_t = context.get_value_type(types.uintp) - intp_ptr_t = lc.Type.pointer(intp_t) - uintp_ptr_t = lc.Type.pointer(uintp_t) - zero = context.get_constant(types.uintp, 0) - one = context.get_constant(types.uintp, 1) - one_type = one.type - sizeof_intp = context.get_abi_sizeof(intp_t) - - # Prepare sched, first pop it out of expr_args, outer_sig, and gu_signature - sched_name = expr_args.pop(0) - sched_typ = outer_sig.args[0] - sched_sig = sin.pop(0) - - if config.DEBUG_ARRAY_OPT: - print("Parfor has potentially negative start", index_var_typ.signed) - - if index_var_typ.signed: - sched_type = intp_t - sched_ptr_type = intp_ptr_t - else: - sched_type = uintp_t - sched_ptr_type = uintp_ptr_t - - # Call do_scheduling with appropriate arguments - dim_starts = cgutils.alloca_once( - builder, sched_type, size=context.get_constant( - types.uintp, num_dim), name="dims") - dim_stops = cgutils.alloca_once( - builder, sched_type, size=context.get_constant( - types.uintp, num_dim), name="dims") - for i in range(num_dim): - start, stop, step = loop_ranges[i] - if start.type != one_type: - start = builder.sext(start, one_type) - if stop.type != one_type: - stop = builder.sext(stop, one_type) - if step.type != one_type: - step = builder.sext(step, one_type) - # substract 1 because do-scheduling takes inclusive ranges - stop = builder.sub(stop, one) - builder.store( - start, builder.gep( - dim_starts, [ - context.get_constant( - types.uintp, i)])) - builder.store(stop, builder.gep(dim_stops, - [context.get_constant(types.uintp, i)])) - - sched_size = get_thread_count() * num_dim * 2 - sched = cgutils.alloca_once( - builder, sched_type, size=context.get_constant( - types.uintp, sched_size), name="sched") - debug_flag = 1 if config.DEBUG_ARRAY_OPT else 0 - scheduling_fnty = lc.Type.function( - intp_ptr_t, [uintp_t, sched_ptr_type, sched_ptr_type, uintp_t, sched_ptr_type, intp_t]) - if index_var_typ.signed: - do_scheduling = builder.module.get_or_insert_function(scheduling_fnty, - name="do_scheduling_signed") - else: - do_scheduling = builder.module.get_or_insert_function(scheduling_fnty, - name="do_scheduling_unsigned") - - builder.call( - do_scheduling, [ - context.get_constant( - types.uintp, num_dim), dim_starts, dim_stops, context.get_constant( - types.uintp, get_thread_count()), sched, context.get_constant( - types.intp, debug_flag)]) - - # init reduction array allocation here. - nredvars = len(redvars) - ninouts = len(expr_args) - nredvars - redarrs = [] - for i in range(nredvars): - redvar_typ = lowerer.fndesc.typemap[redvars[i]] - # we need to use the default initial value instead of existing value in - # redvar if available - init_val = reddict[redvars[i]][0] - if init_val != None: - val = context.get_constant(redvar_typ, init_val) - else: - val = lowerer.loadvar(redvars[i]) - typ = context.get_value_type(redvar_typ) - size = get_thread_count() - arr = cgutils.alloca_once(builder, typ, - size=context.get_constant(types.uintp, size)) - redarrs.append(arr) - for j in range(size): - dst = builder.gep(arr, [context.get_constant(types.uintp, j)]) - builder.store(val, dst) - - if config.DEBUG_ARRAY_OPT: - for i in range(get_thread_count()): - cgutils.printf(builder, "sched[" + str(i) + "] = ") - for j in range(num_dim * 2): - cgutils.printf( - builder, "%d ", builder.load( - builder.gep( - sched, [ - context.get_constant( - types.intp, i * num_dim * 2 + j)]))) - cgutils.printf(builder, "\n") - - # Prepare arguments: args, shapes, steps, data - all_args = [lowerer.loadvar(x) for x in expr_args[:ninouts]] + redarrs - num_args = len(all_args) - num_inps = len(sin) + 1 - args = cgutils.alloca_once( - builder, - byte_ptr_t, - size=context.get_constant( - types.intp, - 1 + num_args), - name="pargs") - array_strides = [] - # sched goes first - builder.store(builder.bitcast(sched, byte_ptr_t), args) - array_strides.append(context.get_constant(types.intp, sizeof_intp)) - # followed by other arguments - for i in range(num_args): - arg = all_args[i] - aty = outer_sig.args[i + 1] # skip first argument sched - dst = builder.gep(args, [context.get_constant(types.intp, i + 1)]) - if i >= ninouts: # reduction variables - builder.store(builder.bitcast(arg, byte_ptr_t), dst) - elif isinstance(aty, types.ArrayCompatible): - ary = context.make_array(aty)(context, builder, arg) - strides = cgutils.unpack_tuple(builder, ary.strides, aty.ndim) - for j in range(len(strides)): - array_strides.append(strides[j]) - builder.store(builder.bitcast(ary.data, byte_ptr_t), dst) - else: - if i < num_inps: - # Scalar input, need to store the value in an array of size 1 - typ = context.get_data_type( - aty) if aty != types.boolean else lc.Type.int(1) - ptr = cgutils.alloca_once(builder, typ) - builder.store(arg, ptr) - else: - # Scalar output, must allocate - typ = context.get_data_type( - aty) if aty != types.boolean else lc.Type.int(1) - ptr = cgutils.alloca_once(builder, typ) - builder.store(builder.bitcast(ptr, byte_ptr_t), dst) - - # Next, we prepare the individual dimension info recorded in gu_signature - sig_dim_dict = {} - occurances = [] - occurances = [sched_sig[0]] - sig_dim_dict[sched_sig[0]] = context.get_constant(types.intp, 2 * num_dim) - for var, arg, aty, gu_sig in zip(expr_args[:ninouts], all_args[:ninouts], - outer_sig.args[1:], sin + sout): - if config.DEBUG_ARRAY_OPT: - print("var = ", var, " gu_sig = ", gu_sig) - i = 0 - for dim_sym in gu_sig: - if config.DEBUG_ARRAY_OPT: - print("var = ", var, " type = ", aty) - ary = context.make_array(aty)(context, builder, arg) - shapes = cgutils.unpack_tuple(builder, ary.shape, aty.ndim) - sig_dim_dict[dim_sym] = shapes[i] - if not (dim_sym in occurances): - if config.DEBUG_ARRAY_OPT: - print("dim_sym = ", dim_sym, ", i = ", i) - cgutils.printf(builder, dim_sym + " = %d\n", shapes[i]) - occurances.append(dim_sym) - i = i + 1 - - # Prepare shapes, which is a single number (outer loop size), followed by - # the size of individual shape variables. - nshapes = len(sig_dim_dict) + 1 - shapes = cgutils.alloca_once(builder, intp_t, size=nshapes, name="pshape") - # For now, outer loop size is the same as number of threads - builder.store(context.get_constant(types.intp, get_thread_count()), shapes) - # Individual shape variables go next - i = 1 - for dim_sym in occurances: - if config.DEBUG_ARRAY_OPT: - cgutils.printf(builder, dim_sym + " = %d\n", sig_dim_dict[dim_sym]) - builder.store( - sig_dim_dict[dim_sym], builder.gep( - shapes, [ - context.get_constant( - types.intp, i)])) - i = i + 1 - - # Prepare steps for each argument. Note that all steps are counted in - # bytes. - num_steps = num_args + 1 + len(array_strides) - steps = cgutils.alloca_once( - builder, intp_t, size=context.get_constant( - types.intp, num_steps), name="psteps") - # First goes the step size for sched, which is 2 * num_dim - builder.store(context.get_constant(types.intp, 2 * num_dim * sizeof_intp), - steps) - # The steps for all others are 0. (TODO: except reduction results) - for i in range(num_args): - if i >= ninouts: # steps for reduction vars are abi_sizeof(typ) - j = i - ninouts - typ = context.get_value_type(lowerer.fndesc.typemap[redvars[j]]) - sizeof = context.get_abi_sizeof(typ) - stepsize = context.get_constant(types.intp, sizeof) - else: - # steps are strides - stepsize = zero - dst = builder.gep(steps, [context.get_constant(types.intp, 1 + i)]) - builder.store(stepsize, dst) - for j in range(len(array_strides)): - dst = builder.gep( - steps, [ - context.get_constant( - types.intp, 1 + num_args + j)]) - builder.store(array_strides[j], dst) - - # prepare data - data = builder.inttoptr(zero, byte_ptr_t) - - fnty = lc.Type.function(lc.Type.void(), [byte_ptr_ptr_t, intp_ptr_t, - intp_ptr_t, byte_ptr_t]) - fn = builder.module.get_or_insert_function(fnty, name=wrapper_name) - if config.DEBUG_ARRAY_OPT: - cgutils.printf(builder, "before calling kernel %p\n", fn) - result = builder.call(fn, [args, shapes, steps, data]) - if config.DEBUG_ARRAY_OPT: - cgutils.printf(builder, "after calling kernel %p\n", fn) - - scope = init_block.scope - loc = init_block.loc - calltypes = lowerer.fndesc.calltypes - # Accumulate all reduction arrays back to a single value - for i in range(get_thread_count()): - for name, arr in zip(redvars, redarrs): - tmpname = mk_unique_var(name) - src = builder.gep(arr, [context.get_constant(types.intp, i)]) - val = builder.load(src) - vty = lowerer.fndesc.typemap[name] - lowerer.fndesc.typemap[tmpname] = vty - lowerer.storevar(val, tmpname) - tmpvar = ir.Var(scope, tmpname, loc) - tmp_assign = ir.Assign(tmpvar, ir.Var(scope, name+"#init", loc), loc) - if name+"#init" not in lowerer.fndesc.typemap: - lowerer.fndesc.typemap[name+"#init"] = vty - lowerer.lower_inst(tmp_assign) - # generate code for combining reduction variable with thread output - for inst in reddict[name][1]: - lowerer.lower_inst(inst) - - # TODO: scalar output must be assigned back to corresponding output - # variables - return diff --git a/numba/numba/npyufunc/sigparse.py b/numba/numba/npyufunc/sigparse.py deleted file mode 100644 index 722d26b1f..000000000 --- a/numba/numba/npyufunc/sigparse.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import, print_function, division -import tokenize -import string -from numba import utils - - -def parse_signature(sig): - '''Parse generalized ufunc signature. - - NOTE: ',' (COMMA) is a delimiter; not separator. - This means trailing comma is legal. - ''' - def stripws(s): - return ''.join(c for c in s if c not in string.whitespace) - - def tokenizer(src): - def readline(): - yield src - gen = readline() - return tokenize.generate_tokens(lambda: next(gen)) - - def parse(src): - tokgen = tokenizer(src) - while True: - tok = next(tokgen) - if tok[1] == '(': - symbols = [] - while True: - tok = next(tokgen) - if tok[1] == ')': - break - elif tok[0] == tokenize.NAME: - symbols.append(tok[1]) - elif tok[1] == ',': - continue - else: - raise ValueError('bad token in signature "%s"' % tok[1]) - yield tuple(symbols) - tok = next(tokgen) - if tok[1] == ',': - continue - elif tokenize.ISEOF(tok[0]): - break - elif tokenize.ISEOF(tok[0]): - break - else: - raise ValueError('bad token in signature "%s"' % tok[1]) - - ins, _, outs = stripws(sig).partition('->') - inputs = list(parse(ins)) - outputs = list(parse(outs)) - - # check that all output symbols are defined in the inputs - isym = set() - osym = set() - for grp in inputs: - isym |= set(grp) - for grp in outputs: - osym |= set(grp) - - diff = osym.difference(isym) - if diff: - raise NameError('undefined output symbols: %s' % ','.join(sorted(diff))) - - return inputs, outputs diff --git a/numba/numba/npyufunc/tbbpool.cpp b/numba/numba/npyufunc/tbbpool.cpp deleted file mode 100644 index 39a7a363b..000000000 --- a/numba/numba/npyufunc/tbbpool.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* -Implement parallel vectorize workqueue on top of Intel TBB. -*/ - -#define TBB_PREVIEW_WAITING_FOR_WORKERS 1 -#include -#include -#include -#include "workqueue.h" -#include "../_pymodule.h" -#include "gufunc_scheduler.h" - -#if TBB_INTERFACE_VERSION >= 9106 - #define TSI_INIT(count) tbb::task_scheduler_init(count) - #define TSI_TERMINATE(tsi) tsi->blocking_terminate(std::nothrow) -#else -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - #define TSI_INIT(count) tbb::task_scheduler_init(count, 0, /*blocking termination*/true) - #define TSI_TERMINATE(tsi) tsi->terminate() -#else -#error This version of TBB does not support blocking terminate -#endif -#endif - -static tbb::task_group *tg = NULL; -static tbb::task_scheduler_init *tsi = NULL; -static int tsi_count = 0; - -static void -add_task(void *fn, void *args, void *dims, void *steps, void *data) { - tg->run([=]{ - auto func = reinterpret_cast(fn); - func(args, dims, steps, data); - }); -} - - -void ignore_blocking_terminate_assertion( const char*, int, const char*, const char * ) { - tbb::internal::runtime_warning("Unable to wait for threads to shut down before fork(). It can break multithreading in child process\n"); -} -void ignore_assertion( const char*, int, const char*, const char * ) {} - -static void prepare_fork(void) { - //puts("Suspending TBB: prepare fork"); - if(tsi) { - assertion_handler_type orig = tbb::set_assertion_handler(ignore_blocking_terminate_assertion); - TSI_TERMINATE(tsi); - tbb::set_assertion_handler(orig); - } -} - -static void reset_after_fork(void) { - //puts("Resuming TBB: after fork"); - if(tsi) - tsi->initialize(tsi_count); -} - -static void unload_tbb(void) { - if(tsi) { - delete tg; - tg = NULL; - //puts("Unloading TBB"); - assertion_handler_type orig = tbb::set_assertion_handler(ignore_assertion); - TSI_TERMINATE(tsi); - tbb::set_assertion_handler(orig); - delete tsi; - tsi = NULL; - } -} - -static void launch_threads(int count) { - if(tsi) - return; - if(count < 1) - count = tbb::task_scheduler_init::automatic; - tsi = new TSI_INIT(tsi_count = count); - tg = new tbb::task_group; - tg->run([]{}); // start creating threads asynchronously - -#ifndef _MSC_VER - pthread_atfork(prepare_fork, reset_after_fork, reset_after_fork); -#endif -} - -static void synchronize(void) { - tg->wait(); -} - -static void ready(void) { -} - -MOD_INIT(workqueue) { - PyObject *m; - MOD_DEF(m, "workqueue", "No docs", NULL) - if (m == NULL) - return MOD_ERROR_VAL; -#if PY_MAJOR_VERSION >= 3 - PyModuleDef *md = PyModule_GetDef(m); - if (md) { - md->m_free = (freefunc)unload_tbb; - } -#endif - - PyObject_SetAttrString(m, "launch_threads", - PyLong_FromVoidPtr((void*)&launch_threads)); - PyObject_SetAttrString(m, "synchronize", - PyLong_FromVoidPtr((void*)&synchronize)); - PyObject_SetAttrString(m, "ready", - PyLong_FromVoidPtr((void*)&ready)); - PyObject_SetAttrString(m, "add_task", - PyLong_FromVoidPtr((void*)&add_task)); - PyObject_SetAttrString(m, "do_scheduling_signed", - PyLong_FromVoidPtr((void*)&do_scheduling_signed)); - PyObject_SetAttrString(m, "do_scheduling_unsigned", - PyLong_FromVoidPtr((void*)&do_scheduling_unsigned)); - - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/npyufunc/ufuncbuilder.py b/numba/numba/npyufunc/ufuncbuilder.py deleted file mode 100644 index 7dcbe16e9..000000000 --- a/numba/numba/npyufunc/ufuncbuilder.py +++ /dev/null @@ -1,335 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -import warnings -import inspect -from contextlib import contextmanager - -import numpy as np - -from numba.decorators import jit -from numba.targets.descriptors import TargetDescriptor -from numba.targets.options import TargetOptions -from numba.targets.registry import dispatcher_registry, cpu_target -from numba import utils, compiler, types, sigutils -from numba.numpy_support import as_dtype -from . import _internal -from .sigparse import parse_signature -from .wrappers import build_ufunc_wrapper, build_gufunc_wrapper -from numba.caching import FunctionCache, NullCache - - -import llvmlite.llvmpy.core as lc - -class UFuncTargetOptions(TargetOptions): - OPTIONS = { - "nopython" : bool, - "forceobj" : bool, - "fastmath" : bool, - } - - -class UFuncTarget(TargetDescriptor): - options = UFuncTargetOptions - - @property - def typing_context(self): - return cpu_target.typing_context - - @property - def target_context(self): - return cpu_target.target_context - -ufunc_target = UFuncTarget() - - -class UFuncDispatcher(object): - """ - An object handling compilation of various signatures for a ufunc. - """ - targetdescr = ufunc_target - - def __init__(self, py_func, locals={}, targetoptions={}): - self.py_func = py_func - self.overloads = utils.UniqueDict() - self.targetoptions = targetoptions - self.locals = locals - self.cache = NullCache() - - def enable_caching(self): - self.cache = FunctionCache(self.py_func) - - def compile(self, sig, locals={}, **targetoptions): - locs = self.locals.copy() - locs.update(locals) - - topt = self.targetoptions.copy() - topt.update(targetoptions) - - flags = compiler.Flags() - self.targetdescr.options.parse_as_flags(flags, topt) - - flags.set("no_cpython_wrapper") - flags.set("error_model", "numpy") - # Disable loop lifting - # The feature requires a real python function - flags.unset("enable_looplift") - - return self._compile_core(sig, flags, locals) - - def _compile_core(self, sig, flags, locals): - """ - Trigger the compiler on the core function or load a previously - compiled version from the cache. Returns the CompileResult. - """ - typingctx = self.targetdescr.typing_context - targetctx = self.targetdescr.target_context - - @contextmanager - def store_overloads_on_success(): - # use to ensure overloads are stored on success - try: - yield - except: - raise - else: - exists = self.overloads.get(cres.signature) - if exists is None: - self.overloads[cres.signature] = cres - - # Use cache and compiler in a critical section - with compiler.lock_compiler: - with store_overloads_on_success(): - # attempt look up of existing - cres = self.cache.load_overload(sig, targetctx) - if cres is not None: - return cres - - # Compile - args, return_type = sigutils.normalize_signature(sig) - cres = compiler.compile_extra(typingctx, targetctx, - self.py_func, args=args, - return_type=return_type, - flags=flags, locals=locals) - - # cache lookup failed before so safe to save - self.cache.save_overload(sig, cres) - - return cres - - -dispatcher_registry['npyufunc'] = UFuncDispatcher - - -# Utility functions - -def _compile_element_wise_function(nb_func, targetoptions, sig): - # Do compilation - # Return CompileResult to test - cres = nb_func.compile(sig, **targetoptions) - args, return_type = sigutils.normalize_signature(sig) - return cres, args, return_type - -def _finalize_ufunc_signature(cres, args, return_type): - '''Given a compilation result, argument types, and a return type, - build a valid Numba signature after validating that it doesn't - violate the constraints for the compilation mode. - ''' - if return_type is None: - if cres.objectmode: - # Object mode is used and return type is not specified - raise TypeError("return type must be specified for object mode") - else: - return_type = cres.signature.return_type - - assert return_type != types.pyobject - return return_type(*args) - -def _build_element_wise_ufunc_wrapper(cres, signature): - '''Build a wrapper for the ufunc loop entry point given by the - compilation result object, using the element-wise signature. - ''' - ctx = cres.target_context - library = cres.library - fname = cres.fndesc.llvm_func_name - - with compiler.lock_compiler: - ptr = build_ufunc_wrapper(library, ctx, fname, signature, - cres.objectmode, cres) - - # Get dtypes - dtypenums = [as_dtype(a).num for a in signature.args] - dtypenums.append(as_dtype(signature.return_type).num) - return dtypenums, ptr, cres.environment - - -_identities = { - 0: _internal.PyUFunc_Zero, - 1: _internal.PyUFunc_One, - None: _internal.PyUFunc_None, - "reorderable": _internal.PyUFunc_ReorderableNone, - } - -def parse_identity(identity): - """ - Parse an identity value and return the corresponding low-level value - for Numpy. - """ - try: - identity = _identities[identity] - except KeyError: - raise ValueError("Invalid identity value %r" % (identity,)) - return identity - - -# Class definitions - -class _BaseUFuncBuilder(object): - - def add(self, sig=None): - if hasattr(self, 'targetoptions'): - targetoptions = self.targetoptions - else: - targetoptions = self.nb_func.targetoptions - cres, args, return_type = _compile_element_wise_function( - self.nb_func, targetoptions, sig) - sig = self._finalize_signature(cres, args, return_type) - self._sigs.append(sig) - self._cres[sig] = cres - return cres - - def disable_compile(self): - """ - Disable the compilation of new signatures at call time. - """ - # Override this for implementations that support lazy compilation - - -class UFuncBuilder(_BaseUFuncBuilder): - - def __init__(self, py_func, identity=None, cache=False, targetoptions={}): - self.py_func = py_func - self.identity = parse_identity(identity) - self.nb_func = jit(target='npyufunc', cache=cache, **targetoptions)(py_func) - self._sigs = [] - self._cres = {} - - def _finalize_signature(self, cres, args, return_type): - '''Slated for deprecation, use ufuncbuilder._finalize_ufunc_signature() - instead. - ''' - return _finalize_ufunc_signature(cres, args, return_type) - - def build_ufunc(self): - dtypelist = [] - ptrlist = [] - if not self.nb_func: - raise TypeError("No definition") - - # Get signature in the order they are added - keepalive = [] - cres = None - for sig in self._sigs: - cres = self._cres[sig] - dtypenums, ptr, env = self.build(cres, sig) - dtypelist.append(dtypenums) - ptrlist.append(utils.longint(ptr)) - keepalive.append((cres.library, env)) - - datlist = [None] * len(ptrlist) - - if cres is None: - argspec = inspect.getargspec(self.py_func) - inct = len(argspec.args) - else: - inct = len(cres.signature.args) - outct = 1 - - # Becareful that fromfunc does not provide full error checking yet. - # If typenum is out-of-bound, we have nasty memory corruptions. - # For instance, -1 for typenum will cause segfault. - # If elements of type-list (2nd arg) is tuple instead, - # there will also memory corruption. (Seems like code rewrite.) - ufunc = _internal.fromfunc(self.py_func.__name__, self.py_func.__doc__, - ptrlist, dtypelist, inct, outct, datlist, - keepalive, self.identity) - - return ufunc - - def build(self, cres, signature): - '''Slated for deprecation, use - ufuncbuilder._build_element_wise_ufunc_wrapper(). - ''' - return _build_element_wise_ufunc_wrapper(cres, signature) - - -class GUFuncBuilder(_BaseUFuncBuilder): - - # TODO handle scalar - def __init__(self, py_func, signature, identity=None, cache=False, - targetoptions={}): - self.py_func = py_func - self.identity = parse_identity(identity) - self.nb_func = jit(target='npyufunc', cache=cache)(py_func) - self.signature = signature - self.sin, self.sout = parse_signature(signature) - self.targetoptions = targetoptions - self.cache = cache - self._sigs = [] - self._cres = {} - - def _finalize_signature(self, cres, args, return_type): - if not cres.objectmode and cres.signature.return_type != types.void: - raise TypeError("gufunc kernel must have void return type") - - if return_type is None: - return_type = types.void - - return return_type(*args) - - def build_ufunc(self): - dtypelist = [] - ptrlist = [] - if not self.nb_func: - raise TypeError("No definition") - - # Get signature in the order they are added - keepalive = [] - for sig in self._sigs: - cres = self._cres[sig] - dtypenums, ptr, env = self.build(cres) - dtypelist.append(dtypenums) - ptrlist.append(utils.longint(ptr)) - keepalive.append((cres.library, env)) - - datlist = [None] * len(ptrlist) - - inct = len(self.sin) - outct = len(self.sout) - - # Pass envs to fromfuncsig to bind to the lifetime of the ufunc object - ufunc = _internal.fromfunc(self.py_func.__name__, self.py_func.__doc__, - ptrlist, dtypelist, inct, outct, datlist, - keepalive, self.identity, self.signature) - return ufunc - - def build(self, cres): - """ - Returns (dtype numbers, function ptr, EnvironmentObject) - """ - # Buider wrapper for ufunc entry point - signature = cres.signature - with compiler.lock_compiler: - ptr, env, wrapper_name = build_gufunc_wrapper(self.py_func, cres, - self.sin, self.sout, - cache=self.cache) - - # Get dtypes - dtypenums = [] - for a in signature.args: - if isinstance(a, types.Array): - ty = a.dtype - else: - ty = a - dtypenums.append(as_dtype(ty).num) - return dtypenums, ptr, env diff --git a/numba/numba/npyufunc/workqueue.c b/numba/numba/npyufunc/workqueue.c deleted file mode 100644 index 3b153fabd..000000000 --- a/numba/numba/npyufunc/workqueue.c +++ /dev/null @@ -1,316 +0,0 @@ -/* -Implement parallel vectorize workqueue. - -This keeps a set of worker threads running all the time. -They wait and spin on a task queue for jobs. - -**WARNING** -This module is not thread-safe. Adding task to queue is not protected from -race condition. -*/ - -#ifdef _MSC_VER - /* Windows */ - #include - #include - #define NUMBA_WINTHREAD -#else - /* PThread */ - #include - #include - #define NUMBA_PTHREAD -#endif - -#include -#include -#include "workqueue.h" -#include "../_pymodule.h" -#include "gufunc_scheduler.h" - -/* As the thread-pool isn't inherited by children, - free the task-queue, too. */ -static void reset_after_fork(void); - -/* PThread */ -#ifdef NUMBA_PTHREAD - -typedef struct { - pthread_cond_t cond; - pthread_mutex_t mutex; -} queue_condition_t; - -static int -queue_condition_init(queue_condition_t *qc) -{ - int r; - if ((r = pthread_cond_init(&qc->cond, NULL))) - return r; - if ((r = pthread_mutex_init(&qc->mutex, NULL))) - return r; - return 0; -} - -static void -queue_condition_lock(queue_condition_t *qc) -{ - /* XXX errors? */ - pthread_mutex_lock(&qc->mutex); -} - -static void -queue_condition_unlock(queue_condition_t *qc) -{ - /* XXX errors? */ - pthread_mutex_unlock(&qc->mutex); -} - -static void -queue_condition_signal(queue_condition_t *qc) -{ - /* XXX errors? */ - pthread_cond_signal(&qc->cond); -} - -static void -queue_condition_wait(queue_condition_t *qc) -{ - /* XXX errors? */ - pthread_cond_wait(&qc->cond, &qc->mutex); -} - -static thread_pointer -numba_new_thread(void *worker, void *arg) -{ - int status; - pthread_attr_t attr; - pthread_t th; - - pthread_atfork(0, 0, reset_after_fork); - - /* Create detached threads */ - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - - status = pthread_create(&th, &attr, worker, arg); - - if (status != 0){ - return NULL; - } - - pthread_attr_destroy(&attr); - return (thread_pointer)th; -} - -#endif - -/* Win Thread */ -#ifdef NUMBA_WINTHREAD - -typedef struct { - CONDITION_VARIABLE cv; - CRITICAL_SECTION cs; -} queue_condition_t; - -static int -queue_condition_init(queue_condition_t *qc) -{ - InitializeConditionVariable(&qc->cv); - InitializeCriticalSection(&qc->cs); - return 0; -} - -static void -queue_condition_lock(queue_condition_t *qc) -{ - EnterCriticalSection(&qc->cs); -} - -static void -queue_condition_unlock(queue_condition_t *qc) -{ - LeaveCriticalSection(&qc->cs); -} - -static void -queue_condition_signal(queue_condition_t *qc) -{ - WakeConditionVariable(&qc->cv); -} - -static void -queue_condition_wait(queue_condition_t *qc) -{ - SleepConditionVariableCS(&qc->cv, &qc->cs, INFINITE); -} - -/* Adapted from Python/thread_nt.h */ -typedef struct { - void (*func)(void*); - void *arg; -} callobj; - -static unsigned __stdcall -bootstrap(void *call) -{ - callobj *obj = (callobj*)call; - void (*func)(void*) = obj->func; - void *arg = obj->arg; - HeapFree(GetProcessHeap(), 0, obj); - func(arg); - _endthreadex(0); - return 0; -} - -static thread_pointer -numba_new_thread(void *worker, void *arg) -{ - uintptr_t handle; - unsigned threadID; - callobj *obj; - - if (sizeof(handle) > sizeof(void*)) - return 0; - - obj = (callobj*)HeapAlloc(GetProcessHeap(), 0, sizeof(*obj)); - if (!obj) - return NULL; - - obj->func = worker; - obj->arg = arg; - - handle = _beginthreadex(NULL, 0, bootstrap, obj, 0, &threadID); - if (handle == -1) - return 0; - return (thread_pointer)handle; -} - -#endif - -typedef struct Task{ - void (*func)(void *args, void *dims, void *steps, void *data); - void *args, *dims, *steps, *data; -} Task; - -typedef struct { - queue_condition_t cond; - int state; - Task task; -} Queue; - - -static Queue *queues = NULL; -static int queue_count; -static int queue_pivot = 0; - -static void -queue_state_wait(Queue *queue, int old, int repl) -{ - queue_condition_t *cond = &queue->cond; - - queue_condition_lock(cond); - while (queue->state != old) { - queue_condition_wait(cond); - } - queue->state = repl; - queue_condition_signal(cond); - queue_condition_unlock(cond); -} - -static void -add_task(void *fn, void *args, void *dims, void *steps, void *data) { - void (*func)(void *args, void *dims, void *steps, void *data) = fn; - - Queue *queue = &queues[queue_pivot]; - - Task *task = &queue->task; - task->func = func; - task->args = args; - task->dims = dims; - task->steps = steps; - task->data = data; - - /* Move pivot */ - if ( ++queue_pivot == queue_count ) { - queue_pivot = 0; - } -} - -static -void thread_worker(void *arg) { - Queue *queue = (Queue*)arg; - Task *task; - - while (1) { - /* Wait for the queue to be in READY state (i.e. for some task - * to need running), and switch it to RUNNING. - */ - queue_state_wait(queue, READY, RUNNING); - - task = &queue->task; - task->func(task->args, task->dims, task->steps, task->data); - - /* Task is done. */ - queue_state_wait(queue, RUNNING, DONE); - } -} - -static void launch_threads(int count) { - if (!queues) { - /* If queues are not yet allocated, - create them, one for each thread. */ - int i; - size_t sz = sizeof(Queue) * count; - - queues = malloc(sz); /* this memory will leak */ - /* Note this initializes the state to IDLE */ - memset(queues, 0, sz); - queue_count = count; - - for (i = 0; i < count; ++i) { - queue_condition_init(&queues[i].cond); - numba_new_thread(thread_worker, &queues[i]); - } - } -} - -static void synchronize(void) { - int i; - for (i = 0; i < queue_count; ++i) { - queue_state_wait(&queues[i], DONE, IDLE); - } -} - -static void ready(void) { - int i; - for (i = 0; i < queue_count; ++i) { - queue_state_wait(&queues[i], IDLE, READY); - } -} - -static void reset_after_fork(void) -{ - free(queues); - queues = NULL; -} - -MOD_INIT(workqueue) { - PyObject *m; - MOD_DEF(m, "workqueue", "No docs", NULL) - if (m == NULL) - return MOD_ERROR_VAL; - - PyObject_SetAttrString(m, "launch_threads", - PyLong_FromVoidPtr(&launch_threads)); - PyObject_SetAttrString(m, "synchronize", - PyLong_FromVoidPtr(&synchronize)); - PyObject_SetAttrString(m, "ready", - PyLong_FromVoidPtr(&ready)); - PyObject_SetAttrString(m, "add_task", - PyLong_FromVoidPtr(&add_task)); - PyObject_SetAttrString(m, "do_scheduling_signed", - PyLong_FromVoidPtr(&do_scheduling_signed)); - PyObject_SetAttrString(m, "do_scheduling_unsigned", - PyLong_FromVoidPtr(&do_scheduling_unsigned)); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/npyufunc/workqueue.h b/numba/numba/npyufunc/workqueue.h deleted file mode 100644 index e79fd15b4..000000000 --- a/numba/numba/npyufunc/workqueue.h +++ /dev/null @@ -1,38 +0,0 @@ -typedef struct opaque_thread * thread_pointer; - -enum QUEUE_STATE { - /* - The queue has 4 states: - - IDLE: not doing anything - READY: tasks enqueued; signal workers to start - RUNNING: workers running - DONE: workers completed - */ - IDLE = 0, READY, RUNNING, DONE -}; - -/* Launch new thread */ -static -thread_pointer numba_new_thread(void *worker, void *arg); - -/* Launch `count` number of threads and create the associated thread queue. -Must invoke once before each add_task() is used. -*Warning* queues memory are leaked at interpreter tear down! -*/ -static -void launch_threads(int count); - -/* Add task to queue -Automatically assigned to queues of different thread in a round robin fashion. -*/ -static -void add_task(void *fn, void *args, void *dims, void *steps, void *data); - -/* Wait until all tasks are done */ -static -void synchronize(void); - -/* Signal worker threads that tasks are added and it is ready to run */ -static -void ready(void); diff --git a/numba/numba/npyufunc/wrappers.py b/numba/numba/npyufunc/wrappers.py deleted file mode 100644 index 7c5a6e23b..000000000 --- a/numba/numba/npyufunc/wrappers.py +++ /dev/null @@ -1,698 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np - -from llvmlite.llvmpy.core import Type, Builder, ICMP_EQ, Constant - -from numba import types, cgutils, compiler -from ..caching import make_library_cache, NullCache - - -def _build_ufunc_loop_body(load, store, context, func, builder, arrays, out, - offsets, store_offset, signature, pyapi, env): - elems = load() - - # Compute - status, retval = context.call_conv.call_function(builder, func, - signature.return_type, - signature.args, elems) - - # Store - with builder.if_else(status.is_ok, likely=True) as (if_ok, if_error): - with if_ok: - store(retval) - with if_error: - gil = pyapi.gil_ensure() - context.call_conv.raise_error(builder, pyapi, status) - pyapi.gil_release(gil) - - # increment indices - for off, ary in zip(offsets, arrays): - builder.store(builder.add(builder.load(off), ary.step), off) - - builder.store(builder.add(builder.load(store_offset), out.step), - store_offset) - - return status.code - - -def _build_ufunc_loop_body_objmode(load, store, context, func, builder, - arrays, out, offsets, store_offset, - signature, env, pyapi): - elems = load() - - # Compute - _objargs = [types.pyobject] * len(signature.args) - # We need to push the error indicator to avoid it messing with - # the ufunc's execution. We restore it unless the ufunc raised - # a new error. - with pyapi.err_push(keep_new=True): - status, retval = context.call_conv.call_function(builder, func, - types.pyobject, - _objargs, elems) - # Release owned reference to arguments - for elem in elems: - pyapi.decref(elem) - # NOTE: if an error occurred, it will be caught by the Numpy machinery - - # Store - store(retval) - - # increment indices - for off, ary in zip(offsets, arrays): - builder.store(builder.add(builder.load(off), ary.step), off) - - builder.store(builder.add(builder.load(store_offset), out.step), - store_offset) - - return status.code - - -def build_slow_loop_body(context, func, builder, arrays, out, offsets, - store_offset, signature, pyapi, env): - def load(): - elems = [ary.load_direct(builder.load(off)) - for off, ary in zip(offsets, arrays)] - return elems - - def store(retval): - out.store_direct(retval, builder.load(store_offset)) - - return _build_ufunc_loop_body(load, store, context, func, builder, arrays, - out, offsets, store_offset, signature, pyapi, - env=env) - - -def build_obj_loop_body(context, func, builder, arrays, out, offsets, - store_offset, signature, pyapi, envptr, env): - env_body = context.get_env_body(builder, envptr) - env_manager = pyapi.get_env_manager(env, env_body, envptr) - - def load(): - # Load - elems = [ary.load_direct(builder.load(off)) - for off, ary in zip(offsets, arrays)] - # Box - elems = [pyapi.from_native_value(t, v, env_manager) - for v, t in zip(elems, signature.args)] - return elems - - def store(retval): - is_ok = cgutils.is_not_null(builder, retval) - # If an error is raised by the object mode ufunc, it will - # simply get caught by the Numpy ufunc machinery. - with builder.if_then(is_ok, likely=True): - # Unbox - native = pyapi.to_native_value(signature.return_type, retval) - assert native.cleanup is None - # Store - out.store_direct(native.value, builder.load(store_offset)) - # Release owned reference - pyapi.decref(retval) - - return _build_ufunc_loop_body_objmode(load, store, context, func, builder, - arrays, out, offsets, store_offset, - signature, envptr, pyapi) - - -def build_fast_loop_body(context, func, builder, arrays, out, offsets, - store_offset, signature, ind, pyapi, env): - def load(): - elems = [ary.load_aligned(ind) - for ary in arrays] - return elems - - def store(retval): - out.store_aligned(retval, ind) - - return _build_ufunc_loop_body(load, store, context, func, builder, arrays, - out, offsets, store_offset, signature, pyapi, - env=env) - - -def build_ufunc_wrapper(library, context, fname, signature, objmode, cres): - """ - Wrap the scalar function with a loop that iterates over the arguments - """ - assert isinstance(fname, str) - byte_t = Type.int(8) - byte_ptr_t = Type.pointer(byte_t) - byte_ptr_ptr_t = Type.pointer(byte_ptr_t) - intp_t = context.get_value_type(types.intp) - intp_ptr_t = Type.pointer(intp_t) - - fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, - intp_ptr_t, byte_ptr_t]) - - wrapperlib = context.codegen().create_library('ufunc_wrapper') - wrapper_module = wrapperlib.create_ir_module('') - if objmode: - func_type = context.call_conv.get_function_type( - types.pyobject, [types.pyobject] * len(signature.args)) - else: - func_type = context.call_conv.get_function_type( - signature.return_type, signature.args) - - func = wrapper_module.add_function(func_type, name=fname) - func.attributes.add("alwaysinline") - - wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) - arg_args, arg_dims, arg_steps, arg_data = wrapper.args - arg_args.name = "args" - arg_dims.name = "dims" - arg_steps.name = "steps" - arg_data.name = "data" - - builder = Builder(wrapper.append_basic_block("entry")) - - # Prepare Environment - envname = context.get_env_name(cres.fndesc) - env = cres.environment - envptr = builder.load(context.declare_env_global(builder.module, envname)) - - # Emit loop - loopcount = builder.load(arg_dims, name="loopcount") - - # Prepare inputs - arrays = [] - for i, typ in enumerate(signature.args): - arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) - - # Prepare output - out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), - signature.return_type) - - # Setup indices - offsets = [] - zero = context.get_constant(types.intp, 0) - for _ in arrays: - p = cgutils.alloca_once(builder, intp_t) - offsets.append(p) - builder.store(zero, p) - - store_offset = cgutils.alloca_once(builder, intp_t) - builder.store(zero, store_offset) - - unit_strided = cgutils.true_bit - for ary in arrays: - unit_strided = builder.and_(unit_strided, ary.is_unit_strided) - - pyapi = context.get_python_api(builder) - if objmode: - # General loop - gil = pyapi.gil_ensure() - with cgutils.for_range(builder, loopcount, intp=intp_t): - slowloop = build_obj_loop_body(context, func, builder, - arrays, out, offsets, - store_offset, signature, - pyapi, envptr, env) - pyapi.gil_release(gil) - builder.ret_void() - - else: - with builder.if_else(unit_strided) as (is_unit_strided, is_strided): - with is_unit_strided: - with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: - fastloop = build_fast_loop_body(context, func, builder, - arrays, out, offsets, - store_offset, signature, - loop.index, pyapi, - env=envptr) - - with is_strided: - # General loop - with cgutils.for_range(builder, loopcount, intp=intp_t): - slowloop = build_slow_loop_body(context, func, builder, - arrays, out, offsets, - store_offset, signature, - pyapi, env=envptr) - - builder.ret_void() - del builder - - # Link and finalize - wrapperlib.add_ir_module(wrapper_module) - wrapperlib.add_linking_library(library) - return wrapperlib.get_pointer_to_function(wrapper.name) - - -class UArrayArg(object): - def __init__(self, context, builder, args, steps, i, fe_type): - self.context = context - self.builder = builder - self.fe_type = fe_type - offset = self.context.get_constant(types.intp, i) - offseted_args = self.builder.load(builder.gep(args, [offset])) - data_type = context.get_data_type(fe_type) - self.dataptr = self.builder.bitcast(offseted_args, - data_type.as_pointer()) - sizeof = self.context.get_abi_sizeof(data_type) - self.abisize = self.context.get_constant(types.intp, sizeof) - offseted_step = self.builder.gep(steps, [offset]) - self.step = self.builder.load(offseted_step) - self.is_unit_strided = builder.icmp(ICMP_EQ, self.abisize, self.step) - self.builder = builder - - def load_direct(self, byteoffset): - """ - Generic load from the given *byteoffset*. load_aligned() is - preferred if possible. - """ - ptr = cgutils.pointer_add(self.builder, self.dataptr, byteoffset) - return self.context.unpack_value(self.builder, self.fe_type, ptr) - - def load_aligned(self, ind): - # Using gep() instead of explicit pointer addition helps LLVM - # vectorize the loop. - ptr = self.builder.gep(self.dataptr, [ind]) - return self.context.unpack_value(self.builder, self.fe_type, ptr) - - def store_direct(self, value, byteoffset): - ptr = cgutils.pointer_add(self.builder, self.dataptr, byteoffset) - self.context.pack_value(self.builder, self.fe_type, value, ptr) - - def store_aligned(self, value, ind): - ptr = self.builder.gep(self.dataptr, [ind]) - self.context.pack_value(self.builder, self.fe_type, value, ptr) - - -GufWrapperCache = make_library_cache('guf') - - -class _GufuncWrapper(object): - def __init__(self, py_func, cres, sin, sout, cache): - self.py_func = py_func - self.cres = cres - self.sin = sin - self.sout = sout - self.is_objectmode = self.signature.return_type == types.pyobject - self.cache = (GufWrapperCache(py_func=self.py_func) - if cache else NullCache()) - - @property - def library(self): - return self.cres.library - - @property - def context(self): - return self.cres.target_context - - @property - def call_conv(self): - return self.context.call_conv - - @property - def signature(self): - return self.cres.signature - - @property - def fndesc(self): - return self.cres.fndesc - - @property - def env(self): - return self.cres.environment - - def _build_wrapper(self, library, name): - """ - The LLVM IRBuilder code to create the gufunc wrapper. - The *library* arg is the CodeLibrary for which the wrapper should - be added to. The *name* arg is the name of the wrapper function being - created. - """ - byte_t = Type.int(8) - byte_ptr_t = Type.pointer(byte_t) - byte_ptr_ptr_t = Type.pointer(byte_ptr_t) - intp_t = self.context.get_value_type(types.intp) - intp_ptr_t = Type.pointer(intp_t) - - fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, - intp_ptr_t, byte_ptr_t]) - - wrapper_module = library.create_ir_module('_gufunc_wrapper') - func_type = self.call_conv.get_function_type(self.fndesc.restype, - self.fndesc.argtypes) - fname = self.fndesc.llvm_func_name - func = wrapper_module.add_function(func_type, name=fname) - - func.attributes.add("alwaysinline") - wrapper = wrapper_module.add_function(fnty, name) - arg_args, arg_dims, arg_steps, arg_data = wrapper.args - arg_args.name = "args" - arg_dims.name = "dims" - arg_steps.name = "steps" - arg_data.name = "data" - - builder = Builder(wrapper.append_basic_block("entry")) - loopcount = builder.load(arg_dims, name="loopcount") - pyapi = self.context.get_python_api(builder) - - # Unpack shapes - unique_syms = set() - for grp in (self.sin, self.sout): - for syms in grp: - unique_syms |= set(syms) - - sym_map = {} - for syms in self.sin: - for s in syms: - if s not in sym_map: - sym_map[s] = len(sym_map) - - sym_dim = {} - for s, i in sym_map.items(): - sym_dim[s] = builder.load(builder.gep(arg_dims, - [self.context.get_constant( - types.intp, - i + 1)])) - - # Prepare inputs - arrays = [] - step_offset = len(self.sin) + len(self.sout) - for i, (typ, sym) in enumerate(zip(self.signature.args, - self.sin + self.sout)): - ary = GUArrayArg(self.context, builder, arg_args, - arg_steps, i, step_offset, typ, sym, sym_dim) - step_offset += len(sym) - arrays.append(ary) - - bbreturn = builder.append_basic_block('.return') - - # Prologue - self.gen_prologue(builder, pyapi) - - # Loop - with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: - args = [a.get_array_at_offset(loop.index) for a in arrays] - innercall, error = self.gen_loop_body(builder, pyapi, func, args) - # If error, escape - cgutils.cbranch_or_continue(builder, error, bbreturn) - - builder.branch(bbreturn) - builder.position_at_end(bbreturn) - - # Epilogue - self.gen_epilogue(builder, pyapi) - - builder.ret_void() - - # Link - library.add_ir_module(wrapper_module) - library.add_linking_library(self.library) - - def build(self): - # Use cache and compiler in a critical section - with compiler.lock_compiler: - wrapperlib = self.cache.load_overload(self.cres.signature, self.cres.target_context) - wrapper_name = "__gufunc__." + self.fndesc.mangled_name - - if wrapperlib is None: - # Create library and enable caching - wrapperlib = self.context.codegen().create_library(str(self)) - wrapperlib.enable_object_caching() - # Build wrapper - self._build_wrapper(wrapperlib, wrapper_name) - # Cache - self.cache.save_overload(self.cres.signature, wrapperlib) - # Finalize and get function pointer - ptr = wrapperlib.get_pointer_to_function(wrapper_name) - return ptr, self.env, wrapper_name - - def gen_loop_body(self, builder, pyapi, func, args): - status, retval = self.call_conv.call_function( - builder, func, self.signature.return_type, self.signature.args, - args) - - with builder.if_then(status.is_error, likely=False): - gil = pyapi.gil_ensure() - self.context.call_conv.raise_error(builder, pyapi, status) - pyapi.gil_release(gil) - - return status.code, status.is_error - - def gen_prologue(self, builder, pyapi): - pass # Do nothing - - def gen_epilogue(self, builder, pyapi): - pass # Do nothing - - -class _GufuncObjectWrapper(_GufuncWrapper): - def gen_loop_body(self, builder, pyapi, func, args): - innercall, error = _prepare_call_to_object_mode(self.context, - builder, pyapi, func, - self.signature, - args) - return innercall, error - - def gen_prologue(self, builder, pyapi): - # Acquire the GIL - self.gil = pyapi.gil_ensure() - - def gen_epilogue(self, builder, pyapi): - # Release GIL - pyapi.gil_release(self.gil) - - -def build_gufunc_wrapper(py_func, cres, sin, sout, cache): - signature = cres.signature - wrapcls = (_GufuncObjectWrapper - if signature.return_type == types.pyobject - else _GufuncWrapper) - return wrapcls(py_func, cres, sin, sout, cache).build() - - -def _prepare_call_to_object_mode(context, builder, pyapi, func, - signature, args): - mod = builder.module - - bb_core_return = builder.append_basic_block('ufunc.core.return') - - # Call to - # PyObject* ndarray_new(int nd, - # npy_intp *dims, /* shape */ - # npy_intp *strides, - # void* data, - # int type_num, - # int itemsize) - - ll_int = context.get_value_type(types.int32) - ll_intp = context.get_value_type(types.intp) - ll_intp_ptr = Type.pointer(ll_intp) - ll_voidptr = context.get_value_type(types.voidptr) - ll_pyobj = context.get_value_type(types.pyobject) - fnty = Type.function(ll_pyobj, [ll_int, ll_intp_ptr, - ll_intp_ptr, ll_voidptr, - ll_int, ll_int]) - - fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") - - # Convert each llarray into pyobject - error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') - builder.store(cgutils.true_bit, error_pointer) - - # The PyObject* arguments to the kernel function - object_args = [] - object_pointers = [] - - for i, (arg, argty) in enumerate(zip(args, signature.args)): - # Allocate NULL-initialized slot for this argument - objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) - object_pointers.append(objptr) - - if isinstance(argty, types.Array): - # Special case arrays: we don't need full-blown NRT reflection - # since the argument will be gone at the end of the kernel - arycls = context.make_array(argty) - array = arycls(context, builder, value=arg) - - zero = Constant.int(ll_int, 0) - - # Extract members of the llarray - nd = Constant.int(ll_int, argty.ndim) - dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) - strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) - data = builder.bitcast(array.data, ll_voidptr) - dtype = np.dtype(str(argty.dtype)) - - # Prepare other info for reconstruction of the PyArray - type_num = Constant.int(ll_int, dtype.num) - itemsize = Constant.int(ll_int, dtype.itemsize) - - # Call helper to reconstruct PyArray objects - obj = builder.call(fn_array_new, [nd, dims, strides, data, - type_num, itemsize]) - else: - # Other argument types => use generic boxing - obj = pyapi.from_native_value(argty, arg) - - builder.store(obj, objptr) - object_args.append(obj) - - obj_is_null = cgutils.is_null(builder, obj) - builder.store(obj_is_null, error_pointer) - cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) - - # Call ufunc core function - object_sig = [types.pyobject] * len(object_args) - - status, retval = context.call_conv.call_function( - builder, func, types.pyobject, object_sig, - object_args) - builder.store(status.is_error, error_pointer) - - # Release returned object - pyapi.decref(retval) - - builder.branch(bb_core_return) - # At return block - builder.position_at_end(bb_core_return) - - # Release argument objects - for objptr in object_pointers: - pyapi.decref(builder.load(objptr)) - - innercall = status.code - return innercall, builder.load(error_pointer) - - -class GUArrayArg(object): - def __init__(self, context, builder, args, steps, i, step_offset, - typ, syms, sym_dim): - - self.context = context - self.builder = builder - - offset = context.get_constant(types.intp, i) - - data = builder.load(builder.gep(args, [offset], name="data.ptr"), - name="data") - self.data = data - - core_step_ptr = builder.gep(steps, [offset], name="core.step.ptr") - core_step = builder.load(core_step_ptr) - - if isinstance(typ, types.Array): - as_scalar = not syms - - # number of symbol in the shape spec should match the dimension - # of the array type. - if len(syms) != typ.ndim: - if len(syms) == 0 and typ.ndim == 1: - # This is an exception for handling scalar argument. - # The type can be 1D array for scalar. - # In the future, we may deprecate this exception. - pass - else: - raise TypeError("type and shape signature mismatch for arg " - "#{0}".format(i + 1)) - - ndim = typ.ndim - shape = [sym_dim[s] for s in syms] - strides = [] - - for j in range(ndim): - stepptr = builder.gep(steps, - [context.get_constant(types.intp, - step_offset + j)], - name="step.ptr") - step = builder.load(stepptr) - strides.append(step) - - ldcls = (_ArrayAsScalarArgLoader - if as_scalar - else _ArrayArgLoader) - - self._loader = ldcls(dtype=typ.dtype, - ndim=ndim, - core_step=core_step, - as_scalar=as_scalar, - shape=shape, - strides=strides) - else: - # If typ is not an array - if syms: - raise TypeError("scalar type {0} given for non scalar " - "argument #{1}".format(typ, i + 1)) - self._loader = _ScalarArgLoader(dtype=typ, stride=core_step) - - def get_array_at_offset(self, ind): - return self._loader.load(context=self.context, builder=self.builder, - data=self.data, ind=ind) - - -class _ScalarArgLoader(object): - """ - Handle GFunc argument loading where a scalar type is used in the core - function. - Note: It still has a stride because the input to the gufunc can be an array - for this argument. - """ - - def __init__(self, dtype, stride): - self.dtype = dtype - self.stride = stride - - def load(self, context, builder, data, ind): - # Load at base + ind * stride - data = builder.gep(data, [builder.mul(ind, self.stride)]) - dptr = builder.bitcast(data, - context.get_data_type(self.dtype).as_pointer()) - return builder.load(dptr) - - -class _ArrayArgLoader(object): - """ - Handle GUFunc argument loading where an array is expected. - """ - - def __init__(self, dtype, ndim, core_step, as_scalar, shape, strides): - self.dtype = dtype - self.ndim = ndim - self.core_step = core_step - self.as_scalar = as_scalar - self.shape = shape - self.strides = strides - - def load(self, context, builder, data, ind): - arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A") - arycls = context.make_array(arytyp) - - array = arycls(context, builder) - offseted_data = cgutils.pointer_add(builder, - data, - builder.mul(self.core_step, - ind)) - - shape, strides = self._shape_and_strides(context, builder) - - itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype)) - context.populate_array(array, - data=builder.bitcast(offseted_data, - array.data.type), - shape=shape, - strides=strides, - itemsize=context.get_constant(types.intp, - itemsize), - meminfo=None) - - return array._getvalue() - - def _shape_and_strides(self, context, builder): - shape = cgutils.pack_array(builder, self.shape) - strides = cgutils.pack_array(builder, self.strides) - return shape, strides - - -class _ArrayAsScalarArgLoader(_ArrayArgLoader): - """ - Handle GUFunc argument loading where the shape signature specifies - a scalar "()" but a 1D array is used for the type of the core function. - """ - - def _shape_and_strides(self, context, builder): - # Set shape and strides for a 1D size 1 array - one = context.get_constant(types.intp, 1) - zero = context.get_constant(types.intp, 0) - shape = cgutils.pack_array(builder, [one]) - strides = cgutils.pack_array(builder, [zero]) - return shape, strides diff --git a/numba/numba/numba_entry.py b/numba/numba/numba_entry.py deleted file mode 100644 index ce13efaba..000000000 --- a/numba/numba/numba_entry.py +++ /dev/null @@ -1,320 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import sys -import argparse -import os -import subprocess - - -def get_sys_info(): - # delay these imports until now as they are only needed in this - # function which then exits. - import platform - import json - from numba import config - from numba import cuda as cu - from numba.cuda import cudadrv - from numba.cuda.cudadrv.driver import driver as cudriver - from numba import roc - from numba.roc.hlc import hlc, libhlc - import textwrap as tw - import ctypes as ct - import llvmlite.binding as llvmbind - import locale - from datetime import datetime - from itertools import chain - from subprocess import check_output, CalledProcessError - - try: - fmt = "%-35s : %-s" - print("-" * 80) - print("__Time Stamp__") - print(datetime.utcnow()) - print("") - - print("__Hardware Information__") - print(fmt % ("Machine", platform.machine())) - print(fmt % ("CPU Name", llvmbind.get_host_cpu_name())) - try: - featuremap = llvmbind.get_host_cpu_features() - except RuntimeError: - print(fmt % ("CPU Features", "NA")) - else: - features = sorted([key for key, value in featuremap.items() - if value]) - cpu_feat = tw.fill(' '.join(features), 80) - print(fmt % ("CPU Features", "")) - print(cpu_feat) - print("") - - print("__OS Information__") - print(fmt % ("Platform", platform.platform(aliased=True))) - print(fmt % ("Release", platform.release())) - system_name = platform.system() - print(fmt % ("System Name", system_name)) - print(fmt % ("Version", platform.version())) - try: - if system_name == 'Linux': - info = platform.linux_distribution() - elif system_name == 'Windows': - info = platform.win32_ver() - elif system_name == 'Darwin': - info = platform.mac_ver() - else: - raise RuntimeError("Unknown system.") - buf = ''.join([x - if x != '' else ' ' - for x in list(chain.from_iterable(info))]) - print(fmt % ("OS specific info", buf)) - - if system_name == 'Linux': - print(fmt % ("glibc info", ' '.join(platform.libc_ver()))) - except: - print("Error: System name incorrectly identified or unknown.") - print("") - - print("__Python Information__") - print(fmt % ("Python Compiler", platform.python_compiler())) - print( - fmt % - ("Python Implementation", - platform.python_implementation())) - print(fmt % ("Python Version", platform.python_version())) - print( - fmt % - ("Python Locale ", ' '.join( - [x for x in locale.getdefaultlocale() if x is not None]))) - - print("") - print("__LLVM information__") - print( - fmt % - ("LLVM version", '.'.join( - [str(k) for k in llvmbind.llvm_version_info]))) - - print("") - print("__CUDA Information__") - # Look for GPUs - try: - cu.list_devices()[0] # will a device initialise? - except BaseException as e: - msg_not_found = "CUDA driver library cannot be found" - msg_disabled_by_user = "CUDA is disabled" - msg_end = " or no CUDA enabled devices are present." - msg_generic_problem = "Error: CUDA device intialisation problem." - msg = getattr(e, 'msg', None) - if msg is not None: - if msg_not_found in msg: - err_msg = msg_not_found + msg_end - elif msg_disabled_by_user in msg: - err_msg = msg_disabled_by_user + msg_end - else: - err_msg = msg_generic_problem + " Message:" + msg - else: - err_msg = msg_generic_problem + " " + str(e) - # Best effort error report - print("%s\nError class: %s" % (err_msg, str(type(e)))) - else: - try: - cu.detect() - dv = ct.c_int(0) - cudriver.cuDriverGetVersion(ct.byref(dv)) - print(fmt % ("CUDA driver version", dv.value)) - print("CUDA libraries:") - cudadrv.libs.test(sys.platform, print_paths=False) - except: - print( - "Error: Probing CUDA failed (device and driver present, runtime problem?)\n") - - print("") - print("__ROC Information__") - roc_is_available = roc.is_available() - print(fmt % ("ROC available", roc_is_available)) - - toolchains = [] - try: - libhlc.HLC() - toolchains.append('librocmlite library') - except: - pass - try: - cmd = hlc.CmdLine().check_tooling() - toolchains.append('ROC command line tools') - except: - pass - - # if no ROC try and report why - if not roc_is_available: - from numba.roc.hsadrv.driver import hsa - try: - hsa.is_available - except BaseException as e: - msg = str(e) - else: - msg = 'No ROC toolchains found.' - print(fmt % ("Error initialising ROC due to", msg)) - - if toolchains: - print(fmt % ("Available Toolchains", ', '.join(toolchains))) - - try: - # ROC might not be available due to lack of tool chain, but HSA - # agents may be listed - from numba.roc.hsadrv.driver import hsa, dgpu_count - print("\nFound %s HSA Agents:" % len(hsa.agents)) - for i, agent in enumerate(hsa.agents): - print('Agent id : %s' % i) - print(' vendor: %s' % agent.vendor_name) - print(' name: %s' % agent.name) - print(' type: %s' % agent.device) - print("") - - _dgpus = [] - for a in hsa.agents: - if a.is_component and a.device == 'GPU': - _dgpus.append(a.name) - print(fmt % ("Found %s discrete GPU(s)" % dgpu_count(), \ - ', '.join(_dgpus))) - except Exception as e: - print("No HSA Agents found, encountered exception when searching:") - print(e) - - - print("") - print("__SVML Information__") - # replicate some SVML detection logic from numba.__init__ here. - # if SVML load fails in numba.__init__ the splitting of the logic - # here will help diagnosis of the underlying issue - have_svml_library = True - try: - if sys.platform.startswith('linux'): - llvmbind.load_library_permanently("libsvml.so") - elif sys.platform.startswith('darwin'): - llvmbind.load_library_permanently("libsvml.dylib") - elif sys.platform.startswith('win'): - llvmbind.load_library_permanently("svml_dispmd") - else: - have_svml_library = False - except: - have_svml_library = False - func = getattr(llvmbind.targets, "has_svml", None) - llvm_svml_patched = func() if func is not None else False - svml_operational = (config.USING_SVML and llvm_svml_patched \ - and have_svml_library) - print(fmt % ("SVML state, config.USING_SVML", config.USING_SVML)) - print(fmt % ("SVML library found and loaded", have_svml_library)) - print(fmt % ("llvmlite using SVML patched LLVM", llvm_svml_patched)) - print(fmt % ("SVML operational:", svml_operational)) - - # Look for conda and conda information - print("") - print("__Conda Information__") - cmd = ["conda", "info", "--json"] - try: - conda_out = check_output(cmd) - except Exception as e: - print( - "Conda not present/not working.\nError was %s\n" % e) - else: - data = ''.join(conda_out.decode("utf-8").splitlines()) - jsond = json.loads(data) - keys = ['conda_build_version', - 'conda_env_version', - 'platform', - 'python_version', - 'root_writable'] - for k in keys: - try: - print(fmt % (k, jsond[k])) - except KeyError: - pass - - # get info about current environment - cmd = ["conda", "list"] - try: - conda_out = check_output(cmd) - except CalledProcessError as e: - print("Error: Conda command failed. Error was %s\n" % e.output) - else: - print("") - print("__Current Conda Env__") - data = conda_out.decode("utf-8").splitlines() - for k in data: - if k[0] != '#': # don't show where the env is, personal data - print(k) - - print("-" * 80) - - except Exception as e: - print("Error: The system reporting tool has failed unexpectedly.") - print("Exception was:") - print(e) - - finally: - print( - "%s" % - "If requested, please copy and paste the information between\n" - "the dashed (----) lines, or from a given specific section as\n" - "appropriate.\n\n" - "=============================================================\n" - "IMPORTANT: Please ensure that you are happy with sharing the\n" - "contents of the information present, any information that you\n" - "wish to keep private you should remove before sharing.\n" - "=============================================================\n") - - -def make_parser(): - parser = argparse.ArgumentParser() - parser.add_argument('--annotate', help='Annotate source', - action='store_true') - parser.add_argument('--dump-llvm', action="store_true", - help='Print generated llvm assembly') - parser.add_argument('--dump-optimized', action='store_true', - help='Dump the optimized llvm assembly') - parser.add_argument('--dump-assembly', action='store_true', - help='Dump the LLVM generated assembly') - parser.add_argument('--dump-cfg', action="store_true", - help='[Deprecated] Dump the control flow graph') - parser.add_argument('--dump-ast', action="store_true", - help='[Deprecated] Dump the AST') - parser.add_argument('--annotate-html', nargs=1, - help='Output source annotation as html') - parser.add_argument('-s', '--sysinfo', action="store_true", - help='Output system information for bug reporting') - parser.add_argument('filename', nargs='?', help='Python source filename') - return parser - - -def main(): - parser = make_parser() - args = parser.parse_args() - - if args.dump_cfg: - print("CFG dump is removed.") - sys.exit(1) - if args.dump_ast: - print("AST dump is removed. Numba no longer depends on AST.") - sys.exit(1) - - if args.sysinfo: - print("System info:") - get_sys_info() - sys.exit(0) - - os.environ['NUMBA_DUMP_ANNOTATION'] = str(int(args.annotate)) - if args.annotate_html is not None: - try: - from jinja2 import Template - except ImportError: - raise ImportError("Please install the 'jinja2' package") - os.environ['NUMBA_DUMP_HTML'] = str(args.annotate_html[0]) - os.environ['NUMBA_DUMP_LLVM'] = str(int(args.dump_llvm)) - os.environ['NUMBA_DUMP_OPTIMIZED'] = str(int(args.dump_optimized)) - os.environ['NUMBA_DUMP_ASSEMBLY'] = str(int(args.dump_assembly)) - - if args.filename: - cmd = [sys.executable, args.filename] - subprocess.call(cmd) - else: - print("numba: error: the following arguments are required: filename") - sys.exit(1) diff --git a/numba/numba/numpy_support.py b/numba/numba/numpy_support.py deleted file mode 100644 index 357939f37..000000000 --- a/numba/numba/numpy_support.py +++ /dev/null @@ -1,532 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -import ctypes -import re - -import numpy as np - -from . import errors, types, config, npdatetime, utils - - -version = tuple(map(int, np.__version__.split('.')[:2])) -int_divbyzero_returns_zero = config.PYVERSION <= (3, 0) - -# Starting from Numpy 1.10, ufuncs accept argument conversion according -# to the "same_kind" rule (used to be "unsafe"). -strict_ufunc_typing = version >= (1, 10) - - -FROM_DTYPE = { - np.dtype('bool'): types.boolean, - np.dtype('int8'): types.int8, - np.dtype('int16'): types.int16, - np.dtype('int32'): types.int32, - np.dtype('int64'): types.int64, - - np.dtype('uint8'): types.uint8, - np.dtype('uint16'): types.uint16, - np.dtype('uint32'): types.uint32, - np.dtype('uint64'): types.uint64, - - np.dtype('float32'): types.float32, - np.dtype('float64'): types.float64, - - np.dtype('complex64'): types.complex64, - np.dtype('complex128'): types.complex128, -} - -re_typestr = re.compile(r'[<>=\|]([a-z])(\d+)?$', re.I) -re_datetimestr = re.compile(r'[<>=\|]([mM])8?(\[([a-z]+)\])?$', re.I) - -sizeof_unicode_char = np.dtype('U1').itemsize - - -def _from_str_dtype(dtype): - m = re_typestr.match(dtype.str) - if not m: - raise NotImplementedError(dtype) - groups = m.groups() - typecode = groups[0] - if typecode == 'U': - # unicode - if dtype.byteorder not in '=|': - raise NotImplementedError("Does not support non-native " - "byteorder") - count = dtype.itemsize // sizeof_unicode_char - assert count == int(groups[1]), "Unicode char size mismatch" - return types.UnicodeCharSeq(count) - - elif typecode == 'S': - # char - count = dtype.itemsize - assert count == int(groups[1]), "Char size mismatch" - return types.CharSeq(count) - - else: - raise NotImplementedError(dtype) - - -def _from_datetime_dtype(dtype): - m = re_datetimestr.match(dtype.str) - if not m: - raise NotImplementedError(dtype) - groups = m.groups() - typecode = groups[0] - unit = groups[2] or '' - if typecode == 'm': - return types.NPTimedelta(unit) - elif typecode == 'M': - return types.NPDatetime(unit) - else: - raise NotImplementedError(dtype) - - -def from_dtype(dtype): - """ - Return a Numba Type instance corresponding to the given Numpy *dtype*. - NotImplementedError is raised on unsupported Numpy dtypes. - """ - if dtype.fields is None: - try: - return FROM_DTYPE[dtype] - except KeyError: - if dtype.char in 'SU': - return _from_str_dtype(dtype) - if dtype.char in 'mM': - return _from_datetime_dtype(dtype) - if dtype.char in 'V': - subtype = from_dtype(dtype.subdtype[0]) - return types.NestedArray(subtype, dtype.shape) - raise NotImplementedError(dtype) - else: - return from_struct_dtype(dtype) - - -_as_dtype_letters = { - types.NPDatetime: 'M8', - types.NPTimedelta: 'm8', - types.CharSeq: 'S', - types.UnicodeCharSeq: 'U', -} - -def as_dtype(nbtype): - """ - Return a numpy dtype instance corresponding to the given Numba type. - NotImplementedError is if no correspondence is known. - """ - if isinstance(nbtype, (types.Complex, types.Integer, types.Float)): - return np.dtype(str(nbtype)) - if nbtype is types.bool_: - return np.dtype('?') - if isinstance(nbtype, (types.NPDatetime, types.NPTimedelta)): - letter = _as_dtype_letters[type(nbtype)] - if nbtype.unit: - return np.dtype('%s[%s]' % (letter, nbtype.unit)) - else: - return np.dtype(letter) - if isinstance(nbtype, (types.CharSeq, types.UnicodeCharSeq)): - letter = _as_dtype_letters[type(nbtype)] - return np.dtype('%s%d' % (letter, nbtype.count)) - if isinstance(nbtype, types.Record): - return nbtype.dtype - if isinstance(nbtype, types.EnumMember): - return as_dtype(nbtype.dtype) - raise NotImplementedError("%r cannot be represented as a Numpy dtype" - % (nbtype,)) - - -def is_arrayscalar(val): - return np.dtype(type(val)) in FROM_DTYPE - - -def map_arrayscalar_type(val): - if isinstance(val, np.generic): - # We can't blindly call np.dtype() as it loses information - # on some types, e.g. datetime64 and timedelta64. - dtype = val.dtype - else: - try: - dtype = np.dtype(type(val)) - except TypeError: - raise NotImplementedError("no corresponding numpy dtype for %r" % type(val)) - return from_dtype(dtype) - - -def is_array(val): - return isinstance(val, np.ndarray) - - -def map_layout(val): - if val.flags['C_CONTIGUOUS']: - layout = 'C' - elif val.flags['F_CONTIGUOUS']: - layout = 'F' - else: - layout = 'A' - return layout - - -def select_array_wrapper(inputs): - """ - Given the array-compatible input types to an operation (e.g. ufunc), - select the appropriate input for wrapping the operation output, - according to each input's __array_priority__. - - An index into *inputs* is returned. - """ - max_prio = float('-inf') - selected_input = None - selected_index = None - for index, ty in enumerate(inputs): - # Ties are broken by choosing the first winner, as in Numpy - if isinstance(ty, types.ArrayCompatible) and ty.array_priority > max_prio: - selected_input = ty - selected_index = index - max_prio = ty.array_priority - - assert selected_index is not None - return selected_index - - -def resolve_output_type(context, inputs, formal_output): - """ - Given the array-compatible input types to an operation (e.g. ufunc), - and the operation's formal output type (a types.Array instance), - resolve the actual output type using the typing *context*. - - This uses a mechanism compatible with Numpy's __array_priority__ / - __array_wrap__. - """ - selected_input = inputs[select_array_wrapper(inputs)] - args = selected_input, formal_output - sig = context.resolve_function_type('__array_wrap__', args, {}) - if sig is None: - if selected_input.array_priority == types.Array.array_priority: - # If it's the same priority as a regular array, assume we - # should return the output unchanged. - # (we can't define __array_wrap__ explicitly for types.Buffer, - # as that would be inherited by most array-compatible objects) - return formal_output - raise errors.TypingError("__array_wrap__ failed for %s" % (args,)) - return sig.return_type - - -def supported_ufunc_loop(ufunc, loop): - """Return whether the *loop* for the *ufunc* is supported -in nopython-. - - *loop* should be a UFuncLoopSpec instance, and *ufunc* a numpy ufunc. - - For ufuncs implemented using the ufunc_db, it is supported if the ufunc_db - contains a lowering definition for 'loop' in the 'ufunc' entry. - - For other ufuncs, it is type based. The loop will be considered valid if it - only contains the following letter types: '?bBhHiIlLqQfd'. Note this is - legacy and when implementing new ufuncs the ufunc_db should be preferred, - as it allows for a more fine-grained incremental support. - """ - from .targets import ufunc_db - loop_sig = loop.ufunc_sig - try: - # check if the loop has a codegen description in the - # ufunc_db. If so, we can proceed. - - # note that as of now not all ufuncs have an entry in the - # ufunc_db - supported_loop = loop_sig in ufunc_db.get_ufunc_info(ufunc) - except KeyError: - # for ufuncs not in ufunc_db, base the decision of whether the - # loop is supported on its types - loop_types = [x.char for x in loop.numpy_inputs + loop.numpy_outputs] - supported_types = '?bBhHiIlLqQfd' - # check if all the types involved in the ufunc loop are - # supported in this mode - supported_loop = all(t in supported_types for t in loop_types) - - return supported_loop - - -class UFuncLoopSpec(collections.namedtuple('_UFuncLoopSpec', - ('inputs', 'outputs', 'ufunc_sig'))): - """ - An object describing a ufunc loop's inner types. Properties: - - inputs: the inputs' Numba types - - outputs: the outputs' Numba types - - ufunc_sig: the string representing the ufunc's type signature, in - Numpy format (e.g. "ii->i") - """ - - __slots__ = () - - @property - def numpy_inputs(self): - return [as_dtype(x) for x in self.inputs] - - @property - def numpy_outputs(self): - return [as_dtype(x) for x in self.outputs] - - -def ufunc_can_cast(from_, to, has_mixed_inputs, casting='safe'): - """ - A variant of np.can_cast() that can allow casting any integer to - any real or complex type, in case the operation has mixed-kind - inputs. - - For example we want `np.power(float32, int32)` to be computed using - SP arithmetic and return `float32`. - However, `np.sqrt(int32)` should use DP arithmetic and return `float64`. - """ - from_ = np.dtype(from_) - to = np.dtype(to) - if has_mixed_inputs and from_.kind in 'iu' and to.kind in 'cf': - # Decide that all integers can cast to any real or complex type. - return True - return np.can_cast(from_, to, casting) - - -def ufunc_find_matching_loop(ufunc, arg_types): - """Find the appropriate loop to be used for a ufunc based on the types - of the operands - - ufunc - The ufunc we want to check - arg_types - The tuple of arguments to the ufunc, including any - explicit output(s). - return value - A UFuncLoopSpec identifying the loop, or None - if no matching loop is found. - """ - - # Separate logical input from explicit output arguments - input_types = arg_types[:ufunc.nin] - output_types = arg_types[ufunc.nin:] - assert(len(input_types) == ufunc.nin) - - try: - np_input_types = [as_dtype(x) for x in input_types] - except NotImplementedError: - return None - try: - np_output_types = [as_dtype(x) for x in output_types] - except NotImplementedError: - return None - - # Whether the inputs are mixed integer / floating-point - has_mixed_inputs = ( - any(dt.kind in 'iu' for dt in np_input_types) and - any(dt.kind in 'cf' for dt in np_input_types)) - - def choose_types(numba_types, ufunc_letters): - """ - Return a list of Numba types representing *ufunc_letters*, - except when the letter designates a datetime64 or timedelta64, - in which case the type is taken from *numba_types*. - """ - assert len(ufunc_letters) >= len(numba_types) - types = [tp if letter in 'mM' else from_dtype(np.dtype(letter)) - for tp, letter in zip(numba_types, ufunc_letters)] - # Add missing types (presumably implicit outputs) - types += [from_dtype(np.dtype(letter)) - for letter in ufunc_letters[len(numba_types):]] - return types - - # In NumPy, the loops are evaluated from first to last. The first one - # that is viable is the one used. One loop is viable if it is possible - # to cast every input operand to the one expected by the ufunc. - # Also under NumPy 1.10+ the output must be able to be cast back - # to a close enough type ("same_kind"). - - for candidate in ufunc.types: - ufunc_inputs = candidate[:ufunc.nin] - ufunc_outputs = candidate[-ufunc.nout:] - if 'O' in ufunc_inputs: - # Skip object arrays - continue - found = True - # Skip if any input or output argument is mismatching - for outer, inner in zip(np_input_types, ufunc_inputs): - # (outer is a dtype instance, inner is a type char) - if outer.char in 'mM' or inner in 'mM': - # For datetime64 and timedelta64, we want to retain - # precise typing (i.e. the units); therefore we look for - # an exact match. - if outer.char != inner: - found = False - break - elif not ufunc_can_cast(outer.char, inner, - has_mixed_inputs, 'safe'): - found = False - break - if found and strict_ufunc_typing: - # Can we cast the inner result to the outer result type? - for outer, inner in zip(np_output_types, ufunc_outputs): - if (outer.char not in 'mM' and not - ufunc_can_cast(inner, outer.char, - has_mixed_inputs, 'same_kind')): - found = False - break - if found: - # Found: determine the Numba types for the loop's inputs and - # outputs. - try: - inputs = choose_types(input_types, ufunc_inputs) - outputs = choose_types(output_types, ufunc_outputs) - except NotImplementedError: - # One of the selected dtypes isn't supported by Numba - # (e.g. float16), try other candidates - continue - else: - return UFuncLoopSpec(inputs, outputs, candidate) - - return None - - -def _is_aligned_struct(struct): - return struct.isalignedstruct - - -def from_struct_dtype(dtype): - if dtype.hasobject: - raise TypeError("Do not support dtype containing object") - - fields = {} - - for name, info in dtype.fields.items(): - # *info* may have 3 element if it has a "title", which can be ignored - [elemdtype, offset] = info[:2] - fields[name] = from_dtype(elemdtype), offset - - # Note: dtype.alignment is not consistent. - # It is different after passing into a recarray. - # recarray(N, dtype=mydtype).dtype.alignment != mydtype.alignment - size = dtype.itemsize - aligned = _is_aligned_struct(dtype) - - return types.Record(str(dtype.descr), fields, size, aligned, dtype) - - -def _get_bytes_buffer(ptr, nbytes): - """ - Get a ctypes array of *nbytes* starting at *ptr*. - """ - if isinstance(ptr, ctypes.c_void_p): - ptr = ptr.value - arrty = ctypes.c_byte * nbytes - return arrty.from_address(ptr) - -def _get_array_from_ptr(ptr, nbytes, dtype): - return np.frombuffer(_get_bytes_buffer(ptr, nbytes), dtype) - - -def carray(ptr, shape, dtype=None): - """ - Return a Numpy array view over the data pointed to by *ptr* with the - given *shape*, in C order. If *dtype* is given, it is used as the - array's dtype, otherwise the array's dtype is inferred from *ptr*'s type. - """ - from .typing.ctypes_utils import from_ctypes - - try: - # Use ctypes parameter protocol if available - ptr = ptr._as_parameter_ - except AttributeError: - pass - - # Normalize dtype, to accept e.g. "int64" or np.int64 - if dtype is not None: - dtype = np.dtype(dtype) - - if isinstance(ptr, ctypes.c_void_p): - if dtype is None: - raise TypeError("explicit dtype required for void* argument") - p = ptr - elif isinstance(ptr, ctypes._Pointer): - ptrty = from_ctypes(ptr.__class__) - assert isinstance(ptrty, types.CPointer) - ptr_dtype = as_dtype(ptrty.dtype) - if dtype is not None and dtype != ptr_dtype: - raise TypeError("mismatching dtype '%s' for pointer %s" - % (dtype, ptr)) - dtype = ptr_dtype - p = ctypes.cast(ptr, ctypes.c_void_p) - else: - raise TypeError("expected a ctypes pointer, got %r" % (ptr,)) - - nbytes = dtype.itemsize * np.product(shape, dtype=np.intp) - return _get_array_from_ptr(p, nbytes, dtype).reshape(shape) - - -def farray(ptr, shape, dtype=None): - """ - Return a Numpy array view over the data pointed to by *ptr* with the - given *shape*, in Fortran order. If *dtype* is given, it is used as the - array's dtype, otherwise the array's dtype is inferred from *ptr*'s type. - """ - if not isinstance(shape, utils.INT_TYPES): - shape = shape[::-1] - return carray(ptr, shape, dtype).T - - -def is_contiguous(dims, strides, itemsize): - """Is the given shape, strides, and itemsize of C layout? - - Note: The code is usable as a numba-compiled function - """ - nd = len(dims) - # Check and skip 1s or 0s in inner dims - innerax = nd - 1 - while innerax > -1 and dims[innerax] <= 1: - innerax -= 1 - - # Early exit if all axis are 1s or 0s - if innerax < 0: - return True - - # Check itemsize matches innermost stride - if itemsize != strides[innerax]: - return False - - # Check and skip 1s or 0s in outer dims - outerax = 0 - while outerax < innerax and dims[outerax] <= 1: - outerax += 1 - - # Check remaining strides to be contiguous - ax = innerax - while ax > outerax: - if strides[ax] * dims[ax] != strides[ax - 1]: - return False - ax -= 1 - return True - - -def is_fortran(dims, strides, itemsize): - """Is the given shape, strides, and itemsize of F layout? - - Note: The code is usable as a numba-compiled function - """ - nd = len(dims) - # Check and skip 1s or 0s in inner dims - firstax = 0 - while firstax < nd and dims[firstax] <= 1: - firstax += 1 - - # Early exit if all axis are 1s or 0s - if firstax >= nd: - return True - - # Check itemsize matches innermost stride - if itemsize != strides[firstax]: - return False - - # Check and skip 1s or 0s in outer dims - lastax = nd - 1 - while lastax > firstax and dims[lastax] <= 1: - lastax -= 1 - - # Check remaining strides to be contiguous - ax = firstax - while ax < lastax: - if strides[ax] * dims[ax] != strides[ax + 1]: - return False - ax += 1 - return True diff --git a/numba/numba/objmode.py b/numba/numba/objmode.py deleted file mode 100644 index ca3286003..000000000 --- a/numba/numba/objmode.py +++ /dev/null @@ -1,597 +0,0 @@ -""" -Lowering implementation for object mode. -""" - -from __future__ import print_function, division, absolute_import - -from llvmlite.llvmpy.core import Type, Constant -import llvmlite.llvmpy.core as lc - -from . import cgutils, generators, ir, types, utils -from .errors import ForbiddenConstruct -from .lowering import BaseLower -from .utils import builtins, intern - - -# Issue #475: locals() is unsupported as calling it naively would give -# out wrong results. -_unsupported_builtins = set([locals]) - -# Map operators to methods on the PythonAPI class -PYTHON_OPMAP = { - '+': "number_add", - '-': "number_subtract", - '*': "number_multiply", - '/?': "number_divide", - '/': "number_truedivide", - '//': "number_floordivide", - '%': "number_remainder", - '**': "number_power", - '@': "number_matrix_multiply", - '<<': "number_lshift", - '>>': "number_rshift", - '&': "number_and", - '|': "number_or", - '^': "number_xor", -} - - -class PyLower(BaseLower): - - GeneratorLower = generators.PyGeneratorLower - - def init(self): - # Strings to be frozen into the Environment object - self._frozen_strings = set() - - self._live_vars = set() - - def pre_lower(self): - super(PyLower, self).pre_lower() - self.init_pyapi() - # Pre-computed for later use - from .dispatcher import OmittedArg - self.omitted_typobj = self.pyapi.unserialize( - self.pyapi.serialize_object(OmittedArg)) - - def post_lower(self): - pass - - def pre_block(self, block): - self.init_vars(block) - - def lower_inst(self, inst): - if isinstance(inst, ir.Assign): - value = self.lower_assign(inst) - self.storevar(value, inst.target.name) - - elif isinstance(inst, ir.SetItem): - target = self.loadvar(inst.target.name) - index = self.loadvar(inst.index.name) - value = self.loadvar(inst.value.name) - ok = self.pyapi.object_setitem(target, index, value) - self.check_int_status(ok) - - elif isinstance(inst, ir.DelItem): - target = self.loadvar(inst.target.name) - index = self.loadvar(inst.index.name) - ok = self.pyapi.object_delitem(target, index) - self.check_int_status(ok) - - elif isinstance(inst, ir.SetAttr): - target = self.loadvar(inst.target.name) - value = self.loadvar(inst.value.name) - ok = self.pyapi.object_setattr(target, - self._freeze_string(inst.attr), - value) - self.check_int_status(ok) - - elif isinstance(inst, ir.DelAttr): - target = self.loadvar(inst.target.name) - ok = self.pyapi.object_delattr(target, - self._freeze_string(inst.attr)) - self.check_int_status(ok) - - elif isinstance(inst, ir.StoreMap): - dct = self.loadvar(inst.dct.name) - key = self.loadvar(inst.key.name) - value = self.loadvar(inst.value.name) - ok = self.pyapi.dict_setitem(dct, key, value) - self.check_int_status(ok) - - elif isinstance(inst, ir.Return): - retval = self.loadvar(inst.value.name) - if self.generator_info: - # StopIteration - # We own a reference to the "return value", but we - # don't return it. - self.pyapi.decref(retval) - self.genlower.return_from_generator(self) - return - # No need to incref() as the reference is already owned. - self.call_conv.return_value(self.builder, retval) - - elif isinstance(inst, ir.Branch): - cond = self.loadvar(inst.cond.name) - if cond.type == Type.int(1): - istrue = cond - else: - istrue = self.pyapi.object_istrue(cond) - zero = lc.Constant.null(istrue.type) - pred = self.builder.icmp(lc.ICMP_NE, istrue, zero) - tr = self.blkmap[inst.truebr] - fl = self.blkmap[inst.falsebr] - self.builder.cbranch(pred, tr, fl) - - elif isinstance(inst, ir.Jump): - target = self.blkmap[inst.target] - self.builder.branch(target) - - elif isinstance(inst, ir.Del): - self.delvar(inst.value) - - elif isinstance(inst, ir.Raise): - if inst.exception is not None: - exc = self.loadvar(inst.exception.name) - # A reference will be stolen by raise_object() and another - # by return_exception_raised(). - self.incref(exc) - else: - exc = None - self.pyapi.raise_object(exc) - self.return_exception_raised() - - else: - raise NotImplementedError(type(inst), inst) - - def lower_assign(self, inst): - """ - The returned object must have a new reference - """ - value = inst.value - if isinstance(value, (ir.Const, ir.FreeVar)): - return self.lower_const(value.value) - elif isinstance(value, ir.Var): - val = self.loadvar(value.name) - self.incref(val) - return val - elif isinstance(value, ir.Expr): - return self.lower_expr(value) - elif isinstance(value, ir.Global): - return self.lower_global(value.name, value.value) - elif isinstance(value, ir.Yield): - return self.lower_yield(value) - elif isinstance(value, ir.Arg): - obj = self.fnargs[value.index] - # When an argument is omitted, the dispatcher hands it as - # _OmittedArg() - typobj = self.pyapi.get_type(obj) - slot = cgutils.alloca_once_value(self.builder, obj) - is_omitted = self.builder.icmp_unsigned('==', typobj, - self.omitted_typobj) - with self.builder.if_else(is_omitted, likely=False) as (omitted, present): - with present: - self.incref(obj) - self.builder.store(obj, slot) - with omitted: - # The argument is omitted => get the default value - obj = self.pyapi.object_getattr_string(obj, 'value') - self.builder.store(obj, slot) - - return self.builder.load(slot) - else: - raise NotImplementedError(type(value), value) - - def lower_yield(self, inst): - yp = self.generator_info.yield_points[inst.index] - assert yp.inst is inst - self.genlower.init_generator_state(self) - - # Save live vars in state - # We also need to save live vars that are del'ed afterwards. - y = generators.LowerYield(self, yp, yp.live_vars | yp.weak_live_vars) - y.lower_yield_suspend() - # Yield to caller - val = self.loadvar(inst.value.name) - # Let caller own the reference - self.pyapi.incref(val) - self.call_conv.return_value(self.builder, val) - - # Resumption point - y.lower_yield_resume() - # None is returned by the yield expression - return self.pyapi.make_none() - - def lower_binop(self, expr, op, inplace=False): - lhs = self.loadvar(expr.lhs.name) - rhs = self.loadvar(expr.rhs.name) - if op in PYTHON_OPMAP: - fname = PYTHON_OPMAP[op] - fn = getattr(self.pyapi, fname) - res = fn(lhs, rhs, inplace=inplace) - else: - # Assumed to be rich comparison - res = self.pyapi.object_richcompare(lhs, rhs, expr.fn) - self.check_error(res) - return res - - def lower_expr(self, expr): - if expr.op == 'binop': - return self.lower_binop(expr, expr.fn, inplace=False) - elif expr.op == 'inplace_binop': - return self.lower_binop(expr, expr.immutable_fn, inplace=True) - elif expr.op == 'unary': - value = self.loadvar(expr.value.name) - if expr.fn == '-': - res = self.pyapi.number_negative(value) - elif expr.fn == '+': - res = self.pyapi.number_positive(value) - elif expr.fn == 'not': - res = self.pyapi.object_not(value) - self.check_int_status(res) - - longval = self.builder.zext(res, self.pyapi.long) - res = self.pyapi.bool_from_long(longval) - elif expr.fn == '~': - res = self.pyapi.number_invert(value) - else: - raise NotImplementedError(expr) - self.check_error(res) - return res - elif expr.op == 'call': - argvals = [self.loadvar(a.name) for a in expr.args] - fn = self.loadvar(expr.func.name) - args = self.pyapi.tuple_pack(argvals) - if expr.vararg: - # Expand *args - new_args = self.pyapi.number_add(args, - self.loadvar(expr.vararg.name)) - self.decref(args) - args = new_args - if not expr.kws: - # No named arguments - ret = self.pyapi.call(fn, args, None) - else: - # Named arguments - keyvalues = [(k, self.loadvar(v.name)) for k, v in expr.kws] - kws = self.pyapi.dict_pack(keyvalues) - ret = self.pyapi.call(fn, args, kws) - self.decref(kws) - self.decref(args) - self.check_error(ret) - return ret - elif expr.op == 'getattr': - obj = self.loadvar(expr.value.name) - res = self.pyapi.object_getattr(obj, self._freeze_string(expr.attr)) - self.check_error(res) - return res - elif expr.op == 'build_tuple': - items = [self.loadvar(it.name) for it in expr.items] - res = self.pyapi.tuple_pack(items) - self.check_error(res) - return res - elif expr.op == 'build_list': - items = [self.loadvar(it.name) for it in expr.items] - res = self.pyapi.list_pack(items) - self.check_error(res) - return res - elif expr.op == 'build_map': - res = self.pyapi.dict_new(expr.size) - self.check_error(res) - for k, v in expr.items: - key = self.loadvar(k.name) - value = self.loadvar(v.name) - ok = self.pyapi.dict_setitem(res, key, value) - self.check_int_status(ok) - return res - elif expr.op == 'build_set': - items = [self.loadvar(it.name) for it in expr.items] - res = self.pyapi.set_new() - self.check_error(res) - for it in items: - ok = self.pyapi.set_add(res, it) - self.check_int_status(ok) - return res - elif expr.op == 'getiter': - obj = self.loadvar(expr.value.name) - res = self.pyapi.object_getiter(obj) - self.check_error(res) - return res - elif expr.op == 'iternext': - iterobj = self.loadvar(expr.value.name) - item = self.pyapi.iter_next(iterobj) - is_valid = cgutils.is_not_null(self.builder, item) - pair = self.pyapi.tuple_new(2) - with self.builder.if_else(is_valid) as (then, otherwise): - with then: - self.pyapi.tuple_setitem(pair, 0, item) - with otherwise: - self.check_occurred() - # Make the tuple valid by inserting None as dummy - # iteration "result" (it will be ignored). - self.pyapi.tuple_setitem(pair, 0, self.pyapi.make_none()) - self.pyapi.tuple_setitem(pair, 1, self.pyapi.bool_from_bool(is_valid)) - return pair - elif expr.op == 'pair_first': - pair = self.loadvar(expr.value.name) - first = self.pyapi.tuple_getitem(pair, 0) - self.incref(first) - return first - elif expr.op == 'pair_second': - pair = self.loadvar(expr.value.name) - second = self.pyapi.tuple_getitem(pair, 1) - self.incref(second) - return second - elif expr.op == 'exhaust_iter': - iterobj = self.loadvar(expr.value.name) - tup = self.pyapi.sequence_tuple(iterobj) - self.check_error(tup) - # Check tuple size is as expected - tup_size = self.pyapi.tuple_size(tup) - expected_size = self.context.get_constant(types.intp, expr.count) - has_wrong_size = self.builder.icmp(lc.ICMP_NE, - tup_size, expected_size) - with cgutils.if_unlikely(self.builder, has_wrong_size): - self.return_exception(ValueError) - return tup - elif expr.op == 'getitem': - value = self.loadvar(expr.value.name) - index = self.loadvar(expr.index.name) - res = self.pyapi.object_getitem(value, index) - self.check_error(res) - return res - elif expr.op == 'static_getitem': - value = self.loadvar(expr.value.name) - index = self.context.get_constant(types.intp, expr.index) - indexobj = self.pyapi.long_from_ssize_t(index) - self.check_error(indexobj) - res = self.pyapi.object_getitem(value, indexobj) - self.decref(indexobj) - self.check_error(res) - return res - elif expr.op == 'getslice': - target = self.loadvar(expr.target.name) - start = self.loadvar(expr.start.name) - stop = self.loadvar(expr.stop.name) - - slicefn = self.get_builtin_obj("slice") - sliceobj = self.pyapi.call_function_objargs(slicefn, (start, stop)) - self.decref(slicefn) - self.check_error(sliceobj) - - res = self.pyapi.object_getitem(target, sliceobj) - self.check_error(res) - - return res - - elif expr.op == 'cast': - val = self.loadvar(expr.value.name) - self.incref(val) - return val - - else: - raise NotImplementedError(expr) - - def lower_const(self, const): - # All constants are frozen inside the environment - index = self.env_manager.add_const(const) - ret = self.env_manager.read_const(index) - self.check_error(ret) - self.incref(ret) - return ret - - def lower_global(self, name, value): - """ - 1) Check global scope dictionary. - 2) Check __builtins__. - 2a) is it a dictionary (for non __main__ module) - 2b) is it a module (for __main__ module) - """ - moddict = self.get_module_dict() - obj = self.pyapi.dict_getitem(moddict, self._freeze_string(name)) - self.incref(obj) # obj is borrowed - - try: - if value in _unsupported_builtins: - raise ForbiddenConstruct("builtins %s() is not supported" - % name, loc=self.loc) - except TypeError: - # `value` is unhashable, ignore - pass - - if hasattr(builtins, name): - obj_is_null = self.is_null(obj) - bbelse = self.builder.basic_block - - with self.builder.if_then(obj_is_null): - mod = self.pyapi.dict_getitem(moddict, - self._freeze_string("__builtins__")) - builtin = self.builtin_lookup(mod, name) - bbif = self.builder.basic_block - - retval = self.builder.phi(self.pyapi.pyobj) - retval.add_incoming(obj, bbelse) - retval.add_incoming(builtin, bbif) - - else: - retval = obj - with cgutils.if_unlikely(self.builder, self.is_null(retval)): - self.pyapi.raise_missing_global_error(name) - self.return_exception_raised() - - return retval - - # ------------------------------------------------------------------------- - - def get_module_dict(self): - return self.env_body.globals - - def get_builtin_obj(self, name): - # XXX The builtins dict could be bound into the environment - moddict = self.get_module_dict() - mod = self.pyapi.dict_getitem(moddict, - self._freeze_string("__builtins__")) - return self.builtin_lookup(mod, name) - - def builtin_lookup(self, mod, name): - """ - Args - ---- - mod: - The __builtins__ dictionary or module, as looked up in - a module's globals. - name: str - The object to lookup - """ - fromdict = self.pyapi.dict_getitem(mod, self._freeze_string(name)) - self.incref(fromdict) # fromdict is borrowed - bbifdict = self.builder.basic_block - - with cgutils.if_unlikely(self.builder, self.is_null(fromdict)): - # This happen if we are using the __main__ module - frommod = self.pyapi.object_getattr(mod, self._freeze_string(name)) - - with cgutils.if_unlikely(self.builder, self.is_null(frommod)): - self.pyapi.raise_missing_global_error(name) - self.return_exception_raised() - - bbifmod = self.builder.basic_block - - builtin = self.builder.phi(self.pyapi.pyobj) - builtin.add_incoming(fromdict, bbifdict) - builtin.add_incoming(frommod, bbifmod) - - return builtin - - def check_occurred(self): - """ - Return if an exception occurred. - """ - err_occurred = cgutils.is_not_null(self.builder, - self.pyapi.err_occurred()) - - with cgutils.if_unlikely(self.builder, err_occurred): - self.return_exception_raised() - - def check_error(self, obj): - """ - Return if *obj* is NULL. - """ - with cgutils.if_unlikely(self.builder, self.is_null(obj)): - self.return_exception_raised() - - return obj - - def check_int_status(self, num, ok_value=0): - """ - Raise an exception if *num* is smaller than *ok_value*. - """ - ok = lc.Constant.int(num.type, ok_value) - pred = self.builder.icmp(lc.ICMP_SLT, num, ok) - with cgutils.if_unlikely(self.builder, pred): - self.return_exception_raised() - - def is_null(self, obj): - return cgutils.is_null(self.builder, obj) - - def return_exception_raised(self): - """ - Return with the currently raised exception. - """ - self.cleanup_vars() - self.call_conv.return_exc(self.builder) - - def init_vars(self, block): - """ - Initialize live variables for *block*. - """ - self._live_vars = set(self.func_ir.get_block_entry_vars(block)) - - def _getvar(self, name, ltype=None): - if name not in self.varmap: - self.varmap[name] = self.alloca(name, ltype=ltype) - return self.varmap[name] - - def loadvar(self, name): - """ - Load the llvm value of the variable named *name*. - """ - # If this raises then the live variables analysis is wrong - assert name in self._live_vars, name - ptr = self.varmap[name] - val = self.builder.load(ptr) - with cgutils.if_unlikely(self.builder, self.is_null(val)): - self.pyapi.raise_missing_name_error(name) - self.return_exception_raised() - return val - - def delvar(self, name): - """ - Delete the variable slot with the given name. This will decref - the corresponding Python object. - """ - # If this raises then the live variables analysis is wrong - self._live_vars.remove(name) - ptr = self._getvar(name) # initializes `name` if not already - self.decref(self.builder.load(ptr)) - # This is a safety guard against double decref's, but really - # the IR should be correct and have only one Del per variable - # and code path. - self.builder.store(cgutils.get_null_value(ptr.type.pointee), ptr) - - def storevar(self, value, name, clobber=False): - """ - Stores a llvm value and allocate stack slot if necessary. - The llvm value can be of arbitrary type. - """ - is_redefine = name in self._live_vars and not clobber - ptr = self._getvar(name, ltype=value.type) - if is_redefine: - old = self.builder.load(ptr) - else: - self._live_vars.add(name) - assert value.type == ptr.type.pointee, (str(value.type), - str(ptr.type.pointee)) - self.builder.store(value, ptr) - # Safe to call decref even on non python object - if is_redefine: - self.decref(old) - - def cleanup_vars(self): - """ - Cleanup live variables. - """ - for name in self._live_vars: - ptr = self._getvar(name) - self.decref(self.builder.load(ptr)) - - def alloca(self, name, ltype=None): - """ - Allocate a stack slot and initialize it to NULL. - The default is to allocate a pyobject pointer. - Use ``ltype`` to override. - """ - if ltype is None: - ltype = self.context.get_value_type(types.pyobject) - with self.builder.goto_block(self.entry_block): - ptr = self.builder.alloca(ltype, name=name) - self.builder.store(cgutils.get_null_value(ltype), ptr) - return ptr - - def incref(self, value): - self.pyapi.incref(value) - - def decref(self, value): - """ - This is allow to be called on non pyobject pointer, in which case - no code is inserted. - """ - lpyobj = self.context.get_value_type(types.pyobject) - if value.type == lpyobj: - self.pyapi.decref(value) - - def _freeze_string(self, string): - """ - Freeze a Python string object into the code. - """ - return self.lower_const(string) diff --git a/numba/numba/parfor.py b/numba/numba/parfor.py deleted file mode 100644 index c76339fff..000000000 --- a/numba/numba/parfor.py +++ /dev/null @@ -1,3064 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -""" -This module transforms data-parallel operations such as Numpy calls into -'Parfor' nodes, which are nested loops that can be parallelized. -It also implements optimizations such as loop fusion, and extends the rest of -compiler analysis and optimizations to support Parfors. -This is similar to ParallelAccelerator package in Julia: -https://github.com/IntelLabs/ParallelAccelerator.jl -'Parallelizing Julia with a Non-invasive DSL', T. Anderson et al., ECOOP'17. -""" -from __future__ import print_function, division, absolute_import -import types as pytypes # avoid confusion with numba.types -import sys, math -from functools import reduce -from collections import defaultdict -from contextlib import contextmanager - -import numba -from numba import ir, ir_utils, types, typing, rewrites, config, analysis, prange, pndindex -from numba import array_analysis, postproc, typeinfer -from numba.numpy_support import as_dtype -from numba.typing.templates import infer_global, AbstractTemplate -from numba import stencilparfor -from numba.stencilparfor import StencilPass - - -from numba.ir_utils import ( - mk_unique_var, - next_label, - mk_alloc, - get_np_ufunc_typ, - mk_range_block, - mk_loop_header, - find_op_typ, - get_name_var_table, - replace_vars, - replace_vars_inner, - visit_vars, - visit_vars_inner, - remove_dels, - remove_dead, - copy_propagate, - get_block_copies, - apply_copy_propagate, - dprint_func_ir, - find_topo_order, - get_stmt_writes, - rename_labels, - get_call_table, - simplify, - simplify_CFG, - has_no_side_effect, - canonicalize_array_math, - add_offset_to_labels, - find_callname, - find_build_sequence, - guard, - require, - GuardException, - compile_to_numba_ir, - get_definition, - build_definitions, - replace_arg_nodes, - replace_returns, - is_getitem, - is_setitem, - is_get_setitem, - index_var_of_get_setitem, - set_index_var_of_get_setitem) - -from numba.analysis import (compute_use_defs, compute_live_map, - compute_dead_maps, compute_cfg_from_blocks) -from numba.controlflow import CFGraph -from numba.typing import npydecl, signature -from numba.types.functions import Function -from numba.array_analysis import (random_int_args, random_1arg_size, - random_2arg_sizelast, random_3arg_sizelast, - random_calls, assert_equiv) -from numba.extending import overload -import copy -import numpy -import numpy as np -# circular dependency: import numba.npyufunc.dufunc.DUFunc - -sequential_parfor_lowering = False - -# init_prange is a sentinel call that specifies the start of the initialization -# code for the computation in the upcoming prange call -# This lets the prange pass to put the code in the generated parfor's init_block -def init_prange(): - return - -@overload(init_prange) -def init_prange_overload(): - def no_op(): - return - return no_op - -class internal_prange(object): - - def __new__(cls, *args): - return range(*args) - -def min_parallel_impl(return_type, arg): - # XXX: use prange for 1D arrays since pndindex returns a 1-tuple instead of - # integer. This causes type and fusion issues. - if arg.ndim == 1: - def min_1(in_arr): - numba.parfor.init_prange() - val = numba.targets.builtins.get_type_max_value(in_arr.dtype) - for i in numba.parfor.internal_prange(len(in_arr)): - val = min(val, in_arr[i]) - return val - else: - def min_1(in_arr): - numba.parfor.init_prange() - val = numba.targets.builtins.get_type_max_value(in_arr.dtype) - for i in numba.pndindex(in_arr.shape): - val = min(val, in_arr[i]) - return val - return min_1 - -def max_parallel_impl(return_type, arg): - if arg.ndim == 1: - def max_1(in_arr): - numba.parfor.init_prange() - val = numba.targets.builtins.get_type_min_value(in_arr.dtype) - for i in numba.parfor.internal_prange(len(in_arr)): - val = max(val, in_arr[i]) - return val - else: - def max_1(in_arr): - numba.parfor.init_prange() - val = numba.targets.builtins.get_type_min_value(in_arr.dtype) - for i in numba.pndindex(in_arr.shape): - val = max(val, in_arr[i]) - return val - return max_1 - -def argmin_parallel_impl(in_arr): - numba.parfor.init_prange() - A = in_arr.ravel() - init_val = numba.targets.builtins.get_type_max_value(A.dtype) - ival = numba.typing.builtins.IndexValue(0, init_val) - for i in numba.parfor.internal_prange(len(A)): - curr_ival = numba.typing.builtins.IndexValue(i, A[i]) - ival = min(ival, curr_ival) - return ival.index - -def argmax_parallel_impl(in_arr): - numba.parfor.init_prange() - A = in_arr.ravel() - init_val = numba.targets.builtins.get_type_min_value(A.dtype) - ival = numba.typing.builtins.IndexValue(0, init_val) - for i in numba.parfor.internal_prange(len(A)): - curr_ival = numba.typing.builtins.IndexValue(i, A[i]) - ival = max(ival, curr_ival) - return ival.index - -def dotvv_parallel_impl(a, b): - numba.parfor.init_prange() - l = a.shape[0] - m = b.shape[0] - # TODO: investigate assert_equiv - #assert_equiv("sizes of l, m do not match", l, m) - s = 0 - for i in numba.parfor.internal_prange(l): - s += a[i] * b[i] - return s - -def dotvm_parallel_impl(a, b): - numba.parfor.init_prange() - l = a.shape - m, n = b.shape - # TODO: investigate assert_equiv - #assert_equiv("Sizes of l, m do not match", l, m) - c = np.zeros(n, a.dtype) - # TODO: evaluate dotvm implementation options - #for i in prange(n): - # s = 0 - # for j in range(m): - # s += a[j] * b[j, i] - # c[i] = s - for i in numba.parfor.internal_prange(m): - c += a[i] * b[i, :] - return c - -def dotmv_parallel_impl(a, b): - numba.parfor.init_prange() - m, n = a.shape - l = b.shape - # TODO: investigate assert_equiv - #assert_equiv("sizes of n, l do not match", n, l) - c = np.empty(m, a.dtype) - for i in numba.parfor.internal_prange(m): - s = 0 - for j in range(n): - s += a[i, j] * b[j] - c[i] = s - return c - -def dot_parallel_impl(return_type, atyp, btyp): - # Note that matrix matrix multiply is not translated. - if (isinstance(atyp, types.npytypes.Array) and - isinstance(btyp, types.npytypes.Array)): - if atyp.ndim == btyp.ndim == 1: - return dotvv_parallel_impl - # TODO: evaluate support for dotvm and enable - #elif atyp.ndim == 1 and btyp.ndim == 2: - # return dotvm_parallel_impl - elif atyp.ndim == 2 and btyp.ndim == 1: - return dotmv_parallel_impl - -def sum_parallel_impl(return_type, arg): - zero = return_type(0) - - if arg.ndim == 1: - def sum_1(in_arr): - numba.parfor.init_prange() - val = zero - for i in numba.parfor.internal_prange(len(in_arr)): - val += in_arr[i] - return val - else: - def sum_1(in_arr): - numba.parfor.init_prange() - val = zero - for i in numba.pndindex(in_arr.shape): - val += in_arr[i] - return val - return sum_1 - -def prod_parallel_impl(return_type, arg): - one = return_type(1) - - if arg.ndim == 1: - def prod_1(in_arr): - numba.parfor.init_prange() - val = one - for i in numba.parfor.internal_prange(len(in_arr)): - val *= in_arr[i] - return val - else: - def prod_1(in_arr): - numba.parfor.init_prange() - val = one - for i in numba.pndindex(in_arr.shape): - val *= in_arr[i] - return val - return prod_1 - - -def mean_parallel_impl(return_type, arg): - # can't reuse sum since output type is different - zero = return_type(0) - - if arg.ndim == 1: - def mean_1(in_arr): - numba.parfor.init_prange() - val = zero - for i in numba.parfor.internal_prange(len(in_arr)): - val += in_arr[i] - return val/len(in_arr) - else: - def mean_1(in_arr): - numba.parfor.init_prange() - val = zero - for i in numba.pndindex(in_arr.shape): - val += in_arr[i] - return val/in_arr.size - return mean_1 - -def var_parallel_impl(return_type, arg): - - if arg.ndim == 1: - def var_1(in_arr): - # Compute the mean - m = in_arr.mean() - # Compute the sum of square diffs - numba.parfor.init_prange() - ssd = 0 - for i in numba.parfor.internal_prange(len(in_arr)): - val = in_arr[i] - m - ssd += np.real(val * np.conj(val)) - return ssd / len(in_arr) - else: - def var_1(in_arr): - # Compute the mean - m = in_arr.mean() - # Compute the sum of square diffs - numba.parfor.init_prange() - ssd = 0 - for i in numba.pndindex(in_arr.shape): - val = in_arr[i] - m - ssd += np.real(val * np.conj(val)) - return ssd / in_arr.size - return var_1 - -def std_parallel_impl(return_type, arg): - def std_1(in_arr): - return in_arr.var() ** 0.5 - return std_1 - -def arange_parallel_impl(return_type, *args): - dtype = as_dtype(return_type.dtype) - - def arange_1(stop): - return np.arange(0, stop, 1, dtype) - - def arange_2(start, stop): - return np.arange(start, stop, 1, dtype) - - def arange_3(start, stop, step): - return np.arange(start, stop, step, dtype) - - if any(isinstance(a, types.Complex) for a in args): - def arange_4(start, stop, step, dtype): - numba.parfor.init_prange() - nitems_c = (stop - start) / step - nitems_r = math.ceil(nitems_c.real) - nitems_i = math.ceil(nitems_c.imag) - nitems = int(max(min(nitems_i, nitems_r), 0)) - arr = np.empty(nitems, dtype) - for i in numba.parfor.internal_prange(nitems): - arr[i] = start + i * step - return arr - else: - def arange_4(start, stop, step, dtype): - numba.parfor.init_prange() - nitems_r = math.ceil((stop - start) / step) - nitems = int(max(nitems_r, 0)) - arr = np.empty(nitems, dtype) - val = start - for i in numba.parfor.internal_prange(nitems): - arr[i] = start + i * step - return arr - - if len(args) == 1: - return arange_1 - elif len(args) == 2: - return arange_2 - elif len(args) == 3: - return arange_3 - elif len(args) == 4: - return arange_4 - else: - raise ValueError("parallel arange with types {}".format(args)) - -def linspace_parallel_impl(return_type, *args): - dtype = as_dtype(return_type.dtype) - - def linspace_2(start, stop): - return np.linspace(start, stop, 50) - - def linspace_3(start, stop, num): - numba.parfor.init_prange() - arr = np.empty(num, dtype) - div = num - 1 - delta = stop - start - arr[0] = start - for i in numba.parfor.internal_prange(num): - arr[i] = start + delta * (i / div) - return arr - - if len(args) == 2: - return linspace_2 - elif len(args) == 3: - return linspace_3 - else: - raise ValueError("parallel linspace with types {}".format(args)) - -replace_functions_map = { - ('argmin', 'numpy'): lambda r,a: argmin_parallel_impl, - ('argmax', 'numpy'): lambda r,a: argmax_parallel_impl, - ('min', 'numpy'): min_parallel_impl, - ('max', 'numpy'): max_parallel_impl, - ('sum', 'numpy'): sum_parallel_impl, - ('prod', 'numpy'): prod_parallel_impl, - ('mean', 'numpy'): mean_parallel_impl, - ('var', 'numpy'): var_parallel_impl, - ('std', 'numpy'): std_parallel_impl, - ('dot', 'numpy'): dot_parallel_impl, - ('arange', 'numpy'): arange_parallel_impl, - ('linspace', 'numpy'): linspace_parallel_impl, -} - -class LoopNest(object): - - '''The LoopNest class holds information of a single loop including - the index variable (of a non-negative integer value), and the - range variable, e.g. range(r) is 0 to r-1 with step size 1. - ''' - - def __init__(self, index_variable, start, stop, step): - self.index_variable = index_variable - self.start = start - self.stop = stop - self.step = step - - - def __repr__(self): - return ("LoopNest(index_variable = {}, range = ({}, {}, {}))". - format(self.index_variable, self.start, self.stop, self.step)) - - def list_vars(self): - all_uses = [] - all_uses.append(self.index_variable) - if isinstance(self.start, ir.Var): - all_uses.append(self.start) - if isinstance(self.stop, ir.Var): - all_uses.append(self.stop) - if isinstance(self.step, ir.Var): - all_uses.append(self.step) - return all_uses - -class Parfor(ir.Expr, ir.Stmt): - - id_counter = 0 - - def __init__( - self, - loop_nests, - init_block, - loop_body, - loc, - index_var, - equiv_set, - pattern, - flags, - no_sequential_lowering=False): - super(Parfor, self).__init__( - op='parfor', - loc=loc - ) - - self.id = type(self).id_counter - type(self).id_counter += 1 - #self.input_info = input_info - #self.output_info = output_info - self.loop_nests = loop_nests - self.init_block = init_block - self.loop_body = loop_body - self.index_var = index_var - self.params = None # filled right before parallel lowering - self.equiv_set = equiv_set - # The parallel patterns this parfor was generated from and their options - # for example, a parfor could be from the stencil pattern with - # the neighborhood option - self.patterns = [pattern] - self.flags = flags - # if True, this parfor shouldn't be lowered sequentially even with the - # sequential lowering option - self.no_sequential_lowering = no_sequential_lowering - if config.DEBUG_ARRAY_OPT_STATS: - fmt = 'Parallel for-loop #{} is produced from pattern \'{}\' at {}' - print(fmt.format( - self.id, pattern, loc)) - - def __repr__(self): - return "id=" + str(self.id) + repr(self.loop_nests) + \ - repr(self.loop_body) + repr(self.index_var) - - def list_vars(self): - """list variables used (read/written) in this parfor by - traversing the body and combining block uses. - """ - all_uses = [] - for l, b in self.loop_body.items(): - for stmt in b.body: - all_uses += stmt.list_vars() - - for loop in self.loop_nests: - all_uses += loop.list_vars() - - for stmt in self.init_block.body: - all_uses += stmt.list_vars() - - return all_uses - - def get_shape_classes(self, var): - return self.equiv_set.get_shape_classes(var) - - def dump(self, file=None): - file = file or sys.stdout - print(("begin parfor {}".format(self.id)).center(20, '-'), file=file) - print("index_var = ", self.index_var, file=file) - for loopnest in self.loop_nests: - print(loopnest, file=file) - print("init block:", file=file) - self.init_block.dump(file) - for offset, block in sorted(self.loop_body.items()): - print('label %s:' % (offset,), file=file) - block.dump(file) - print(("end parfor {}".format(self.id)).center(20, '-'), file=file) - -def _analyze_parfor(parfor, equiv_set, typemap, array_analysis): - """Recursive array analysis for parfor nodes. - """ - func_ir = array_analysis.func_ir - parfor_blocks = wrap_parfor_blocks(parfor) - # Since init_block get label 0 after wrap, we need to save - # the equivset for the real block label 0. - backup_equivset = array_analysis.equiv_sets.get(0, None) - array_analysis.run(parfor_blocks, equiv_set) - unwrap_parfor_blocks(parfor, parfor_blocks) - parfor.equiv_set = array_analysis.equiv_sets[0] - # Restore equivset for block 0 after parfor is unwrapped - if backup_equivset: - array_analysis.equiv_sets[0] = backup_equivset - return [], [] - -array_analysis.array_analysis_extensions[Parfor] = _analyze_parfor - - -class PreParforPass(object): - """Preprocessing for the Parfor pass. It mostly inlines parallel - implementations of numpy functions if available. - """ - def __init__(self, func_ir, typemap, calltypes, typingctx, options): - self.func_ir = func_ir - self.typemap = typemap - self.calltypes = calltypes - self.typingctx = typingctx - self.options = options - - def run(self): - """Run pre-parfor processing pass. - """ - # e.g. convert A.sum() to np.sum(A) for easier match and optimization - canonicalize_array_math(self.func_ir, self.typemap, - self.calltypes, self.typingctx) - if self.options.numpy: - self._replace_parallel_functions(self.func_ir.blocks) - self.func_ir.blocks = simplify_CFG(self.func_ir.blocks) - - def _replace_parallel_functions(self, blocks): - """ - Replace functions with their parallel implemntation in - replace_functions_map if available. - The implementation code is inlined to enable more optimization. - """ - from numba.inline_closurecall import inline_closure_call - work_list = list(blocks.items()) - while work_list: - label, block = work_list.pop() - for i, instr in enumerate(block.body): - if isinstance(instr, ir.Assign): - lhs = instr.target - lhs_typ = self.typemap[lhs.name] - expr = instr.value - if isinstance(expr, ir.Expr) and expr.op == 'call': - # Try inline known calls with their parallel implementations - def replace_func(): - func_def = get_definition(self.func_ir, expr.func) - callname = find_callname(self.func_ir, expr) - repl_func = replace_functions_map.get(callname, None) - require(repl_func != None) - typs = tuple(self.typemap[x.name] for x in expr.args) - try: - new_func = repl_func(lhs_typ, *typs) - except: - new_func = None - require(new_func != None) - g = copy.copy(self.func_ir.func_id.func.__globals__) - g['numba'] = numba - g['np'] = numpy - g['math'] = math - # inline the parallel implementation - inline_closure_call(self.func_ir, g, - block, i, new_func, self.typingctx, typs, - self.typemap, self.calltypes, work_list) - return True - if guard(replace_func): - break - elif (isinstance(expr, ir.Expr) and expr.op == 'getattr' and - expr.attr == 'dtype'): - # Replace getattr call "A.dtype" with the actual type itself. - # This helps remove superfulous dependencies from parfor. - typ = self.typemap[expr.value.name] - if isinstance(typ, types.npytypes.Array): - dtype = typ.dtype - scope = block.scope - loc = instr.loc - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - self.typemap[g_np_var.name] = types.misc.Module(numpy) - g_np = ir.Global('np', numpy, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) - self.typemap[typ_var.name] = types.DType(dtype) - dtype_str = str(dtype) - if dtype_str == 'bool': - dtype_str = 'bool_' - np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) - typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) - instr.value = typ_var - block.body.insert(0, typ_var_assign) - block.body.insert(0, g_np_assign) - break - - -class ParforPass(object): - - """ParforPass class is responsible for converting Numpy - calls in Numba intermediate representation to Parfors, which - will lower into either sequential or parallel loops during lowering - stage. - """ - - def __init__(self, func_ir, typemap, calltypes, return_type, typingctx, options, flags): - self.func_ir = func_ir - self.typemap = typemap - self.calltypes = calltypes - self.typingctx = typingctx - self.return_type = return_type - self.options = options - self.array_analysis = array_analysis.ArrayAnalysis(typingctx, func_ir, typemap, - calltypes) - ir_utils._max_label = max(func_ir.blocks.keys()) - self.flags = flags - - def run(self): - """run parfor conversion pass: replace Numpy calls - with Parfors when possible and optimize the IR.""" - # run array analysis, a pre-requisite for parfor translation - remove_dels(self.func_ir.blocks) - self.array_analysis.run(self.func_ir.blocks) - # run stencil translation to parfor - if self.options.stencil: - stencil_pass = StencilPass(self.func_ir, self.typemap, self.calltypes, - self.array_analysis, self.typingctx, self.flags) - stencil_pass.run() - if self.options.setitem: - self._convert_setitem(self.func_ir.blocks) - if self.options.numpy: - self._convert_numpy(self.func_ir.blocks) - if self.options.reduction: - self._convert_reduce(self.func_ir.blocks) - if self.options.prange: - self._convert_loop(self.func_ir.blocks) - dprint_func_ir(self.func_ir, "after parfor pass") - - # simplify CFG of parfor body loops since nested parfors with extra - # jumps can be created with prange conversion - simplify_parfor_body_CFG(self.func_ir.blocks) - # simplify before fusion - simplify(self.func_ir, self.typemap, self.calltypes) - # need two rounds of copy propagation to enable fusion of long sequences - # of parfors like test_fuse_argmin (some PYTHONHASHSEED values since - # apply_copies_parfor depends on set order for creating dummy assigns) - simplify(self.func_ir, self.typemap, self.calltypes) - - if self.options.fusion: - self.func_ir._definitions = build_definitions(self.func_ir.blocks) - self.array_analysis.equiv_sets = dict() - self.array_analysis.run(self.func_ir.blocks) - # reorder statements to maximize fusion - # push non-parfors down - maximize_fusion(self.func_ir, self.func_ir.blocks, - up_direction=False) - dprint_func_ir(self.func_ir, "after maximize fusion down") - self.fuse_parfors(self.array_analysis, self.func_ir.blocks) - # push non-parfors up - maximize_fusion(self.func_ir, self.func_ir.blocks) - dprint_func_ir(self.func_ir, "after maximize fusion up") - # try fuse again after maximize - self.fuse_parfors(self.array_analysis, self.func_ir.blocks) - dprint_func_ir(self.func_ir, "after fusion") - # simplify again - simplify(self.func_ir, self.typemap, self.calltypes) - # push function call variables inside parfors so gufunc function - # wouldn't need function variables as argument - push_call_vars(self.func_ir.blocks, {}, {}) - # simplify again - simplify(self.func_ir, self.typemap, self.calltypes) - dprint_func_ir(self.func_ir, "after optimization") - if config.DEBUG_ARRAY_OPT == 1: - print("variable types: ", sorted(self.typemap.items())) - print("call types: ", self.calltypes) - # run post processor again to generate Del nodes - post_proc = postproc.PostProcessor(self.func_ir) - post_proc.run() - if self.func_ir.is_generator: - fix_generator_types(self.func_ir.generator_info, self.return_type, - self.typemap) - if sequential_parfor_lowering: - lower_parfor_sequential( - self.typingctx, self.func_ir, self.typemap, self.calltypes) - else: - # prepare for parallel lowering - # add parfor params to parfors here since lowering is destructive - # changing the IR after this is not allowed - parfor_ids = get_parfor_params(self.func_ir.blocks, self.options.fusion) - if config.DEBUG_ARRAY_OPT_STATS: - name = self.func_ir.func_id.func_qualname - n_parfors = len(parfor_ids) - if n_parfors > 0: - after_fusion = ("After fusion" if self.options.fusion - else "With fusion disabled") - print(('{}, function {} has ' - '{} parallel for-loop(s) #{}.').format( - after_fusion, name, n_parfors, parfor_ids)) - else: - print('Function {} has no Parfor.'.format(name)) - return - - def _convert_numpy(self, blocks): - """ - Convert supported Numpy functions, as well as arrayexpr nodes, to - parfor nodes. - """ - topo_order = find_topo_order(blocks) - # variables available in the program so far (used for finding map - # functions in array_expr lowering) - avail_vars = [] - for label in topo_order: - block = blocks[label] - new_body = [] - equiv_set = self.array_analysis.get_equiv_set(label) - for instr in block.body: - if isinstance(instr, ir.Assign): - expr = instr.value - lhs = instr.target - if self._is_C_order(lhs.name): - # only translate C order since we can't allocate F - if guard(self._is_supported_npycall, expr): - instr = self._numpy_to_parfor(equiv_set, lhs, expr) - if isinstance(instr, tuple): - pre_stmts, instr = instr - new_body.extend(pre_stmts) - elif isinstance(expr, ir.Expr) and expr.op == 'arrayexpr': - instr = self._arrayexpr_to_parfor( - equiv_set, lhs, expr, avail_vars) - avail_vars.append(lhs.name) - new_body.append(instr) - block.body = new_body - - def _convert_reduce(self, blocks): - """ - Find reduce() calls and convert them to parfors. - """ - topo_order = find_topo_order(blocks) - for label in topo_order: - block = blocks[label] - new_body = [] - equiv_set = self.array_analysis.get_equiv_set(label) - for instr in block.body: - parfor = None - if isinstance(instr, ir.Assign): - loc = instr.loc - lhs = instr.target - expr = instr.value - callname = guard(find_callname, self.func_ir, expr) - if (callname == ('reduce', 'builtins') - or callname == ('reduce', '_functools')): - # reduce function with generic function - parfor = guard(self._reduce_to_parfor, equiv_set, lhs, - expr.args, loc) - if parfor: - instr = parfor - new_body.append(instr) - block.body = new_body - return - - def _convert_setitem(self, blocks): - # convert setitem expressions like A[C] = c or A[C] = B[C] to parfor, - # where C is a boolean array. - topo_order = find_topo_order(blocks) - # variables available in the program so far (used for finding map - # functions in array_expr lowering) - avail_vars = [] - for label in topo_order: - block = blocks[label] - new_body = [] - equiv_set = self.array_analysis.get_equiv_set(label) - for instr in block.body: - if isinstance(instr, ir.StaticSetItem) or isinstance(instr, ir.SetItem): - loc = instr.loc - target = instr.target - index = instr.index if isinstance(instr, ir.SetItem) else instr.index_var - value = instr.value - target_typ = self.typemap[target.name] - index_typ = self.typemap[index.name] - value_typ = self.typemap[value.name] - if isinstance(target_typ, types.npytypes.Array): - if (isinstance(index_typ, types.npytypes.Array) and - isinstance(index_typ.dtype, types.Boolean) and - target_typ.ndim == index_typ.ndim): - if isinstance(value_typ, types.Number): - instr = self._setitem_to_parfor(equiv_set, - loc, target, index, value) - elif isinstance(value_typ, types.npytypes.Array): - val_def = guard(get_definition, self.func_ir, - value.name) - if (isinstance(val_def, ir.Expr) and - val_def.op == 'getitem' and - val_def.index.name == index.name): - instr = self._setitem_to_parfor(equiv_set, - loc, target, index, val_def.value) - else: - shape = equiv_set.get_shape(instr) - if shape != None: - instr = self._setitem_to_parfor(equiv_set, - loc, target, index, value, shape=shape) - new_body.append(instr) - block.body = new_body - - def _convert_loop(self, blocks): - call_table, _ = get_call_table(blocks) - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) - loops = cfg.loops() - sized_loops = [(loops[k], len(loops[k].body)) for k in loops.keys()] - moved_blocks = [] - # We go over all loops, smaller loops first (inner first) - for loop, s in sorted(sized_loops, key=lambda tup: tup[1]): - if len(loop.entries) != 1 or len(loop.exits) != 1: - continue - entry = list(loop.entries)[0] - for inst in blocks[entry].body: - # if prange or pndindex call - if (isinstance(inst, ir.Assign) - and isinstance(inst.value, ir.Expr) - and inst.value.op == 'call' - and self._is_parallel_loop(inst.value.func.name, call_table)): - body_labels = [ l for l in loop.body if - l in blocks and l != loop.header ] - args = inst.value.args - loop_kind = self._get_loop_kind(inst.value.func.name, - call_table) - # find loop index variable (pair_first in header block) - for stmt in blocks[loop.header].body: - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op == 'pair_first'): - loop_index = stmt.target.name - break - # loop_index may be assigned to other vars - # get header copies to find all of them - cps, _ = get_block_copies({0: blocks[loop.header]}, - self.typemap) - cps = cps[0] - loop_index_vars = set(t for t, v in cps if v == loop_index) - loop_index_vars.add(loop_index) - - scope = blocks[entry].scope - loc = inst.loc - equiv_set = self.array_analysis.get_equiv_set(loop.header) - init_block = ir.Block(scope, loc) - init_block.body = self._get_prange_init_block(blocks[entry], - call_table, args) - # set l=l for remove dead prange call - inst.value = inst.target - loop_body = {l: blocks[l] for l in body_labels} - # Add an empty block to the end of loop body - end_label = next_label() - loop_body[end_label] = ir.Block(scope, loc) - # replace jumps to header block with the end block - for l in body_labels: - last_inst = loop_body[l].body[-1] - if (isinstance(last_inst, ir.Jump) and - last_inst.target == loop.header): - last_inst.target = end_label - - def find_indexed_arrays(): - """find expressions that involve getitem using the - index variable. Return both the arrays and expressions. - """ - indices = copy.copy(loop_index_vars) - for block in loop_body.values(): - for inst in block.find_insts(ir.Assign): - if (isinstance(inst.value, ir.Var) and - inst.value.name in indices): - indices.add(inst.target.name) - arrs = [] - exprs = [] - for block in loop_body.values(): - for inst in block.body: - lv = set(x.name for x in inst.list_vars()) - if lv & indices: - if lv.issubset(indices): - continue - require(isinstance(inst, ir.Assign)) - expr = inst.value - require(isinstance(expr, ir.Expr) and - expr.op in ['getitem', 'static_getitem']) - arrs.append(expr.value.name) - exprs.append(expr) - return arrs, exprs - - mask_var = None - mask_indices = None - def find_mask_from_size(size_var): - """Find the case where size_var is defined by A[M].shape, - where M is a boolean array. - """ - size_def = get_definition(self.func_ir, size_var) - require(size_def and isinstance(size_def, ir.Expr) and - size_def.op == 'getattr' and size_def.attr == 'shape') - arr_var = size_def.value - live_vars = set.union(*[live_map[l] for l in loop.exits]) - index_arrs, index_exprs = find_indexed_arrays() - require([arr_var.name] == list(index_arrs)) - # input array has to be dead after loop - require(arr_var.name not in live_vars) - # loop for arr's definition, where size = arr.shape - arr_def = get_definition(self.func_ir, size_def.value) - result = self._find_mask(arr_def) - # Found the mask. - # Replace B[i] with A[i], where B = A[M] - for expr in index_exprs: - expr.value = result[0] - return result - - # pndindex and prange are provably positive except when - # user provides negative start to prange() - unsigned_index = True - # TODO: support array mask optimization for prange - # TODO: refactor and simplify array mask optimization - if loop_kind == 'pndindex': - assert(equiv_set.has_shape(args[0])) - # see if input array to pndindex is output of array - # mask like B = A[M] - result = guard(find_mask_from_size, args[0]) - if result: - in_arr, mask_var, mask_typ, mask_indices = result - else: - in_arr = args[0] - size_vars = equiv_set.get_shape(in_arr - if mask_indices == None else mask_var) - index_vars, loops = self._mk_parfor_loops( - size_vars, scope, loc) - orig_index = index_vars - if mask_indices: - # replace mask indices if required; - # integer indices of original array should be used - # instead of parfor indices - index_vars = tuple(x if x else index_vars[0] - for x in mask_indices) - first_body_block = loop_body[min(loop_body.keys())] - body_block = ir.Block(scope, loc) - index_var, index_var_typ = self._make_index_var( - scope, index_vars, body_block) - body = body_block.body + first_body_block.body - first_body_block.body = body - if mask_indices: - orig_index_var = orig_index[0] - else: - orig_index_var = index_var - - # if masked array optimization is being applied, create - # the branch for array selection - if mask_var != None: - body_label = next_label() - # loop_body needs new labels greater than body_label - loop_body = add_offset_to_labels(loop_body, - body_label - min(loop_body.keys()) + 1) - labels = loop_body.keys() - true_label = min(labels) - false_label = max(labels) - body_block = ir.Block(scope, loc) - loop_body[body_label] = body_block - mask = ir.Var(scope, mk_unique_var("$mask_val"), loc) - self.typemap[mask.name] = mask_typ - mask_val = ir.Expr.getitem(mask_var, orig_index_var, loc) - body_block.body.extend([ - ir.Assign(mask_val, mask, loc), - ir.Branch(mask, true_label, false_label, loc) - ]) - else: # prange - start = 0 - step = 1 - size_var = args[0] - if len(args) == 2: - start = args[0] - size_var = args[1] - if len(args) == 3: - start = args[0] - size_var = args[1] - try: - step = self.func_ir.get_definition(args[2]) - except KeyError: - raise NotImplementedError( - "Only known step size is supported for prange") - if not isinstance(step, ir.Const): - raise NotImplementedError( - "Only constant step size is supported for prange") - step = step.value - if step != 1: - raise NotImplementedError( - "Only constant step size of 1 is supported for prange") - index_var = ir.Var(scope, mk_unique_var("parfor_index"), loc) - # assume user-provided start to prange can be negative - # this is the only case parfor can have negative index - if isinstance(start, int) and start >= 0: - index_var_typ = types.uintp - else: - index_var_typ = types.intp - unsigned_index = False - loops = [LoopNest(index_var, start, size_var, step)] - self.typemap[index_var.name] = index_var_typ - - index_var_map = {v: index_var for v in loop_index_vars} - replace_vars(loop_body, index_var_map) - if unsigned_index: - # need to replace signed array access indices to enable - # optimizations (see #2846) - self._replace_loop_access_indices( - loop_body, loop_index_vars, index_var) - parfor = Parfor(loops, init_block, loop_body, loc, - orig_index_var if mask_indices else index_var, - equiv_set, - ("prange", loop_kind), - self.flags) - # add parfor to entry block's jump target - jump = blocks[entry].body[-1] - jump.target = list(loop.exits)[0] - blocks[jump.target].body.insert(0, parfor) - # remove loop blocks from top level dict - blocks.pop(loop.header) - for l in body_labels: - blocks.pop(l) - - def _replace_loop_access_indices(self, loop_body, index_set, new_index): - """ - Replace array access indices in a loop body with a new index. - index_set has all the variables that are equivalent to loop index. - """ - # treat new index like others since replacing it with itself is ok - index_set.add(new_index.name) - - with dummy_return_in_loop_body(loop_body): - labels = find_topo_order(loop_body) - - first_label = labels[0] - added_indices = set() - - # traverse loop body and replace indices in getitem/setitem with - # new_index if possible. - # also, find equivalent indices defined in first block. - for l in labels: - block = loop_body[l] - for stmt in block.body: - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Var)): - # the first block dominates others so we can use copies - # of indices safely - if (l == first_label and stmt.value.name in index_set - and stmt.target.name not in index_set): - index_set.add(stmt.target.name) - added_indices.add(stmt.target.name) - # make sure parallel index is not overwritten - elif stmt.target.name in index_set: - raise ValueError( - "Overwrite of parallel loop index at {}".format( - stmt.target.loc)) - - if is_get_setitem(stmt): - index = index_var_of_get_setitem(stmt) - # statics can have none indices - if index is None: - continue - ind_def = guard(get_definition, self.func_ir, - index, lhs_only=True) - if (index.name in index_set - or (ind_def is not None - and ind_def.name in index_set)): - set_index_var_of_get_setitem(stmt, new_index) - # corner case where one dimension of a multi-dim access - # should be replaced - guard(self._replace_multi_dim_ind, ind_def, index_set, - new_index) - - if isinstance(stmt, Parfor): - self._replace_loop_access_indices(stmt.loop_body, index_set, new_index) - - # remove added indices for currect recursive parfor handling - index_set -= added_indices - return - - def _replace_multi_dim_ind(self, ind_var, index_set, new_index): - """ - replace individual indices in multi-dimensional access variable, which - is a build_tuple - """ - require(ind_var is not None) - # check for Tuple instead of UniTuple since some dims could be slices - require(isinstance(self.typemap[ind_var.name], - (types.Tuple, types.UniTuple))) - ind_def_node = get_definition(self.func_ir, ind_var) - require(isinstance(ind_def_node, ir.Expr) - and ind_def_node.op == 'build_tuple') - ind_def_node.items = [new_index if v.name in index_set else v - for v in ind_def_node.items] - - def _find_mask(self, arr_def): - """check if an array is of B[...M...], where M is a - boolean array, and other indices (if available) are ints. - If found, return B, M, M's type, and a tuple representing mask indices. - Otherwise, raise GuardException. - """ - require(isinstance(arr_def, ir.Expr) and arr_def.op == 'getitem') - value = arr_def.value - index = arr_def.index - value_typ = self.typemap[value.name] - index_typ = self.typemap[index.name] - ndim = value_typ.ndim - require(isinstance(value_typ, types.npytypes.Array)) - if (isinstance(index_typ, types.npytypes.Array) and - isinstance(index_typ.dtype, types.Boolean) and - ndim == index_typ.ndim): - return value, index, index_typ.dtype, None - elif isinstance(index_typ, types.BaseTuple): - # Handle multi-dimension differently by requiring - # all indices to be constant except the one for mask. - seq, op = find_build_sequence(self.func_ir, index) - require(op == 'build_tuple' and len(seq) == ndim) - count_consts = 0 - mask_indices = [] - mask_var = None - for ind in seq: - index_typ = self.typemap[ind.name] - if (isinstance(index_typ, types.npytypes.Array) and - isinstance(index_typ.dtype, types.Boolean)): - mask_var = ind - mask_typ = index_typ.dtype - mask_indices.append(None) - elif (isinstance(index_typ, types.npytypes.Array) and - isinstance(index_typ.dtype, types.Integer)): - mask_var = ind - mask_typ = index_typ.dtype - mask_indices.append(None) - elif isinstance(index_typ, types.Integer): - count_consts += 1 - mask_indices.append(ind) - require(mask_var and count_consts == ndim - 1) - return value, mask_var, mask_typ, mask_indices - raise GuardException - - def _get_prange_init_block(self, entry_block, call_table, prange_args): - """ - If there is init_prange, find the code between init_prange and prange - calls. Remove the code from entry_block and return it. - """ - init_call_ind = -1 - prange_call_ind = -1 - init_body = [] - for i, inst in enumerate(entry_block.body): - # if init_prange call - if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) - and inst.value.op == 'call' - and self._is_prange_init(inst.value.func.name, call_table)): - init_call_ind = i - if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) - and inst.value.op == 'call' - and self._is_parallel_loop(inst.value.func.name, call_table)): - prange_call_ind = i - if init_call_ind != -1 and prange_call_ind != -1: - # we save instructions that are used to calculate prange call args - # in the entry block. The rest go to parfor init_block - arg_related_vars = {v.name for v in prange_args} - saved_nodes = [] - for i in reversed(range(init_call_ind+1, prange_call_ind)): - inst = entry_block.body[i] - inst_vars = {v.name for v in inst.list_vars()} - if arg_related_vars & inst_vars: - arg_related_vars |= inst_vars - saved_nodes.append(inst) - else: - init_body.append(inst) - - init_body.reverse() - saved_nodes.reverse() - entry_block.body = (entry_block.body[:init_call_ind] - + saved_nodes + entry_block.body[prange_call_ind+1:]) - - return init_body - - def _is_prange_init(self, func_var, call_table): - if func_var not in call_table: - return False - call = call_table[func_var] - return len(call) > 0 and (call[0] == 'init_prange' or call[0] == init_prange) - - def _is_parallel_loop(self, func_var, call_table): - # prange can be either getattr (numba.prange) or global (prange) - if func_var not in call_table: - return False - call = call_table[func_var] - return len(call) > 0 and (call[0] == 'prange' or call[0] == prange - or call[0] == 'internal_prange' or call[0] == internal_prange - or call[0] == 'pndindex' or call[0] == pndindex) - - def _get_loop_kind(self, func_var, call_table): - """see if prange is user prange or internal""" - # prange can be either getattr (numba.prange) or global (prange) - assert func_var in call_table - call = call_table[func_var] - assert len(call) > 0 - kind = 'user' - if call[0] == 'internal_prange' or call[0] == internal_prange: - kind = 'internal' - elif call[0] == 'pndindex' or call[0] == pndindex: - kind = 'pndindex' - return kind - - def _is_C_order(self, arr_name): - typ = self.typemap[arr_name] - return isinstance(typ, types.npytypes.Array) and typ.layout == 'C' and typ.ndim > 0 - - def _make_index_var(self, scope, index_vars, body_block): - ndims = len(index_vars) - loc = body_block.loc - if ndims > 1: - tuple_var = ir.Var(scope, mk_unique_var( - "$parfor_index_tuple_var"), loc) - self.typemap[tuple_var.name] = types.containers.UniTuple( - types.uintp, ndims) - tuple_call = ir.Expr.build_tuple(list(index_vars), loc) - tuple_assign = ir.Assign(tuple_call, tuple_var, loc) - body_block.body.append(tuple_assign) - return tuple_var, types.containers.UniTuple(types.uintp, ndims) - elif ndims == 1: - return index_vars[0], types.uintp - else: - raise NotImplementedError( - "Parfor does not handle arrays of dimension 0") - - def _mk_parfor_loops(self, size_vars, scope, loc): - """ - Create loop index variables and build LoopNest objects for a parfor. - """ - loopnests = [] - index_vars = [] - for size_var in size_vars: - index_var = ir.Var(scope, mk_unique_var("parfor_index"), loc) - index_vars.append(index_var) - self.typemap[index_var.name] = types.uintp - loopnests.append(LoopNest(index_var, 0, size_var, 1)) - return index_vars, loopnests - - def _arrayexpr_to_parfor(self, equiv_set, lhs, arrayexpr, avail_vars): - """generate parfor from arrayexpr node, which is essentially a - map with recursive tree. - """ - scope = lhs.scope - loc = lhs.loc - expr = arrayexpr.expr - arr_typ = self.typemap[lhs.name] - el_typ = arr_typ.dtype - - # generate loopnests and size variables from lhs correlations - size_vars = equiv_set.get_shape(lhs) - index_vars, loopnests = self._mk_parfor_loops(size_vars, scope, loc) - - # generate init block and body - init_block = ir.Block(scope, loc) - init_block.body = mk_alloc(self.typemap, self.calltypes, lhs, - tuple(size_vars), el_typ, scope, loc) - body_label = next_label() - body_block = ir.Block(scope, loc) - expr_out_var = ir.Var(scope, mk_unique_var("$expr_out_var"), loc) - self.typemap[expr_out_var.name] = el_typ - - index_var, index_var_typ = self._make_index_var( - scope, index_vars, body_block) - - body_block.body.extend( - _arrayexpr_tree_to_ir( - self.func_ir, - self.typingctx, - self.typemap, - self.calltypes, - equiv_set, - init_block, - expr_out_var, - expr, - index_var, - index_vars, - avail_vars)) - - parfor = Parfor(loopnests, init_block, {}, loc, index_var, equiv_set, - ('arrayexpr {}'.format(repr_arrayexpr(arrayexpr.expr)),), - self.flags) - - setitem_node = ir.SetItem(lhs, index_var, expr_out_var, loc) - self.calltypes[setitem_node] = signature( - types.none, self.typemap[lhs.name], index_var_typ, el_typ) - body_block.body.append(setitem_node) - parfor.loop_body = {body_label: body_block} - if config.DEBUG_ARRAY_OPT == 1: - parfor.dump() - return parfor - - def _setitem_to_parfor(self, equiv_set, loc, target, index, value, shape=None): - """generate parfor from setitem node with a boolean or slice array indices. - The value can be either a scalar or an array variable, and if a boolean index - is used for the latter case, the same index must be used for the value too. - """ - scope = target.scope - arr_typ = self.typemap[target.name] - el_typ = arr_typ.dtype - index_typ = self.typemap[index.name] - init_block = ir.Block(scope, loc) - - if shape: - # Slice index is being used on the target array, we'll have to create - # a sub-array so that the target dimension matches the given shape. - assert(isinstance(index_typ, types.BaseTuple) or - isinstance(index_typ, types.SliceType)) - # setitem has a custom target shape - size_vars = shape - # create a new target array via getitem - subarr_var = ir.Var(scope, mk_unique_var("$subarr"), loc) - getitem_call = ir.Expr.getitem(target, index, loc) - subarr_typ = typing.arraydecl.get_array_index_type( arr_typ, index_typ).result - self.typemap[subarr_var.name] = subarr_typ - self.calltypes[getitem_call] = signature(subarr_typ, arr_typ, - index_typ) - init_block.append(ir.Assign(getitem_call, subarr_var, loc)) - target = subarr_var - else: - # Otherwise it is a boolean array that is used as index. - assert(isinstance(index_typ, types.ArrayCompatible)) - size_vars = equiv_set.get_shape(target) - bool_typ = index_typ.dtype - - - # generate loopnests and size variables from lhs correlations - loopnests = [] - index_vars = [] - for size_var in size_vars: - index_var = ir.Var(scope, mk_unique_var("parfor_index"), loc) - index_vars.append(index_var) - self.typemap[index_var.name] = types.uintp - loopnests.append(LoopNest(index_var, 0, size_var, 1)) - - # generate body - body_label = next_label() - body_block = ir.Block(scope, loc) - index_var, index_var_typ = self._make_index_var( - scope, index_vars, body_block) - parfor = Parfor(loopnests, init_block, {}, loc, index_var, equiv_set, - ('setitem',), self.flags) - if shape: - # slice subarray - parfor.loop_body = {body_label: body_block} - true_block = body_block - end_label = None - else: - # boolean mask - true_label = next_label() - true_block = ir.Block(scope, loc) - end_label = next_label() - end_block = ir.Block(scope, loc) - parfor.loop_body = {body_label: body_block, - true_label: true_block, - end_label: end_block, - } - mask_var = ir.Var(scope, mk_unique_var("$mask_var"), loc) - self.typemap[mask_var.name] = bool_typ - mask_val = ir.Expr.getitem(index, index_var, loc) - body_block.body.extend([ - ir.Assign(mask_val, mask_var, loc), - ir.Branch(mask_var, true_label, end_label, loc) - ]) - - value_typ = self.typemap[value.name] - if isinstance(value_typ, types.npytypes.Array): - value_var = ir.Var(scope, mk_unique_var("$value_var"), loc) - self.typemap[value_var.name] = value_typ.dtype - getitem_call = ir.Expr.getitem(value, index_var, loc) - self.calltypes[getitem_call] = signature( - value_typ.dtype, value_typ, index_var_typ) - true_block.body.append(ir.Assign(getitem_call, value_var, loc)) - else: - value_var = value - setitem_node = ir.SetItem(target, index_var, value_var, loc) - self.calltypes[setitem_node] = signature( - types.none, self.typemap[target.name], index_var_typ, el_typ) - true_block.body.append(setitem_node) - if end_label: - true_block.body.append(ir.Jump(end_label, loc)) - - if config.DEBUG_ARRAY_OPT == 1: - parfor.dump() - return parfor - - def _is_supported_npycall(self, expr): - """check if we support parfor translation for - this Numpy call. - """ - call_name, mod_name = find_callname(self.func_ir, expr) - if not (isinstance(mod_name, str) and mod_name.startswith('numpy')): - return False - if call_name in ['zeros', 'ones']: - return True - if call_name in ['arange', 'linspace']: - return True - if mod_name == 'numpy.random' and call_name in random_calls: - return True - # TODO: add more calls - return False - - def _get_ndims(self, arr): - # return len(self.array_analysis.array_shape_classes[arr]) - return self.typemap[arr].ndim - - def _numpy_to_parfor(self, equiv_set, lhs, expr): - call_name, mod_name = find_callname(self.func_ir, expr) - args = expr.args - kws = dict(expr.kws) - if call_name in ['zeros', 'ones'] or mod_name == 'numpy.random': - return self._numpy_map_to_parfor(equiv_set, call_name, lhs, args, kws, expr) - # return error if we couldn't handle it (avoid rewrite infinite loop) - raise NotImplementedError("parfor translation failed for ", expr) - - def _numpy_map_to_parfor(self, equiv_set, call_name, lhs, args, kws, expr): - """generate parfor from Numpy calls that are maps. - """ - scope = lhs.scope - loc = lhs.loc - arr_typ = self.typemap[lhs.name] - el_typ = arr_typ.dtype - - # generate loopnests and size variables from lhs correlations - size_vars = equiv_set.get_shape(lhs) - index_vars, loopnests = self._mk_parfor_loops(size_vars, scope, loc) - - # generate init block and body - init_block = ir.Block(scope, loc) - init_block.body = mk_alloc(self.typemap, self.calltypes, lhs, - tuple(size_vars), el_typ, scope, loc) - body_label = next_label() - body_block = ir.Block(scope, loc) - expr_out_var = ir.Var(scope, mk_unique_var("$expr_out_var"), loc) - self.typemap[expr_out_var.name] = el_typ - - index_var, index_var_typ = self._make_index_var( - scope, index_vars, body_block) - - if call_name == 'zeros': - value = ir.Const(el_typ(0), loc) - elif call_name == 'ones': - value = ir.Const(el_typ(1), loc) - elif call_name in random_calls: - # remove size arg to reuse the call expr for single value - _remove_size_arg(call_name, expr) - # update expr type - new_arg_typs, new_kw_types = _get_call_arg_types( - expr, self.typemap) - self.calltypes.pop(expr) - self.calltypes[expr] = self.typemap[expr.func.name].get_call_type( - typing.Context(), new_arg_typs, new_kw_types) - value = expr - else: - NotImplementedError( - "Map of numpy.{} to parfor is not implemented".format(call_name)) - - value_assign = ir.Assign(value, expr_out_var, loc) - body_block.body.append(value_assign) - - parfor = Parfor(loopnests, init_block, {}, loc, index_var, equiv_set, - ('{} function'.format(call_name,)), self.flags) - - setitem_node = ir.SetItem(lhs, index_var, expr_out_var, loc) - self.calltypes[setitem_node] = signature( - types.none, self.typemap[lhs.name], index_var_typ, el_typ) - body_block.body.append(setitem_node) - parfor.loop_body = {body_label: body_block} - if config.DEBUG_ARRAY_OPT == 1: - print("generated parfor for numpy map:") - parfor.dump() - return parfor - - def _mk_reduction_body(self, call_name, scope, loc, - index_vars, in_arr, acc_var): - """ - Produce the body blocks for a reduction function indicated by call_name. - """ - from numba.inline_closurecall import check_reduce_func - reduce_func = get_definition(self.func_ir, call_name) - check_reduce_func(self.func_ir, reduce_func) - - arr_typ = self.typemap[in_arr.name] - in_typ = arr_typ.dtype - body_block = ir.Block(scope, loc) - index_var, index_var_type = self._make_index_var( - scope, index_vars, body_block) - - tmp_var = ir.Var(scope, mk_unique_var("$val"), loc) - self.typemap[tmp_var.name] = in_typ - getitem_call = ir.Expr.getitem(in_arr, index_var, loc) - self.calltypes[getitem_call] = signature( - in_typ, arr_typ, index_var_type) - body_block.append(ir.Assign(getitem_call, tmp_var, loc)) - - reduce_f_ir = compile_to_numba_ir(reduce_func, - self.func_ir.func_id.func.__globals__, - self.typingctx, - (in_typ, in_typ), - self.typemap, - self.calltypes) - loop_body = reduce_f_ir.blocks - end_label = next_label() - end_block = ir.Block(scope, loc) - loop_body[end_label] = end_block - first_reduce_label = min(reduce_f_ir.blocks.keys()) - first_reduce_block = reduce_f_ir.blocks[first_reduce_label] - body_block.body.extend(first_reduce_block.body) - first_reduce_block.body = body_block.body - replace_arg_nodes(first_reduce_block, [acc_var, tmp_var]) - replace_returns(loop_body, acc_var, end_label) - return index_var, loop_body - - def _reduce_to_parfor(self, equiv_set, lhs, args, loc): - """ - Convert a reduce call to a parfor. - The call arguments should be (call_name, array, init_value). - """ - scope = lhs.scope - call_name = args[0] - in_arr = args[1] - arr_def = get_definition(self.func_ir, in_arr.name) - - mask_var = None - mask_indices = None - result = guard(self._find_mask, arr_def) - if result: - in_arr, mask_var, mask_typ, mask_indices = result - - init_val = args[2] - size_vars = equiv_set.get_shape(in_arr if mask_indices == None else mask_var) - index_vars, loopnests = self._mk_parfor_loops(size_vars, scope, loc) - mask_index = index_vars - if mask_indices: - index_vars = tuple(x if x else index_vars[0] for x in mask_indices) - acc_var = lhs - - # init block has to init the reduction variable - init_block = ir.Block(scope, loc) - init_block.body.append(ir.Assign(init_val, acc_var, loc)) - - # produce loop body - body_label = next_label() - index_var, loop_body = self._mk_reduction_body(call_name, - scope, loc, index_vars, in_arr, acc_var) - if mask_indices: - index_var = mask_index[0] - - if mask_var != None: - true_label = min(loop_body.keys()) - false_label = max(loop_body.keys()) - body_block = ir.Block(scope, loc) - loop_body[body_label] = body_block - mask = ir.Var(scope, mk_unique_var("$mask_val"), loc) - self.typemap[mask.name] = mask_typ - mask_val = ir.Expr.getitem(mask_var, index_var, loc) - body_block.body.extend([ - ir.Assign(mask_val, mask, loc), - ir.Branch(mask, true_label, false_label, loc) - ]) - - parfor = Parfor(loopnests, init_block, loop_body, loc, index_var, - equiv_set, ('{} function'.format(call_name),), self.flags) - return parfor - - - def fuse_parfors(self, array_analysis, blocks): - for label, block in blocks.items(): - equiv_set = array_analysis.get_equiv_set(label) - fusion_happened = True - while fusion_happened: - fusion_happened = False - new_body = [] - i = 0 - while i < len(block.body) - 1: - stmt = block.body[i] - next_stmt = block.body[i + 1] - if isinstance(stmt, Parfor) and isinstance(next_stmt, Parfor): - # we have to update equiv_set since they have changed due to - # variables being renamed before fusion. - equiv_set = array_analysis.get_equiv_set(label) - stmt.equiv_set = equiv_set - next_stmt.equiv_set = equiv_set - fused_node = try_fuse(equiv_set, stmt, next_stmt) - if fused_node is not None: - fusion_happened = True - new_body.append(fused_node) - self.fuse_recursive_parfor(fused_node, equiv_set) - i += 2 - continue - new_body.append(stmt) - if isinstance(stmt, Parfor): - self.fuse_recursive_parfor(stmt, equiv_set) - i += 1 - new_body.append(block.body[-1]) - block.body = new_body - return - - def fuse_recursive_parfor(self, parfor, equiv_set): - blocks = wrap_parfor_blocks(parfor) - # print("in fuse_recursive parfor for ", parfor.id) - maximize_fusion(self.func_ir, blocks) - arr_analysis = array_analysis.ArrayAnalysis(self.typingctx, self.func_ir, - self.typemap, self.calltypes) - arr_analysis.run(blocks, equiv_set) - self.fuse_parfors(arr_analysis, blocks) - unwrap_parfor_blocks(parfor) - -def _remove_size_arg(call_name, expr): - "remove size argument from args or kws" - # remove size kwarg - kws = dict(expr.kws) - kws.pop('size', '') - expr.kws = tuple(kws.items()) - - # remove size arg if available - if call_name in random_1arg_size + random_int_args: - # these calls have only a "size" argument or list of ints - # so remove all args - expr.args = [] - - if call_name in random_3arg_sizelast: - # normal, uniform, ... have 3 args, last one is size - if len(expr.args) == 3: - expr.args.pop() - - if call_name in random_2arg_sizelast: - # have 2 args, last one is size - if len(expr.args) == 2: - expr.args.pop() - - if call_name == 'randint': - # has 4 args, 3rd one is size - if len(expr.args) == 3: - expr.args.pop() - if len(expr.args) == 4: - dt_arg = expr.args.pop() - expr.args.pop() # remove size - expr.args.append(dt_arg) - - if call_name == 'triangular': - # has 4 args, last one is size - if len(expr.args) == 4: - expr.args.pop() - - return - - -def _get_call_arg_types(expr, typemap): - new_arg_typs = [] - for arg in expr.args: - new_arg_typs.append(typemap[arg.name]) - - new_kw_types = {} - for name, arg in expr.kws: - new_kw_types[name] = typemap[arg.name] - - return tuple(new_arg_typs), new_kw_types - - -def _arrayexpr_tree_to_ir( - func_ir, - typingctx, - typemap, - calltypes, - equiv_set, - init_block, - expr_out_var, - expr, - parfor_index_tuple_var, - all_parfor_indices, - avail_vars): - """generate IR from array_expr's expr tree recursively. Assign output to - expr_out_var and returns the whole IR as a list of Assign nodes. - """ - el_typ = typemap[expr_out_var.name] - scope = expr_out_var.scope - loc = expr_out_var.loc - out_ir = [] - - if isinstance(expr, tuple): - op, arr_expr_args = expr - arg_vars = [] - for arg in arr_expr_args: - arg_out_var = ir.Var(scope, mk_unique_var("$arg_out_var"), loc) - typemap[arg_out_var.name] = el_typ - out_ir += _arrayexpr_tree_to_ir(func_ir, - typingctx, - typemap, - calltypes, - equiv_set, - init_block, - arg_out_var, - arg, - parfor_index_tuple_var, - all_parfor_indices, - avail_vars) - arg_vars.append(arg_out_var) - if op in npydecl.supported_array_operators: - el_typ1 = typemap[arg_vars[0].name] - if len(arg_vars) == 2: - el_typ2 = typemap[arg_vars[1].name] - func_typ = find_op_typ(op, [el_typ1, el_typ2]) - ir_expr = ir.Expr.binop(op, arg_vars[0], arg_vars[1], loc) - if op == '/': - func_typ, ir_expr = _gen_np_divide( - arg_vars[0], arg_vars[1], out_ir, typemap) - else: - func_typ = find_op_typ(op, [el_typ1]) - ir_expr = ir.Expr.unary(op, arg_vars[0], loc) - calltypes[ir_expr] = func_typ - el_typ = func_typ.return_type - out_ir.append(ir.Assign(ir_expr, expr_out_var, loc)) - for T in array_analysis.MAP_TYPES: - if isinstance(op, T): - # elif isinstance(op, (np.ufunc, DUFunc)): - # function calls are stored in variables which are not removed - # op is typing_key to the variables type - func_var_name = _find_func_var(typemap, op, avail_vars) - func_var = ir.Var(scope, mk_unique_var(func_var_name), loc) - typemap[func_var.name] = typemap[func_var_name] - func_var_def = func_ir.get_definition(func_var_name) - if isinstance(func_var_def, ir.Expr) and func_var_def.op == 'getattr' and func_var_def.attr == 'sqrt': - g_math_var = ir.Var(scope, mk_unique_var("$math_g_var"), loc) - typemap[g_math_var.name] = types.misc.Module(math) - g_math = ir.Global('math', math, loc) - g_math_assign = ir.Assign(g_math, g_math_var, loc) - func_var_def = ir.Expr.getattr(g_math_var, 'sqrt', loc) - out_ir.append(g_math_assign) -# out_ir.append(func_var_def) - ir_expr = ir.Expr.call(func_var, arg_vars, (), loc) - call_typ = typemap[func_var.name].get_call_type( - typing.Context(), [el_typ] * len(arg_vars), {}) - calltypes[ir_expr] = call_typ - el_typ = call_typ.return_type - #signature(el_typ, el_typ) - out_ir.append(ir.Assign(func_var_def, func_var, loc)) - out_ir.append(ir.Assign(ir_expr, expr_out_var, loc)) - elif isinstance(expr, ir.Var): - var_typ = typemap[expr.name] - if isinstance(var_typ, types.Array): - el_typ = var_typ.dtype - ir_expr = _gen_arrayexpr_getitem( - equiv_set, - expr, - parfor_index_tuple_var, - all_parfor_indices, - el_typ, - calltypes, - typingctx, - typemap, - init_block, - out_ir) - else: - # assert typemap[expr.name]==el_typ - el_typ = var_typ - ir_expr = expr - out_ir.append(ir.Assign(ir_expr, expr_out_var, loc)) - elif isinstance(expr, ir.Const): - el_typ = typing.Context().resolve_value_type(expr.value) - out_ir.append(ir.Assign(expr, expr_out_var, loc)) - - if len(out_ir) == 0: - raise NotImplementedError( - "Don't know how to translate array expression '%r'" % (expr,)) - typemap.pop(expr_out_var.name, None) - typemap[expr_out_var.name] = el_typ - return out_ir - - -def _gen_np_divide(arg1, arg2, out_ir, typemap): - """generate np.divide() instead of / for array_expr to get numpy error model - like inf for division by zero (test_division_by_zero). - """ - scope = arg1.scope - loc = arg1.loc - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - typemap[g_np_var.name] = types.misc.Module(numpy) - g_np = ir.Global('np', numpy, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - # attr call: div_attr = getattr(g_np_var, divide) - div_attr_call = ir.Expr.getattr(g_np_var, "divide", loc) - attr_var = ir.Var(scope, mk_unique_var("$div_attr"), loc) - func_var_typ = get_np_ufunc_typ(numpy.divide) - typemap[attr_var.name] = func_var_typ - attr_assign = ir.Assign(div_attr_call, attr_var, loc) - # divide call: div_attr(arg1, arg2) - div_call = ir.Expr.call(attr_var, [arg1, arg2], (), loc) - func_typ = func_var_typ.get_call_type( - typing.Context(), [typemap[arg1.name], typemap[arg2.name]], {}) - out_ir.extend([g_np_assign, attr_assign]) - return func_typ, div_call - - -def _gen_arrayexpr_getitem( - equiv_set, - var, - parfor_index_tuple_var, - all_parfor_indices, - el_typ, - calltypes, - typingctx, - typemap, - init_block, - out_ir): - """if there is implicit dimension broadcast, generate proper access variable - for getitem. For example, if indices are (i1,i2,i3) but shape is (c1,0,c3), - generate a tuple with (i1,0,i3) for access. Another example: for (i1,i2,i3) - and (c1,c2) generate (i2,i3). - """ - loc = var.loc - index_var = parfor_index_tuple_var - var_typ = typemap[var.name] - ndims = typemap[var.name].ndim - num_indices = len(all_parfor_indices) - size_vars = equiv_set.get_shape(var) or [] - size_consts = [equiv_set.get_equiv_const(x) for x in size_vars] - if ndims == 0: - # call np.ravel - ravel_var = ir.Var(var.scope, mk_unique_var("$ravel"), loc) - ravel_typ = types.npytypes.Array(dtype=var_typ.dtype, ndim=1, layout='C') - typemap[ravel_var.name] = ravel_typ - stmts = ir_utils.gen_np_call('ravel', numpy.ravel, ravel_var, [var], typingctx, typemap, calltypes) - init_block.body.extend(stmts) - var = ravel_var - # Const(0) - const_node = ir.Const(0, var.loc) - const_var = ir.Var(var.scope, mk_unique_var("$const_ind_0"), loc) - typemap[const_var.name] = types.uintp - const_assign = ir.Assign(const_node, const_var, loc) - out_ir.append(const_assign) - index_var = const_var - elif ndims == 1: - # Use last index for 1D arrays - index_var = all_parfor_indices[-1] - elif any([x != None for x in size_consts]): - # Need a tuple as index - ind_offset = num_indices - ndims - tuple_var = ir.Var(var.scope, mk_unique_var( - "$parfor_index_tuple_var_bcast"), loc) - typemap[tuple_var.name] = types.containers.UniTuple(types.uintp, ndims) - # Just in case, const var for size 1 dim access index: $const0 = - # Const(0) - const_node = ir.Const(0, var.loc) - const_var = ir.Var(var.scope, mk_unique_var("$const_ind_0"), loc) - typemap[const_var.name] = types.uintp - const_assign = ir.Assign(const_node, const_var, loc) - out_ir.append(const_assign) - index_vars = [] - for i in reversed(range(ndims)): - size_var = size_vars[i] - size_const = size_consts[i] - if size_const == 1: - index_vars.append(const_var) - else: - index_vars.append(all_parfor_indices[ind_offset + i]) - index_vars = list(reversed(index_vars)) - tuple_call = ir.Expr.build_tuple(index_vars, loc) - tuple_assign = ir.Assign(tuple_call, tuple_var, loc) - out_ir.append(tuple_assign) - index_var = tuple_var - - ir_expr = ir.Expr.getitem(var, index_var, loc) - calltypes[ir_expr] = signature(el_typ, typemap[var.name], - typemap[index_var.name]) - return ir_expr - - -def _find_func_var(typemap, func, avail_vars): - """find variable in typemap which represents the function func. - """ - for v in avail_vars: - t = typemap[v] - # Function types store actual functions in typing_key. - if isinstance(t, Function) and t.typing_key == func: - return v - raise RuntimeError("ufunc call variable not found") - - -def lower_parfor_sequential(typingctx, func_ir, typemap, calltypes): - ir_utils._max_label = max(ir_utils._max_label, - ir_utils.find_max_label(func_ir.blocks)) - parfor_found = False - new_blocks = {} - for (block_label, block) in func_ir.blocks.items(): - block_label, parfor_found = _lower_parfor_sequential_block( - block_label, block, new_blocks, typemap, calltypes, parfor_found) - # old block stays either way - new_blocks[block_label] = block - func_ir.blocks = new_blocks - # rename only if parfor found and replaced (avoid test_flow_control error) - if parfor_found: - func_ir.blocks = rename_labels(func_ir.blocks) - dprint_func_ir(func_ir, "after parfor sequential lowering") - simplify(func_ir, typemap, calltypes) - dprint_func_ir(func_ir, "after parfor sequential simplify") - # add dels since simplify removes dels - post_proc = postproc.PostProcessor(func_ir) - post_proc.run() - return - - -def _lower_parfor_sequential_block( - block_label, - block, - new_blocks, - typemap, - calltypes, - parfor_found): - scope = block.scope - i = _find_first_parfor(block.body) - while i != -1: - parfor_found = True - inst = block.body[i] - loc = inst.init_block.loc - # split block across parfor - prev_block = ir.Block(scope, loc) - prev_block.body = block.body[:i] - block.body = block.body[i + 1:] - # previous block jump to parfor init block - init_label = next_label() - prev_block.body.append(ir.Jump(init_label, loc)) - new_blocks[init_label] = inst.init_block - new_blocks[block_label] = prev_block - block_label = next_label() - - ndims = len(inst.loop_nests) - for i in range(ndims): - loopnest = inst.loop_nests[i] - # create range block for loop - range_label = next_label() - header_label = next_label() - range_block = mk_range_block( - typemap, - loopnest.start, - loopnest.stop, - loopnest.step, - calltypes, - scope, - loc) - range_block.body[-1].target = header_label # fix jump target - phi_var = range_block.body[-2].target - new_blocks[range_label] = range_block - header_block = mk_loop_header(typemap, phi_var, calltypes, - scope, loc) - header_block.body[-2].target = loopnest.index_variable - new_blocks[header_label] = header_block - # jump to this new inner loop - if i == 0: - inst.init_block.body.append(ir.Jump(range_label, loc)) - header_block.body[-1].falsebr = block_label - else: - new_blocks[prev_header_label].body[-1].truebr = range_label - header_block.body[-1].falsebr = prev_header_label - prev_header_label = header_label # to set truebr next loop - - # last body block jump to inner most header - body_last_label = max(inst.loop_body.keys()) - inst.loop_body[body_last_label].body.append( - ir.Jump(header_label, loc)) - # inner most header jumps to first body block - body_first_label = min(inst.loop_body.keys()) - header_block.body[-1].truebr = body_first_label - # add parfor body to blocks - for (l, b) in inst.loop_body.items(): - l, parfor_found = _lower_parfor_sequential_block( - l, b, new_blocks, typemap, calltypes, parfor_found) - new_blocks[l] = b - i = _find_first_parfor(block.body) - return block_label, parfor_found - - -def _find_first_parfor(body): - for (i, inst) in enumerate(body): - if isinstance(inst, Parfor) and not inst.no_sequential_lowering: - return i - return -1 - - -def get_parfor_params(blocks, options_fusion): - """find variables used in body of parfors from outside and save them. - computed as live variables at entry of first block. - """ - - # since parfor wrap creates a back-edge to first non-init basic block, - # live_map[first_non_init_block] contains variables defined in parfor body - # that could be undefined before. So we only consider variables that are - # actually defined before the parfor body in the program. - parfor_ids = set() - pre_defs = set() - _, all_defs = compute_use_defs(blocks) - topo_order = find_topo_order(blocks) - for label in topo_order: - block = blocks[label] - for i, parfor in _find_parfors(block.body): - # find variable defs before the parfor in the same block - dummy_block = ir.Block(block.scope, block.loc) - dummy_block.body = block.body[:i] - before_defs = compute_use_defs({0: dummy_block}).defmap[0] - pre_defs |= before_defs - parfor.params = get_parfor_params_inner(parfor, pre_defs, options_fusion) - parfor_ids.add(parfor.id) - - pre_defs |= all_defs[label] - - return parfor_ids - - -def get_parfor_params_inner(parfor, pre_defs, options_fusion): - - blocks = wrap_parfor_blocks(parfor) - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) - parfor_ids = get_parfor_params(blocks, options_fusion) - if config.DEBUG_ARRAY_OPT_STATS: - n_parfors = len(parfor_ids) - if n_parfors > 0: - after_fusion = ("After fusion" if options_fusion - else "With fusion disabled") - print(('After fusion, parallel for-loop {} has ' - '{} nested Parfor(s) #{}.').format( - after_fusion, parfor.id, n_parfors, parfor_ids)) - unwrap_parfor_blocks(parfor) - keylist = sorted(live_map.keys()) - init_block = keylist[0] - first_non_init_block = keylist[1] - - before_defs = usedefs.defmap[init_block] | pre_defs - params = live_map[first_non_init_block] & before_defs - return params - - -def _find_parfors(body): - for i, inst in enumerate(body): - if isinstance(inst, Parfor): - yield i, inst - - -def get_parfor_outputs(parfor, parfor_params): - """get arrays that are written to inside the parfor and need to be passed - as parameters to gufunc. - """ - # FIXME: The following assumes the target of all SetItem are outputs, - # which is wrong! - last_label = max(parfor.loop_body.keys()) - outputs = [] - for blk in parfor.loop_body.values(): - for stmt in blk.body: - if isinstance(stmt, ir.SetItem): - if stmt.index.name == parfor.index_var.name: - outputs.append(stmt.target.name) - # make sure these written arrays are in parfor parameters (live coming in) - outputs = list(set(outputs) & set(parfor_params)) - return sorted(outputs) - -def get_parfor_reductions(parfor, parfor_params, calltypes, reductions=None, - reduce_varnames=None, param_uses=None, param_nodes=None, - var_to_param=None): - """find variables that are updated using their previous values and an array - item accessed with parfor index, e.g. s = s+A[i] - """ - if reductions is None: - reductions = {} - if reduce_varnames is None: - reduce_varnames = [] - - # for each param variable, find what other variables are used to update it - # also, keep the related nodes - if param_uses is None: - param_uses = defaultdict(list) - if param_nodes is None: - param_nodes = defaultdict(list) - if var_to_param is None: - var_to_param = {} - - blocks = wrap_parfor_blocks(parfor) - topo_order = find_topo_order(blocks) - topo_order = topo_order[1:] # ignore init block - unwrap_parfor_blocks(parfor) - - for label in reversed(topo_order): - for stmt in reversed(parfor.loop_body[label].body): - if (isinstance(stmt, ir.Assign) - and (stmt.target.name in parfor_params - or stmt.target.name in var_to_param)): - lhs = stmt.target.name - rhs = stmt.value - cur_param = lhs if lhs in parfor_params else var_to_param[lhs] - used_vars = [] - if isinstance(rhs, ir.Var): - used_vars = [rhs.name] - elif isinstance(rhs, ir.Expr): - used_vars = [v.name for v in stmt.value.list_vars()] - param_uses[cur_param].extend(used_vars) - for v in used_vars: - var_to_param[v] = cur_param - # save copy of dependent stmt - stmt_cp = copy.deepcopy(stmt) - if stmt.value in calltypes: - calltypes[stmt_cp.value] = calltypes[stmt.value] - param_nodes[cur_param].append(stmt_cp) - if isinstance(stmt, Parfor): - # recursive parfors can have reductions like test_prange8 - get_parfor_reductions(stmt, parfor_params, calltypes, - reductions, reduce_varnames, param_uses, param_nodes, var_to_param) - for param, used_vars in param_uses.items(): - # a parameter is a reduction variable if its value is used to update it - # check reduce_varnames since recursive parfors might have processed - # param already - if param in used_vars and param not in reduce_varnames: - reduce_varnames.append(param) - param_nodes[param].reverse() - reduce_nodes = get_reduce_nodes(param, param_nodes[param]) - init_val = guard(get_reduction_init, reduce_nodes) - reductions[param] = (init_val, reduce_nodes) - return reduce_varnames, reductions - -def get_reduction_init(nodes): - """ - Get initial value for known reductions. - Currently, only += and *= are supported. We assume the inplace_binop node - is followed by an assignment. - """ - require(len(nodes) >=2) - require(isinstance(nodes[-1].value, ir.Var)) - require(nodes[-2].target.name == nodes[-1].value.name) - acc_expr = nodes[-2].value - require(isinstance(acc_expr, ir.Expr) and acc_expr.op=='inplace_binop') - if acc_expr.fn == '+=': - return 0 - if acc_expr.fn == '*=': - return 1 - return None - -def get_reduce_nodes(name, nodes): - """ - Get nodes that combine the reduction variable with a sentinel variable. - Recognizes the first node that combines the reduction variable with another - variable. - """ - reduce_nodes = None - defs = {} - - def lookup(var, varonly=True): - val = defs.get(var.name, None) - if isinstance(val, ir.Var): - return lookup(val) - else: - return var if (varonly or val == None) else val - - for i, stmt in enumerate(nodes): - lhs = stmt.target - rhs = stmt.value - defs[lhs.name] = rhs - if isinstance(rhs, ir.Var) and rhs.name in defs: - rhs = lookup(rhs) - if isinstance(rhs, ir.Expr): - in_vars = set(lookup(v, True).name for v in rhs.list_vars()) - if name in in_vars: - args = [ (x.name, lookup(x, True)) for x in get_expr_args(rhs) ] - non_red_args = [ x for (x, y) in args if y.name != name ] - assert len(non_red_args) == 1 - args = [ (x, y) for (x, y) in args if x != y.name ] - replace_dict = dict(args) - replace_dict[non_red_args[0]] = ir.Var(lhs.scope, name+"#init", lhs.loc) - replace_vars_inner(rhs, replace_dict) - reduce_nodes = nodes[i:] - break; - assert reduce_nodes, "Invalid reduction format" - return reduce_nodes - -def get_expr_args(expr): - """ - Get arguments of an expression node - """ - if expr.op in ['binop', 'inplace_binop']: - return [expr.lhs, expr.rhs] - if expr.op == 'call': - return [v for v in expr.args] - raise NotImplementedError("get arguments for expression {}".format(expr)) - -def visit_parfor_pattern_vars(parfor, callback, cbdata): - # currently, only stencil pattern has variables - for pattern in parfor.patterns: - if pattern[0] == 'stencil': - left_lengths = pattern[1][0] - for i in range(len(left_lengths)): - if isinstance(left_lengths[i], ir.Var): - left_lengths[i] = visit_vars_inner(left_lengths[i], - callback, cbdata) - right_lengths = pattern[1][1] - for i in range(len(right_lengths)): - if isinstance(right_lengths[i], ir.Var): - right_lengths[i] = visit_vars_inner(right_lengths[i], - callback, cbdata) - -def visit_vars_parfor(parfor, callback, cbdata): - if config.DEBUG_ARRAY_OPT == 1: - print("visiting parfor vars for:", parfor) - print("cbdata: ", sorted(cbdata.items())) - for l in parfor.loop_nests: - l.index_variable = visit_vars_inner(l.index_variable, callback, cbdata) - if isinstance(l.start, ir.Var): - l.start = visit_vars_inner(l.start, callback, cbdata) - if isinstance(l.stop, ir.Var): - l.stop = visit_vars_inner(l.stop, callback, cbdata) - if isinstance(l.step, ir.Var): - l.step = visit_vars_inner(l.step, callback, cbdata) - visit_vars({-1: parfor.init_block}, callback, cbdata) - visit_parfor_pattern_vars(parfor, callback, cbdata) - visit_vars(parfor.loop_body, callback, cbdata) - return - - -# add call to visit parfor variable -ir_utils.visit_vars_extensions[Parfor] = visit_vars_parfor - - -def parfor_defs(parfor, use_set=None, def_set=None): - """list variables written in this parfor by recursively - calling compute_use_defs() on body and combining block defs. - """ - if use_set is None: - use_set = set() - if def_set is None: - def_set = set() - blocks = wrap_parfor_blocks(parfor) - uses, defs = compute_use_defs(blocks) - cfg = compute_cfg_from_blocks(blocks) - last_label = max(blocks.keys()) - unwrap_parfor_blocks(parfor) - - # Conservatively, only add defs for blocks that are definitely executed - # Go through blocks in order, as if they are statements of the block that - # includes the parfor, and update uses/defs. - - # no need for topo order of ir_utils - topo_order = cfg.topo_order() - # blocks that dominate last block are definitely executed - definitely_executed = cfg.dominators()[last_label] - # except loop bodies that might not execute - for loop in cfg.loops().values(): - definitely_executed -= loop.body - for label in topo_order: - if label in definitely_executed: - # see compute_use_defs() in analysis.py - # variables defined in the block that includes the parfor are not - # uses of that block (are not potentially live in the beginning of - # the block) - use_set.update(uses[label] - def_set) - def_set.update(defs[label]) - else: - use_set.update(uses[label] - def_set) - - # treat loop variables and size variables as use - loop_vars = { - l.start.name for l in parfor.loop_nests if isinstance( - l.start, ir.Var)} - loop_vars |= { - l.stop.name for l in parfor.loop_nests if isinstance( - l.stop, ir.Var)} - loop_vars |= { - l.step.name for l in parfor.loop_nests if isinstance( - l.step, ir.Var)} - use_set.update(loop_vars) - use_set |= get_parfor_pattern_vars(parfor) - - return analysis._use_defs_result(usemap=use_set, defmap=def_set) - - -analysis.ir_extension_usedefs[Parfor] = parfor_defs - - -def parfor_insert_dels(parfor, curr_dead_set): - """insert dels in parfor. input: dead variable set right after parfor. - returns the variables for which del was inserted. - """ - blocks = wrap_parfor_blocks(parfor) - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) - dead_map = compute_dead_maps(cfg, blocks, live_map, usedefs.defmap) - - # treat loop variables and size variables as live - loop_vars = { - l.start.name for l in parfor.loop_nests if isinstance( - l.start, ir.Var)} - loop_vars |= { - l.stop.name for l in parfor.loop_nests if isinstance( - l.stop, ir.Var)} - loop_vars |= { - l.step.name for l in parfor.loop_nests if isinstance( - l.step, ir.Var)} - loop_vars |= {l.index_variable.name for l in parfor.loop_nests} - # for var_list in parfor.array_analysis.array_size_vars.values(): - # loop_vars |= {v.name for v in var_list if isinstance(v, ir.Var)} - - dead_set = set() - for label in blocks.keys(): - # only kill vars that are actually dead at the parfor's block - dead_map.internal[label] &= curr_dead_set - dead_map.internal[label] -= loop_vars - dead_set |= dead_map.internal[label] - dead_map.escaping[label] &= curr_dead_set - dead_map.escaping[label] -= loop_vars - dead_set |= dead_map.escaping[label] - - # dummy class to replace func_ir. _patch_var_dels only accesses blocks - class DummyFuncIR(object): - - def __init__(self, blocks): - self.blocks = blocks - post_proc = postproc.PostProcessor(DummyFuncIR(blocks)) - post_proc._patch_var_dels(dead_map.internal, dead_map.escaping) - unwrap_parfor_blocks(parfor) - - return dead_set | loop_vars - - -postproc.ir_extension_insert_dels[Parfor] = parfor_insert_dels - - -def maximize_fusion(func_ir, blocks, up_direction=True): - """ - Reorder statements to maximize parfor fusion. Push all parfors up or down - so they are adjacent. - """ - call_table, _ = get_call_table(blocks) - for block in blocks.values(): - order_changed = True - while order_changed: - order_changed = maximize_fusion_inner(func_ir, block, - call_table, up_direction) - -def maximize_fusion_inner(func_ir, block, call_table, up_direction=True): - order_changed = False - i = 0 - # i goes to body[-3] (i+1 to body[-2]) since body[-1] is terminator and - # shouldn't be reordered - while i < len(block.body) - 2: - stmt = block.body[i] - next_stmt = block.body[i+1] - can_reorder = (_can_reorder_stmts(stmt, next_stmt, func_ir, call_table) - if up_direction else _can_reorder_stmts(next_stmt, stmt, - func_ir, call_table)) - if can_reorder: - block.body[i] = next_stmt - block.body[i+1] = stmt - order_changed = True - i += 1 - return order_changed - -def _can_reorder_stmts(stmt, next_stmt, func_ir, call_table): - """ - Check dependencies to determine if a parfor can be reordered in the IR block - with a non-parfor statement. - """ - # swap only parfors with non-parfors - # don't reorder calls with side effects (e.g. file close) - # only read-read dependencies are OK - # make sure there is no write-write, write-read dependencies - if (isinstance( - stmt, Parfor) and not isinstance( - next_stmt, Parfor) and not isinstance( - next_stmt, ir.Print) - and (not isinstance(next_stmt, ir.Assign) - or has_no_side_effect( - next_stmt.value, set(), call_table) - or guard(is_assert_equiv, func_ir, next_stmt.value))): - stmt_accesses = {v.name for v in stmt.list_vars()} - stmt_writes = get_parfor_writes(stmt) - next_accesses = {v.name for v in next_stmt.list_vars()} - next_writes = get_stmt_writes(next_stmt) - if len((stmt_writes & next_accesses) - | (next_writes & stmt_accesses)) == 0: - return True - return False - -def is_assert_equiv(func_ir, expr): - func_name, mod_name = find_callname(func_ir, expr) - return func_name == 'assert_equiv' - - -def get_parfor_writes(parfor): - assert isinstance(parfor, Parfor) - writes = set() - blocks = parfor.loop_body.copy() - blocks[-1] = parfor.init_block - for block in blocks.values(): - for stmt in block.body: - writes.update(get_stmt_writes(stmt)) - if isinstance(stmt, Parfor): - writes.update(get_parfor_writes(stmt)) - return writes - -def try_fuse(equiv_set, parfor1, parfor2): - """try to fuse parfors and return a fused parfor, otherwise return None - """ - dprint("try_fuse trying to fuse \n", parfor1, "\n", parfor2) - - # fusion of parfors with different dimensions not supported yet - if len(parfor1.loop_nests) != len(parfor2.loop_nests): - dprint("try_fuse parfors number of dimensions mismatch") - return None - - ndims = len(parfor1.loop_nests) - # all loops should be equal length - - def is_equiv(x, y): - return x == y or equiv_set.is_equiv(x, y) - - for i in range(ndims): - nest1 = parfor1.loop_nests[i] - nest2 = parfor2.loop_nests[i] - if not (is_equiv(nest1.start, nest2.start) and - is_equiv(nest1.stop, nest2.stop) and - is_equiv(nest1.step, nest2.step)): - dprint("try_fuse parfor dimension correlation mismatch", i) - return None - - # TODO: make sure parfor1's reduction output is not used in parfor2 - # only data parallel loops - if has_cross_iter_dep(parfor1) or has_cross_iter_dep(parfor2): - dprint("try_fuse parfor cross iteration dependency found") - return None - - # find parfor1's defs, only body is considered since init_block will run - # first after fusion as well - p1_body_usedefs = compute_use_defs(parfor1.loop_body) - p1_body_defs = set() - for defs in p1_body_usedefs.defmap.values(): - p1_body_defs |= defs - - p2_usedefs = compute_use_defs(parfor2.loop_body) - p2_uses = compute_use_defs({0: parfor2.init_block}).usemap[0] - for uses in p2_usedefs.usemap.values(): - p2_uses |= uses - - if not p1_body_defs.isdisjoint(p2_uses): - dprint("try_fuse parfor2 depends on parfor1 body") - return None - - return fuse_parfors_inner(parfor1, parfor2) - - -def fuse_parfors_inner(parfor1, parfor2): - # fuse parfor2 into parfor1 - # append parfor2's init block on parfor1's - parfor1.init_block.body.extend(parfor2.init_block.body) - - # append parfor2's first block to parfor1's last block - parfor2_first_label = min(parfor2.loop_body.keys()) - parfor2_first_block = parfor2.loop_body[parfor2_first_label].body - parfor1_first_label = min(parfor1.loop_body.keys()) - parfor1_last_label = max(parfor1.loop_body.keys()) - parfor1.loop_body[parfor1_last_label].body.extend(parfor2_first_block) - - # add parfor2 body blocks to parfor1's except first - parfor1.loop_body.update(parfor2.loop_body) - parfor1.loop_body.pop(parfor2_first_label) - - # replace parfor2 indices with parfor1's - ndims = len(parfor1.loop_nests) - index_dict = {parfor2.index_var.name: parfor1.index_var} - for i in range(ndims): - index_dict[parfor2.loop_nests[i].index_variable.name] = parfor1.loop_nests[ - i].index_variable - replace_vars(parfor1.loop_body, index_dict) - - # re-order labels from min to max - blocks = wrap_parfor_blocks(parfor1, entry_label=parfor1_first_label) - blocks = rename_labels(blocks) - unwrap_parfor_blocks(parfor1, blocks) - - nameset = set(x.name for x in index_dict.values()) - remove_duplicate_definitions(parfor1.loop_body, nameset) - parfor1.patterns.extend(parfor2.patterns) - if config.DEBUG_ARRAY_OPT_STATS: - print('Parallel for-loop #{} is fused into for-loop #{}.'.format( - parfor2.id, parfor1.id)) - - return parfor1 - - -def remove_duplicate_definitions(blocks, nameset): - """Remove duplicated definition for variables in the given nameset, which - is often a result of parfor fusion. - """ - for label, block in blocks.items(): - body = block.body - new_body = [] - defined = set() - for inst in body: - if isinstance(inst, ir.Assign): - name = inst.target.name - if name in nameset: - if name in defined: - continue - defined.add(name) - new_body.append(inst) - block.body = new_body - return - - -def has_cross_iter_dep(parfor): - # we consevatively assume there is cross iteration dependency when - # the parfor index is used in any expression since the expression could - # be used for indexing arrays - # TODO: make it more accurate using ud-chains - indices = {l.index_variable for l in parfor.loop_nests} - for b in parfor.loop_body.values(): - for stmt in b.body: - # GetItem/SetItem nodes are fine since can't have expression inside - # and only simple indices are possible - if isinstance(stmt, (ir.SetItem, ir.StaticSetItem)): - continue - # tuples are immutable so no expression on parfor possible - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): - op = stmt.value.op - if op in ['build_tuple', 'getitem', 'static_getitem']: - continue - # other statements can have potential violations - if not indices.isdisjoint(stmt.list_vars()): - dprint("has_cross_iter_dep found", indices, stmt) - return True - return False - - -def dprint(*s): - if config.DEBUG_ARRAY_OPT == 1: - print(*s) - - -def get_parfor_pattern_vars(parfor): - """ get the variables used in parfor pattern information - """ - out = set() - # currently, only stencil pattern has variables - for pattern in parfor.patterns: - if pattern[0] == 'stencil': - left_lengths = pattern[1][0] - right_lengths = pattern[1][1] - for v in left_lengths+right_lengths: - if isinstance(v, ir.Var): - out.add(v.name) - return out - -def remove_dead_parfor(parfor, lives, arg_aliases, alias_map, func_ir, typemap): - """ remove dead code inside parfor including get/sets - """ - - with dummy_return_in_loop_body(parfor.loop_body): - labels = find_topo_order(parfor.loop_body) - - # get/setitem replacement should ideally use dataflow to propagate setitem - # saved values, but for simplicity we handle the common case of propagating - # setitems in the first block (which is dominant) if the array is not - # potentially changed in any way - first_label = labels[0] - first_block_saved_values = {} - _update_parfor_get_setitems( - parfor.loop_body[first_label].body, - parfor.index_var, alias_map, - first_block_saved_values, - lives - ) - - # remove saved first block setitems if array potentially changed later - saved_arrs = set(first_block_saved_values.keys()) - for l in labels: - if l == first_label: - continue - for stmt in parfor.loop_body[l].body: - if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) - and stmt.value.op == 'getitem' - and stmt.value.index.name == parfor.index_var.name): - continue - varnames = set(v.name for v in stmt.list_vars()) - rm_arrs = varnames & saved_arrs - for a in rm_arrs: - first_block_saved_values.pop(a, None) - - - # replace getitems with available value - # e.g. A[i] = v; ... s = A[i] -> s = v - for l in labels: - if l == first_label: - continue - block = parfor.loop_body[l] - saved_values = first_block_saved_values.copy() - _update_parfor_get_setitems(block.body, parfor.index_var, alias_map, - saved_values, lives) - - - # after getitem replacement, remove extra setitems - blocks = parfor.loop_body.copy() # shallow copy is enough - last_label = max(blocks.keys()) - return_label, tuple_var = _add_liveness_return_block(blocks, lives, typemap) - # jump to return label - jump = ir.Jump(return_label, ir.Loc("parfors_dummy", -1)) - blocks[last_label].body.append(jump) - cfg = compute_cfg_from_blocks(blocks) - usedefs = compute_use_defs(blocks) - live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) - alias_set = set(alias_map.keys()) - - for label, block in blocks.items(): - new_body = [] - in_lives = {v.name for v in block.terminator.list_vars()} - # find live variables at the end of block - for out_blk, _data in cfg.successors(label): - in_lives |= live_map[out_blk] - for stmt in reversed(block.body): - # aliases of lives are also live for setitems - alias_lives = in_lives & alias_set - for v in alias_lives: - in_lives |= alias_map[v] - if (isinstance(stmt, ir.SetItem) and stmt.index.name == - parfor.index_var.name and stmt.target.name not in in_lives and - stmt.target.name not in arg_aliases): - continue - in_lives |= {v.name for v in stmt.list_vars()} - new_body.append(stmt) - new_body.reverse() - block.body = new_body - - typemap.pop(tuple_var.name) # remove dummy tuple type - blocks[last_label].body.pop() # remove jump - - - # process parfor body recursively - remove_dead_parfor_recursive( - parfor, lives, arg_aliases, alias_map, func_ir, typemap) - - # remove parfor if empty - is_empty = len(parfor.init_block.body) == 0 - for block in parfor.loop_body.values(): - is_empty &= len(block.body) == 0 - if is_empty: - return None - return parfor - -def _update_parfor_get_setitems(block_body, index_var, alias_map, - saved_values, lives): - """ - replace getitems of a previously set array in a block of parfor loop body - """ - for stmt in block_body: - if (isinstance(stmt, ir.SetItem) and stmt.index.name == - index_var.name and stmt.target.name not in lives): - # saved values of aliases of SetItem target array are invalid - for w in alias_map.get(stmt.target.name, []): - saved_values.pop(w, None) - # set saved value after invalidation since alias_map may - # contain the array itself (e.g. pi example) - saved_values[stmt.target.name] = stmt.value - continue - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): - rhs = stmt.value - if rhs.op == 'getitem' and isinstance(rhs.index, ir.Var): - if rhs.index.name == index_var.name: - # replace getitem if value saved - stmt.value = saved_values.get(rhs.value.name, rhs) - continue - # conservative assumption: array is modified if referenced - # remove all referenced arrays - for v in stmt.list_vars(): - saved_values.pop(v.name, None) - # aliases are potentially modified as well - for w in alias_map.get(v.name, []): - saved_values.pop(w, None) - - return - -ir_utils.remove_dead_extensions[Parfor] = remove_dead_parfor - - -def remove_dead_parfor_recursive(parfor, lives, arg_aliases, alias_map, - func_ir, typemap): - """create a dummy function from parfor and call remove dead recursively - """ - blocks = parfor.loop_body.copy() # shallow copy is enough - first_body_block = min(blocks.keys()) - assert first_body_block > 0 # we are using 0 for init block here - last_label = max(blocks.keys()) - - return_label, tuple_var = _add_liveness_return_block(blocks, lives, typemap) - - # branch back to first body label to simulate loop - branch = ir.Branch(0, first_body_block, return_label, ir.Loc("parfors_dummy", -1)) - blocks[last_label].body.append(branch) - - # add dummy jump in init_block for CFG to work - blocks[0] = parfor.init_block - blocks[0].body.append(ir.Jump(first_body_block, ir.Loc("parfors_dummy", -1))) - - # args var including aliases is ok - remove_dead(blocks, arg_aliases, func_ir, typemap, alias_map, arg_aliases) - typemap.pop(tuple_var.name) # remove dummy tuple type - blocks[0].body.pop() # remove dummy jump - blocks[last_label].body.pop() # remove branch - return - -def _add_liveness_return_block(blocks, lives, typemap): - last_label = max(blocks.keys()) - return_label = last_label + 1 - - loc = blocks[last_label].loc - scope = blocks[last_label].scope - blocks[return_label] = ir.Block(scope, loc) - - # add lives in a dummpy return to last block to avoid their removal - tuple_var = ir.Var(scope, mk_unique_var("$tuple_var"), loc) - # dummy type for tuple_var - typemap[tuple_var.name] = types.containers.UniTuple( - types.uintp, 2) - live_vars = [ir.Var(scope, v, loc) for v in lives] - tuple_call = ir.Expr.build_tuple(live_vars, loc) - blocks[return_label].body.append(ir.Assign(tuple_call, tuple_var, loc)) - blocks[return_label].body.append(ir.Return(tuple_var, loc)) - return return_label, tuple_var - - -def find_potential_aliases_parfor(parfor, args, typemap, func_ir, alias_map, arg_aliases): - blocks = wrap_parfor_blocks(parfor) - ir_utils.find_potential_aliases( - blocks, args, typemap, func_ir, alias_map, arg_aliases) - unwrap_parfor_blocks(parfor) - return - -ir_utils.alias_analysis_extensions[Parfor] = find_potential_aliases_parfor - -def simplify_parfor_body_CFG(blocks): - """simplify CFG of body loops in parfors""" - for block in blocks.values(): - for stmt in block.body: - if isinstance(stmt, Parfor): - parfor = stmt - # add dummy return to enable CFG creation - # can't use dummy_return_in_loop_body since body changes - last_block = parfor.loop_body[max(parfor.loop_body.keys())] - last_block.body.append(ir.Return(0, ir.Loc("parfors_dummy", -1))) - parfor.loop_body = simplify_CFG(parfor.loop_body) - last_block = parfor.loop_body[max(parfor.loop_body.keys())] - last_block.body.pop() - # call on body recursively - simplify_parfor_body_CFG(parfor.loop_body) - - -def wrap_parfor_blocks(parfor, entry_label = None): - """wrap parfor blocks for analysis/optimization like CFG""" - blocks = parfor.loop_body.copy() # shallow copy is enough - if entry_label == None: - entry_label = min(blocks.keys()) - assert entry_label > 0 # we are using 0 for init block here - - # add dummy jump in init_block for CFG to work - blocks[0] = parfor.init_block - blocks[0].body.append(ir.Jump(entry_label, blocks[0].loc)) - for block in blocks.values(): - if len(block.body) == 0 or (not block.body[-1].is_terminator): - block.body.append(ir.Jump(entry_label, block.loc)) - return blocks - - -def unwrap_parfor_blocks(parfor, blocks=None): - """ - unwrap parfor blocks after analysis/optimization. - Allows changes to the parfor loop. - """ - if blocks is not None: - # make sure init block isn't removed - init_block_label = min(blocks.keys()) - # update loop body blocks - blocks.pop(init_block_label) - parfor.loop_body = blocks - - # make sure dummy jump to loop body isn't altered - first_body_label = min(parfor.loop_body.keys()) - assert isinstance(parfor.init_block.body[-1], ir.Jump) - - # remove dummy jump to loop body - parfor.init_block.body.pop() - - # make sure dummy jump back to loop body isn't altered - for block in parfor.loop_body.values(): - if (isinstance(block.body[-1], ir.Jump) and - block.body[-1].target == first_body_label): - # remove dummy jump back to loop - block.body.pop() - return - - -def get_copies_parfor(parfor, typemap): - """find copies generated/killed by parfor""" - blocks = wrap_parfor_blocks(parfor) - in_copies_parfor, out_copies_parfor = copy_propagate(blocks, typemap) - in_gen_copies, in_extra_kill = get_block_copies(blocks, typemap) - unwrap_parfor_blocks(parfor) - - # parfor's extra kill is kills of its init block, - # and all possible gens and kills of it's body loop. - # body doesn't gen and only kills since it may or may not run - # TODO: save copies that are repeated in parfor - kill_set = in_extra_kill[0] - for label in parfor.loop_body.keys(): - kill_set |= {l for l, r in in_gen_copies[label]} - kill_set |= in_extra_kill[label] - - # gen copies is copies generated by init that are not killed by body - last_label = max(parfor.loop_body.keys()) - gens = out_copies_parfor[last_label] & in_gen_copies[0] - - if config.DEBUG_ARRAY_OPT == 1: - print("copy propagate parfor gens:", gens, "kill_set", kill_set) - return gens, kill_set - - -ir_utils.copy_propagate_extensions[Parfor] = get_copies_parfor - - -def apply_copies_parfor(parfor, var_dict, name_var_table, - typemap, calltypes, save_copies): - """apply copy propagate recursively in parfor""" - # replace variables in pattern metadata like stencil neighborhood - for i, pattern in enumerate(parfor.patterns): - if pattern[0] == 'stencil': - parfor.patterns[i] = ('stencil', - replace_vars_inner(pattern[1], var_dict)) - - # replace loop boundary variables - for l in parfor.loop_nests: - l.start = replace_vars_inner(l.start, var_dict) - l.stop = replace_vars_inner(l.stop, var_dict) - l.step = replace_vars_inner(l.step, var_dict) - - blocks = wrap_parfor_blocks(parfor) - # add dummy assigns for each copy - assign_list = [] - for lhs_name, rhs in var_dict.items(): - assign_list.append(ir.Assign(rhs, name_var_table[lhs_name], - ir.Loc("dummy", -1))) - blocks[0].body = assign_list + blocks[0].body - in_copies_parfor, out_copies_parfor = copy_propagate(blocks, typemap) - apply_copy_propagate(blocks, in_copies_parfor, name_var_table, typemap, - calltypes, save_copies) - unwrap_parfor_blocks(parfor) - # remove dummy assignments - blocks[0].body = blocks[0].body[len(assign_list):] - return - - -ir_utils.apply_copy_propagate_extensions[Parfor] = apply_copies_parfor - - -def push_call_vars(blocks, saved_globals, saved_getattrs): - """push call variables to right before their call site. - assuming one global/getattr is created for each call site and control flow - doesn't change it. - """ - for block in blocks.values(): - new_body = [] - # global/attr variables that are defined in this block already, - # no need to reassign them - block_defs = set() - for stmt in block.body: - def process_assign(stmt): - if isinstance(stmt, ir.Assign): - rhs = stmt.value - lhs = stmt.target - if (isinstance(rhs, ir.Global)): - saved_globals[lhs.name] = stmt - block_defs.add(lhs.name) - elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr': - if (rhs.value.name in saved_globals - or rhs.value.name in saved_getattrs): - saved_getattrs[lhs.name] = stmt - block_defs.add(lhs.name) - - if isinstance(stmt, Parfor): - for s in stmt.init_block.body: - process_assign(s) - pblocks = stmt.loop_body.copy() - push_call_vars(pblocks, saved_globals, saved_getattrs) - new_body.append(stmt) - continue - else: - process_assign(stmt) - for v in stmt.list_vars(): - new_body += _get_saved_call_nodes(v.name, saved_globals, - saved_getattrs, block_defs) - new_body.append(stmt) - block.body = new_body - - return - - -def _get_saved_call_nodes(fname, saved_globals, saved_getattrs, block_defs): - nodes = [] - while (fname not in block_defs and (fname in saved_globals - or fname in saved_getattrs)): - if fname in saved_globals: - nodes.append(saved_globals[fname]) - block_defs.add(saved_globals[fname].target.name) - fname = '_PA_DONE' - elif fname in saved_getattrs: - up_name = saved_getattrs[fname].value.value.name - nodes.append(saved_getattrs[fname]) - block_defs.add(saved_getattrs[fname].target.name) - fname = up_name - nodes.reverse() - return nodes - -def repr_arrayexpr(arrayexpr): - """Extract operators from arrayexpr to represent it abstractly as a string. - """ - if isinstance(arrayexpr, tuple): - opr = arrayexpr[0] - # sometimes opr is not string like '+', but is a ufunc object - if not isinstance(opr, str): - if hasattr(opr, '__name__'): - opr = opr.__name__ - else: - opr = '_' # can return dummy since repr is not critical - args = arrayexpr[1] - if len(args) == 1: - return '({}{})'.format(opr, repr_arrayexpr(args[0])) - else: - return '({})'.format(opr.join([ repr_arrayexpr(x) for x in args ])) - else: - return '_' - -def fix_generator_types(generator_info, return_type, typemap): - """postproc updates generator_info with live variables after transformations - but generator variables have types in return_type that are updated here. - """ - new_state_types = [] - for v in generator_info.state_vars: - new_state_types.append(typemap[v]) - return_type.state_types = tuple(new_state_types) - return - - -def get_parfor_call_table(parfor, call_table=None, reverse_call_table=None): - if call_table is None: - call_table = {} - if reverse_call_table is None: - reverse_call_table = {} - blocks = wrap_parfor_blocks(parfor) - call_table, reverse_call_table = get_call_table(blocks, call_table, - reverse_call_table) - unwrap_parfor_blocks(parfor) - return call_table, reverse_call_table - - -ir_utils.call_table_extensions[Parfor] = get_parfor_call_table - - -def get_parfor_tuple_table(parfor, tuple_table=None): - if tuple_table is None: - tuple_table = {} - blocks = wrap_parfor_blocks(parfor) - tuple_table = ir_utils.get_tuple_table(blocks, tuple_table) - unwrap_parfor_blocks(parfor) - return tuple_table - - -ir_utils.tuple_table_extensions[Parfor] = get_parfor_tuple_table - - -def get_parfor_array_accesses(parfor, accesses=None): - if accesses is None: - accesses = set() - blocks = wrap_parfor_blocks(parfor) - accesses = ir_utils.get_array_accesses(blocks, accesses) - unwrap_parfor_blocks(parfor) - return accesses - - -# parfor handler is same as -ir_utils.array_accesses_extensions[Parfor] = get_parfor_array_accesses - - -def parfor_add_offset_to_labels(parfor, offset): - blocks = wrap_parfor_blocks(parfor) - blocks = add_offset_to_labels(blocks, offset) - blocks[0] = blocks[offset] - blocks.pop(offset) - unwrap_parfor_blocks(parfor, blocks) - return - - -ir_utils.add_offset_to_labels_extensions[Parfor] = parfor_add_offset_to_labels - - -def parfor_find_max_label(parfor): - blocks = wrap_parfor_blocks(parfor) - max_label = ir_utils.find_max_label(blocks) - unwrap_parfor_blocks(parfor) - return max_label - -ir_utils.find_max_label_extensions[Parfor] = parfor_find_max_label - - -def parfor_typeinfer(parfor, typeinferer): - save_blocks = typeinferer.blocks - blocks = wrap_parfor_blocks(parfor) - index_vars = [l.index_variable for l in parfor.loop_nests] - # no need to handle parfor.index_var (tuple of variables), since it will be - # assigned to a tuple from individual indices - first_block = min(blocks.keys()) - loc = blocks[first_block].loc - index_assigns = [ir.Assign(ir.Const(1, loc), v, loc) for v in index_vars] - save_first_block_body = blocks[first_block].body - blocks[first_block].body = index_assigns + blocks[first_block].body - typeinferer.blocks = blocks - typeinferer.build_constraint() - typeinferer.blocks = save_blocks - blocks[first_block].body = save_first_block_body - unwrap_parfor_blocks(parfor) - - -typeinfer.typeinfer_extensions[Parfor] = parfor_typeinfer - -def build_parfor_definitions(parfor, definitions=None): - """get variable definition table for parfors""" - if definitions is None: - definitions = defaultdict(list) - - # avoid wrap_parfor_blocks() since build_definitions is called inside - # find_potential_aliases_parfor where the parfor is already wrapped - build_definitions(parfor.loop_body, definitions) - build_definitions({0: parfor.init_block}, definitions) - return definitions - -ir_utils.build_defs_extensions[Parfor] = build_parfor_definitions - -@contextmanager -def dummy_return_in_loop_body(loop_body): - """adds dummy return to last block of parfor loop body for CFG computation - """ - # max is last block since we add it manually for prange - last_label = max(loop_body.keys()) - loop_body[last_label].body.append( - ir.Return(0, ir.Loc("parfors_dummy", -1))) - yield - # remove dummy return - loop_body[last_label].body.pop() - -@infer_global(reduce) -class ReduceInfer(AbstractTemplate): - def generic(self, args, kws): - assert not kws - assert len(args) == 3 - assert isinstance(args[1], types.Array) - return signature(args[1].dtype, *args) diff --git a/numba/numba/postproc.py b/numba/numba/postproc.py deleted file mode 100644 index 40435648c..000000000 --- a/numba/numba/postproc.py +++ /dev/null @@ -1,213 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from . import analysis, ir, transforms, utils - - -class YieldPoint(object): - - def __init__(self, block, inst): - assert isinstance(block, ir.Block) - assert isinstance(inst, ir.Yield) - self.block = block - self.inst = inst - self.live_vars = None - self.weak_live_vars = None - - -class GeneratorInfo(object): - - def __init__(self): - # { index: YieldPoint } - self.yield_points = {} - # Ordered list of variable names - self.state_vars = [] - - def get_yield_points(self): - """ - Return an iterable of YieldPoint instances. - """ - return self.yield_points.values() - - -class VariableLifetime(object): - """ - For lazily building information of variable lifetime - """ - def __init__(self, blocks): - self._blocks = blocks - - @utils.cached_property - def cfg(self): - return analysis.compute_cfg_from_blocks(self._blocks) - - @utils.cached_property - def usedefs(self): - return analysis.compute_use_defs(self._blocks) - - @utils.cached_property - def livemap(self): - return analysis.compute_live_map(self.cfg, self._blocks, - self.usedefs.usemap, - self.usedefs.defmap) - - @utils.cached_property - def deadmaps(self): - return analysis.compute_dead_maps(self.cfg, self._blocks, self.livemap, - self.usedefs.defmap) - -# other packages that define new nodes add calls for inserting dels -# format: {type:function} -ir_extension_insert_dels = {} - -class PostProcessor(object): - """ - A post-processor for Numba IR. - """ - - def __init__(self, func_ir): - self.func_ir = func_ir - - def run(self): - """ - Run the following passes over Numba IR: - - canonicalize the CFG - - emit explicit `del` instructions for variables - - compute lifetime of variables - - compute generator info (if function is a generator function) - """ - self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks) - vlt = VariableLifetime(self.func_ir.blocks) - self.func_ir.variable_lifetime = vlt - - # Emit del nodes - self._insert_var_dels() - - bev = analysis.compute_live_variables(vlt.cfg, self.func_ir.blocks, - vlt.usedefs.defmap, - vlt.deadmaps.combined) - for offset, ir_block in self.func_ir.blocks.items(): - self.func_ir.block_entry_vars[ir_block] = bev[offset] - - if self.func_ir.is_generator: - self.func_ir.generator_info = GeneratorInfo() - self._compute_generator_info() - else: - self.func_ir.generator_info = None - - def _populate_generator_info(self): - """ - Fill `index` for the Yield instruction and create YieldPoints. - """ - dct = self.func_ir.generator_info.yield_points - assert not dct, 'rerunning _populate_generator_info' - for block in self.func_ir.blocks.values(): - for inst in block.body: - if isinstance(inst, ir.Assign): - yieldinst = inst.value - if isinstance(yieldinst, ir.Yield): - index = len(dct) + 1 - yieldinst.index = index - yp = YieldPoint(block, yieldinst) - dct[yieldinst.index] = yp - - def _compute_generator_info(self): - """ - Compute the generator's state variables as the union of live variables - at all yield points. - """ - self._populate_generator_info() - - gi = self.func_ir.generator_info - for yp in gi.get_yield_points(): - live_vars = set(self.func_ir.get_block_entry_vars(yp.block)) - weak_live_vars = set() - stmts = iter(yp.block.body) - for stmt in stmts: - if isinstance(stmt, ir.Assign): - if stmt.value is yp.inst: - break - live_vars.add(stmt.target.name) - elif isinstance(stmt, ir.Del): - live_vars.remove(stmt.value) - else: - assert 0, "couldn't find yield point" - # Try to optimize out any live vars that are deleted immediately - # after the yield point. - for stmt in stmts: - if isinstance(stmt, ir.Del): - name = stmt.value - if name in live_vars: - live_vars.remove(name) - weak_live_vars.add(name) - else: - break - yp.live_vars = live_vars - yp.weak_live_vars = weak_live_vars - - st = set() - for yp in gi.get_yield_points(): - st |= yp.live_vars - st |= yp.weak_live_vars - gi.state_vars = sorted(st) - - def _insert_var_dels(self): - """ - Insert del statements for each variable. - Returns a 2-tuple of (variable definition map, variable deletion map) - which indicates variables defined and deleted in each block. - - The algorithm avoids relying on explicit knowledge on loops and - distinguish between variables that are defined locally vs variables that - come from incoming blocks. - We start with simple usage (variable reference) and definition (variable - creation) maps on each block. Propagate the liveness info to predecessor - blocks until it stabilize, at which point we know which variables must - exist before entering each block. Then, we compute the end of variable - lives and insert del statements accordingly. Variables are deleted after - the last use. Variable referenced by terminators (e.g. conditional - branch and return) are deleted by the successors or the caller. - """ - vlt = self.func_ir.variable_lifetime - self._patch_var_dels(vlt.deadmaps.internal, vlt.deadmaps.escaping) - - def _patch_var_dels(self, internal_dead_map, escaping_dead_map): - """ - Insert delete in each block - """ - for offset, ir_block in self.func_ir.blocks.items(): - # for each internal var, insert delete after the last use - internal_dead_set = internal_dead_map[offset].copy() - delete_pts = [] - # for each statement in reverse order - for stmt in reversed(ir_block.body[:-1]): - # internal vars that are used here - live_set = set(v.name for v in stmt.list_vars()) - dead_set = live_set & internal_dead_set - for T, def_func in ir_extension_insert_dels.items(): - if isinstance(stmt, T): - done_dels = def_func(stmt, dead_set) - dead_set -= done_dels - internal_dead_set -= done_dels - # used here but not afterwards - delete_pts.append((stmt, dead_set)) - internal_dead_set -= dead_set - - # rewrite body and insert dels - body = [] - lastloc = ir_block.loc - for stmt, delete_set in reversed(delete_pts): - lastloc = stmt.loc - # Ignore dels (assuming no user inserted deletes) - if not isinstance(stmt, ir.Del): - body.append(stmt) - # note: the reverse sort is not necessary for correctness - # it is just to minimize changes to test for now - for var_name in sorted(delete_set, reverse=True): - body.append(ir.Del(var_name, loc=lastloc)) - body.append(ir_block.body[-1]) # terminator - ir_block.body = body - - # vars to delete at the start - escape_dead_set = escaping_dead_map[offset] - for var_name in sorted(escape_dead_set): - ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc)) diff --git a/numba/numba/pretty_annotate.py b/numba/numba/pretty_annotate.py deleted file mode 100644 index b31bcaa0f..000000000 --- a/numba/numba/pretty_annotate.py +++ /dev/null @@ -1,282 +0,0 @@ -""" -This module implements code highlighting of numba function annotations. - -Example: - - >>> import numba - ... from numba_annotate import Annotate - ... @numba.jit - ... def test(q): - ... res = 0 - ... for i in range(q): - ... res += i - ... return res - ... - ... test(10) - ... Annotate(test) - -The last line will return an HTML and/or ANSI representation that will be -displayed accordingly in IPython/Jupyter. - -""" - -from warnings import warn - -warn("The pretty_annotate functionality is experimental and might change API", - FutureWarning) - -def hllines(code, style): - try: - from pygments import highlight - from pygments.lexers import PythonLexer - from pygments.formatters import HtmlFormatter - except ImportError: - raise ImportError("please install the 'pygments' package") - pylex = PythonLexer() - "Given a code string, return a list of html-highlighted lines" - hf = HtmlFormatter(noclasses=True, style=style, nowrap=True) - res = highlight(code, pylex, hf) - return res.splitlines() - - -def htlines(code, style): - try: - from pygments import highlight - from pygments.lexers import PythonLexer - # TerminalFormatter does not support themes, Terminal256 should, - # but seem to not work. - from pygments.formatters import TerminalFormatter - except ImportError: - raise ImportError("please install the 'pygments' package") - pylex = PythonLexer() - "Given a code string, return a list of ANSI-highlighted lines" - hf = TerminalFormatter(style=style) - res = highlight(code, pylex, hf) - return res.splitlines() - -def get_ansi_template(): - try: - from jinja2 import Template - except ImportError: - raise ImportError("please install the 'jinja2' package") - return Template(""" - {%- for func_key in func_data.keys() -%} - Function name: \x1b[34m{{func_data[func_key]['funcname']}}\x1b[39;49;00m - {%- if func_data[func_key]['filename'] -%} - {{'\n'}}In file: \x1b[34m{{func_data[func_key]['filename'] -}}\x1b[39;49;00m - {%- endif -%} - {{'\n'}}With signature: \x1b[34m{{func_key[1]}}\x1b[39;49;00m - {{- "\n" -}} - {%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} - {{-'\n'}}{{ num}}: {{hc-}} - {%- if func_data[func_key]['ir_lines'][num] -%} - {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} - {{-'\n'}}--{{- ' '*func_data[func_key]['python_indent'][num]}} - {{- ' '*(func_data[func_key]['ir_indent'][num][loop.index0]+4) - }}{{ir_line }}\x1b[41m{{ir_line_type-}}\x1b[39;49;00m - {%- endfor -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - """) - return ansi_template - -def get_html_template(): - try: - from jinja2 import Template - except ImportError: - raise ImportError("please install the 'jinja2' package") - return Template(""" - - - - - - - {% for func_key in func_data.keys() %} - -
- - {%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} - {%- if func_data[func_key]['ir_lines'][num] %} - - {% else -%} - - {%- endif -%} - {%- endfor -%} -
-
- - - {{num}}: - {{' '*func_data[func_key]['python_indent'][num]}}{{hl}} - - - - - {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} - - - - {%- endfor -%} - -
-   - {{- ' '*func_data[func_key]['python_indent'][num]}} - {{ ' '*func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} - {{ir_line_type}} - -
-
-
- - {{num}}: - {{' '*func_data[func_key]['python_indent'][num]}}{{hl}} - -
-
- {% endfor %} - - - """) - - -def reform_code(annotation): - """ - Extract the code from the Numba annotation datastructure. - - Pygments can only highlight full multi-line strings, the Numba - annotation is list of single lines, with indentation removed. - """ - ident_dict = annotation['python_indent'] - s= '' - for n,l in annotation['python_lines']: - s = s+' '*ident_dict[n]+l+'\n' - return s - - -class Annotate: - """ - Construct syntax highlighted annotation for a given jitted function: - - Example: - - >>> import numba - ... from numba_annotate import Annotate - ... @numba.jit - ... def test(q): - ... res = 0 - ... for i in range(q): - ... res += i - ... return res - ... - ... test(10) - ... Annotate(test) - - Function annotations persist across compilation for newly encountered - type signatures and as a result annotations are shown for all signatures. - - """ - def __init__(self, function, **kwargs): - - style = kwargs.get('style', 'default') - if not function.signatures: - raise ValueError('function need to be jitted for at least one signature') - for sig in function.signatures: - ann = function.get_annotation_info(sig) - self.ann = ann - - for k,v in ann.items(): - res = hllines(reform_code(v), style) - rest = htlines(reform_code(v), style) - v['pygments_lines'] = [(a,b,c, d) for (a,b),c, d in zip(v['python_lines'], res, rest)] - - def _repr_html_(self): - return get_html_template().render(func_data=self.ann) - - def __repr__(self): - return get_ansi_template().render(func_data=self.ann) diff --git a/numba/numba/pycc/__init__.py b/numba/numba/pycc/__init__.py deleted file mode 100644 index de5af1e3a..000000000 --- a/numba/numba/pycc/__init__.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import print_function, division, absolute_import - -import os -import logging -import subprocess -import tempfile -import sys - -# Public API -from .cc import CC -from .decorators import export, exportmany - - -def get_ending(args): - if args.llvm: - return ".bc" - elif args.olibs: - return ".o" - elif args.python: - return find_pyext_ending() - else: - return find_shared_ending() - - -def main(args=None): - import argparse - - from .compiler import ModuleCompiler - from .platform import Toolchain, find_shared_ending, find_pyext_ending - from . import decorators - - parser = argparse.ArgumentParser( - description="DEPRECATED - Compile Python modules to a single shared library") - parser.add_argument("inputs", nargs='+', help="Input file(s)") - parser.add_argument("-o", nargs=1, dest="output", - help="Output file (default is name of first input -- with new ending)") - - group = parser.add_mutually_exclusive_group() - group.add_argument("-c", action="store_true", dest="olibs", - help="Create object file from each input instead of shared-library") - group.add_argument("--llvm", action="store_true", - help="Emit llvm instead of native code") - - parser.add_argument('--header', action="store_true", - help="Emit C header file with function signatures") - parser.add_argument('--python', action='store_true', - help='Emit additionally generated Python wrapper and ' - 'extension module code in output') - parser.add_argument('-d', '--debug', action='store_true', - help='Print extra debug information') - - args = parser.parse_args(args) - - logger = logging.getLogger(__name__) - if args.debug: - logger.setLevel(logging.DEBUG) - - logger.warn("The 'pycc' script is DEPRECATED; " - "please use the numba.pycc.CC API instead") - - if args.output: - args.output = args.output[0] - output_base = os.path.split(args.output)[1] - module_name = os.path.splitext(output_base)[0] - else: - input_base = os.path.splitext(args.inputs[0])[0] - module_name = os.path.split(input_base)[1] - args.output = input_base + get_ending(args) - logger.debug('args.output --> %s', args.output) - - if args.header: - print('ERROR: pycc --header has been disabled in this release due to a known issue') - sys.exit(1) - - logger.debug('inputs --> %s', args.inputs) - decorators.process_input_files(args.inputs) - - compiler = ModuleCompiler(decorators.export_registry, module_name=module_name) - if args.llvm: - logger.debug('emit llvm') - compiler.write_llvm_bitcode(args.output, wrap=args.python) - elif args.olibs: - logger.debug('emit object file') - compiler.write_native_object(args.output, wrap=args.python) - else: - logger.debug('emit shared library') - logger.debug('write to temporary object file %s', tempfile.gettempdir()) - - toolchain = Toolchain() - toolchain.debug = args.debug - temp_obj = (tempfile.gettempdir() + os.sep + - os.path.basename(args.output) + '.o') - compiler.write_native_object(temp_obj, wrap=args.python) - libraries = toolchain.get_python_libraries() - toolchain.link_shared(args.output, [temp_obj], - toolchain.get_python_libraries(), - toolchain.get_python_library_dirs(), - export_symbols=compiler.dll_exports) - os.remove(temp_obj) diff --git a/numba/numba/pycc/cc.py b/numba/numba/pycc/cc.py deleted file mode 100644 index 957345649..000000000 --- a/numba/numba/pycc/cc.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from distutils import dir_util, log -from distutils.command import build_ext -from distutils.extension import Extension -import os -import shutil -import sys -import tempfile - -from numba import sigutils, typing -from .compiler import ModuleCompiler, ExportEntry -from .platform import Toolchain - - -class CC(object): - """ - An ahead-of-time compiler to create extension modules that don't - depend on Numba. - """ - - # NOTE: using ccache can speed up repetitive builds - # (especially for the mixin modules) - - _mixin_sources = ['modulemixin.c', '../_math_c99.c'] - - # -flto strips all unused helper functions, which 1) makes the - # produced output much smaller and 2) can make the linking step faster. - # (the Windows linker seems to do this by default, judging by the results) - - _extra_cflags = { - # Comment out due to odd behavior with GCC 4.9+ with LTO - # 'posix': ['-flto'], - } - - _extra_ldflags = { - # Comment out due to odd behavior with GCC 4.9+ with LTO - # 'posix': ['-flto'], - } - - def __init__(self, extension_name, source_module=None): - if '.' in extension_name: - raise ValueError("basename should be a simple module name, not " - "qualified name") - - self._basename = extension_name - self._init_function = 'pycc_init_' + extension_name - self._exported_functions = {} - # Resolve source module name and directory - f = sys._getframe(1) - if source_module is None: - dct = f.f_globals - source_module = dct['__name__'] - elif hasattr(source_module, '__name__'): - dct = source_module.__dict__ - source_module = source_module.__name__ - else: - dct = sys.modules[source_module].__dict__ - - self._source_path = dct.get('__file__', '') - self._source_module = source_module - self._toolchain = Toolchain() - self._verbose = False - # By default, output in directory of caller module - self._output_dir = os.path.dirname(self._source_path) - self._output_file = self._toolchain.get_ext_filename(extension_name) - self._use_nrt = True - self._target_cpu = '' - - @property - def name(self): - """ - The name of the extension module to create. - """ - return self._basename - - @property - def output_file(self): - """ - The specific output file (a DLL) that will be generated. - """ - return self._output_file - - @output_file.setter - def output_file(self, value): - self._output_file = value - - @property - def output_dir(self): - """ - The directory the output file will be put in. - """ - return self._output_dir - - @output_dir.setter - def output_dir(self, value): - self._output_dir = value - - @property - def use_nrt(self): - return self._use_nrt - - @use_nrt.setter - def use_nrt(self, value): - self._use_nrt = value - - @property - def target_cpu(self): - """ - The target CPU model for code generation. - """ - return self._target_cpu - - @target_cpu.setter - def target_cpu(self, value): - self._target_cpu = value - - @property - def verbose(self): - """ - Whether to display detailed information when compiling. - """ - return self._verbose - - @verbose.setter - def verbose(self, value): - self._verbose = value - - def export(self, exported_name, sig): - """ - Mark a function for exporting in the extension module. - """ - fn_args, fn_retty = sigutils.normalize_signature(sig) - sig = typing.signature(fn_retty, *fn_args) - if exported_name in self._exported_functions: - raise KeyError("duplicated export symbol %s" % (exported_name)) - - def decorator(func): - entry = ExportEntry(exported_name, sig, func) - self._exported_functions[exported_name] = entry - return func - - return decorator - - @property - def _export_entries(self): - return sorted(self._exported_functions.values(), - key=lambda entry: entry.symbol) - - def _get_mixin_sources(self): - here = os.path.dirname(__file__) - mixin_sources = self._mixin_sources[:] - if self._use_nrt: - mixin_sources.append('../runtime/nrt.c') - return [os.path.join(here, f) for f in mixin_sources] - - def _get_mixin_defines(self): - # Macro definitions required by modulemixin.c - return [ - ('PYCC_MODULE_NAME', self._basename), - ('PYCC_USE_NRT', int(self._use_nrt)), - ] - - def _get_extra_cflags(self): - extra_cflags = self._extra_cflags.get(sys.platform, []) - if not extra_cflags: - extra_cflags = self._extra_cflags.get(os.name, []) - return extra_cflags - - def _get_extra_ldflags(self): - extra_ldflags = self._extra_ldflags.get(sys.platform, []) - if not extra_ldflags: - extra_ldflags = self._extra_ldflags.get(os.name, []) - return extra_ldflags - - def _compile_mixins(self, build_dir): - sources = self._get_mixin_sources() - macros = self._get_mixin_defines() - include_dirs = self._toolchain.get_python_include_dirs() - - extra_cflags = self._get_extra_cflags() - # XXX distutils creates a whole subtree inside build_dir, - # e.g. /tmp/test_pycc/home/antoine/numba/numba/pycc/modulemixin.o - objects = self._toolchain.compile_objects(sources, build_dir, - include_dirs=include_dirs, - macros=macros, - extra_cflags=extra_cflags) - return objects - - def _compile_object_files(self, build_dir): - compiler = ModuleCompiler(self._export_entries, self._basename, - self._use_nrt, cpu_name=self._target_cpu) - compiler.external_init_function = self._init_function - temp_obj = os.path.join(build_dir, - os.path.splitext(self._output_file)[0] + '.o') - log.info("generating LLVM code for '%s' into %s", - self._basename, temp_obj) - compiler.write_native_object(temp_obj, wrap=True) - return [temp_obj], compiler.dll_exports - - def compile(self): - """ - Compile the extension module. - """ - self._toolchain.verbose = self.verbose - build_dir = tempfile.mkdtemp(prefix='pycc-build-%s-' % self._basename) - - # Compile object file - objects, dll_exports = self._compile_object_files(build_dir) - - # Compile mixins - objects += self._compile_mixins(build_dir) - - # Then create shared library - extra_ldflags = self._get_extra_ldflags() - output_dll = os.path.join(self._output_dir, self._output_file) - libraries = self._toolchain.get_python_libraries() - library_dirs = self._toolchain.get_python_library_dirs() - self._toolchain.link_shared(output_dll, objects, - libraries, library_dirs, - export_symbols=dll_exports, - extra_ldflags=extra_ldflags) - - shutil.rmtree(build_dir) - - def distutils_extension(self, **kwargs): - """ - Create a distutils extension object that can be used in your - setup.py. - """ - macros = kwargs.pop('macros', []) + self._get_mixin_defines() - depends = kwargs.pop('depends', []) + [self._source_path] - extra_compile_args = (kwargs.pop('extra_compile_args', []) - + self._get_extra_cflags()) - extra_link_args = (kwargs.pop('extra_link_args', []) - + self._get_extra_ldflags()) - include_dirs = (kwargs.pop('include_dirs', []) - + self._toolchain.get_python_include_dirs()) - libraries = (kwargs.pop('libraries', []) - + self._toolchain.get_python_libraries()) - library_dirs = (kwargs.pop('library_dirs', []) - + self._toolchain.get_python_library_dirs()) - - ext = _CCExtension(name=self._basename, - sources=self._get_mixin_sources(), - depends=depends, - define_macros=macros, - include_dirs=include_dirs, - libraries=libraries, - library_dirs=library_dirs, - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - **kwargs) - ext.monkey_patch_distutils() - ext._cc = self - return ext - - -class _CCExtension(Extension): - """ - A Numba-specific Extension subclass to LLVM-compile pure Python code - to an extension module. - """ - - _cc = None - _distutils_monkey_patched = False - - def _prepare_object_files(self, build_ext): - cc = self._cc - dir_util.mkpath(build_ext.build_temp) - objects, _ = cc._compile_object_files(build_ext.build_temp) - # Add generated object files for linking - self.extra_objects = objects - - @classmethod - def monkey_patch_distutils(cls): - """ - Monkey-patch distutils with our own build_ext class knowing - about pycc-compiled extensions modules. - """ - if cls._distutils_monkey_patched: - return - - _orig_build_ext = build_ext.build_ext - - class _CC_build_ext(_orig_build_ext): - - def build_extension(self, ext): - if isinstance(ext, _CCExtension): - ext._prepare_object_files(self) - - _orig_build_ext.build_extension(self, ext) - - build_ext.build_ext = _CC_build_ext - - cls._distutils_monkey_patched = True diff --git a/numba/numba/pycc/compiler.py b/numba/numba/pycc/compiler.py deleted file mode 100644 index 48eb7fca9..000000000 --- a/numba/numba/pycc/compiler.py +++ /dev/null @@ -1,523 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import print_function, division, absolute_import - -import logging -import os -import sys - -from llvmlite import ir -import llvmlite.llvmpy.core as lc - -from numba import cgutils -from numba.utils import IS_PY3 -from . import llvm_types as lt -from numba.compiler import compile_extra, Flags -from numba.targets.registry import cpu_target -from numba.runtime import nrtdynmod - - -logger = logging.getLogger(__name__) - -__all__ = ['Compiler'] - -NULL = lc.Constant.null(lt._void_star) -ZERO = lc.Constant.int(lt._int32, 0) -ONE = lc.Constant.int(lt._int32, 1) -METH_VARARGS_AND_KEYWORDS = lc.Constant.int(lt._int32, 1|2) - - -def get_header(): - import numpy - import textwrap - - return textwrap.dedent("""\ - #include - - #ifndef HAVE_LONGDOUBLE - #define HAVE_LONGDOUBLE %d - #endif - - typedef struct { - float real; - float imag; - } complex64; - - typedef struct { - double real; - double imag; - } complex128; - - #if HAVE_LONGDOUBLE - typedef struct { - long double real; - long double imag; - } complex256; - #endif - - typedef float float32; - typedef double float64; - #if HAVE_LONGDOUBLE - typedef long double float128; - #endif - """ % hasattr(numpy, 'complex256')) - - -class ExportEntry(object): - """ - A simple record for exporting symbols. - """ - - def __init__(self, symbol, signature, function): - self.symbol = symbol - self.signature = signature - self.function = function - - def __repr__(self): - return "ExportEntry(%r, %r)" % (self.symbol, self.signature) - - -class _ModuleCompiler(object): - """A base class to compile Python modules to a single shared library or - extension module. - - :param export_entries: a list of ExportEntry instances. - :param module_name: the name of the exported module. - """ - - #: Structure used to describe a method of an extension type. - #: struct PyMethodDef { - #: const char *ml_name; /* The name of the built-in function/method */ - #: PyCFunction ml_meth; /* The C function that implements it */ - #: int ml_flags; /* Combination of METH_xxx flags, which mostly - #: describe the args expected by the C func */ - #: const char *ml_doc; /* The __doc__ attribute, or NULL */ - #: }; - method_def_ty = lc.Type.struct((lt._int8_star, - lt._void_star, - lt._int32, - lt._int8_star)) - - method_def_ptr = lc.Type.pointer(method_def_ty) - - env_def_ty = lc.Type.struct((lt._void_star, lt._int32)) - env_def_ptr = lc.Type.pointer(env_def_ty) - - def __init__(self, export_entries, module_name, use_nrt=False, - **aot_options): - self.module_name = module_name - self.export_python_wrap = False - self.dll_exports = [] - self.export_entries = export_entries - # Used by the CC API but not the legacy API - self.external_init_function = None - self.use_nrt = use_nrt - - self.typing_context = cpu_target.typing_context - self.context = cpu_target.target_context.with_aot_codegen( - self.module_name, **aot_options) - - def _mangle_method_symbol(self, func_name): - return "._pycc_method_%s" % (func_name,) - - def _emit_python_wrapper(self, llvm_module): - """Emit generated Python wrapper and extension module code. - """ - raise NotImplementedError - - def _cull_exports(self): - """Read all the exported functions/modules in the translator - environment, and join them into a single LLVM module. - """ - self.exported_function_types = {} - self.function_environments = {} - self.environment_gvs = {} - - codegen = self.context.codegen() - library = codegen.create_library(self.module_name) - - # Generate IR for all exported functions - flags = Flags() - flags.set("no_compile") - if not self.export_python_wrap: - flags.set("no_cpython_wrapper") - if self.use_nrt: - flags.set("nrt") - # Compile NRT helpers - nrt_module, _ = nrtdynmod.create_nrt_module(self.context) - library.add_ir_module(nrt_module) - - for entry in self.export_entries: - cres = compile_extra(self.typing_context, self.context, - entry.function, - entry.signature.args, - entry.signature.return_type, flags, - locals={}, library=library) - - func_name = cres.fndesc.llvm_func_name - llvm_func = cres.library.get_function(func_name) - - if self.export_python_wrap: - llvm_func.linkage = lc.LINKAGE_INTERNAL - wrappername = cres.fndesc.llvm_cpython_wrapper_name - wrapper = cres.library.get_function(wrappername) - wrapper.name = self._mangle_method_symbol(entry.symbol) - wrapper.linkage = lc.LINKAGE_EXTERNAL - fnty = cres.target_context.call_conv.get_function_type( - cres.fndesc.restype, cres.fndesc.argtypes) - self.exported_function_types[entry] = fnty - self.function_environments[entry] = cres.environment - self.environment_gvs[entry] = cres.fndesc.env_name - else: - llvm_func.name = entry.symbol - self.dll_exports.append(entry.symbol) - - if self.export_python_wrap: - wrapper_module = library.create_ir_module("wrapper") - self._emit_python_wrapper(wrapper_module) - library.add_ir_module(wrapper_module) - - # Hide all functions in the DLL except those explicitly exported - library.finalize() - for fn in library.get_defined_functions(): - if fn.name not in self.dll_exports: - fn.visibility = "hidden" - - return library - - def write_llvm_bitcode(self, output, wrap=False, **kws): - self.export_python_wrap = wrap - library = self._cull_exports() - with open(output, 'wb') as fout: - fout.write(library.emit_bitcode()) - - def write_native_object(self, output, wrap=False, **kws): - self.export_python_wrap = wrap - library = self._cull_exports() - with open(output, 'wb') as fout: - fout.write(library.emit_native_object()) - - def emit_type(self, tyobj): - ret_val = str(tyobj) - if 'int' in ret_val: - if ret_val.endswith(('8', '16', '32', '64')): - ret_val += "_t" - return ret_val - - def emit_header(self, output): - fname, ext = os.path.splitext(output) - with open(fname + '.h', 'w') as fout: - fout.write(get_header()) - fout.write("\n/* Prototypes */\n") - for export_entry in self.export_entries: - name = export_entry.symbol - restype = self.emit_type(export_entry.signature.return_type) - args = ", ".join(self.emit_type(argtype) - for argtype in export_entry.signature.args) - fout.write("extern %s %s(%s);\n" % (restype, name, args)) - - def _emit_method_array(self, llvm_module): - """ - Collect exported methods and emit a PyMethodDef array. - - :returns: a pointer to the PyMethodDef array. - """ - method_defs = [] - for entry in self.export_entries: - name = entry.symbol - llvm_func_name = self._mangle_method_symbol(name) - fnty = self.exported_function_types[entry] - lfunc = llvm_module.add_function(fnty, name=llvm_func_name) - - method_name = self.context.insert_const_string(llvm_module, name) - method_def_const = lc.Constant.struct((method_name, - lc.Constant.bitcast(lfunc, lt._void_star), - METH_VARARGS_AND_KEYWORDS, - NULL)) - method_defs.append(method_def_const) - - sentinel = lc.Constant.struct([NULL, NULL, ZERO, NULL]) - method_defs.append(sentinel) - method_array_init = lc.Constant.array(self.method_def_ty, method_defs) - method_array = llvm_module.add_global_variable(method_array_init.type, - '.module_methods') - method_array.initializer = method_array_init - method_array.linkage = lc.LINKAGE_INTERNAL - method_array_ptr = lc.Constant.gep(method_array, [ZERO, ZERO]) - return method_array_ptr - - def _emit_environment_array(self, llvm_module, builder, pyapi): - """ - Emit an array of env_def_t structures (see modulemixin.c) - storing the pickled environment constants for each of the - exported functions. - """ - env_defs = [] - for entry in self.export_entries: - env = self.function_environments[entry] - # Constants may be unhashable so avoid trying to cache them - env_def = pyapi.serialize_uncached(env.consts) - env_defs.append(env_def) - env_defs_init = lc.Constant.array(self.env_def_ty, env_defs) - gv = self.context.insert_unique_const(llvm_module, - '.module_environments', - env_defs_init) - return gv.gep([ZERO, ZERO]) - - def _emit_envgvs_array(self, llvm_module, builder, pyapi): - """ - Emit an array of Environment pointers that needs to be filled at - initialization. - """ - env_setters = [] - for entry in self.export_entries: - envgv_name = self.environment_gvs[entry] - gv = self.context.declare_env_global(llvm_module, envgv_name) - envgv = gv.bitcast(lt._void_star) - env_setters.append(envgv) - - env_setters_init = lc.Constant.array(lt._void_star, env_setters) - gv = self.context.insert_unique_const(llvm_module, - '.module_envgvs', - env_setters_init) - return gv.gep([ZERO, ZERO]) - - def _emit_module_init_code(self, llvm_module, builder, modobj, - method_array, env_array, envgv_array): - """ - Emit call to "external" init function, if any. - """ - if self.external_init_function: - fnty = ir.FunctionType(lt._int32, - [modobj.type, self.method_def_ptr, - self.env_def_ptr, envgv_array.type]) - fn = llvm_module.add_function(fnty, self.external_init_function) - return builder.call(fn, [modobj, method_array, env_array, - envgv_array]) - else: - return None - - -class ModuleCompilerPy2(_ModuleCompiler): - - @property - def module_create_definition(self): - """Return the signature and name of the function to initialize the module. - """ - signature = lc.Type.function(lt._pyobject_head_p, - (lt._int8_star, - self.method_def_ptr, - lt._int8_star, - lt._pyobject_head_p, - lt._int32)) - - name = "Py_InitModule4" - - if lt._trace_refs_: - name += "TraceRefs" - if lt._plat_bits == 64: - name += "_64" - - return signature, name - - @property - def module_init_definition(self): - """Return the signature and name of the function to initialize the extension. - """ - return lc.Type.function(lc.Type.void(), ()), "init" + self.module_name - - def _emit_python_wrapper(self, llvm_module): - - # Define the module initialization function. - mod_init_fn = llvm_module.add_function(*self.module_init_definition) - entry = mod_init_fn.append_basic_block('Entry') - builder = lc.Builder(entry) - pyapi = self.context.get_python_api(builder) - - # Python C API module creation function. - create_module_fn = llvm_module.add_function(*self.module_create_definition) - create_module_fn.linkage = lc.LINKAGE_EXTERNAL - - # Define a constant string for the module name. - mod_name_const = self.context.insert_const_string(llvm_module, - self.module_name) - - method_array = self._emit_method_array(llvm_module) - - mod = builder.call(create_module_fn, - (mod_name_const, - method_array, - NULL, - lc.Constant.null(lt._pyobject_head_p), - lc.Constant.int(lt._int32, sys.api_version))) - - env_array = self._emit_environment_array(llvm_module, builder, pyapi) - envgv_array = self._emit_envgvs_array(llvm_module, builder, pyapi) - - self._emit_module_init_code(llvm_module, builder, mod, - method_array, env_array, envgv_array) - # XXX No way to notify failure to caller... - - builder.ret_void() - - self.dll_exports.append(mod_init_fn.name) - - -class ModuleCompilerPy3(_ModuleCompiler): - - _ptr_fun = lambda ret, *args: lc.Type.pointer(lc.Type.function(ret, args)) - - #: typedef int (*visitproc)(PyObject *, void *); - visitproc_ty = _ptr_fun(lt._int8, - lt._pyobject_head_p) - - #: typedef int (*inquiry)(PyObject *); - inquiry_ty = _ptr_fun(lt._int8, - lt._pyobject_head_p) - - #: typedef int (*traverseproc)(PyObject *, visitproc, void *); - traverseproc_ty = _ptr_fun(lt._int8, - lt._pyobject_head_p, - visitproc_ty, - lt._void_star) - - # typedef void (*freefunc)(void *) - freefunc_ty = _ptr_fun(lt._int8, - lt._void_star) - - # PyObject* (*m_init)(void); - m_init_ty = _ptr_fun(lt._int8) - - _char_star = lt._int8_star - - #: typedef struct PyModuleDef_Base { - #: PyObject_HEAD - #: PyObject* (*m_init)(void); - #: Py_ssize_t m_index; - #: PyObject* m_copy; - #: } PyModuleDef_Base; - module_def_base_ty = lc.Type.struct((lt._pyobject_head, - m_init_ty, - lt._llvm_py_ssize_t, - lt._pyobject_head_p)) - - #: This struct holds all information that is needed to create a module object. - #: typedef struct PyModuleDef{ - #: PyModuleDef_Base m_base; - #: const char* m_name; - #: const char* m_doc; - #: Py_ssize_t m_size; - #: PyMethodDef *m_methods; - #: inquiry m_reload; - #: traverseproc m_traverse; - #: inquiry m_clear; - #: freefunc m_free; - #: }PyModuleDef; - module_def_ty = lc.Type.struct((module_def_base_ty, - _char_star, - _char_star, - lt._llvm_py_ssize_t, - _ModuleCompiler.method_def_ptr, - inquiry_ty, - traverseproc_ty, - inquiry_ty, - freefunc_ty)) - - @property - def module_create_definition(self): - """ - Return the signature and name of the Python C API function to - initialize the module. - """ - signature = lc.Type.function(lt._pyobject_head_p, - (lc.Type.pointer(self.module_def_ty), - lt._int32)) - - name = "PyModule_Create2" - if lt._trace_refs_: - name += "TraceRefs" - - return signature, name - - @property - def module_init_definition(self): - """ - Return the name and signature of the module's initialization function. - """ - signature = lc.Type.function(lt._pyobject_head_p, ()) - - return signature, "PyInit_" + self.module_name - - def _emit_python_wrapper(self, llvm_module): - # Figure out the Python C API module creation function, and - # get a LLVM function for it. - create_module_fn = llvm_module.add_function(*self.module_create_definition) - create_module_fn.linkage = lc.LINKAGE_EXTERNAL - - # Define a constant string for the module name. - mod_name_const = self.context.insert_const_string(llvm_module, - self.module_name) - - mod_def_base_init = lc.Constant.struct( - (lt._pyobject_head_init, # PyObject_HEAD - lc.Constant.null(self.m_init_ty), # m_init - lc.Constant.null(lt._llvm_py_ssize_t), # m_index - lc.Constant.null(lt._pyobject_head_p), # m_copy - ) - ) - mod_def_base = llvm_module.add_global_variable(mod_def_base_init.type, - '.module_def_base') - mod_def_base.initializer = mod_def_base_init - mod_def_base.linkage = lc.LINKAGE_INTERNAL - - method_array = self._emit_method_array(llvm_module) - - mod_def_init = lc.Constant.struct( - (mod_def_base_init, # m_base - mod_name_const, # m_name - lc.Constant.null(self._char_star), # m_doc - lc.Constant.int(lt._llvm_py_ssize_t, -1), # m_size - method_array, # m_methods - lc.Constant.null(self.inquiry_ty), # m_reload - lc.Constant.null(self.traverseproc_ty), # m_traverse - lc.Constant.null(self.inquiry_ty), # m_clear - lc.Constant.null(self.freefunc_ty) # m_free - ) - ) - - # Define a constant string for the module name. - mod_def = llvm_module.add_global_variable(mod_def_init.type, - '.module_def') - mod_def.initializer = mod_def_init - mod_def.linkage = lc.LINKAGE_INTERNAL - - # Define the module initialization function. - mod_init_fn = llvm_module.add_function(*self.module_init_definition) - entry = mod_init_fn.append_basic_block('Entry') - builder = lc.Builder(entry) - pyapi = self.context.get_python_api(builder) - - mod = builder.call(create_module_fn, - (mod_def, - lc.Constant.int(lt._int32, sys.api_version))) - - # Test if module has been created correctly. - # (XXX for some reason comparing with the NULL constant fails llvm - # with an assertion in pydebug mode) - with builder.if_then(cgutils.is_null(builder, mod)): - builder.ret(NULL.bitcast(mod_init_fn.type.pointee.return_type)) - - env_array = self._emit_environment_array(llvm_module, builder, pyapi) - envgv_array = self._emit_envgvs_array(llvm_module, builder, pyapi) - ret = self._emit_module_init_code(llvm_module, builder, mod, - method_array, env_array, envgv_array) - if ret is not None: - with builder.if_then(cgutils.is_not_null(builder, ret)): - # Init function errored out - builder.ret(lc.Constant.null(mod.type)) - - builder.ret(mod) - - self.dll_exports.append(mod_init_fn.name) - - -ModuleCompiler = ModuleCompilerPy3 if IS_PY3 else ModuleCompilerPy2 diff --git a/numba/numba/pycc/decorators.py b/numba/numba/pycc/decorators.py deleted file mode 100644 index b2e81f8a4..000000000 --- a/numba/numba/pycc/decorators.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import print_function, absolute_import - -import re -import warnings - -from numba import sigutils, typing -from .compiler import ExportEntry -from numba.six import exec_ - -# Registry is okay to be a global because we are using pycc as a standalone -# commandline tool. -export_registry = [] - - -def export(prototype): - warnings.warn("export() is deprecated, use the numba.pycc.CC API instead", - DeprecationWarning, stacklevel=2) - - sym, sig = parse_prototype(prototype) - - def wrappped(func): - fn_argtys, fn_retty = sigutils.normalize_signature(sig) - signature = typing.signature(fn_retty, *fn_argtys) - entry = ExportEntry(symbol=sym, signature=signature, function=func) - export_registry.append(entry) - - return wrappped - - -def exportmany(prototypes): - warnings.warn("exportmany() is deprecated, use the numba.pycc.CC API instead", - DeprecationWarning, stacklevel=2) - - def wrapped(func): - for proto in prototypes: - export(proto)(func) - return wrapped - - -def process_input_files(inputs): - """ - Read input source files for execution of legacy @export / @exportmany - decorators. - """ - for ifile in inputs: - with open(ifile) as fin: - exec_(compile(fin.read(), ifile, 'exec')) - - -def clear_export_registry(): - export_registry[:] = [] - - -# --------------------------------- Internal --------------------------------- - -re_symbol = re.compile(r'[_a-z][_a-z0-9]*', re.I) - - -def parse_prototype(text): - """Separate the symbol and function-type in a a string with - "symbol function-type" (e.g. "mult float(float, float)") - - Returns - --------- - (symbol_string, functype_string) - """ - m = re_symbol.match(text) - if not m: - raise ValueError("Invalid function name for export prototype") - s = m.start(0) - e = m.end(0) - symbol = text[s:e] - functype = text[e + 1:] - return symbol, functype - diff --git a/numba/numba/pycc/llvm_types.py b/numba/numba/pycc/llvm_types.py deleted file mode 100644 index 2257e82e8..000000000 --- a/numba/numba/pycc/llvm_types.py +++ /dev/null @@ -1,36 +0,0 @@ -import sys -import ctypes -import struct as struct_ -from llvmlite.llvmpy.core import Type, Constant - -_trace_refs_ = hasattr(sys, 'getobjects') -_plat_bits = struct_.calcsize('@P') * 8 - -_int8 = Type.int(8) -_int32 = Type.int(32) - -_void_star = Type.pointer(_int8) - -_int8_star = _void_star - -_sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, 'c_size_t')) -_llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8) - -if _trace_refs_: - _pyobject_head = Type.struct([_void_star, _void_star, - _llvm_py_ssize_t, _void_star]) - _pyobject_head_init = Constant.struct([ - Constant.null(_void_star), # _ob_next - Constant.null(_void_star), # _ob_prev - Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt - Constant.null(_void_star), # ob_type - ]) - -else: - _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star]) - _pyobject_head_init = Constant.struct([ - Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt - Constant.null(_void_star), # ob_type - ]) - -_pyobject_head_p = Type.pointer(_pyobject_head) diff --git a/numba/numba/pycc/modulemixin.c b/numba/numba/pycc/modulemixin.c deleted file mode 100644 index 82fd40bad..000000000 --- a/numba/numba/pycc/modulemixin.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * This C file is compiled and linked into pycc-generated shared objects. - * It provides the Numba helper functions for runtime use in pycc-compiled - * functions. - */ - -#include "../_numba_common.h" -#include "../_pymodule.h" - -/* Define all runtime-required symbols in this C module, but do not - export them outside the shared library if possible. */ - -#define NUMBA_EXPORT_FUNC(_rettype) VISIBILITY_HIDDEN _rettype -#define NUMBA_EXPORT_DATA(_vartype) VISIBILITY_HIDDEN _vartype - -#include "../_helperlib.c" -#include "../_dynfunc.c" - -#if PYCC_USE_NRT -#include "../runtime/_nrt_python.c" -#include "../runtime/nrt.h" -#endif - - -/* NOTE: import_array() is macro, not a function. It returns NULL on - failure on py3, but nothing on py2. */ -#if PY_MAJOR_VERSION >= 3 - static void * - wrap_import_array(void) { - import_array(); - return (void *) 1; - } -#else - static void - wrap_import_array(void) { - import_array(); - } -#endif - - -static int -init_numpy(void) { - #if PY_MAJOR_VERSION >= 3 - return wrap_import_array() != NULL; - #else - wrap_import_array(); - return 1; /* always succeed */ - #endif -} - - -#ifndef PYCC_MODULE_NAME -#error PYCC_MODULE_NAME must be defined -#endif - -/* Preprocessor trick: need to use two levels of macros otherwise - PYCC_MODULE_NAME would not get expanded */ -#define __PYCC(prefix, modname) prefix ## modname -#define _PYCC(prefix, modname) __PYCC(prefix, modname) -#define PYCC(prefix) _PYCC(prefix, PYCC_MODULE_NAME) - -/* Silence warnings about unused functions */ -VISIBILITY_HIDDEN void **PYCC(_unused_) = { - (void *) Numba_make_generator, -}; - -/* The LLVM-generated functions for atomic refcounting */ -extern void *nrt_atomic_add, *nrt_atomic_sub; - -/* The structure type constructed by PythonAPI.serialize_uncached() */ -typedef struct { - const char *data; - int len; -} env_def_t; - -/* Environment GlobalVariable address type */ -typedef void **env_gv_t; - -/* - * Recreate an environment object from a env_def_t structure. - */ -static EnvironmentObject * -recreate_environment(PyObject *module, env_def_t env) -{ - EnvironmentObject *envobj; - PyObject *env_consts; - - env_consts = numba_unpickle(env.data, env.len); - if (env_consts == NULL) - return NULL; - if (!PyList_Check(env_consts)) { - PyErr_Format(PyExc_TypeError, - "environment constants should be a list, got '%s'", - Py_TYPE(env_consts)->tp_name); - Py_DECREF(env_consts); - return NULL; - } - - envobj = env_new_empty(&EnvironmentType); - if (envobj == NULL) { - Py_DECREF(env_consts); - return NULL; - } - envobj->consts = env_consts; - envobj->globals = PyModule_GetDict(module); - if (envobj->globals == NULL) { - Py_DECREF(envobj); - return NULL; - } - Py_INCREF(envobj->globals); - return envobj; -} - -/* - * Subroutine to initialize all resources required for running the - * pycc-compiled functions. - */ - -int -PYCC(pycc_init_) (PyObject *module, PyMethodDef *defs, - env_def_t *envs, - env_gv_t *envgvs) -{ - PyMethodDef *fdef; - PyObject *modname = NULL; - PyObject *docobj = NULL; - int i; - - if (!init_numpy()) { - goto error; - } - if (init_dynfunc_module(module)) { - goto error; - } - /* Initialize random generation. */ - numba_rnd_ensure_global_init(); - -#if PYCC_USE_NRT - NRT_MemSys_init(); - NRT_MemSys_set_atomic_inc_dec((NRT_atomic_inc_dec_func) &nrt_atomic_add, - (NRT_atomic_inc_dec_func) &nrt_atomic_sub); - if (init_nrt_python_module(module)) { - goto error; - } -#endif - - modname = PyObject_GetAttrString(module, "__name__"); - if (modname == NULL) { - goto error; - } - - /* Empty docstring for all compiled functions */ - docobj = PyString_FromString(""); - if (docobj == NULL) { - goto error; - } - - /* Overwrite C method objects with our own Closure objects, in order - * to make their environments available to the compiled functions. - */ - for (i = 0, fdef = defs; fdef->ml_name != NULL; i++, fdef++) { - PyObject *func; - PyObject *nameobj; - EnvironmentObject *envobj; - - envobj = recreate_environment(module, envs[i]); - if (envobj == NULL) { - goto error; - } - nameobj = PyString_FromString(fdef->ml_name); - if (nameobj == NULL) { - Py_DECREF(envobj); - goto error; - } - // Store the environment pointer into the global - *envgvs[i] = envobj; - - func = pycfunction_new(module, nameobj, docobj, - fdef->ml_meth, envobj, NULL); - Py_DECREF(envobj); - Py_DECREF(nameobj); - - if (func == NULL) { - goto error; - } - if (PyObject_SetAttrString(module, fdef->ml_name, func)) { - Py_DECREF(func); - goto error; - } - Py_DECREF(func); - } - Py_DECREF(docobj); - Py_DECREF(modname); - return 0; - -error: - Py_XDECREF(docobj); - Py_XDECREF(modname); - return -1; -} diff --git a/numba/numba/pycc/platform.py b/numba/numba/pycc/platform.py deleted file mode 100644 index 4698d6f77..000000000 --- a/numba/numba/pycc/platform.py +++ /dev/null @@ -1,258 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from distutils.ccompiler import CCompiler, new_compiler -from distutils.command.build_ext import build_ext -from distutils.sysconfig import customize_compiler -from distutils import log - -import numpy.distutils.misc_util as np_misc - -import functools -import os -import subprocess -import sys -from tempfile import NamedTemporaryFile, gettempdir - -_configs = { - # DLL suffix, Python C extension suffix - 'win': ('.dll', '.pyd'), - 'default': ('.so', '.so'), -} - - -def get_configs(arg): - return _configs.get(sys.platform[:3], _configs['default'])[arg] - - -find_shared_ending = functools.partial(get_configs, 0) -find_pyext_ending = functools.partial(get_configs, 1) - - -def _check_external_compiler(): - # see if the external compiler bound in numpy.distutil is present - # and working - compiler = new_compiler() - customize_compiler(compiler) - for suffix in ['.c', '.cxx']: - with NamedTemporaryFile('wt', suffix=suffix) as ntf: - simple_c = "int main(void) { return 0; }" - ntf.write(simple_c) - ntf.flush() - try: - # *output_dir* is set to avoid the compiler putting temp files - # in the current directory. - compiler.compile([ntf.name], output_dir=gettempdir()) - except Exception: # likely CompileError - return False - return True - - -# boolean on whether the externally provided compiler is present and -# functioning correctly -_external_compiler_ok = _check_external_compiler() - - -class _DummyExtension(object): - libraries = [] - - -class Toolchain(object): - - def __init__(self): - if not _external_compiler_ok: - self._raise_external_compiler_error() - - # Need to import it here since setuptools may monkeypatch it - from distutils.dist import Distribution - self._verbose = False - self._compiler = new_compiler() - customize_compiler(self._compiler) - self._build_ext = build_ext(Distribution()) - self._build_ext.finalize_options() - self._py_lib_dirs = self._build_ext.library_dirs - self._py_include_dirs = self._build_ext.include_dirs - self._math_info = np_misc.get_info('npymath') - - @property - def verbose(self): - return self._verbose - - @verbose.setter - def verbose(self, value): - self._verbose = value - # DEBUG will let Numpy spew many messages, so stick to INFO - # to print commands executed by distutils - log.set_threshold(log.INFO if value else log.WARN) - - def _raise_external_compiler_error(self): - basemsg = ("Attempted to compile AOT function without the " - "compiler used by `numpy.distutils` present.") - conda_msg = "If using conda try:\n\n#> conda install %s" - plt = sys.platform - if plt.startswith('linux'): - if sys.maxsize <= 2 ** 32: - compilers = ['gcc_linux-32', 'gxx_linux-32'] - else: - compilers = ['gcc_linux-64', 'gxx_linux-64'] - msg = "%s %s" % (basemsg, conda_msg % ' '.join(compilers)) - elif plt.startswith('darwin'): - compilers = ['clang_osx-64', 'clangxx_osx-64'] - msg = "%s %s" % (basemsg, conda_msg % ' '.join(compilers)) - elif plt.startswith('win32'): - winmsg = "Cannot find suitable msvc." - msg = "%s %s" % (basemsg, winmsg) - else: - msg = "Unknown platform %s" % plt - raise RuntimeError(msg) - - def compile_objects(self, sources, output_dir, - include_dirs=(), depends=(), macros=(), - extra_cflags=None): - """ - Compile the given source files into a separate object file each, - all beneath the *output_dir*. A list of paths to object files - is returned. - - *macros* has the same format as in distutils: a list of 1- or 2-tuples. - If a 1-tuple (name,), the given name is considered undefined by - the C preprocessor. - If a 2-tuple (name, value), the given name is expanded into the - given value by the C preprocessor. - """ - objects = self._compiler.compile(sources, - output_dir=output_dir, - include_dirs=include_dirs, - depends=depends, - macros=macros or [], - extra_preargs=extra_cflags) - return objects - - def link_shared(self, output, objects, libraries=(), - library_dirs=(), export_symbols=(), - extra_ldflags=None): - """ - Create a shared library *output* linking the given *objects* - and *libraries* (all strings). - """ - output_dir, output_filename = os.path.split(output) - self._compiler.link(CCompiler.SHARED_OBJECT, objects, - output_filename, output_dir, - libraries, library_dirs, - export_symbols=export_symbols, - extra_preargs=extra_ldflags) - - def get_python_libraries(self): - """ - Get the library arguments necessary to link with Python. - """ - libs = self._build_ext.get_libraries(_DummyExtension()) - if sys.platform == 'win32': - # Under Windows, need to link explicitly against the CRT, - # as the MSVC compiler would implicitly do. - # (XXX msvcrtd in pydebug mode?) - libs = libs + ['msvcrt'] - return libs + self._math_info['libraries'] - - def get_python_library_dirs(self): - """ - Get the library directories necessary to link with Python. - """ - return list(self._py_lib_dirs) + self._math_info['library_dirs'] - - def get_python_include_dirs(self): - """ - Get the include directories necessary to compile against the Python - and Numpy C APIs. - """ - return list(self._py_include_dirs) + self._math_info['include_dirs'] - - def get_ext_filename(self, ext_name): - """ - Given a C extension's module name, return its intended filename. - """ - return self._build_ext.get_ext_filename(ext_name) - - -# -# Patch Numpy's exec_command() to avoid random crashes on Windows in test_pycc -# see https://github.com/numpy/numpy/pull/7614 -# and https://github.com/numpy/numpy/pull/7862 -# - -def _patch_exec_command(): - # Patch the internal worker _exec_command() - import numpy.distutils.exec_command as mod - orig_exec_command = mod._exec_command - mod._exec_command = _exec_command - - -def _exec_command(command, use_shell=None, use_tee=None, **env): - """ - Internal workhorse for exec_command(). - Code from https://github.com/numpy/numpy/pull/7862 - """ - if use_shell is None: - use_shell = os.name == 'posix' - if use_tee is None: - use_tee = os.name == 'posix' - - executable = None - - if os.name == 'posix' and use_shell: - # On POSIX, subprocess always uses /bin/sh, override - sh = os.environ.get('SHELL', '/bin/sh') - if _is_sequence(command): - command = [sh, '-c', ' '.join(command)] - else: - command = [sh, '-c', command] - use_shell = False - - elif os.name == 'nt' and _is_sequence(command): - # On Windows, join the string for CreateProcess() ourselves as - # subprocess does it a bit differently - command = ' '.join(_quote_arg(arg) for arg in command) - - # Inherit environment by default - env = env or None - try: - proc = subprocess.Popen(command, shell=use_shell, env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True) - except EnvironmentError: - # Return 127, as os.spawn*() and /bin/sh do - return '', 127 - text, err = proc.communicate() - # Only append stderr if the command failed, as otherwise - # the output may become garbled for parsing - if proc.returncode: - if text: - text += "\n" - text += err - # Another historical oddity - if text[-1:] == '\n': - text = text[:-1] - if use_tee: - print(text) - return proc.returncode, text - - -def _quote_arg(arg): - """ - Quote the argument for safe use in a shell command line. - """ - # If there is a quote in the string, assume relevants parts of the - # string are already quoted (e.g. '-I"C:\\Program Files\\..."') - if '"' not in arg and ' ' in arg: - return '"%s"' % arg - return arg - - -def _is_sequence(arg): - if isinstance(arg, (str, bytes)): - return False - try: - len(arg) - return True - except Exception: - return False diff --git a/numba/numba/pycc/pycc b/numba/numba/pycc/pycc deleted file mode 100644 index 272e4e2ca..000000000 --- a/numba/numba/pycc/pycc +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python -from numba.pycc import main -main() diff --git a/numba/numba/pythonapi.py b/numba/numba/pythonapi.py deleted file mode 100644 index 6637f57d8..000000000 --- a/numba/numba/pythonapi.py +++ /dev/null @@ -1,1476 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from collections import namedtuple -import contextlib -import pickle - -from llvmlite import ir -from llvmlite.llvmpy.core import Type, Constant -import llvmlite.llvmpy.core as lc - -from numba.config import PYVERSION -import numba.ctypes_support as ctypes -from numba import config -from numba import types, utils, cgutils, lowering, _helperlib - - -class _Registry(object): - - def __init__(self): - self.functions = {} - - def register(self, typeclass): - assert issubclass(typeclass, types.Type) - def decorator(func): - if typeclass in self.functions: - raise KeyError("duplicate registration for %s" % (typeclass,)) - self.functions[typeclass] = func - return func - return decorator - - def lookup(self, typeclass, default=None): - assert issubclass(typeclass, types.Type) - for cls in typeclass.__mro__: - func = self.functions.get(cls) - if func is not None: - return func - return default - -# Registries of boxing / unboxing implementations -_boxers = _Registry() -_unboxers = _Registry() -_reflectors = _Registry() - -box = _boxers.register -unbox = _unboxers.register -reflect = _reflectors.register - -class _BoxContext(namedtuple("_BoxContext", - ("context", "builder", "pyapi", "env_manager"))): - """ - The facilities required by boxing implementations. - """ - __slots__ = () - - def box(self, typ, val): - return self.pyapi.from_native_value(typ, val, self.env_manager) - - -class _UnboxContext(namedtuple("_UnboxContext", - ("context", "builder", "pyapi"))): - """ - The facilities required by unboxing implementations. - """ - __slots__ = () - - def unbox(self, typ, obj): - return self.pyapi.to_native_value(typ, obj) - - -class _ReflectContext(namedtuple("_ReflectContext", - ("context", "builder", "pyapi", "env_manager", - "is_error"))): - """ - The facilities required by reflection implementations. - """ - __slots__ = () - - # XXX the error bit is currently unused by consumers (e.g. PyCallWrapper) - def set_error(self): - self.builder.store(self.is_error, cgutils.true_bit) - - def box(self, typ, val): - return self.pyapi.from_native_value(typ, val, self.env_manager) - - def reflect(self, typ, val): - return self.pyapi.reflect_native_value(typ, val, self.env_manager) - - -class NativeValue(object): - """ - Encapsulate the result of converting a Python object to a native value, - recording whether the conversion was successful and how to cleanup. - """ - - def __init__(self, value, is_error=None, cleanup=None): - self.value = value - self.is_error = is_error if is_error is not None else cgutils.false_bit - self.cleanup = cleanup - - -class EnvironmentManager(object): - - def __init__(self, pyapi, env, env_body, env_ptr): - assert isinstance(env, lowering.Environment) - self.pyapi = pyapi - self.env = env - self.env_body = env_body - self.env_ptr = env_ptr - - def add_const(self, const): - """ - Add a constant to the environment, return its index. - """ - # All constants are frozen inside the environment - if isinstance(const, str): - const = utils.intern(const) - for index, val in enumerate(self.env.consts): - if val is const: - break - else: - index = len(self.env.consts) - self.env.consts.append(const) - return index - - def read_const(self, index): - """ - Look up constant number *index* inside the environment body. - A borrowed reference is returned. - """ - assert index < len(self.env.consts) - return self.pyapi.list_getitem(self.env_body.consts, index) - - -_IteratorLoop = namedtuple('_IteratorLoop', ('value', 'do_break')) - - -class PythonAPI(object): - """ - Code generation facilities to call into the CPython C API (and related - helpers). - """ - - def __init__(self, context, builder): - """ - Note: Maybe called multiple times when lowering a function - """ - from numba.targets import boxing - self.context = context - self.builder = builder - - self.module = builder.basic_block.function.module - # A unique mapping of serialized objects in this module - try: - self.module.__serialized - except AttributeError: - self.module.__serialized = {} - - # Initialize types - self.pyobj = self.context.get_argument_type(types.pyobject) - self.pyobjptr = self.pyobj.as_pointer() - self.voidptr = Type.pointer(Type.int(8)) - self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) - self.ulong = self.long - self.longlong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) - self.ulonglong = self.longlong - self.double = Type.double() - self.py_ssize_t = self.context.get_value_type(types.intp) - self.cstring = Type.pointer(Type.int(8)) - self.gil_state = Type.int(_helperlib.py_gil_state_size * 8) - self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size) - if PYVERSION >= (3, 0): - self.py_hash_t = self.py_ssize_t - else: - self.py_hash_t = self.long - - def get_env_manager(self, env, env_body, env_ptr): - return EnvironmentManager(self, env, env_body, env_ptr) - - def emit_environment_sentry(self, envptr, return_pyobject=False): - """Emits LLVM code to ensure the `envptr` is not NULL - """ - is_null = cgutils.is_null(self.builder, envptr) - with cgutils.if_unlikely(self.builder, is_null): - if return_pyobject: - fnty = self.builder.function.type.pointee - assert fnty.return_type == self.pyobj - self.err_set_string("PyExc_RuntimeError", - "missing Environment") - self.builder.ret(self.get_null_object()) - else: - self.context.call_conv.return_user_exc(self.builder, - RuntimeError, - ("missing Environment",)) - - # ------ Python API ----- - - # - # Basic object API - # - - def incref(self, obj): - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="Py_IncRef") - self.builder.call(fn, [obj]) - - def decref(self, obj): - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="Py_DecRef") - self.builder.call(fn, [obj]) - - def get_type(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="numba_py_type") - return self.builder.call(fn, [obj]) - - # - # Argument unpacking - # - - def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs): - charptr = Type.pointer(Type.int(8)) - charptrary = Type.pointer(charptr) - argtypes = [self.pyobj, self.pyobj, charptr, charptrary] - fnty = Type.function(Type.int(), argtypes, var_arg=True) - fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords") - return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs)) - - def parse_tuple(self, args, fmt, *objs): - charptr = Type.pointer(Type.int(8)) - argtypes = [self.pyobj, charptr] - fnty = Type.function(Type.int(), argtypes, var_arg=True) - fn = self._get_function(fnty, name="PyArg_ParseTuple") - return self.builder.call(fn, [args, fmt] + list(objs)) - - def unpack_tuple(self, args, name, n_min, n_max, *objs): - charptr = Type.pointer(Type.int(8)) - argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] - fnty = Type.function(Type.int(), argtypes, var_arg=True) - fn = self._get_function(fnty, name="PyArg_UnpackTuple") - n_min = Constant.int(self.py_ssize_t, n_min) - n_max = Constant.int(self.py_ssize_t, n_max) - if isinstance(name, str): - name = self.context.insert_const_string(self.builder.module, name) - return self.builder.call(fn, [args, name, n_min, n_max] + list(objs)) - - # - # Exception and errors - # - - def err_occurred(self): - fnty = Type.function(self.pyobj, ()) - fn = self._get_function(fnty, name="PyErr_Occurred") - return self.builder.call(fn, ()) - - def err_clear(self): - fnty = Type.function(Type.void(), ()) - fn = self._get_function(fnty, name="PyErr_Clear") - return self.builder.call(fn, ()) - - def err_set_string(self, exctype, msg): - fnty = Type.function(Type.void(), [self.pyobj, self.cstring]) - fn = self._get_function(fnty, name="PyErr_SetString") - if isinstance(exctype, str): - exctype = self.get_c_object(exctype) - if isinstance(msg, str): - msg = self.context.insert_const_string(self.module, msg) - return self.builder.call(fn, (exctype, msg)) - - def err_format(self, exctype, msg, *format_args): - fnty = Type.function(Type.void(), [self.pyobj, self.cstring], var_arg=True) - fn = self._get_function(fnty, name="PyErr_Format") - if isinstance(exctype, str): - exctype = self.get_c_object(exctype) - if isinstance(msg, str): - msg = self.context.insert_const_string(self.module, msg) - return self.builder.call(fn, (exctype, msg) + tuple(format_args)) - - def raise_object(self, exc=None): - """ - Raise an arbitrary exception (type or value or (type, args) - or None - if reraising). A reference to the argument is consumed. - """ - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="numba_do_raise") - if exc is None: - exc = self.make_none() - return self.builder.call(fn, (exc,)) - - def err_set_object(self, exctype, excval): - fnty = Type.function(Type.void(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyErr_SetObject") - if isinstance(exctype, str): - exctype = self.get_c_object(exctype) - return self.builder.call(fn, (exctype, excval)) - - def err_set_none(self, exctype): - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="PyErr_SetNone") - if isinstance(exctype, str): - exctype = self.get_c_object(exctype) - return self.builder.call(fn, (exctype,)) - - def err_write_unraisable(self, obj): - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="PyErr_WriteUnraisable") - return self.builder.call(fn, (obj,)) - - def err_fetch(self, pty, pval, ptb): - fnty = Type.function(Type.void(), [self.pyobjptr] * 3) - fn = self._get_function(fnty, name="PyErr_Fetch") - return self.builder.call(fn, (pty, pval, ptb)) - - def err_restore(self, ty, val, tb): - fnty = Type.function(Type.void(), [self.pyobj] * 3) - fn = self._get_function(fnty, name="PyErr_Restore") - return self.builder.call(fn, (ty, val, tb)) - - @contextlib.contextmanager - def err_push(self, keep_new=False): - """ - Temporarily push the current error indicator while the code - block is executed. If *keep_new* is True and the code block - raises a new error, the new error is kept, otherwise the old - error indicator is restored at the end of the block. - """ - pty, pval, ptb = [cgutils.alloca_once(self.builder, self.pyobj) - for i in range(3)] - self.err_fetch(pty, pval, ptb) - yield - ty = self.builder.load(pty) - val = self.builder.load(pval) - tb = self.builder.load(ptb) - if keep_new: - new_error = cgutils.is_not_null(self.builder, self.err_occurred()) - with self.builder.if_else(new_error, likely=False) as (if_error, if_ok): - with if_error: - # Code block raised an error, keep it - self.decref(ty) - self.decref(val) - self.decref(tb) - with if_ok: - # Restore previous error - self.err_restore(ty, val, tb) - else: - self.err_restore(ty, val, tb) - - def get_c_object(self, name): - """ - Get a Python object through its C-accessible *name* - (e.g. "PyExc_ValueError"). The underlying variable must be - a `PyObject *`, and the value of that pointer is returned. - """ - # A LLVM global variable is implicitly a pointer to the declared - # type, so fix up by using pyobj.pointee. - return self.context.get_c_value(self.builder, self.pyobj.pointee, name, - dllimport=True) - - def raise_missing_global_error(self, name): - msg = "global name '%s' is not defined" % name - cstr = self.context.insert_const_string(self.module, msg) - self.err_set_string("PyExc_NameError", cstr) - - def raise_missing_name_error(self, name): - msg = "name '%s' is not defined" % name - cstr = self.context.insert_const_string(self.module, msg) - self.err_set_string("PyExc_NameError", cstr) - - def fatal_error(self, msg): - fnty = Type.function(Type.void(), [self.cstring]) - fn = self._get_function(fnty, name="Py_FatalError") - fn.attributes.add("noreturn") - cstr = self.context.insert_const_string(self.module, msg) - self.builder.call(fn, (cstr,)) - - # - # Concrete dict API - # - - def dict_getitem_string(self, dic, name): - """Lookup name inside dict - - Returns a borrowed reference - """ - fnty = Type.function(self.pyobj, [self.pyobj, self.cstring]) - fn = self._get_function(fnty, name="PyDict_GetItemString") - cstr = self.context.insert_const_string(self.module, name) - return self.builder.call(fn, [dic, cstr]) - - def dict_getitem(self, dic, name): - """Lookup name inside dict - - Returns a borrowed reference - """ - fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyDict_GetItem") - return self.builder.call(fn, [dic, name]) - - def dict_new(self, presize=0): - if presize == 0: - fnty = Type.function(self.pyobj, ()) - fn = self._get_function(fnty, name="PyDict_New") - return self.builder.call(fn, ()) - else: - fnty = Type.function(self.pyobj, [self.py_ssize_t]) - fn = self._get_function(fnty, name="_PyDict_NewPresized") - return self.builder.call(fn, - [Constant.int(self.py_ssize_t, presize)]) - - def dict_setitem(self, dictobj, nameobj, valobj): - fnty = Type.function(Type.int(), (self.pyobj, self.pyobj, - self.pyobj)) - fn = self._get_function(fnty, name="PyDict_SetItem") - return self.builder.call(fn, (dictobj, nameobj, valobj)) - - def dict_setitem_string(self, dictobj, name, valobj): - fnty = Type.function(Type.int(), (self.pyobj, self.cstring, - self.pyobj)) - fn = self._get_function(fnty, name="PyDict_SetItemString") - cstr = self.context.insert_const_string(self.module, name) - return self.builder.call(fn, (dictobj, cstr, valobj)) - - def dict_pack(self, keyvalues): - """ - Args - ----- - keyvalues: iterable of (str, llvm.Value of PyObject*) - """ - dictobj = self.dict_new() - with self.if_object_ok(dictobj): - for k, v in keyvalues: - self.dict_setitem_string(dictobj, k, v) - return dictobj - - # - # Concrete number APIs - # - - def float_from_double(self, fval): - fnty = Type.function(self.pyobj, [self.double]) - fn = self._get_function(fnty, name="PyFloat_FromDouble") - return self.builder.call(fn, [fval]) - - def number_as_ssize_t(self, numobj): - fnty = Type.function(self.py_ssize_t, [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_AsSsize_t") - # We don't want any clipping, so pass OverflowError as the 2nd arg - exc_class = self.get_c_object("PyExc_OverflowError") - return self.builder.call(fn, [numobj, exc_class]) - - def number_long(self, numobj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_Long") - return self.builder.call(fn, [numobj]) - - def long_as_ulonglong(self, numobj): - fnty = Type.function(self.ulonglong, [self.pyobj]) - fn = self._get_function(fnty, name="PyLong_AsUnsignedLongLong") - return self.builder.call(fn, [numobj]) - - def long_as_longlong(self, numobj): - fnty = Type.function(self.ulonglong, [self.pyobj]) - fn = self._get_function(fnty, name="PyLong_AsLongLong") - return self.builder.call(fn, [numobj]) - - def long_as_voidptr(self, numobj): - """ - Convert the given Python integer to a void*. This is recommended - over number_as_ssize_t as it isn't affected by signedness. - """ - fnty = Type.function(self.voidptr, [self.pyobj]) - fn = self._get_function(fnty, name="PyLong_AsVoidPtr") - return self.builder.call(fn, [numobj]) - - def _long_from_native_int(self, ival, func_name, native_int_type, - signed): - fnty = Type.function(self.pyobj, [native_int_type]) - fn = self._get_function(fnty, name=func_name) - resptr = cgutils.alloca_once(self.builder, self.pyobj) - - if PYVERSION < (3, 0): - # Under Python 2, we try to return a PyInt object whenever - # the given number fits in a C long. - pyint_fnty = Type.function(self.pyobj, [self.long]) - pyint_fn = self._get_function(pyint_fnty, name="PyInt_FromLong") - long_max = Constant.int(native_int_type, _helperlib.long_max) - if signed: - long_min = Constant.int(native_int_type, _helperlib.long_min) - use_pyint = self.builder.and_( - self.builder.icmp(lc.ICMP_SGE, ival, long_min), - self.builder.icmp(lc.ICMP_SLE, ival, long_max), - ) - else: - use_pyint = self.builder.icmp(lc.ICMP_ULE, ival, long_max) - - with self.builder.if_else(use_pyint) as (then, otherwise): - with then: - downcast_ival = self.builder.trunc(ival, self.long) - res = self.builder.call(pyint_fn, [downcast_ival]) - self.builder.store(res, resptr) - with otherwise: - res = self.builder.call(fn, [ival]) - self.builder.store(res, resptr) - else: - fn = self._get_function(fnty, name=func_name) - self.builder.store(self.builder.call(fn, [ival]), resptr) - - return self.builder.load(resptr) - - def long_from_long(self, ival): - if PYVERSION < (3, 0): - func_name = "PyInt_FromLong" - else: - func_name = "PyLong_FromLong" - fnty = Type.function(self.pyobj, [self.long]) - fn = self._get_function(fnty, name=func_name) - return self.builder.call(fn, [ival]) - - def long_from_ulong(self, ival): - return self._long_from_native_int(ival, "PyLong_FromUnsignedLong", - self.long, signed=False) - - def long_from_ssize_t(self, ival): - return self._long_from_native_int(ival, "PyLong_FromSsize_t", - self.py_ssize_t, signed=True) - - def long_from_longlong(self, ival): - return self._long_from_native_int(ival, "PyLong_FromLongLong", - self.longlong, signed=True) - - def long_from_ulonglong(self, ival): - return self._long_from_native_int(ival, "PyLong_FromUnsignedLongLong", - self.ulonglong, signed=False) - - def long_from_signed_int(self, ival): - """ - Return a Python integer from any native integer value. - """ - bits = ival.type.width - if bits <= self.long.width: - return self.long_from_long(self.builder.sext(ival, self.long)) - elif bits <= self.longlong.width: - return self.long_from_longlong(self.builder.sext(ival, self.longlong)) - else: - raise OverflowError("integer too big (%d bits)" % (bits)) - - def long_from_unsigned_int(self, ival): - """ - Same as long_from_signed_int, but for unsigned values. - """ - bits = ival.type.width - if bits <= self.ulong.width: - return self.long_from_ulong(self.builder.zext(ival, self.ulong)) - elif bits <= self.ulonglong.width: - return self.long_from_ulonglong(self.builder.zext(ival, self.ulonglong)) - else: - raise OverflowError("integer too big (%d bits)" % (bits)) - - def _get_number_operator(self, name): - fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_%s" % name) - return fn - - def _call_number_operator(self, name, lhs, rhs, inplace=False): - if inplace: - name = "InPlace" + name - fn = self._get_number_operator(name) - return self.builder.call(fn, [lhs, rhs]) - - def number_add(self, lhs, rhs, inplace=False): - return self._call_number_operator("Add", lhs, rhs, inplace=inplace) - - def number_subtract(self, lhs, rhs, inplace=False): - return self._call_number_operator("Subtract", lhs, rhs, inplace=inplace) - - def number_multiply(self, lhs, rhs, inplace=False): - return self._call_number_operator("Multiply", lhs, rhs, inplace=inplace) - - def number_divide(self, lhs, rhs, inplace=False): - assert PYVERSION < (3, 0) - return self._call_number_operator("Divide", lhs, rhs, inplace=inplace) - - def number_truedivide(self, lhs, rhs, inplace=False): - return self._call_number_operator("TrueDivide", lhs, rhs, inplace=inplace) - - def number_floordivide(self, lhs, rhs, inplace=False): - return self._call_number_operator("FloorDivide", lhs, rhs, inplace=inplace) - - def number_remainder(self, lhs, rhs, inplace=False): - return self._call_number_operator("Remainder", lhs, rhs, inplace=inplace) - - def number_matrix_multiply(self, lhs, rhs, inplace=False): - assert PYVERSION >= (3, 5) - return self._call_number_operator("MatrixMultiply", lhs, rhs, inplace=inplace) - - def number_lshift(self, lhs, rhs, inplace=False): - return self._call_number_operator("Lshift", lhs, rhs, inplace=inplace) - - def number_rshift(self, lhs, rhs, inplace=False): - return self._call_number_operator("Rshift", lhs, rhs, inplace=inplace) - - def number_and(self, lhs, rhs, inplace=False): - return self._call_number_operator("And", lhs, rhs, inplace=inplace) - - def number_or(self, lhs, rhs, inplace=False): - return self._call_number_operator("Or", lhs, rhs, inplace=inplace) - - def number_xor(self, lhs, rhs, inplace=False): - return self._call_number_operator("Xor", lhs, rhs, inplace=inplace) - - def number_power(self, lhs, rhs, inplace=False): - fnty = Type.function(self.pyobj, [self.pyobj] * 3) - fname = "PyNumber_InPlacePower" if inplace else "PyNumber_Power" - fn = self._get_function(fnty, fname) - return self.builder.call(fn, [lhs, rhs, self.borrow_none()]) - - def number_negative(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_Negative") - return self.builder.call(fn, (obj,)) - - def number_positive(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_Positive") - return self.builder.call(fn, (obj,)) - - def number_float(self, val): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_Float") - return self.builder.call(fn, [val]) - - def number_invert(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyNumber_Invert") - return self.builder.call(fn, (obj,)) - - def float_as_double(self, fobj): - fnty = Type.function(self.double, [self.pyobj]) - fn = self._get_function(fnty, name="PyFloat_AsDouble") - return self.builder.call(fn, [fobj]) - - def bool_from_bool(self, bval): - """ - Get a Python bool from a LLVM boolean. - """ - longval = self.builder.zext(bval, self.long) - return self.bool_from_long(longval) - - def bool_from_long(self, ival): - fnty = Type.function(self.pyobj, [self.long]) - fn = self._get_function(fnty, name="PyBool_FromLong") - return self.builder.call(fn, [ival]) - - def complex_from_doubles(self, realval, imagval): - fnty = Type.function(self.pyobj, [Type.double(), Type.double()]) - fn = self._get_function(fnty, name="PyComplex_FromDoubles") - return self.builder.call(fn, [realval, imagval]) - - def complex_real_as_double(self, cobj): - fnty = Type.function(Type.double(), [self.pyobj]) - fn = self._get_function(fnty, name="PyComplex_RealAsDouble") - return self.builder.call(fn, [cobj]) - - def complex_imag_as_double(self, cobj): - fnty = Type.function(Type.double(), [self.pyobj]) - fn = self._get_function(fnty, name="PyComplex_ImagAsDouble") - return self.builder.call(fn, [cobj]) - - # - # Concrete slice API - # - - def slice_as_ints(self, obj): - """ - Read the members of a slice of integers. - - Returns a (ok, start, stop, step) tuple where ok is a boolean and - the following members are pointer-sized ints. - """ - pstart = cgutils.alloca_once(self.builder, self.py_ssize_t) - pstop = cgutils.alloca_once(self.builder, self.py_ssize_t) - pstep = cgutils.alloca_once(self.builder, self.py_ssize_t) - fnty = Type.function(Type.int(), - [self.pyobj] + [self.py_ssize_t.as_pointer()] * 3) - fn = self._get_function(fnty, name="numba_unpack_slice") - res = self.builder.call(fn, (obj, pstart, pstop, pstep)) - start = self.builder.load(pstart) - stop = self.builder.load(pstop) - step = self.builder.load(pstep) - return cgutils.is_null(self.builder, res), start, stop, step - - # - # List and sequence APIs - # - - def sequence_getslice(self, obj, start, stop): - fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t, - self.py_ssize_t]) - fn = self._get_function(fnty, name="PySequence_GetSlice") - return self.builder.call(fn, (obj, start, stop)) - - def sequence_tuple(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PySequence_Tuple") - return self.builder.call(fn, [obj]) - - def list_new(self, szval): - fnty = Type.function(self.pyobj, [self.py_ssize_t]) - fn = self._get_function(fnty, name="PyList_New") - return self.builder.call(fn, [szval]) - - def list_size(self, lst): - fnty = Type.function(self.py_ssize_t, [self.pyobj]) - fn = self._get_function(fnty, name="PyList_Size") - return self.builder.call(fn, [lst]) - - def list_append(self, lst, val): - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyList_Append") - return self.builder.call(fn, [lst, val]) - - def list_setitem(self, lst, idx, val): - """ - Warning: Steals reference to ``val`` - """ - fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t, - self.pyobj]) - fn = self._get_function(fnty, name="PyList_SetItem") - return self.builder.call(fn, [lst, idx, val]) - - def list_getitem(self, lst, idx): - """ - Returns a borrowed reference. - """ - fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t]) - fn = self._get_function(fnty, name="PyList_GetItem") - if isinstance(idx, int): - idx = self.context.get_constant(types.intp, idx) - return self.builder.call(fn, [lst, idx]) - - def list_setslice(self, lst, start, stop, obj): - if obj is None: - obj = self.get_null_object() - fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t, - self.py_ssize_t, self.pyobj]) - fn = self._get_function(fnty, name="PyList_SetSlice") - return self.builder.call(fn, (lst, start, stop, obj)) - - - # - # Concrete tuple API - # - - def tuple_getitem(self, tup, idx): - """ - Borrow reference - """ - fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t]) - fn = self._get_function(fnty, name="PyTuple_GetItem") - idx = self.context.get_constant(types.intp, idx) - return self.builder.call(fn, [tup, idx]) - - def tuple_pack(self, items): - fnty = Type.function(self.pyobj, [self.py_ssize_t], var_arg=True) - fn = self._get_function(fnty, name="PyTuple_Pack") - n = self.context.get_constant(types.intp, len(items)) - args = [n] - args.extend(items) - return self.builder.call(fn, args) - - def tuple_size(self, tup): - fnty = Type.function(self.py_ssize_t, [self.pyobj]) - fn = self._get_function(fnty, name="PyTuple_Size") - return self.builder.call(fn, [tup]) - - def tuple_new(self, count): - fnty = Type.function(self.pyobj, [Type.int()]) - fn = self._get_function(fnty, name='PyTuple_New') - return self.builder.call(fn, [self.context.get_constant(types.int32, - count)]) - - def tuple_setitem(self, tuple_val, index, item): - """ - Steals a reference to `item`. - """ - fnty = Type.function(Type.int(), [self.pyobj, Type.int(), self.pyobj]) - setitem_fn = self._get_function(fnty, name='PyTuple_SetItem') - index = self.context.get_constant(types.int32, index) - self.builder.call(setitem_fn, [tuple_val, index, item]) - - # - # Concrete set API - # - - def set_new(self, iterable=None): - if iterable is None: - iterable = self.get_null_object() - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PySet_New") - return self.builder.call(fn, [iterable]) - - def set_add(self, set, value): - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PySet_Add") - return self.builder.call(fn, [set, value]) - - def set_clear(self, set): - fnty = Type.function(Type.int(), [self.pyobj]) - fn = self._get_function(fnty, name="PySet_Clear") - return self.builder.call(fn, [set]) - - def set_size(self, set): - fnty = Type.function(self.py_ssize_t, [self.pyobj]) - fn = self._get_function(fnty, name="PySet_Size") - return self.builder.call(fn, [set]) - - def set_update(self, set, iterable): - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="_PySet_Update") - return self.builder.call(fn, [set, iterable]) - - def set_next_entry(self, set, posptr, keyptr, hashptr): - fnty = Type.function(Type.int(), - [self.pyobj, self.py_ssize_t.as_pointer(), - self.pyobj.as_pointer(), self.py_hash_t.as_pointer()]) - fn = self._get_function(fnty, name="_PySet_NextEntry") - return self.builder.call(fn, (set, posptr, keyptr, hashptr)) - - @contextlib.contextmanager - def set_iterate(self, set): - builder = self.builder - - hashptr = cgutils.alloca_once(builder, self.py_hash_t, name="hashptr") - keyptr = cgutils.alloca_once(builder, self.pyobj, name="keyptr") - posptr = cgutils.alloca_once_value(builder, - ir.Constant(self.py_ssize_t, 0), - name="posptr") - - bb_body = builder.append_basic_block("bb_body") - bb_end = builder.append_basic_block("bb_end") - - builder.branch(bb_body) - def do_break(): - builder.branch(bb_end) - - with builder.goto_block(bb_body): - r = self.set_next_entry(set, posptr, keyptr, hashptr) - finished = cgutils.is_null(builder, r) - with builder.if_then(finished, likely=False): - builder.branch(bb_end) - yield _IteratorLoop(builder.load(keyptr), do_break) - builder.branch(bb_body) - - builder.position_at_end(bb_end) - - # - # GIL APIs - # - - def gil_ensure(self): - """ - Ensure the GIL is acquired. - The returned value must be consumed by gil_release(). - """ - gilptrty = Type.pointer(self.gil_state) - fnty = Type.function(Type.void(), [gilptrty]) - fn = self._get_function(fnty, "numba_gil_ensure") - gilptr = cgutils.alloca_once(self.builder, self.gil_state) - self.builder.call(fn, [gilptr]) - return gilptr - - def gil_release(self, gil): - """ - Release the acquired GIL by gil_ensure(). - Must be paired with a gil_ensure(). - """ - gilptrty = Type.pointer(self.gil_state) - fnty = Type.function(Type.void(), [gilptrty]) - fn = self._get_function(fnty, "numba_gil_release") - return self.builder.call(fn, [gil]) - - def save_thread(self): - """ - Release the GIL and return the former thread state - (an opaque non-NULL pointer). - """ - fnty = Type.function(self.voidptr, []) - fn = self._get_function(fnty, name="PyEval_SaveThread") - return self.builder.call(fn, []) - - def restore_thread(self, thread_state): - """ - Restore the given thread state by reacquiring the GIL. - """ - fnty = Type.function(Type.void(), [self.voidptr]) - fn = self._get_function(fnty, name="PyEval_RestoreThread") - self.builder.call(fn, [thread_state]) - - # - # Generic object private data (a way of associating an arbitrary void * - # pointer to an arbitrary Python object). - # - - def object_get_private_data(self, obj): - fnty = Type.function(self.voidptr, [self.pyobj]) - fn = self._get_function(fnty, name="numba_get_pyobject_private_data") - return self.builder.call(fn, (obj,)) - - def object_set_private_data(self, obj, ptr): - fnty = Type.function(Type.void(), [self.pyobj, self.voidptr]) - fn = self._get_function(fnty, name="numba_set_pyobject_private_data") - return self.builder.call(fn, (obj, ptr)) - - def object_reset_private_data(self, obj): - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="numba_reset_pyobject_private_data") - return self.builder.call(fn, (obj,)) - - - # - # Other APIs (organize them better!) - # - - def import_module_noblock(self, modname): - fnty = Type.function(self.pyobj, [self.cstring]) - fn = self._get_function(fnty, name="PyImport_ImportModuleNoBlock") - return self.builder.call(fn, [modname]) - - def call_function_objargs(self, callee, objargs): - fnty = Type.function(self.pyobj, [self.pyobj], var_arg=True) - fn = self._get_function(fnty, name="PyObject_CallFunctionObjArgs") - args = [callee] + list(objargs) - args.append(self.context.get_constant_null(types.pyobject)) - return self.builder.call(fn, args) - - def call_method(self, callee, method, objargs=()): - cname = self.context.insert_const_string(self.module, method) - fnty = Type.function(self.pyobj, [self.pyobj, self.cstring, self.cstring], - var_arg=True) - fn = self._get_function(fnty, name="PyObject_CallMethod") - fmt = 'O' * len(objargs) - cfmt = self.context.insert_const_string(self.module, fmt) - args = [callee, cname, cfmt] - if objargs: - args.extend(objargs) - args.append(self.context.get_constant_null(types.pyobject)) - return self.builder.call(fn, args) - - def call(self, callee, args=None, kws=None): - if args is None: - args = self.get_null_object() - if kws is None: - kws = self.get_null_object() - fnty = Type.function(self.pyobj, [self.pyobj] * 3) - fn = self._get_function(fnty, name="PyObject_Call") - return self.builder.call(fn, (callee, args, kws)) - - def object_istrue(self, obj): - fnty = Type.function(Type.int(), [self.pyobj]) - fn = self._get_function(fnty, name="PyObject_IsTrue") - return self.builder.call(fn, [obj]) - - def object_not(self, obj): - fnty = Type.function(Type.int(), [self.pyobj]) - fn = self._get_function(fnty, name="PyObject_Not") - return self.builder.call(fn, [obj]) - - def object_richcompare(self, lhs, rhs, opstr): - """ - Refer to Python source Include/object.h for macros definition - of the opid. - """ - ops = ['<', '<=', '==', '!=', '>', '>='] - if opstr in ops: - opid = ops.index(opstr) - fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, Type.int()]) - fn = self._get_function(fnty, name="PyObject_RichCompare") - lopid = self.context.get_constant(types.int32, opid) - return self.builder.call(fn, (lhs, rhs, lopid)) - elif opstr == 'is': - bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs) - return self.from_native_value(types.boolean, bitflag) - elif opstr == 'is not': - bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs) - return self.from_native_value(types.boolean, bitflag) - elif opstr in ('in', 'not in'): - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PySequence_Contains") - status = self.builder.call(fn, (rhs, lhs)) - negone = self.context.get_constant(types.int32, -1) - is_good = self.builder.icmp(lc.ICMP_NE, status, negone) - # Stack allocate output and initialize to Null - outptr = cgutils.alloca_once_value(self.builder, - Constant.null(self.pyobj)) - # If PySequence_Contains returns non-error value - with cgutils.if_likely(self.builder, is_good): - if opstr == 'not in': - status = self.builder.not_(status) - # Store the status as a boolean object - truncated = self.builder.trunc(status, Type.int(1)) - self.builder.store(self.bool_from_bool(truncated), - outptr) - - return self.builder.load(outptr) - else: - raise NotImplementedError("Unknown operator {op!r}".format( - op=opstr)) - - def iter_next(self, iterobj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyIter_Next") - return self.builder.call(fn, [iterobj]) - - def object_getiter(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyObject_GetIter") - return self.builder.call(fn, [obj]) - - def object_getattr_string(self, obj, attr): - cstr = self.context.insert_const_string(self.module, attr) - fnty = Type.function(self.pyobj, [self.pyobj, self.cstring]) - fn = self._get_function(fnty, name="PyObject_GetAttrString") - return self.builder.call(fn, [obj, cstr]) - - def object_getattr(self, obj, attr): - fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_GetAttr") - return self.builder.call(fn, [obj, attr]) - - def object_setattr_string(self, obj, attr, val): - cstr = self.context.insert_const_string(self.module, attr) - fnty = Type.function(Type.int(), [self.pyobj, self.cstring, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_SetAttrString") - return self.builder.call(fn, [obj, cstr, val]) - - def object_setattr(self, obj, attr, val): - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_SetAttr") - return self.builder.call(fn, [obj, attr, val]) - - def object_delattr_string(self, obj, attr): - # PyObject_DelAttrString() is actually a C macro calling - # PyObject_SetAttrString() with value == NULL. - return self.object_setattr_string(obj, attr, self.get_null_object()) - - def object_delattr(self, obj, attr): - # PyObject_DelAttr() is actually a C macro calling - # PyObject_SetAttr() with value == NULL. - return self.object_setattr(obj, attr, self.get_null_object()) - - def object_getitem(self, obj, key): - """ - Return obj[key] - """ - fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_GetItem") - return self.builder.call(fn, (obj, key)) - - def object_setitem(self, obj, key, val): - """ - obj[key] = val - """ - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_SetItem") - return self.builder.call(fn, (obj, key, val)) - - def object_delitem(self, obj, key): - """ - del obj[key] - """ - fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) - fn = self._get_function(fnty, name="PyObject_DelItem") - return self.builder.call(fn, (obj, key)) - - def string_as_string(self, strobj): - fnty = Type.function(self.cstring, [self.pyobj]) - if PYVERSION >= (3, 0): - fname = "PyUnicode_AsUTF8" - else: - fname = "PyString_AsString" - fn = self._get_function(fnty, name=fname) - return self.builder.call(fn, [strobj]) - - def string_as_string_and_size(self, strobj): - """ - Returns a tuple of ``(ok, buffer, length)``. - The ``ok`` is i1 value that is set if ok. - The ``buffer`` is a i8* of the output buffer. - The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer. - """ - - p_length = cgutils.alloca_once(self.builder, self.py_ssize_t) - if PYVERSION >= (3, 0): - fnty = Type.function(self.cstring, [self.pyobj, - self.py_ssize_t.as_pointer()]) - fname = "PyUnicode_AsUTF8AndSize" - fn = self._get_function(fnty, name=fname) - - buffer = self.builder.call(fn, [strobj, p_length]) - ok = self.builder.icmp_unsigned('!=', - ir.Constant(buffer.type, None), - buffer) - else: - fnty = Type.function(lc.Type.int(), [self.pyobj, - self.cstring.as_pointer(), - self.py_ssize_t.as_pointer()]) - fname = "PyString_AsStringAndSize" - fn = self._get_function(fnty, name=fname) - # Allocate space for the output parameters - p_buffer = cgutils.alloca_once(self.builder, self.cstring) - - status = self.builder.call(fn, [strobj, p_buffer, p_length]) - - negone = ir.Constant(status.type, -1) - ok = self.builder.icmp_signed("!=", status, negone) - buffer = self.builder.load(p_buffer) - - return (ok, buffer, self.builder.load(p_length)) - - def string_from_string_and_size(self, string, size): - fnty = Type.function(self.pyobj, [self.cstring, self.py_ssize_t]) - if PYVERSION >= (3, 0): - fname = "PyUnicode_FromStringAndSize" - else: - fname = "PyString_FromStringAndSize" - fn = self._get_function(fnty, name=fname) - return self.builder.call(fn, [string, size]) - - def string_from_string(self, string): - fnty = Type.function(self.pyobj, [self.cstring]) - if PYVERSION >= (3, 0): - fname = "PyUnicode_FromString" - else: - fname = "PyString_FromString" - fn = self._get_function(fnty, name=fname) - return self.builder.call(fn, [string]) - - def bytes_from_string_and_size(self, string, size): - fnty = Type.function(self.pyobj, [self.cstring, self.py_ssize_t]) - if PYVERSION >= (3, 0): - fname = "PyBytes_FromStringAndSize" - else: - fname = "PyString_FromStringAndSize" - fn = self._get_function(fnty, name=fname) - return self.builder.call(fn, [string, size]) - - def object_str(self, obj): - fnty = Type.function(self.pyobj, [self.pyobj]) - fn = self._get_function(fnty, name="PyObject_Str") - return self.builder.call(fn, [obj]) - - def make_none(self): - obj = self.borrow_none() - self.incref(obj) - return obj - - def borrow_none(self): - return self.get_c_object("_Py_NoneStruct") - - def sys_write_stdout(self, fmt, *args): - fnty = Type.function(Type.void(), [self.cstring], var_arg=True) - fn = self._get_function(fnty, name="PySys_WriteStdout") - return self.builder.call(fn, (fmt,) + args) - - def object_dump(self, obj): - """ - Dump a Python object on C stderr. For debugging purposes. - """ - fnty = Type.function(Type.void(), [self.pyobj]) - fn = self._get_function(fnty, name="_PyObject_Dump") - return self.builder.call(fn, (obj,)) - - # - # NRT (Numba runtime) APIs - # - - def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): - assert self.context.enable_nrt, "NRT required" - - intty = ir.IntType(32) - fnty = Type.function(self.pyobj, - [self.voidptr, intty, intty, self.pyobj]) - fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python") - fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) - - ndim = self.context.get_constant(types.int32, aryty.ndim) - writable = self.context.get_constant(types.int32, int(aryty.mutable)) - - aryptr = cgutils.alloca_once_value(self.builder, ary) - return self.builder.call(fn, [self.builder.bitcast(aryptr, - self.voidptr), - ndim, writable, dtypeptr]) - - def nrt_adapt_ndarray_from_python(self, ary, ptr): - assert self.context.enable_nrt - fnty = Type.function(Type.int(), [self.pyobj, self.voidptr]) - fn = self._get_function(fnty, name="NRT_adapt_ndarray_from_python") - fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) - fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) - return self.builder.call(fn, (ary, ptr)) - - def nrt_adapt_buffer_from_python(self, buf, ptr): - assert self.context.enable_nrt - fnty = Type.function(Type.void(), [Type.pointer(self.py_buffer_t), - self.voidptr]) - fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python") - fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) - fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) - return self.builder.call(fn, (buf, ptr)) - - # ------ utils ----- - - def _get_function(self, fnty, name): - return self.module.get_or_insert_function(fnty, name=name) - - def alloca_obj(self): - return self.builder.alloca(self.pyobj) - - def alloca_buffer(self): - """ - Return a pointer to a stack-allocated, zero-initialized Py_buffer. - """ - # Treat the buffer as an opaque array of bytes - ptr = cgutils.alloca_once_value(self.builder, - lc.Constant.null(self.py_buffer_t)) - return ptr - - @contextlib.contextmanager - def if_object_ok(self, obj): - with cgutils.if_likely(self.builder, - cgutils.is_not_null(self.builder, obj)): - yield - - def print_object(self, obj): - strobj = self.object_str(obj) - cstr = self.string_as_string(strobj) - fmt = self.context.insert_const_string(self.module, "%s") - self.sys_write_stdout(fmt, cstr) - self.decref(strobj) - - def print_string(self, text): - fmt = self.context.insert_const_string(self.module, text) - self.sys_write_stdout(fmt) - - def get_null_object(self): - return Constant.null(self.pyobj) - - def return_none(self): - none = self.make_none() - self.builder.ret(none) - - def list_pack(self, items): - n = len(items) - seq = self.list_new(self.context.get_constant(types.intp, n)) - with self.if_object_ok(seq): - for i in range(n): - idx = self.context.get_constant(types.intp, i) - self.incref(items[i]) - self.list_setitem(seq, idx, items[i]) - return seq - - def unserialize(self, structptr): - """ - Unserialize some data. *structptr* should be a pointer to - a {i8* data, i32 length} structure. - """ - fnty = Type.function(self.pyobj, (self.voidptr, ir.IntType(32))) - fn = self._get_function(fnty, name="numba_unpickle") - ptr = self.builder.extract_value(self.builder.load(structptr), 0) - n = self.builder.extract_value(self.builder.load(structptr), 1) - return self.builder.call(fn, (ptr, n)) - - def serialize_uncached(self, obj): - """ - Same as serialize_object(), but don't create a global variable, - simply return a literal {i8* data, i32 length} structure. - """ - # First make the array constant - data = pickle.dumps(obj, protocol=-1) - assert len(data) < 2**31 - name = ".const.pickledata.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") - bdata = cgutils.make_bytearray(data) - arr = self.context.insert_unique_const(self.module, name, bdata) - # Then populate the structure constant - struct = ir.Constant.literal_struct([ - arr.bitcast(self.voidptr), - ir.Constant(ir.IntType(32), arr.type.pointee.count), - ]) - return struct - - def serialize_object(self, obj): - """ - Serialize the given object in the bitcode, and return it - as a pointer to a {i8* data, i32 length}, structure constant - (suitable for passing to unserialize()). - """ - try: - gv = self.module.__serialized[obj] - except KeyError: - struct = self.serialize_uncached(obj) - name = ".const.picklebuf.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") - gv = self.context.insert_unique_const(self.module, name, struct) - # Make the id() (and hence the name) unique while populating the module. - self.module.__serialized[obj] = gv - return gv - - def c_api_error(self): - return cgutils.is_not_null(self.builder, self.err_occurred()) - - def to_native_value(self, typ, obj): - """ - Unbox the Python object as the given Numba type. - A NativeValue instance is returned. - """ - from numba.targets.boxing import unbox_unsupported - - impl = _unboxers.lookup(typ.__class__, unbox_unsupported) - c = _UnboxContext(self.context, self.builder, self) - return impl(typ, obj, c) - - def from_native_return(self, typ, val, env_manager): - assert not isinstance(typ, types.Optional), "callconv should have " \ - "prevented the return of " \ - "optional value" - out = self.from_native_value(typ, val, env_manager) - return out - - def from_native_value(self, typ, val, env_manager=None): - """ - Box the native value of the given Numba type. A Python object - pointer is returned (NULL if an error occurred). - This method steals any native (NRT) reference embedded in *val*. - """ - from numba.targets.boxing import box_unsupported - - impl = _boxers.lookup(typ.__class__, box_unsupported) - - c = _BoxContext(self.context, self.builder, self, env_manager) - return impl(typ, val, c) - - def reflect_native_value(self, typ, val, env_manager=None): - """ - Reflect the native value onto its Python original, if any. - An error bit (as an LLVM value) is returned. - """ - impl = _reflectors.lookup(typ.__class__) - if impl is None: - # Reflection isn't needed for most types - return cgutils.false_bit - - is_error = cgutils.alloca_once_value(self.builder, cgutils.false_bit) - c = _ReflectContext(self.context, self.builder, self, env_manager, - is_error) - impl(typ, val, c) - return self.builder.load(c.is_error) - - def to_native_generator(self, obj, typ): - """ - Extract the generator structure pointer from a generator *obj* - (a _dynfunc.Generator instance). - """ - gen_ptr_ty = Type.pointer(self.context.get_data_type(typ)) - value = self.context.get_generator_state(self.builder, obj, gen_ptr_ty) - return NativeValue(value) - - def from_native_generator(self, val, typ, env=None): - """ - Make a Numba generator (a _dynfunc.Generator instance) from a - generator structure pointer *val*. - *env* is an optional _dynfunc.Environment instance to be wrapped - in the generator. - """ - llty = self.context.get_data_type(typ) - assert not llty.is_pointer - gen_struct_size = self.context.get_abi_sizeof(llty) - - gendesc = self.context.get_generator_desc(typ) - - # This is the PyCFunctionWithKeywords generated by PyCallWrapper - genfnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) - genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name) - - # This is the raw finalizer generated by _lower_generator_finalize_func() - finalizerty = Type.function(Type.void(), [self.voidptr]) - if typ.has_finalizer: - finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name) - else: - finalizer = Constant.null(Type.pointer(finalizerty)) - - # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env) - fnty = Type.function(self.pyobj, [self.py_ssize_t, - self.voidptr, - Type.pointer(genfnty), - Type.pointer(finalizerty), - self.voidptr]) - fn = self._get_function(fnty, name="numba_make_generator") - - state_size = ir.Constant(self.py_ssize_t, gen_struct_size) - initial_state = self.builder.bitcast(val, self.voidptr) - if env is None: - env = self.get_null_object() - env = self.builder.bitcast(env, self.voidptr) - - return self.builder.call(fn, - (state_size, initial_state, genfn, finalizer, env)) - - def numba_array_adaptor(self, ary, ptr): - assert not self.context.enable_nrt - fnty = Type.function(Type.int(), [self.pyobj, self.voidptr]) - fn = self._get_function(fnty, name="numba_adapt_ndarray") - fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) - fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) - return self.builder.call(fn, (ary, ptr)) - - def numba_buffer_adaptor(self, buf, ptr): - fnty = Type.function(Type.void(), - [ir.PointerType(self.py_buffer_t), self.voidptr]) - fn = self._get_function(fnty, name="numba_adapt_buffer") - fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) - fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) - return self.builder.call(fn, (buf, ptr)) - - def complex_adaptor(self, cobj, cmplx): - fnty = Type.function(Type.int(), [self.pyobj, cmplx.type]) - fn = self._get_function(fnty, name="numba_complex_adaptor") - return self.builder.call(fn, [cobj, cmplx]) - - def extract_record_data(self, obj, pbuf): - fnty = Type.function(self.voidptr, - [self.pyobj, ir.PointerType(self.py_buffer_t)]) - fn = self._get_function(fnty, name="numba_extract_record_data") - return self.builder.call(fn, [obj, pbuf]) - - def get_buffer(self, obj, pbuf): - fnty = Type.function(Type.int(), - [self.pyobj, ir.PointerType(self.py_buffer_t)]) - fn = self._get_function(fnty, name="numba_get_buffer") - return self.builder.call(fn, [obj, pbuf]) - - def release_buffer(self, pbuf): - fnty = Type.function(Type.void(), [ir.PointerType(self.py_buffer_t)]) - fn = self._get_function(fnty, name="numba_release_buffer") - return self.builder.call(fn, [pbuf]) - - def extract_np_datetime(self, obj): - fnty = Type.function(Type.int(64), [self.pyobj]) - fn = self._get_function(fnty, name="numba_extract_np_datetime") - return self.builder.call(fn, [obj]) - - def extract_np_timedelta(self, obj): - fnty = Type.function(Type.int(64), [self.pyobj]) - fn = self._get_function(fnty, name="numba_extract_np_timedelta") - return self.builder.call(fn, [obj]) - - def create_np_datetime(self, val, unit_code): - unit_code = Constant.int(Type.int(), unit_code) - fnty = Type.function(self.pyobj, [Type.int(64), Type.int()]) - fn = self._get_function(fnty, name="numba_create_np_datetime") - return self.builder.call(fn, [val, unit_code]) - - def create_np_timedelta(self, val, unit_code): - unit_code = Constant.int(Type.int(), unit_code) - fnty = Type.function(self.pyobj, [Type.int(64), Type.int()]) - fn = self._get_function(fnty, name="numba_create_np_timedelta") - return self.builder.call(fn, [val, unit_code]) - - def recreate_record(self, pdata, size, dtype, env_manager): - fnty = Type.function(self.pyobj, [Type.pointer(Type.int(8)), - Type.int(), self.pyobj]) - fn = self._get_function(fnty, name="numba_recreate_record") - dtypeaddr = env_manager.read_const(env_manager.add_const(dtype)) - return self.builder.call(fn, [pdata, size, dtypeaddr]) - - def string_from_constant_string(self, string): - cstr = self.context.insert_const_string(self.module, string) - sz = self.context.get_constant(types.intp, len(string)) - return self.string_from_string_and_size(cstr, sz) diff --git a/numba/numba/rewrites/__init__.py b/numba/numba/rewrites/__init__.py deleted file mode 100644 index 84495b13e..000000000 --- a/numba/numba/rewrites/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -A subpackage hosting Numba IR rewrite passes. -""" - -from .registry import register_rewrite, rewrite_registry, Rewrite - -# Register various built-in rewrite passes -from . import static_getitem, static_raise, static_binop, ir_print, macros diff --git a/numba/numba/rewrites/ir_print.py b/numba/numba/rewrites/ir_print.py deleted file mode 100644 index e1d624eea..000000000 --- a/numba/numba/rewrites/ir_print.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import print_function - -from numba import ir, errors -from . import register_rewrite, Rewrite - - -@register_rewrite('before-inference') -class RewritePrintCalls(Rewrite): - """ - Rewrite calls to the print() global function to dedicated IR print() nodes. - """ - - def match(self, func_ir, block, typemap, calltypes): - self.prints = prints = {} - self.block = block - # Find all assignments with a right-hand print() call - for inst in block.find_insts(ir.Assign): - if isinstance(inst.value, ir.Expr) and inst.value.op == 'call': - expr = inst.value - if expr.kws: - # Only positional args are supported - continue - try: - callee = func_ir.infer_constant(expr.func) - except errors.ConstantInferenceError: - continue - if callee is print: - prints[inst] = expr - return len(prints) > 0 - - def apply(self): - """ - Rewrite `var = call (...)` as a sequence of - `print(...)` and `var = const(None)`. - """ - new_block = self.block.copy() - new_block.clear() - for inst in self.block.body: - if inst in self.prints: - expr = self.prints[inst] - print_node = ir.Print(args=expr.args, vararg=expr.vararg, - loc=expr.loc) - new_block.append(print_node) - assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc), - target=inst.target, - loc=inst.loc) - new_block.append(assign_node) - else: - new_block.append(inst) - return new_block - - -@register_rewrite('before-inference') -class DetectConstPrintArguments(Rewrite): - """ - Detect and store constant arguments to print() nodes. - """ - - def match(self, func_ir, block, typemap, calltypes): - self.consts = consts = {} - self.block = block - for inst in block.find_insts(ir.Print): - if inst.consts: - # Already rewritten - continue - for idx, var in enumerate(inst.args): - try: - const = func_ir.infer_constant(var) - except errors.ConstantInferenceError: - continue - consts.setdefault(inst, {})[idx] = const - - return len(consts) > 0 - - def apply(self): - """ - Store detected constant arguments on their nodes. - """ - for inst in self.block.body: - if inst in self.consts: - inst.consts = self.consts[inst] - return self.block diff --git a/numba/numba/rewrites/macros.py b/numba/numba/rewrites/macros.py deleted file mode 100644 index 24d7f0e44..000000000 --- a/numba/numba/rewrites/macros.py +++ /dev/null @@ -1,131 +0,0 @@ -from numba import ir, errors -from . import register_rewrite, Rewrite - - -class Macro(object): - ''' - A macro object is expanded to a function call - - Args - ---- - name: str - Name of this Macro - func: function - Function that evaluates the macro expansion. - callable: bool - True if the macro is callable from Python code - (``func`` is then a Python callable returning the desired IR node). - False if the macro is not callable - (``func`` is then the name of a backend-specific function name - specifying the function to call at runtime). - argnames: list - If ``callable`` is True, this holds a list of the names of arguments - to the function. - ''' - - __slots__ = 'name', 'func', 'callable', 'argnames' - - def __init__(self, name, func, callable=False, argnames=None): - self.name = name - self.func = func - self.callable = callable - self.argnames = argnames - - def __repr__(self): - return ' %s>' % (self.name, self.func) - - -@register_rewrite('before-inference') -class ExpandMacros(Rewrite): - """ - Expand lookups and calls of Macro objects. - """ - - def match(self, func_ir, block, typemap, calltypes): - """ - Look for potential macros for expand and store their expansions. - """ - self.block = block - self.rewrites = rewrites = {} - - for inst in block.body: - if isinstance(inst, ir.Assign): - rhs = inst.value - if (isinstance(rhs, ir.Expr) and rhs.op == 'call' - and isinstance(rhs.func, ir.Var)): - # Is it a callable macro? - try: - const = func_ir.infer_constant(rhs.func) - except errors.ConstantInferenceError: - continue - if isinstance(const, Macro): - assert const.callable - new_expr = self._expand_callable_macro(func_ir, rhs, - const, rhs.loc) - rewrites[rhs] = new_expr - - elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr': - # Is it a non-callable macro looked up as a constant attribute? - try: - const = func_ir.infer_constant(inst.target) - except errors.ConstantInferenceError: - continue - if isinstance(const, Macro) and not const.callable: - new_expr = self._expand_non_callable_macro(const, rhs.loc) - rewrites[rhs] = new_expr - - return len(rewrites) > 0 - - def _expand_non_callable_macro(self, macro, loc): - """ - Return the IR expression of expanding the non-callable macro. - """ - intr = ir.Intrinsic(macro.name, macro.func, args=()) - new_expr = ir.Expr.call(func=intr, args=(), - kws=(), loc=loc) - return new_expr - - def _expand_callable_macro(self, func_ir, call, macro, loc): - """ - Return the IR expression of expanding the macro call. - """ - assert macro.callable - - # Resolve all macro arguments as constants, or fail - args = [func_ir.infer_constant(arg.name) for arg in call.args] - kws = {} - for k, v in call.kws: - try: - kws[k] = func_ir.infer_constant(v) - except errors.ConstantInferenceError: - msg = "Argument {name!r} must be a " \ - "constant at {loc}".format(name=k, - loc=loc) - raise ValueError(msg) - - try: - result = macro.func(*args, **kws) - except Exception as e: - msg = str(e) - headfmt = "Macro expansion failed at {line}" - head = headfmt.format(line=loc) - newmsg = "{0}:\n{1}".format(head, msg) - raise errors.MacroError(newmsg) - - assert result is not None - - result.loc = call.loc - new_expr = ir.Expr.call(func=result, args=call.args, - kws=call.kws, loc=loc) - return new_expr - - def apply(self): - """ - Apply the expansions computed in .match(). - """ - block = self.block - rewrites = self.rewrites - for inst in block.body: - if isinstance(inst, ir.Assign) and inst.value in rewrites: - inst.value = rewrites[inst.value] - return block diff --git a/numba/numba/rewrites/registry.py b/numba/numba/rewrites/registry.py deleted file mode 100644 index 79b3a21d3..000000000 --- a/numba/numba/rewrites/registry.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from collections import defaultdict - -from numba import config - - -class Rewrite(object): - '''Defines the abstract base class for Numba rewrites. - ''' - - def __init__(self, pipeline): - '''Constructor for the Rewrite class. - ''' - self.pipeline = pipeline - - def match(self, func_ir, block, typemap, calltypes): - '''Overload this method to check an IR block for matching terms in the - rewrite. - ''' - return False - - def apply(self): - '''Overload this method to return a rewritten IR basic block when a - match has been found. - ''' - raise NotImplementedError("Abstract Rewrite.apply() called!") - - -class RewriteRegistry(object): - '''Defines a registry for Numba rewrites. - ''' - _kinds = frozenset(['before-inference', 'after-inference']) - - def __init__(self): - '''Constructor for the rewrite registry. Initializes the rewrites - member to an empty list. - ''' - self.rewrites = defaultdict(list) - - def register(self, kind): - """ - Decorator adding a subclass of Rewrite to the registry for - the given *kind*. - """ - if kind not in self._kinds: - raise KeyError("invalid kind %r" % (kind,)) - def do_register(rewrite_cls): - if not issubclass(rewrite_cls, Rewrite): - raise TypeError('{0} is not a subclass of Rewrite'.format( - rewrite_cls)) - self.rewrites[kind].append(rewrite_cls) - return rewrite_cls - return do_register - - def apply(self, kind, pipeline, func_ir): - '''Given a pipeline and a dictionary of basic blocks, exhaustively - attempt to apply all registered rewrites to all basic blocks. - ''' - assert kind in self._kinds - blocks = func_ir.blocks - old_blocks = blocks.copy() - for rewrite_cls in self.rewrites[kind]: - # Exhaustively apply a rewrite until it stops matching. - rewrite = rewrite_cls(pipeline) - work_list = list(blocks.items()) - while work_list: - key, block = work_list.pop() - matches = rewrite.match(func_ir, block, pipeline.typemap, - pipeline.calltypes) - if matches: - if config.DEBUG or config.DUMP_IR: - print("_" * 70) - print("REWRITING (%s):" % rewrite_cls.__name__) - block.dump() - print("_" * 60) - new_block = rewrite.apply() - blocks[key] = new_block - work_list.append((key, new_block)) - if config.DEBUG or config.DUMP_IR: - new_block.dump() - print("_" * 70) - # If any blocks were changed, perform a sanity check. - for key, block in blocks.items(): - if block != old_blocks[key]: - block.verify() - - -rewrite_registry = RewriteRegistry() -register_rewrite = rewrite_registry.register diff --git a/numba/numba/rewrites/static_binop.py b/numba/numba/rewrites/static_binop.py deleted file mode 100644 index 330115c23..000000000 --- a/numba/numba/rewrites/static_binop.py +++ /dev/null @@ -1,35 +0,0 @@ -from numba import ir, errors -from . import register_rewrite, Rewrite - - -@register_rewrite('before-inference') -class DetectStaticBinops(Rewrite): - """ - Detect constant arguments to select binops. - """ - - # Those operators can benefit from a constant-inferred argument - rhs_operators = {'**'} - - def match(self, func_ir, block, typemap, calltypes): - self.static_lhs = {} - self.static_rhs = {} - self.block = block - # Find binop expressions with a constant lhs or rhs - for expr in block.find_exprs(op='binop'): - try: - if (expr.fn in self.rhs_operators - and expr.static_rhs is ir.UNDEFINED): - self.static_rhs[expr] = func_ir.infer_constant(expr.rhs) - except errors.ConstantInferenceError: - continue - - return len(self.static_lhs) > 0 or len(self.static_rhs) > 0 - - def apply(self): - """ - Store constant arguments that were detected in match(). - """ - for expr, rhs in self.static_rhs.items(): - expr.static_rhs = rhs - return self.block diff --git a/numba/numba/rewrites/static_getitem.py b/numba/numba/rewrites/static_getitem.py deleted file mode 100644 index 6a8eefc69..000000000 --- a/numba/numba/rewrites/static_getitem.py +++ /dev/null @@ -1,85 +0,0 @@ -from numba import ir, errors -from . import register_rewrite, Rewrite - - -@register_rewrite('before-inference') -class RewriteConstGetitems(Rewrite): - """ - Rewrite IR expressions of the kind `getitem(value=arr, index=$constXX)` - where `$constXX` is a known constant as - `static_getitem(value=arr, index=)`. - """ - - def match(self, func_ir, block, typemap, calltypes): - self.getitems = getitems = {} - self.block = block - # Detect all getitem expressions and find which ones can be - # rewritten - for expr in block.find_exprs(op='getitem'): - if expr.op == 'getitem': - try: - const = func_ir.infer_constant(expr.index) - except errors.ConstantInferenceError: - continue - getitems[expr] = const - - return len(getitems) > 0 - - def apply(self): - """ - Rewrite all matching getitems as static_getitems. - """ - new_block = self.block.copy() - new_block.clear() - for inst in self.block.body: - if isinstance(inst, ir.Assign): - expr = inst.value - if expr in self.getitems: - const = self.getitems[expr] - new_expr = ir.Expr.static_getitem(value=expr.value, - index=const, - index_var=expr.index, - loc=expr.loc) - inst = ir.Assign(value=new_expr, target=inst.target, - loc=inst.loc) - new_block.append(inst) - return new_block - - -@register_rewrite('before-inference') -class RewriteConstSetitems(Rewrite): - """ - Rewrite IR statements of the kind `setitem(target=arr, index=$constXX, ...)` - where `$constXX` is a known constant as - `static_setitem(target=arr, index=, ...)`. - """ - - def match(self, func_ir, block, typemap, calltypes): - self.setitems = setitems = {} - self.block = block - # Detect all setitem statements and find which ones can be - # rewritten - for inst in block.find_insts(ir.SetItem): - try: - const = func_ir.infer_constant(inst.index) - except errors.ConstantInferenceError: - continue - setitems[inst] = const - - return len(setitems) > 0 - - def apply(self): - """ - Rewrite all matching setitems as static_setitems. - """ - new_block = self.block.copy() - new_block.clear() - for inst in self.block.body: - if inst in self.setitems: - const = self.setitems[inst] - new_inst = ir.StaticSetItem(inst.target, const, - inst.index, inst.value, inst.loc) - new_block.append(new_inst) - else: - new_block.append(inst) - return new_block diff --git a/numba/numba/rewrites/static_raise.py b/numba/numba/rewrites/static_raise.py deleted file mode 100644 index 69b39a481..000000000 --- a/numba/numba/rewrites/static_raise.py +++ /dev/null @@ -1,62 +0,0 @@ -from numba import ir -from . import register_rewrite, Rewrite - - -@register_rewrite('before-inference') -class RewriteConstRaises(Rewrite): - """ - Rewrite IR statements of the kind `raise(value)` - where `value` is the result of instantiating an exception with - constant arguments - into `static_raise(exception_type, constant args)`. - - This allows lowering in nopython mode, where one can't instantiate - exception instances from runtime data. - """ - - def _is_exception_type(self, const): - return isinstance(const, type) and issubclass(const, Exception) - - def _break_constant(self, const): - """ - Break down constant exception. - """ - if isinstance(const, BaseException): - return const.__class__, const.args - elif self._is_exception_type(const): - return const, None - else: - raise NotImplementedError("unsupported exception constant %r" - % (const,)) - - def match(self, func_ir, block, typemap, calltypes): - self.raises = raises = {} - self.block = block - # Detect all raise statements and find which ones can be - # rewritten - for inst in block.find_insts(ir.Raise): - if inst.exception is None: - # re-reraise - exc_type, exc_args = None, None - else: - # raise => find the definition site for - const = func_ir.infer_constant(inst.exception) - exc_type, exc_args = self._break_constant(const) - raises[inst] = exc_type, exc_args - - return len(raises) > 0 - - def apply(self): - """ - Rewrite all matching setitems as static_setitems. - """ - new_block = self.block.copy() - new_block.clear() - for inst in self.block.body: - if inst in self.raises: - exc_type, exc_args = self.raises[inst] - new_inst = ir.StaticRaise(exc_type, exc_args, inst.loc) - new_block.append(new_inst) - else: - new_block.append(inst) - return new_block diff --git a/numba/numba/roc/README.md b/numba/numba/roc/README.md deleted file mode 100644 index 45ea2a0c6..000000000 --- a/numba/numba/roc/README.md +++ /dev/null @@ -1,36 +0,0 @@ -Setup ------ - -`libhsakmt.so.1`, `libhsa-runtime64.so`, `libhsa-runtime-ext64.so` must be in - the `LD_LIBRARY_PATH`. - -The standard location of these libraries are in `/opt/hsa/lib`. Thus, -user can simply do `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/hsa/lib` - - -Run Tests ---------- - -The HSA test suite can be executed from the base of the source tree with: - -```bash -python runtests.py numba.hsa.tests -vb -``` - -The test suite can also be executed inside the python interpreter with: - -```python -import numba.hsa -numba.hsa.test("-vb") -``` - -Or directly from the terminal with: - -```bash -python -c 'import numba.hsa; numba.hsa.test("-vb")' -``` - -Note that the "-vb" flags are optional. The "-v" flag enables verbose mode -that will print the name of each test. The "-b" flag enables capturing -the stdout messages printed from within the tests. - diff --git a/numba/numba/roc/__init__.py b/numba/numba/roc/__init__.py deleted file mode 100644 index b4d17add0..000000000 --- a/numba/numba/roc/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Module that deals with HSA in a high level way -""" -from __future__ import print_function, absolute_import, division -import os -import numba.testing -from .api import * -from .stubs import atomic -from . import initialize - - -def is_available(): - """Returns a boolean to indicate the availability of a HSA runtime. - - This will force initialization of the driver if it hasn't been - initialized. It also checks that a toolchain is present. - """ - from .hsadrv.driver import hsa - - from .hlc import hlc, libhlc - has_a_toolchain = False - - try: - libhlc.HLC() - has_a_toolchain = True - except: - try: - cmd = hlc.CmdLine().check_tooling() - has_a_toolchain = True - except: - pass - - return hsa.is_available and has_a_toolchain - - -def test(*args, **kwargs): - if not is_available(): - raise RuntimeError("HSA is not detected") - - return numba.testing.test("numba.hsa.tests", *args, **kwargs) diff --git a/numba/numba/roc/api.py b/numba/numba/roc/api.py deleted file mode 100644 index 83adeabce..000000000 --- a/numba/numba/roc/api.py +++ /dev/null @@ -1,196 +0,0 @@ -from __future__ import absolute_import, print_function - -import numpy as np -from numba import mviewbuf -from numba.roc.hsadrv.devices import get_context - -from .stubs import ( - get_global_id, - get_global_size, - get_local_id, - get_local_size, - get_group_id, - get_work_dim, - get_num_groups, - barrier, - mem_fence, - shared, - wavebarrier, - activelanepermute_wavewidth, - ds_permute, - ds_bpermute, -) - -from .decorators import ( - jit, -) - -from .enums import ( - CLK_LOCAL_MEM_FENCE, - CLK_GLOBAL_MEM_FENCE -) - -from .hsadrv.driver import hsa as _hsadrv -from .hsadrv import devicearray - -try: - long -except NameError: - long = int - -class _AutoDeregister(object): - def __init__(self, args): - self.args = args - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - deregister(*self.args) - - -def register(*args): - """Register data into the HSA system - - Returns a contextmanager for use in with-context for auto deregistration. - - Use in context: - - with hsa.register(array): - do_work_on_HSA(array) - - """ - for data in args: - if isinstance(data, np.ndarray): - _hsadrv.hsa_memory_register(data.ctypes.data, data.nbytes) - else: - raise TypeError(type(data)) - return _AutoDeregister(args) - - -def deregister(*args): - """Deregister data from the HSA system - """ - for data in args: - if isinstance(data, np.ndarray): - _hsadrv.hsa_memory_deregister(data.ctypes.data, data.nbytes) - else: - raise TypeError(type(data)) - -def device_array(shape, dtype=np.float, strides=None, order='C'): - """device_array(shape, dtype=np.float, strides=None, order='C') - - Allocate an empty device ndarray. Similar to :meth:`numpy.empty`. - """ - shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype, - order) - return devicearray.DeviceNDArray(shape=shape, strides=strides, dtype=dtype) - - -def device_array_like(ary): - """Call roc.devicearray() with information from the array. - """ - return device_array(shape=ary.shape, dtype=ary.dtype, strides=ary.strides) - - -def to_device(obj, stream=None, context=None, copy=True, to=None): - """to_device(obj, context, copy=True, to=None) - - Allocate and transfer a numpy ndarray or structured scalar to the device. - - To copy host->device a numpy array:: - - ary = numpy.arange(10) - d_ary = roc.to_device(ary) - - The resulting ``d_ary`` is a ``DeviceNDArray``. - - To copy device->host:: - - hary = d_ary.copy_to_host() - - To copy device->host to an existing array:: - - ary = numpy.empty(shape=d_ary.shape, dtype=d_ary.dtype) - d_ary.copy_to_host(ary) - - """ - context = context or get_context() - - if to is None: - to = devicearray.from_array_like(obj) - - if copy: - to.copy_to_device(obj, stream=stream, context=context) - return to - - -def stream(): - return _hsadrv.create_stream() - -def _fill_stride_by_order(shape, dtype, order): - nd = len(shape) - strides = [0] * nd - if order == 'C': - strides[-1] = dtype.itemsize - for d in reversed(range(nd - 1)): - strides[d] = strides[d + 1] * shape[d + 1] - elif order == 'F': - strides[0] = dtype.itemsize - for d in range(1, nd): - strides[d] = strides[d - 1] * shape[d - 1] - else: - raise ValueError('must be either C/F order') - return tuple(strides) - - -def _prepare_shape_strides_dtype(shape, strides, dtype, order): - dtype = np.dtype(dtype) - if isinstance(shape, (int, long)): - shape = (shape,) - if isinstance(strides, (int, long)): - strides = (strides,) - else: - if shape == (): - shape = (1,) - strides = strides or _fill_stride_by_order(shape, dtype, order) - return shape, strides, dtype - -def _memory_size_from_info(shape, strides, itemsize): - """Get the byte size of a contiguous memory buffer given the shape, strides - and itemsize. - """ - assert len(shape) == len(strides), "# dim mismatch" - ndim = len(shape) - s, e = mviewbuf.memoryview_get_extents_info(shape, strides, ndim, itemsize) - return e - s - -def _host_array(finegrain, shape, dtype, strides, order): - from .hsadrv import devices - shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype, - order) - bytesize = _memory_size_from_info(shape, strides, dtype.itemsize) - # TODO does allowing access by all dGPUs really work in a multiGPU system? - agents = [c._agent for c in devices.get_all_contexts()] - buf = devices.get_cpu_context().memhostalloc(bytesize, finegrain=finegrain, - allow_access_to=agents) - arr = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order, - buffer=buf) - return arr.view(type=devicearray.HostArray) - - -def coarsegrain_array(shape, dtype=np.float, strides=None, order='C'): - """coarsegrain_array(shape, dtype=np.float, strides=None, order='C') - Similar to np.empty(). - """ - return _host_array(finegrain=False, shape=shape, dtype=dtype, - strides=strides, order=order) - - -def finegrain_array(shape, dtype=np.float, strides=None, order='C'): - """finegrain_array(shape, dtype=np.float, strides=None, order='C') - - Similar to np.empty(). - """ - return _host_array(finegrain=False, shape=shape, dtype=dtype, - strides=strides, order=order) diff --git a/numba/numba/roc/codegen.py b/numba/numba/roc/codegen.py deleted file mode 100644 index 03f37f4d0..000000000 --- a/numba/numba/roc/codegen.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import print_function, absolute_import - -from llvmlite import binding as ll -from llvmlite.llvmpy import core as lc -from numba import utils -from numba.targets.codegen import BaseCPUCodegen, CodeLibrary -from .hlc import DATALAYOUT, TRIPLE, hlc - - -class HSACodeLibrary(CodeLibrary): - def _optimize_functions(self, ll_module): - pass - - def _optimize_final_module(self): - pass - - def _finalize_specific(self): - pass - - def get_asm_str(self): - """ - Get the human-readable assembly. - """ - m = hlc.Module() - m.load_llvm(str(self._final_module)) - out = m.finalize() - return str(out.hsail) - - -class JITHSACodegen(BaseCPUCodegen): - _library_class = HSACodeLibrary - - def _init(self, llvm_module): - assert list(llvm_module.global_variables) == [], "Module isn't empty" - self._data_layout = DATALAYOUT[utils.MACHINE_BITS] - self._target_data = ll.create_target_data(self._data_layout) - - def _create_empty_module(self, name): - ir_module = lc.Module(name) - ir_module.triple = TRIPLE - return ir_module - - def _module_pass_manager(self): - raise NotImplementedError - - def _function_pass_manager(self, llvm_module): - raise NotImplementedError - - def _add_module(self, module): - pass diff --git a/numba/numba/roc/compiler.py b/numba/numba/roc/compiler.py deleted file mode 100644 index 82c74892f..000000000 --- a/numba/numba/roc/compiler.py +++ /dev/null @@ -1,464 +0,0 @@ -from __future__ import print_function, absolute_import -import copy -from collections import namedtuple -import re - -import numpy as np - -from numba.typing.templates import ConcreteTemplate -from numba import types, compiler -from .hlc import hlc -from .hsadrv import devices, driver, enums, drvapi -from .hsadrv.error import HsaKernelLaunchError -from . import gcn_occupancy -from numba.roc.hsadrv.driver import hsa, dgpu_present -from .hsadrv import devicearray -from numba.typing.templates import AbstractTemplate -from numba import ctypes_support as ctypes -from numba import config - - -def compile_hsa(pyfunc, return_type, args, debug): - # First compilation will trigger the initialization of the HSA backend. - from .descriptor import HSATargetDesc - - typingctx = HSATargetDesc.typingctx - targetctx = HSATargetDesc.targetctx - # TODO handle debug flag - flags = compiler.Flags() - # Do not compile (generate native code), just lower (to LLVM) - flags.set('no_compile') - flags.set('no_cpython_wrapper') - flags.unset('nrt') - # Run compilation pipeline - cres = compiler.compile_extra(typingctx=typingctx, - targetctx=targetctx, - func=pyfunc, - args=args, - return_type=return_type, - flags=flags, - locals={}) - - # Linking depending libraries - # targetctx.link_dependencies(cres.llvm_module, cres.target_context.linking) - library = cres.library - library.finalize() - - return cres - - -def compile_kernel(pyfunc, args, debug=False): - cres = compile_hsa(pyfunc, types.void, args, debug=debug) - func = cres.library.get_function(cres.fndesc.llvm_func_name) - kernel = cres.target_context.prepare_hsa_kernel(func, cres.signature.args) - hsakern = HSAKernel(llvm_module=kernel.module, - name=kernel.name, - argtypes=cres.signature.args) - return hsakern - - -def compile_device(pyfunc, return_type, args, debug=False): - cres = compile_hsa(pyfunc, return_type, args, debug=debug) - func = cres.library.get_function(cres.fndesc.llvm_func_name) - cres.target_context.mark_hsa_device(func) - devfn = DeviceFunction(cres) - - class device_function_template(ConcreteTemplate): - key = devfn - cases = [cres.signature] - - cres.typing_context.insert_user_function(devfn, device_function_template) - libs = [cres.library] - cres.target_context.insert_user_function(devfn, cres.fndesc, libs) - return devfn - - -def compile_device_template(pyfunc): - """Compile a DeviceFunctionTemplate - """ - from .descriptor import HSATargetDesc - - dft = DeviceFunctionTemplate(pyfunc) - - class device_function_template(AbstractTemplate): - key = dft - - def generic(self, args, kws): - assert not kws - return dft.compile(args) - - typingctx = HSATargetDesc.typingctx - typingctx.insert_user_function(dft, device_function_template) - return dft - - -class DeviceFunctionTemplate(object): - """Unmaterialized device function - """ - def __init__(self, pyfunc, debug=False): - self.py_func = pyfunc - self.debug = debug - # self.inline = inline - self._compileinfos = {} - - def compile(self, args): - """Compile the function for the given argument types. - - Each signature is compiled once by caching the compiled function inside - this object. - """ - if args not in self._compileinfos: - cres = compile_hsa(self.py_func, None, args, debug=self.debug) - func = cres.library.get_function(cres.fndesc.llvm_func_name) - cres.target_context.mark_hsa_device(func) - first_definition = not self._compileinfos - self._compileinfos[args] = cres - libs = [cres.library] - - if first_definition: - # First definition - cres.target_context.insert_user_function(self, cres.fndesc, - libs) - else: - cres.target_context.add_user_function(self, cres.fndesc, libs) - - else: - cres = self._compileinfos[args] - - return cres.signature - - -class DeviceFunction(object): - def __init__(self, cres): - self.cres = cres - - -def _ensure_list(val): - if not isinstance(val, (tuple, list)): - return [val] - else: - return list(val) - - -def _ensure_size_or_append(val, size): - n = len(val) - for _ in range(n, size): - val.append(1) - - -class HSAKernelBase(object): - """Define interface for configurable kernels - """ - - def __init__(self): - self.global_size = (1,) - self.local_size = (1,) - self.stream = None - - def copy(self): - return copy.copy(self) - - def configure(self, global_size, local_size=None, stream=None): - """Configure the OpenCL kernel - local_size can be None - """ - global_size = _ensure_list(global_size) - - if local_size is not None: - local_size = _ensure_list(local_size) - size = max(len(global_size), len(local_size)) - _ensure_size_or_append(global_size, size) - _ensure_size_or_append(local_size, size) - - clone = self.copy() - clone.global_size = tuple(global_size) - clone.local_size = tuple(local_size) if local_size else None - clone.stream = stream - - return clone - - def forall(self, nelem, local_size=64, stream=None): - """Simplified configuration for 1D kernel launch - """ - return self.configure(nelem, min(nelem, local_size), stream=stream) - - def __getitem__(self, args): - """Mimick CUDA python's square-bracket notation for configuration. - This assumes a the argument to be: - `griddim, blockdim, stream` - The blockdim maps directly to local_size. - The actual global_size is computed by multiplying the local_size to - griddim. - """ - griddim = _ensure_list(args[0]) - blockdim = _ensure_list(args[1]) - size = max(len(griddim), len(blockdim)) - _ensure_size_or_append(griddim, size) - _ensure_size_or_append(blockdim, size) - # Compute global_size - gs = [g * l for g, l in zip(griddim, blockdim)] - return self.configure(gs, blockdim, *args[2:]) - - -_CacheEntry = namedtuple("_CachedEntry", ['symbol', 'executable', - 'kernarg_region']) - - -class _CachedProgram(object): - def __init__(self, entry_name, binary): - self._entry_name = entry_name - self._binary = binary - # key: hsa context - self._cache = {} - - def get(self): - ctx = devices.get_context() - result = self._cache.get(ctx) - # The program does not exist as GCN yet. - if result is None: - - # generate GCN - symbol = '{0}'.format(self._entry_name) - agent = ctx.agent - - ba = bytearray(self._binary) - bblob = ctypes.c_byte * len(self._binary) - bas = bblob.from_buffer(ba) - - code_ptr = drvapi.hsa_code_object_t() - driver.hsa.hsa_code_object_deserialize( - ctypes.addressof(bas), - len(self._binary), - None, - ctypes.byref(code_ptr) - ) - - code = driver.CodeObject(code_ptr) - - ex = driver.Executable() - ex.load(agent, code) - ex.freeze() - symobj = ex.get_symbol(agent, symbol) - regions = agent.regions.globals - for reg in regions: - if reg.host_accessible: - if reg.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG): - kernarg_region = reg - break - assert kernarg_region is not None - - # Cache the GCN program - result = _CacheEntry(symbol=symobj, executable=ex, - kernarg_region=kernarg_region) - self._cache[ctx] = result - - return ctx, result - - -class HSAKernel(HSAKernelBase): - """ - A HSA kernel object - """ - def __init__(self, llvm_module, name, argtypes): - super(HSAKernel, self).__init__() - self._llvm_module = llvm_module - self.assembly, self.binary = self._generateGCN() - self.entry_name = name - self.argument_types = tuple(argtypes) - self._argloc = [] - # cached program - self._cacheprog = _CachedProgram(entry_name=self.entry_name, - binary=self.binary) - self._parse_kernel_resource() - - def _parse_kernel_resource(self): - """ - Temporary workaround for register limit - """ - m = re.search(r"\bwavefront_sgpr_count\s*=\s*(\d+)", self.assembly) - self._wavefront_sgpr_count = int(m.group(1)) - m = re.search(r"\bworkitem_vgpr_count\s*=\s*(\d+)", self.assembly) - self._workitem_vgpr_count = int(m.group(1)) - - def _sentry_resource_limit(self): - # only check resource factprs if either sgpr or vgpr is non-zero - #if (self._wavefront_sgpr_count > 0 or self._workitem_vgpr_count > 0): - group_size = np.prod(self.local_size) - limits = gcn_occupancy.get_limiting_factors( - group_size=group_size, - vgpr_per_workitem=self._workitem_vgpr_count, - sgpr_per_wave=self._wavefront_sgpr_count) - if limits.reasons: - fmt = 'insufficient resources to launch kernel due to:\n{}' - msg = fmt.format('\n'.join(limits.suggestions)) - raise HsaKernelLaunchError(msg) - - def _generateGCN(self): - hlcmod = hlc.Module() - hlcmod.load_llvm(str(self._llvm_module)) - return hlcmod.generateGCN() - - def bind(self): - """ - Bind kernel to device - """ - ctx, entry = self._cacheprog.get() - if entry.symbol.kernarg_segment_size > 0: - sz = ctypes.sizeof(ctypes.c_byte) *\ - entry.symbol.kernarg_segment_size - kernargs = entry.kernarg_region.allocate(sz) - else: - kernargs = None - - return ctx, entry.symbol, kernargs, entry.kernarg_region - - def __call__(self, *args): - self._sentry_resource_limit() - - ctx, symbol, kernargs, kernarg_region = self.bind() - - # Unpack pyobject values into ctypes scalar values - expanded_values = [] - - # contains lambdas to execute on return - retr = [] - for ty, val in zip(self.argument_types, args): - _unpack_argument(ty, val, expanded_values, retr) - - # Insert kernel arguments - base = 0 - for av in expanded_values: - # Adjust for alignemnt - align = ctypes.sizeof(av) - pad = _calc_padding_for_alignment(align, base) - base += pad - # Move to offset - offseted = kernargs.value + base - asptr = ctypes.cast(offseted, ctypes.POINTER(type(av))) - # Assign value - asptr[0] = av - # Increment offset - base += align - - # Actual Kernel launch - qq = ctx.default_queue - - if self.stream is None: - hsa.implicit_sync() - - # Dispatch - signal = None - if self.stream is not None: - signal = hsa.create_signal(1) - qq.insert_barrier(self.stream._get_last_signal()) - - qq.dispatch(symbol, kernargs, workgroup_size=self.local_size, - grid_size=self.global_size, signal=signal) - - if self.stream is not None: - self.stream._add_signal(signal) - - # retrieve auto converted arrays - for wb in retr: - wb() - - # Free kernel region - if kernargs is not None: - if self.stream is None: - kernarg_region.free(kernargs) - else: - self.stream._add_callback(lambda: kernarg_region.free(kernargs)) - - -def _unpack_argument(ty, val, kernelargs, retr): - """ - Convert arguments to ctypes and append to kernelargs - """ - if isinstance(ty, types.Array): - c_intp = ctypes.c_ssize_t - # if a dgpu is present, move the data to the device. - if dgpu_present: - devary, conv = devicearray.auto_device(val, devices.get_context()) - if conv: - retr.append(lambda: devary.copy_to_host(val)) - data = devary.device_ctypes_pointer - else: - data = ctypes.c_void_p(val.ctypes.data) - - - meminfo = parent = ctypes.c_void_p(0) - nitems = c_intp(val.size) - itemsize = c_intp(val.dtype.itemsize) - kernelargs.append(meminfo) - kernelargs.append(parent) - kernelargs.append(nitems) - kernelargs.append(itemsize) - kernelargs.append(data) - for ax in range(val.ndim): - kernelargs.append(c_intp(val.shape[ax])) - for ax in range(val.ndim): - kernelargs.append(c_intp(val.strides[ax])) - - elif isinstance(ty, types.Integer): - cval = getattr(ctypes, "c_%s" % ty)(val) - kernelargs.append(cval) - - elif ty == types.float64: - cval = ctypes.c_double(val) - kernelargs.append(cval) - - elif ty == types.float32: - cval = ctypes.c_float(val) - kernelargs.append(cval) - - elif ty == types.boolean: - cval = ctypes.c_uint8(int(val)) - kernelargs.append(cval) - - elif ty == types.complex64: - kernelargs.append(ctypes.c_float(val.real)) - kernelargs.append(ctypes.c_float(val.imag)) - - elif ty == types.complex128: - kernelargs.append(ctypes.c_double(val.real)) - kernelargs.append(ctypes.c_double(val.imag)) - - else: - raise NotImplementedError(ty, val) - - -def _calc_padding_for_alignment(align, base): - """ - Returns byte padding required to move the base pointer into proper alignment - """ - rmdr = int(base) % align - if rmdr == 0: - return 0 - else: - return align - rmdr - - -class AutoJitHSAKernel(HSAKernelBase): - def __init__(self, func): - super(AutoJitHSAKernel, self).__init__() - self.py_func = func - self.definitions = {} - - from .descriptor import HSATargetDesc - - self.typingctx = HSATargetDesc.typingctx - - def __call__(self, *args): - kernel = self.specialize(*args) - cfg = kernel.configure(self.global_size, self.local_size, self.stream) - cfg(*args) - - def specialize(self, *args): - argtypes = tuple([self.typingctx.resolve_argument_type(a) - for a in args]) - kernel = self.definitions.get(argtypes) - if kernel is None: - kernel = compile_kernel(self.py_func, argtypes) - self.definitions[argtypes] = kernel - return kernel - diff --git a/numba/numba/roc/decorators.py b/numba/numba/roc/decorators.py deleted file mode 100644 index 452c23a21..000000000 --- a/numba/numba/roc/decorators.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import print_function, absolute_import, division -from numba import sigutils, types -from .compiler import (compile_kernel, compile_device, AutoJitHSAKernel, - compile_device_template) - - -def jit(signature=None, device=False): - """JIT compile a python function conforming to - the HSA-Python - """ - if signature is None: - return autojit(device=device) - elif not sigutils.is_signature(signature): - func = signature - return autojit(device=device)(func) - else: - if device: - return _device_jit(signature) - else: - return _kernel_jit(signature) - - -def autojit(device=False): - if device: - return _device_autojit - else: - return _kernel_autojit - - -def _device_jit(signature): - argtypes, restype = sigutils.normalize_signature(signature) - - def _wrapped(pyfunc): - return compile_device(pyfunc, restype, argtypes) - - return _wrapped - - -def _kernel_jit(signature): - argtypes, restype = sigutils.normalize_signature(signature) - if restype is not None and restype != types.void: - msg = "HSA kernel must have void return type but got {restype}" - raise TypeError(msg.format(restype=restype)) - - def _wrapped(pyfunc): - return compile_kernel(pyfunc, argtypes) - - return _wrapped - - -def _device_autojit(pyfunc): - return compile_device_template(pyfunc) - - -def _kernel_autojit(pyfunc): - return AutoJitHSAKernel(pyfunc) - - - diff --git a/numba/numba/roc/descriptor.py b/numba/numba/roc/descriptor.py deleted file mode 100644 index e607c9952..000000000 --- a/numba/numba/roc/descriptor.py +++ /dev/null @@ -1,14 +0,0 @@ -from __future__ import print_function, division, absolute_import -from numba.targets.descriptors import TargetDescriptor -from numba.targets.options import TargetOptions -from .target import HSATargetContext, HSATypingContext - - -class HSATargetOptions(TargetOptions): - OPTIONS = {} - - -class HSATargetDesc(TargetDescriptor): - options = HSATargetOptions - typingctx = HSATypingContext() - targetctx = HSATargetContext(typingctx) diff --git a/numba/numba/roc/dispatch.py b/numba/numba/roc/dispatch.py deleted file mode 100644 index bb8bf1a44..000000000 --- a/numba/numba/roc/dispatch.py +++ /dev/null @@ -1,150 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import numpy as np - -from numba.npyufunc.deviceufunc import (UFuncMechanism, GenerializedUFunc, - GUFuncCallSteps) -from numba.roc.hsadrv.driver import dgpu_present -import numba.roc.hsadrv.devicearray as devicearray -import numba.roc.api as api - -class HsaUFuncDispatcher(object): - """ - Invoke the HSA ufunc specialization for the given inputs. - """ - - def __init__(self, types_to_retty_kernels): - self.functions = types_to_retty_kernels - - def __call__(self, *args, **kws): - """ - *args: numpy arrays - **kws: - stream -- hsa stream; when defined, asynchronous mode is used. - out -- output array. Can be a numpy array or DeviceArrayBase - depending on the input arguments. Type must match - the input arguments. - """ - return HsaUFuncMechanism.call(self.functions, args, kws) - - def reduce(self, arg, stream=0): - raise NotImplementedError - - -class HsaUFuncMechanism(UFuncMechanism): - """ - Provide OpenCL specialization - """ - DEFAULT_STREAM = 0 - ARRAY_ORDER = 'A' - - def is_device_array(self, obj): - if dgpu_present: - return devicearray.is_hsa_ndarray(obj) - else: - return isinstance(obj, np.ndarray) - - def is_host_array(self, obj): - if dgpu_present: - return False - else: - return isinstance(obj, np.ndarray) - - def to_device(self, hostary, stream): - if dgpu_present: - return api.to_device(hostary) - else: - return hostary - - def launch(self, func, count, stream, args): - # ILP must match vectorize kernel source - ilp = 4 - # Use more wavefront to allow hiding latency - tpb = 64 * 2 - count = (count + (ilp - 1)) // ilp - blockcount = (count + (tpb - 1)) // tpb - func[blockcount, tpb](*args) - - def device_array(self, shape, dtype, stream): - if dgpu_present: - return api.device_array(shape=shape, dtype=dtype) - else: - return np.empty(shape=shape, dtype=dtype) - - def broadcast_device(self, ary, shape): - if dgpu_present: - raise NotImplementedError('device broadcast_device NIY') - else: - ax_differs = [ax for ax in range(len(shape)) - if ax >= ary.ndim - or ary.shape[ax] != shape[ax]] - - missingdim = len(shape) - len(ary.shape) - strides = [0] * missingdim + list(ary.strides) - - for ax in ax_differs: - strides[ax] = 0 - - return np.ndarray(shape=shape, strides=strides, - dtype=ary.dtype, buffer=ary) - - -class _HsaGUFuncCallSteps(GUFuncCallSteps): - __slots__ = () - - def is_device_array(self, obj): - if dgpu_present: - return devicearray.is_hsa_ndarray(obj) - else: - return True - - def to_device(self, hostary): - if dgpu_present: - return api.to_device(hostary) - else: - return hostary - - def to_host(self, devary, hostary): - if dgpu_present: - out = devary.copy_to_host(hostary) - return out - else: - pass - - def device_array(self, shape, dtype): - if dgpu_present: - return api.device_array(shape=shape, dtype=dtype) - else: - return np.empty(shape=shape, dtype=dtype) - - def launch_kernel(self, kernel, nelem, args): - kernel.configure(nelem, min(nelem, 64))(*args) - - -class HSAGenerializedUFunc(GenerializedUFunc): - @property - def _call_steps(self): - return _HsaGUFuncCallSteps - - def _broadcast_scalar_input(self, ary, shape): - if dgpu_present: - return devicearray.DeviceNDArray(shape=shape, - strides=(0,), - dtype=ary.dtype, - dgpu_data=ary.dgpu_data) - else: - return np.lib.stride_tricks.as_strided(ary, shape=(shape,), - strides=(0,)) - - def _broadcast_add_axis(self, ary, newshape): - newax = len(newshape) - len(ary.shape) - # Add 0 strides for missing dimension - newstrides = (0,) * newax + ary.strides - if dgpu_present: - return devicearray.DeviceNDArray(shape=newshape, - strides=newstrides, - dtype=ary.dtype, - dgpu_data=ary.dgpu_data) - else: - raise NotImplementedError - diff --git a/numba/numba/roc/enums.py b/numba/numba/roc/enums.py deleted file mode 100644 index e60f08e6b..000000000 --- a/numba/numba/roc/enums.py +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import print_function, absolute_import, division - -CLK_LOCAL_MEM_FENCE = 0 -CLK_GLOBAL_MEM_FENCE = 1 diff --git a/numba/numba/roc/gcn_occupancy.py b/numba/numba/roc/gcn_occupancy.py deleted file mode 100644 index 241ab7c98..000000000 --- a/numba/numba/roc/gcn_occupancy.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import division, print_function - -import math -from collections import namedtuple - - -# GCN architecture specific info -simd_per_cu = 4 -wave_size = 64 -vector_register_file_size = 64 * 2**10 # 64 kB -byte_per_VGPR = 4 -vgpr_per_simd = vector_register_file_size // byte_per_VGPR -sgpr_per_simd = 512 -max_wave_count = 10 -max_inflight_wave_per_cu = max_wave_count * simd_per_cu - -# XXX due to limit in AMDGPU backend -max_group_size = 256 - - -_limits = namedtuple('_limits', ['allowed_wave_due_to_sgpr', - 'allowed_wave_due_to_vgpr', - 'allowed_wave', - 'allowed_vgpr_per_workitem', - 'occupancy', - 'reasons', - 'suggestions']) - - -def get_limiting_factors(group_size, vgpr_per_workitem, sgpr_per_wave): - def _ceil(x): - return int(math.ceil(x)) - - # these might be zero, for resource limit treat as 1 - vgpr_per_workitem = vgpr_per_workitem if vgpr_per_workitem > 0 else 1 - sgpr_per_wave = sgpr_per_wave if sgpr_per_wave > 0 else 1 - - workitem_per_simd = group_size / simd_per_cu - required_wave_count_per_simd = _ceil(workitem_per_simd / wave_size) - required_vgpr_per_wave = vgpr_per_workitem * wave_size - # limiting factor - allowed_wave_due_to_sgpr = sgpr_per_simd // sgpr_per_wave - allowed_wave_due_to_vgpr = vgpr_per_simd // required_vgpr_per_wave - allowed_wave = min(allowed_wave_due_to_sgpr, max_wave_count, allowed_wave_due_to_vgpr) - allowed_vgpr_per_workitem = _ceil(vgpr_per_simd / required_wave_count_per_simd / wave_size) - # reasons - reasons = set() - if allowed_wave_due_to_sgpr < required_wave_count_per_simd: - reasons.add('allowed_wave_due_to_sgpr') - if allowed_wave_due_to_vgpr < required_wave_count_per_simd: - reasons.add('allowed_wave_due_to_vgpr') - if allowed_wave < required_wave_count_per_simd: - reasons.add('allowed_wave') - if group_size > max_group_size: - reasons.add('group_size') - - suggestions = [_suggestions[r] for r in sorted(reasons)] - - # occupancy - inflight_wave_per_cu = (0 if reasons else - required_wave_count_per_simd * simd_per_cu) - occupancy = inflight_wave_per_cu / max_inflight_wave_per_cu - - return _limits(allowed_wave_due_to_sgpr=allowed_wave_due_to_sgpr, - allowed_wave_due_to_vgpr=allowed_wave_due_to_vgpr, - allowed_wave=allowed_wave, - allowed_vgpr_per_workitem=allowed_vgpr_per_workitem, - occupancy=occupancy, - reasons=reasons, - suggestions=suggestions) - - -_suggestions = {} - -_suggestions['allowed_wave_due_to_sgpr'] = ( - "* Cannot allocate enough sGPRs for all resident wavefronts." -) - -_suggestions['allowed_wave_due_to_vgpr'] = ( - "* Cannot allocate enough vGPRs for all resident wavefronts." -) - -_suggestions['allowed_wave'] = ( - "* Launch requires too many wavefronts. Try reducing group-size." -) - -_suggestions['group_size'] = ( - "* Exceeds max group size (256)." -) - diff --git a/numba/numba/roc/hlc/__init__.py b/numba/numba/roc/hlc/__init__.py deleted file mode 100644 index dabfe520f..000000000 --- a/numba/numba/roc/hlc/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import absolute_import - -import os - -# 32-bit private, local, and region pointers. 64-bit global, constant and flat. -# See: -# https://github.com/RadeonOpenCompute/llvm/blob/b20b796f65ab6ac12fac4ea32e1d89e1861dee6a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp#L270-L275 -# Alloc goes into addrspace(5) (private) -DATALAYOUT = { - 64: ("e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"), -} - -TRIPLE = "amdgcn--amdhsa" - -# Allow user to use "NUMBA_USE_LIBHLC" env-var to use cmdline HLC. -if os.environ.get('NUMBA_USE_LIBHLC', '').lower() not in ['0', 'no', 'false']: - from . import libhlc as hlc diff --git a/numba/numba/roc/hlc/common.py b/numba/numba/roc/hlc/common.py deleted file mode 100644 index 363143bd9..000000000 --- a/numba/numba/roc/hlc/common.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -Shared code for the low level compiler tooling -""" - -from __future__ import print_function, division, absolute_import - -from abc import abstractmethod, ABCMeta -from numba import six -import re - -# These are for parsing labels and metadata -_re_labelname = re.compile(r"\n\.([0-9a-z_\.]+):", re.I) # label: .: -_re_regname = re.compile(r"%\.([0-9a-z_]+)", re.I) # register: %. -_re_metadata_def = re.compile(r"\!\d+\s*=") -_re_metadata_correct_usage = re.compile(r"metadata\s*\![{'\"]") -_re_metadata_ref = re.compile(r"\!\d+") - -# These are for parsing alloca instructions -_re_alloca_quoted = re.compile('(.*)"(\$.*)".*') -_re_alloca_parts = re.compile('(.*)=(.*alloca(.*))') - -def add_metadata_type(ir): - """ - Rewrite metadata since llvm3.6 dropped the "metadata" type prefix. - """ - buf = [] - for line in ir.splitlines(): - # If the line is a metadata - if _re_metadata_def.match(line): - # Does not contain any correct usage (Maybe already fixed) - if None is _re_metadata_correct_usage.search(line): - line = line.replace('!{', 'metadata !{') - line = line.replace('!"', 'metadata !"') - - def sub_metadata(m): - return "metadata {0}".format(m.group(0)) - - line = _re_metadata_ref.sub(sub_metadata, line) - line = line.lstrip('metadata ') - buf.append(line) - - return '\n'.join(buf) - - -def rename_register(llvmir): - """ - HLC does not like variable with '.' prefix. - """ - def repl(mat): - return '%_dot_.{0}'.format(mat.group(1)) - - return _re_regname.sub(repl, llvmir) - - -def rename_label(llvmir): - """ - HLC does not like a label with '.' prefix. - """ - def repl(mat): - return '_dot_.{0}:'.format(mat.group(1)) - - return _re_labelname.sub(repl, llvmir) - - -def adapt_llvm_version(llvmir): - """ - Adapt the LLVM IR to match the syntax required by HLC. - """ - llvmir = rename_register(llvmir) - llvmir = rename_label(llvmir) - # return add_metadata_type(llvmir) - return llvmir - - -def alloca_addrspace_correction(llvmir): - """ - rewrites llvmir such that alloca's go into addrspace(5) and are then - addrspacecast back to to addrspace(0). Alloca into 5 is a requirement of - the datalayout specification. - """ - lines = llvmir.splitlines() - mangle = '__tmp' - new_ir = [] - for l in lines: - # pluck lines containing alloca - if 'alloca' in l: - assignee, alloca_match, ptrty = _re_alloca_parts.match(l).groups() - q_match = _re_alloca_quoted.match(assignee) - if q_match: - start, var = q_match.groups() - var = var.strip() - name_fmt = '%s"%s"' - old_name = name_fmt % (start, var) - new_name = name_fmt % (start, var + mangle) - else: - old_name = assignee.strip() - new_name = old_name + mangle - allocaline = "%s = %s, addrspace(5)" % (new_name, alloca_match) - castline_fmt = ("%s = addrspacecast %s addrspace(5)* " - "%s to %s addrspace(0)*") - castline = castline_fmt % (old_name, ptrty, new_name, ptrty) - new_ir.append(allocaline) - new_ir.append(castline) - else: - new_ir.append(l) - return '\n'.join(new_ir) - - -@six.add_metaclass(ABCMeta) -class _AMDGCNModule(object): - """ - The AMDCGN LLVM module contract - """ - - @abstractmethod - def load_llvm(self, llvmir): - pass - - @abstractmethod - def link_builtins(self, main): - pass - - @abstractmethod - def generateGCN(self, llvmir): - pass - - -class AMDGCNModule(object): - """ - The AMDCGN LLVM module contract - """ - - bitcodes = [ - "opencl.amdgcn.bc", - "ocml.amdgcn.bc", - "ockl.amdgcn.bc", - "oclc_correctly_rounded_sqrt_off.amdgcn.bc", - "oclc_daz_opt_off.amdgcn.bc", - "oclc_finite_only_off.amdgcn.bc", - "oclc_isa_version_803.amdgcn.bc", - "oclc_unsafe_math_off.amdgcn.bc", - "irif.amdgcn.bc" - ] - - def __init__(self): - self._finalized = False - - def _preprocess(self, llvmir): - version_adapted = adapt_llvm_version(llvmir) - alloca_fixed = alloca_addrspace_correction(version_adapted) - return alloca_fixed - - def load_llvm(self, llvmir): - pass - - def link_builtins(self, main): - pass - - def generateGCN(self): - pass - diff --git a/numba/numba/roc/hlc/config.py b/numba/numba/roc/hlc/config.py deleted file mode 100644 index f98f975e9..000000000 --- a/numba/numba/roc/hlc/config.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import print_function, absolute_import - -import sys -import os - -# where ROCM bitcode is installed -DEFAULT_ROCM_BC_PATH = '/opt/rocm/opencl/lib/x86_64/bitcode/' -ROCM_BC_PATH = os.environ.get("NUMBA_ROCM_BC_PATH", DEFAULT_ROCM_BC_PATH) diff --git a/numba/numba/roc/hlc/hlc.py b/numba/numba/roc/hlc/hlc.py deleted file mode 100644 index b04e66b4c..000000000 --- a/numba/numba/roc/hlc/hlc.py +++ /dev/null @@ -1,306 +0,0 @@ -# A temporary wrapper to connect to the HLC LLVM binaries. -# Currently, connect to commandline interface. -from __future__ import print_function, absolute_import -import sys -from subprocess import check_call, check_output -import subprocess -import tempfile -import os -import re -from collections import namedtuple -from numba import config -from numba.roc.hsadrv import devices -from .common import AMDGCNModule -from .config import ROCM_BC_PATH -from . import TRIPLE -from datetime import datetime -from contextlib import contextmanager -from numba import utils -from numba.roc.hsadrv.error import HsaSupportError - -_real_check_call = check_call - -NOISY_CMDLINE = False - -@contextmanager -def error_pipe(): - if NOISY_CMDLINE: - yield subprocess.STDOUT - else: - if utils.IS_PY3: - yield subprocess.DEVNULL - else: - with open(os.devnull, 'wb') as devnull: - yield devnull - - -def check_call(*args, **kwargs): - # This is so that time is stamped against invocation - # such that correlations can be looked for against messages in the - # sys and kernel logs. - try: - with error_pipe() as stderr: - if NOISY_CMDLINE: - print(datetime.now().strftime("%b %d %H:%M:%S"), - file=sys.stdout) - print('CMD: ' + ';'.join(args), file=sys.stdout) - - ret = _real_check_call(*args, stderr=stderr, **kwargs) - - except subprocess.CalledProcessError as e: - print(e) - raise(e) - return ret - - -class CmdLine(object): - - def _initialize(self): - if not self.initialized: - dev_ctx = devices.get_context() - target_cpu = dev_ctx.agent.name.decode('UTF-8') - self.target_cpu = "-mcpu %s" % target_cpu - - self.CMD_OPT = ' '.join([ - self.opt, - "-O3", - self.triple_flag, - self.target_cpu, - "-disable-simplify-libcalls", - "-verify", - "-S", - "-o {fout}", - "{fin}"]) - - self.CMD_VERIFY = ' '.join([ - self.opt, - "-verify", - self.triple_flag, - self.target_cpu, - "-S", - "-o {fout}", - "{fin}"]) - - self.CMD_GEN_HSAIL = ' '.join([self.llc, - "-O2", - self.triple_flag, - self.target_cpu, - "-filetype=asm", - "-o {fout}", - "{fin}"]) - - self.CMD_GEN_BRIG = ' '.join([self.llc, - "-O2", - self.triple_flag, - self.target_cpu, - "-filetype=obj", - "-o {fout}", - "{fin}"]) - - self.CMD_LINK_BUILTINS = ' '.join([ - self.llvm_link, - "-S", - "-o {fout}", - "{fin}", - "{lib}"]) - - self.CMD_LINK_LIBS = ' '.join([self.llvm_link, - "-S", - "-o {fout}", - "{fin}"]) - - self.CMD_LINK_BRIG = ' '.join([self.ld_lld, - "-shared", - "-o {fout}", - "{fin}"]) - - def __init__(self): - self._binary_path = os.environ.get('HSAILBIN', None) - def _setup_path(tool): - if self._binary_path is not None: - return os.path.join(self._binary_path, tool) - else: - binpath = os.path.join(sys.prefix, 'bin', tool) - return binpath - self._triple = TRIPLE - - self.opt = _setup_path("opt") - self.llc = _setup_path("llc") - self.llvm_link = _setup_path("llvm-link") - self.ld_lld = _setup_path("ld.lld") - self.triple_flag = "-mtriple %s" % self._triple - self.initialized = False - - def check_tooling(self): - # make sure the llc can actually target amdgcn, ideally all tooling - # should be checked but most don't print anything useful and so - # compilation for AMDGCN would have to be tested instead. This is a - # smoke test like check. - try: - if not os.path.isfile(self.llc): - raise HsaSupportError('llc not found') - output = check_output([self.llc, '--version']) - olines = [x.strip() for x in output.splitlines()] - tgtidx = olines.index('Registered Targets:') - targets = olines[tgtidx + 1:] - for tgt in targets: - if 'amdgcn' in tgt: - break - else: - msg = 'Command line tooling does not support "amdgcn" target' - raise HsaSupportError(msg) - except BaseException as e: - raise - - def verify(self, ipath, opath): - if not self.initialized: - self._initialize() - check_call(self.CMD_VERIFY.format(fout=opath, fin=ipath), shell=True) - - def optimize(self, ipath, opath): - if not self.initialized: - self._initialize() - check_call(self.CMD_OPT.format(fout=opath, fin=ipath), shell=True) - - def generate_hsail(self, ipath, opath): - if not self.initialized: - self._initialize() - check_call(self.CMD_GEN_HSAIL.format(fout=opath, fin=ipath), shell=True) - - def generate_brig(self, ipath, opath): - if not self.initialized: - self._initialize() - check_call(self.CMD_GEN_BRIG.format(fout=opath, fin=ipath), shell=True) - - def link_libs(self, ipath, libpaths, opath): - if not self.initialized: - self._initialize() - cmdline = self.CMD_LINK_LIBS.format(fout=opath, fin=ipath) - cmdline += ' '.join(["{0}".format(lib) for lib in libpaths]) - check_call(cmdline, shell=True) - - def link_brig(self, ipath, opath): - if not self.initialized: - self._initialize() - check_call(self.CMD_LINK_BRIG.format(fout=opath, fin=ipath), shell=True) - - -class Module(AMDGCNModule): - def __init__(self): - """ - Setup - """ - self._tmpdir = tempfile.mkdtemp() - self._tempfiles = [] - self._linkfiles = [] - self._cmd = CmdLine() - AMDGCNModule.__init__(self) - - def __del__(self): - return - self.close() - - def close(self): - # Remove all temporary files - for afile in self._tempfiles: - os.unlink(afile) - #Remove directory - os.rmdir(self._tmpdir) - - def _create_temp_file(self, name, mode='wb'): - path = self._track_temp_file(name) - fobj = open(path, mode=mode) - return fobj, path - - def _track_temp_file(self, name): - path = os.path.join(self._tmpdir, - "{0}-{1}".format(len(self._tempfiles), name)) - self._tempfiles.append(path) - return path - - def load_llvm(self, llvmir): - """ - Load LLVM with HSAIL SPIR spec - """ - # Preprocess LLVM IR - llvmir = self._preprocess(llvmir) - - # Create temp file to store the input file - tmp_llvm_ir, fin = self._create_temp_file("dump-llvm-ir") - with tmp_llvm_ir: - tmp_llvm_ir.write(llvmir.encode('ascii')) - - # Create temp file for optimization - fout = self._track_temp_file("verified-llvm-ir") - self._cmd.verify(ipath=fin, opath=fout) - - if config.DUMP_OPTIMIZED: - with open(fout, 'rb') as fin_opt: - print(fin_opt.read().decode('ascii')) - - self._linkfiles.append(fout) - - def link_builtins(self, ipath, opath): - - # progressively link in all the bitcodes - for bc in self.bitcodes: - if bc != self.bitcodes[-1]: - tmp_opath = opath + bc.replace('/', '_').replace('.','_') - else: - tmp_opath = opath - lib = os.path.join(ROCM_BC_PATH, bc) - cmd = self._cmd.CMD_LINK_BUILTINS.format(fout=tmp_opath, fin=ipath, lib=lib) - check_call(cmd, shell=True) - ipath = tmp_opath - - def generateGCN(self): - """ - Generate GCN from a module and also return the HSAIL code. - """ - assert not self._finalized, "Module already has GCN generated" - - # Link dependencies libraries - llvmfile = self._linkfiles[0] - pre_builtin_path = self._track_temp_file("link-dep") - libpaths = self._linkfiles[1:] - self._cmd.link_libs(ipath=llvmfile, libpaths=libpaths, - opath=pre_builtin_path) - - # Link library with the builtin modules - linked_path = self._track_temp_file("linked-path") - self.link_builtins(ipath=pre_builtin_path, opath=linked_path) - - # Optimize - opt_path = self._track_temp_file("optimized-llvm-ir") - self._cmd.optimize(ipath=linked_path, opath=opt_path) - - if config.DUMP_OPTIMIZED: - with open(opt_path, 'rb') as fin: - print(fin.read().decode('ascii')) - - # Compile the llvm to HSAIL - hsail_path = self._track_temp_file("create-hsail") - self._cmd.generate_hsail(ipath=opt_path, opath=hsail_path) - - # Compile the llvm to BRIG - brig_path = self._track_temp_file("create-brig") - self._cmd.generate_brig(ipath=opt_path, opath=brig_path) - - # link - end_brig_path = self._track_temp_file("linked-brig") - self._cmd.link_brig(ipath = brig_path, opath=end_brig_path) - - self._finalized = True - - # Read HSAIL - with open(hsail_path, 'rb') as fin: - hsail = fin.read().decode('ascii') - - # Read BRIG - with open(end_brig_path, 'rb') as fin: - brig = fin.read() - - if config.DUMP_ASSEMBLY: - print(hsail) - - return namedtuple('FinalizerResult', ['hsail', 'brig'])(hsail, brig) diff --git a/numba/numba/roc/hlc/libhlc.py b/numba/numba/roc/hlc/libhlc.py deleted file mode 100644 index f0bdf7c4e..000000000 --- a/numba/numba/roc/hlc/libhlc.py +++ /dev/null @@ -1,247 +0,0 @@ -from __future__ import absolute_import, print_function - -import os -import sys -from collections import namedtuple -from ctypes import (c_size_t, byref, c_char_p, c_void_p, Structure, CDLL, - POINTER, create_string_buffer, c_int, addressof, - c_byte) -import tempfile -import os -import re -from numba import utils, config -from numba.roc.hsadrv import devices -from .common import AMDGCNModule - -from numba.roc.hlc.hlc import CmdLine - -# the CLI tooling is needed for the linking phase at present -cli = CmdLine() - - -class OpaqueModuleRef(Structure): - pass - - -moduleref_ptr = POINTER(OpaqueModuleRef) - - -def set_option(*opt): - """ - Use this for setting debug flags to libHLC using the same options - available to LLVM. - E.g -debug-pass=Structure - """ - inp = [create_string_buffer(x.encode('ascii')) for x in (('libhlc',) + opt)] - argc = len(inp) - argv = (c_char_p * argc)() - for i in range(argc): - argv[i] = addressof(inp[i]) - hlc.ROC_SetCommandLineOption(argc, byref(argv)) - - -class Error(Exception): - pass - - -class HLC(object): - """ - LibHLC wrapper interface - """ - hlc = None - - def __init__(self): - # Lazily load the libHLC library - bitcode_path = os.path.join(sys.prefix, 'share', 'rocmtools') - assert os.path.exists(bitcode_path) and os.path.isdir(bitcode_path) - self.bitcode_path = bitcode_path - dev_ctx = devices.get_context() - target_cpu = dev_ctx.agent.name - self.target_cpu = target_cpu - - if self.hlc is None: - try: - hlc = CDLL(os.path.join(sys.prefix, 'lib', 'librocmlite.so')) - except OSError: - raise ImportError("librocmlite.so cannot be found. Please " - "install the roctools package by: " - "conda install -c numba roctools") - - else: - hlc.ROC_ParseModule.restype = moduleref_ptr - hlc.ROC_ParseBitcode.restype = moduleref_ptr - hlc.ROC_ModuleEmitBRIG.restype = c_size_t - hlc.ROC_Initialize() - utils.finalize(hlc, hlc.ROC_Finalize) - - hlc.ROC_SetCommandLineOption.argtypes = [ - c_int, - c_void_p, - ] - - type(self).hlc = hlc - - def parse_assembly(self, ir): - if isinstance(ir, str): - ir = ir.encode("latin1") - buf = create_string_buffer(ir) - mod = self.hlc.ROC_ParseModule(buf) - if not mod: - raise Error("Failed to parse assembly") - return mod - - def parse_bitcode(self, bitcode): - buf = create_string_buffer(bitcode, len(bitcode)) - mod = self.hlc.ROC_ParseBitcode(buf, c_size_t(len(bitcode))) - if not mod: - raise Error("Failed to parse bitcode") - return mod - - def optimize(self, mod, opt=3, size=0, verify=1): - if not self.hlc.ROC_ModuleOptimize(mod, int(opt), int(size), - int(verify), c_char_p(self.target_cpu)): - raise Error("Failed to optimize module") - - def link(self, dst, src): - if not self.hlc.ROC_ModuleLinkIn(dst, src): - raise Error("Failed to link modules") - - def to_hsail(self, mod, opt=2): - buf = c_char_p(0) - if not self.hlc.ROC_ModuleEmitHSAIL(mod, int(opt), - c_char_p(self.target_cpu), byref(buf)): - raise Error("Failed to emit HSAIL") - ret = buf.value.decode("latin1") - self.hlc.ROC_DisposeString(buf) - return ret - - def _link_brig(self, upbrig_loc, patchedbrig_loc): - cli.link_brig(upbrig_loc, patchedbrig_loc) - - def to_brig(self, mod, opt=2): - bufptr = c_void_p(0) - size = self.hlc.ROC_ModuleEmitBRIG(mod, int(opt), - c_char_p(self.target_cpu), byref(bufptr)) - if not size: - raise Error("Failed to emit BRIG") - buf = (c_byte * size).from_address(bufptr.value) - try: - buffer - except NameError: - ret = bytes(buf) - else: - ret = bytes(buffer(buf)) - self.hlc.ROC_DisposeString(buf) - # Now we have an ELF, this needs patching with ld.lld which doesn't - # have an API. So we write out `ret` to a temporary file, then call - # the ld.lld ELF linker main() on it to generate a patched ELF - # temporary file output, which we read back in. - - # tmpdir, not using a ctx manager as debugging is easier without - tmpdir = tempfile.mkdtemp() - tmp_files = [] - - # write out unpatched BRIG - upbrig_file = "unpatched.brig" - upbrig_loc = os.path.join(tmpdir, upbrig_file) - with open(upbrig_loc, "wb") as up_brig_fobj: - up_brig_fobj.write(ret) - tmp_files.append(upbrig_loc) - - # record the location of the patched ELF - patchedbrig_file = "patched.brig" - patchedbrig_loc = os.path.join(tmpdir, patchedbrig_file) - - # call out to ld.lld to patch - self._link_brig(upbrig_loc, patchedbrig_loc) - - # read back in brig temporary. - with open(patchedbrig_loc, "rb") as p_brig_fobj: - patchedBrig = p_brig_fobj.read() - tmp_files.append(patchedbrig_loc) - - # Remove all temporary files - for afile in tmp_files: - os.unlink(afile) - # Remove directory - os.rmdir(tmpdir) - - return patchedBrig - - def to_string(self, mod): - buf = c_char_p(0) - self.hlc.ROC_ModulePrint(mod, byref(buf)) - ret = buf.value.decode("latin1") - self.hlc.ROC_DisposeString(buf) - return ret - - def destroy_module(self, mod): - self.hlc.ROC_ModuleDestroy(mod) - - -class Module(AMDGCNModule): - def __init__(self): - self._llvm_modules = [] - self._hlc = HLC() - AMDGCNModule.__init__(self) - - def load_llvm(self, llvmir): - """ - Load LLVM with HSAIL SPIR spec - """ - # Preprocess LLVM IR - # Because HLC does not handle dot in LLVM variable names - llvmir = self._preprocess(llvmir) - - mod = self._hlc.parse_assembly(llvmir) - - if config.DUMP_OPTIMIZED: - print(self._hlc.to_string(mod)) - - self._llvm_modules.append(mod) - - def link_builtins(self, main): - - for bc in self.bitcodes: - bc_path = os.path.join(self._hlc.bitcode_path, bc) - with open(bc_path, 'rb') as builtin: - buf = builtin.read() - mod = self._hlc.parse_bitcode(buf) - self._hlc.link(main, mod) - - - def generateGCN(self): - """ - Finalize module and return the HSAIL code - """ - assert not self._finalized, "Module finalized already" - - # Link dependencies - main = self._llvm_modules[0] - for dep in self._llvm_modules[1:]: - self._hlc.link(main, dep) - - # link bitcode - self.link_builtins(main) - - # Optimize - self._hlc.optimize(main) - - if config.DUMP_OPTIMIZED: - print(self._hlc.to_string(main)) - - # create HSAIL - hsail = self._hlc.to_hsail(main) - - # Finalize the llvm to BRIG - brig = self._hlc.to_brig(main) - - self._finalized = True - - # Clean up main; other modules are destroyed at linking - self._hlc.destroy_module(main) - - if config.DUMP_ASSEMBLY: - print(hsail) - - return namedtuple('FinalizerResult', ['hsail', 'brig'])(hsail, brig) diff --git a/numba/numba/roc/hsadecl.py b/numba/numba/roc/hsadecl.py deleted file mode 100644 index f0a2d8d1c..000000000 --- a/numba/numba/roc/hsadecl.py +++ /dev/null @@ -1,191 +0,0 @@ -from __future__ import print_function, division, absolute_import -from numba import types -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - AbstractTemplate, - MacroTemplate, signature, Registry) -from numba import roc - -registry = Registry() -intrinsic = registry.register -intrinsic_attr = registry.register_attr -intrinsic_global = registry.register_global - - -# =============================== NOTE =============================== -# Even though the following functions return size_t in the OpenCL standard, -# It should be rare (and unrealistic) to have 2**63 number of work items. -# We are choosing to use intp (signed 64-bit in large model) due to potential -# loss of precision in coerce(intp, uintp) that results in double. - - -@intrinsic -class Hsa_get_global_id(ConcreteTemplate): - key = roc.get_global_id - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_get_local_id(ConcreteTemplate): - key = roc.get_local_id - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_get_group_id(ConcreteTemplate): - key = roc.get_group_id - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_get_num_groups(ConcreteTemplate): - key = roc.get_num_groups - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_get_work_dim(ConcreteTemplate): - key = roc.get_work_dim - cases = [signature(types.uint32)] - - -@intrinsic -class Hsa_get_global_size(ConcreteTemplate): - key = roc.get_global_size - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_get_local_size(ConcreteTemplate): - key = roc.get_local_size - cases = [signature(types.intp, types.uint32)] - - -@intrinsic -class Hsa_barrier(ConcreteTemplate): - key = roc.barrier - cases = [signature(types.void, types.uint32), - signature(types.void)] - - -@intrinsic -class Hsa_mem_fence(ConcreteTemplate): - key = roc.mem_fence - cases = [signature(types.void, types.uint32)] - - -@intrinsic -class Hsa_wavebarrier(ConcreteTemplate): - key = roc.wavebarrier - cases = [signature(types.void)] - - -@intrinsic -class Hsa_activelanepermute_wavewidth(ConcreteTemplate): - key = roc.activelanepermute_wavewidth - # parameter: src, laneid, identity, useidentity - cases = [signature(ty, ty, types.uint32, ty, types.bool_) - for ty in (types.integer_domain|types.real_domain)] - -@intrinsic -class Hsa_ds_permute(ConcreteTemplate): - key = roc.ds_permute - cases = [signature(types.int32, types.int32, types.int32)] - -@intrinsic -class Hsa_ds_bpermute(ConcreteTemplate): - key = roc.ds_bpermute - cases = [signature(types.int32, types.int32, types.int32)] - -# hsa.shared submodule ------------------------------------------------------- - -class Hsa_shared_array(MacroTemplate): - key = roc.shared.array - - -@intrinsic_attr -class HsaSharedTemplate(AttributeTemplate): - key = types.Module(roc.shared) - - def resolve_array(self, mod): - return types.Macro(Hsa_shared_array) - - -# hsa.atomic submodule ------------------------------------------------------- - -@intrinsic -class Hsa_atomic_add(AbstractTemplate): - key = roc.atomic.add - - def generic(self, args, kws): - assert not kws - ary, idx, val = args - - if ary.ndim == 1: - return signature(ary.dtype, ary, types.intp, ary.dtype) - elif ary.ndim > 1: - return signature(ary.dtype, ary, idx, ary.dtype) - - -@intrinsic_attr -class HsaAtomicTemplate(AttributeTemplate): - key = types.Module(roc.atomic) - - def resolve_add(self, mod): - return types.Function(Hsa_atomic_add) - - -# hsa module ----------------------------------------------------------------- - -@intrinsic_attr -class HsaModuleTemplate(AttributeTemplate): - key = types.Module(roc) - - def resolve_get_global_id(self, mod): - return types.Function(Hsa_get_global_id) - - def resolve_get_local_id(self, mod): - return types.Function(Hsa_get_local_id) - - def resolve_get_global_size(self, mod): - return types.Function(Hsa_get_global_size) - - def resolve_get_local_size(self, mod): - return types.Function(Hsa_get_local_size) - - def resolve_get_num_groups(self, mod): - return types.Function(Hsa_get_num_groups) - - def resolve_get_work_dim(self, mod): - return types.Function(Hsa_get_work_dim) - - def resolve_get_group_id(self, mod): - return types.Function(Hsa_get_group_id) - - def resolve_barrier(self, mod): - return types.Function(Hsa_barrier) - - def resolve_mem_fence(self, mod): - return types.Function(Hsa_mem_fence) - - def resolve_wavebarrier(self, mod): - return types.Function(Hsa_wavebarrier) - - def resolve_activelanepermute_wavewidth(self, mod): - return types.Function(Hsa_activelanepermute_wavewidth) - - def resolve_ds_permute(self, mod): - return types.Function(Hsa_ds_permute) - - def resolve_ds_bpermute(self, mod): - return types.Function(Hsa_ds_bpermute) - - def resolve_shared(self, mod): - return types.Module(roc.shared) - - def resolve_atomic(self, mod): - return types.Module(roc.atomic) - - -# intrinsic - -intrinsic_global(roc, types.Module(roc)) diff --git a/numba/numba/roc/hsadrv/__init__.py b/numba/numba/roc/hsadrv/__init__.py deleted file mode 100644 index d11e66a67..000000000 --- a/numba/numba/roc/hsadrv/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""HSA driver - -This submodule contains low level bindings to HSA -""" diff --git a/numba/numba/roc/hsadrv/devicearray.py b/numba/numba/roc/hsadrv/devicearray.py deleted file mode 100644 index 277fde22e..000000000 --- a/numba/numba/roc/hsadrv/devicearray.py +++ /dev/null @@ -1,365 +0,0 @@ -""" -A HSA dGPU backed ND Array is recognized by checking the __hsa_memory__ -attribute on the object. If it exists and evaluate to True, it must define -shape, strides, dtype and size attributes similar to a NumPy ndarray. -""" -from __future__ import print_function, absolute_import, division -import warnings -import math -import copy -import weakref -from ctypes import c_void_p -import numpy as np -from numba.roc.hsadrv import driver as _driver -from . import devices -from numba import dummyarray, types, numpy_support -from .error import HsaContextMismatchError - -try: - long -except NameError: - long = int - - -def is_hsa_ndarray(obj): - "Check if an object is a HSA ndarray" - return getattr(obj, '__hsa_ndarray__', False) - - -def verify_hsa_ndarray_interface(obj): - "Verify the HSA ndarray interface for an obj" - require_hsa_ndarray(obj) - - def requires_attr(attr, typ): - if not hasattr(obj, attr): - raise AttributeError(attr) - if not isinstance(getattr(obj, attr), typ): - raise AttributeError('%s must be of type %s' % (attr, typ)) - - requires_attr('shape', tuple) - requires_attr('strides', tuple) - requires_attr('dtype', np.dtype) - requires_attr('size', (int, long)) - - -def require_hsa_ndarray(obj): - "Raises ValueError if is_hsa_ndarray(obj) evaluates False" - if not is_hsa_ndarray(obj): - raise ValueError('require an hsa ndarray object') - - -class DeviceNDArrayBase(object): - """Base class for an on dGPU NDArray representation cf. numpy.ndarray - """ - __hsa_memory__ = True - __hsa_ndarray__ = True # There must be dgpu_data attribute as a result - - def __init__(self, shape, strides, dtype, dgpu_data=None): - """ - Args - ---- - - shape - array shape. - strides - array strides. - dtype - data type as numpy.dtype. - dgpu_data - user provided device memory for the ndarray data buffer - """ - if isinstance(shape, (int, long)): - shape = (shape,) - if isinstance(strides, (int, long)): - strides = (strides,) - self.ndim = len(shape) - if len(strides) != self.ndim: - raise ValueError('strides not match ndim') - self._dummy = dummyarray.Array.from_desc(0, shape, strides, - dtype.itemsize) - self.shape = tuple(shape) - self.strides = tuple(strides) - self.dtype = np.dtype(dtype) - self.size = int(np.prod(self.shape)) - # prepare dgpu memory - if self.size > 0: - if dgpu_data is None: - from numba.roc.api import _memory_size_from_info - self.alloc_size = _memory_size_from_info(self.shape, - self.strides, self.dtype.itemsize) - # find a coarse region on the dGPU - dgpu_data = devices.get_context().mempoolalloc(self.alloc_size) - else: # we have some preallocated dgpu_memory - sz = getattr(dgpu_data, '_hsa_memsize_', None) - if sz is None: - raise ValueError('dgpu_data as no _hsa_memsize_ attribute') - assert sz >= 0 - self.alloc_size = sz - else: - dgpu_data = None - self.alloc_size = 0 - - self.dgpu_data = dgpu_data - - @property - def _context(self): - return self.dgpu_data.context - - @property - def _numba_type_(self): - """ - Magic attribute expected by Numba to get the numba type that - represents this object. - """ - dtype = numpy_support.from_dtype(self.dtype) - return types.Array(dtype, self.ndim, 'A') - - @property - def device_ctypes_pointer(self): - """Returns the ctypes pointer to the GPU data buffer - """ - if self.dgpu_data is None: - return c_void_p(0) - else: - return self.dgpu_data.device_ctypes_pointer - - def copy_to_device(self, ary, stream=None, context=None): - """Copy `ary` to `self`. - - If `ary` is a HSA memory, perform a device-to-device transfer. - Otherwise, perform a a host-to-device transfer. - - If `stream` is a stream object, an async copy to used. - """ - if ary.size == 0: - # Nothing to do - return - - if context is not None: - if self.dgpu_data is not None: - expect, got = self._context, context - if expect.unproxy != got.unproxy: - raise HsaContextMismatchError(expect=expect, got=got) - else: - context = self._context - - # TODO: Worry about multiple dGPUs - #if _driver.is_device_memory(ary): - # sz = min(self.alloc_size, ary.alloc_size) - # _driver.device_to_device(self, ary, sz) - #else: - # sz = min(_driver.host_memory_size(ary), self.alloc_size) - - sz = self.alloc_size - - # host_to_dGPU(context, dst, src, size): - if stream is None: - _driver.hsa.implicit_sync() - - if isinstance(ary, DeviceNDArray): - _driver.dGPU_to_dGPU(self._context, self, ary, sz) - else: - _driver.host_to_dGPU(self._context, self, ary, sz) - else: - if isinstance(ary, DeviceNDArray): - _driver.async_dGPU_to_dGPU(dst_ctx=self._context, - src_ctx=ary._context, - dst=self, src=ary, size=sz, - stream=stream) - else: - _driver.async_host_to_dGPU(dst_ctx=self._context, - src_ctx=devices.get_cpu_context(), - dst=self, src=ary, size=sz, - stream=stream) - - def copy_to_host(self, ary=None, stream=None): - """Copy ``self`` to ``ary`` or create a new Numpy ndarray - if ``ary`` is ``None``. - - The transfer is synchronous: the function returns after the copy - is finished. - - Always returns the host array. - - Example:: - - import numpy as np - from numba import hsa - - arr = np.arange(1000) - d_arr = hsa.to_device(arr) - - my_kernel[100, 100](d_arr) - - result_array = d_arr.copy_to_host() - """ - if ary is None: # destination does not exist - hostary = np.empty(shape=self.alloc_size, dtype=np.byte) - else: # destination does exist, it's `ary`, check it - if ary.dtype != self.dtype: - raise TypeError('incompatible dtype') - - if ary.shape != self.shape: - scalshapes = (), (1,) - if not (ary.shape in scalshapes and self.shape in scalshapes): - raise TypeError('incompatible shape; device %s; host %s' % - (self.shape, ary.shape)) - if ary.strides != self.strides: - scalstrides = (), (self.dtype.itemsize,) - if not (ary.strides in scalstrides and - self.strides in scalstrides): - raise TypeError('incompatible strides; device %s; host %s' % - (self.strides, ary.strides)) - hostary = ary # this is supposed to be a ptr for writing - - # a location for the data exists as `hostary` - assert self.alloc_size >= 0, "Negative memory size" - - context = self._context - - # copy the data from the device to the hostary - if self.alloc_size != 0: - sz = self.alloc_size - if stream is None: - _driver.hsa.implicit_sync() - _driver.dGPU_to_host(context, hostary, self, sz) - else: - _driver.async_dGPU_to_host(dst_ctx=devices.get_cpu_context(), - src_ctx=self._context, - dst=hostary, src=self, - size=sz, stream=stream) - - # if the location for the data was originally None - # then create a new ndarray and plumb in the new memory - if ary is None: - if self.size == 0: - hostary = np.ndarray(shape=self.shape, dtype=self.dtype, - buffer=hostary) - else: - hostary = np.ndarray(shape=self.shape, dtype=self.dtype, - strides=self.strides, buffer=hostary) - else: # else hostary points to ary and how has the right memory - hostary = ary - - return hostary - - def as_hsa_arg(self): - """Returns a device memory object that is used as the argument. - """ - return self.dgpu_data - - -class DeviceNDArray(DeviceNDArrayBase): - ''' - An on-dGPU array type - ''' - def is_f_contiguous(self): - ''' - Return true if the array is Fortran-contiguous. - ''' - return self._dummy.is_f_contig - - def is_c_contiguous(self): - ''' - Return true if the array is C-contiguous. - ''' - return self._dummy.is_c_contig - - def reshape(self, *newshape, **kws): - """ - Reshape the array without changing its contents, similarly to - :meth:`numpy.ndarray.reshape`. Example:: - - d_arr = d_arr.reshape(20, 50, order='F') - """ - if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)): - newshape = newshape[0] - - cls = type(self) - if newshape == self.shape: - # nothing to do - return cls(shape=self.shape, strides=self.strides, - dtype=self.dtype, dgpu_data=self.dgpu_data) - - newarr, extents = self._dummy.reshape(*newshape, **kws) - - if extents == [self._dummy.extent]: - return cls(shape=newarr.shape, strides=newarr.strides, - dtype=self.dtype, dgpu_data=self.dgpu_data) - else: - raise NotImplementedError("operation requires copying") - - def ravel(self, order='C'): - ''' - Flatten the array without changing its contents, similar to - :meth:`numpy.ndarray.ravel`. - ''' - cls = type(self) - newarr, extents = self._dummy.ravel(order=order) - - if extents == [self._dummy.extent]: - return cls(shape=newarr.shape, strides=newarr.strides, - dtype=self.dtype, dgpu_data=self.dgpu_data) - - else: - raise NotImplementedError("operation requires copying") - - -class HostArray(np.ndarray): - __hsa_memory__ = True - - @property - def device_ctypes_pointer(self): - return self.ctypes.data_as(c_void_p) - - -def from_array_like(ary, dgpu_data=None): - "Create a DeviceNDArray object that is like ary." - if ary.ndim == 0: - ary = ary.reshape(1) - return DeviceNDArray(ary.shape, ary.strides, ary.dtype, - dgpu_data=dgpu_data) - - - -errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot " - "be transferred as a single memory region. Please " - "ensure contiguous buffer with numpy " - ".ascontiguousarray()") - - -def _single_buffer(ary): - i = np.argmax(ary.strides) - size = ary.strides[i] * ary.shape[i] - return size == ary.nbytes - - -def sentry_contiguous(ary): - if not ary.flags['C_CONTIGUOUS'] and not ary.flags['F_CONTIGUOUS']: - if ary.strides[0] == 0: - # Broadcasted, ensure inner contiguous - return sentry_contiguous(ary[0]) - - elif _single_buffer(ary): - return True - - else: - raise ValueError(errmsg_contiguous_buffer) - - -def auto_device(obj, context, stream=None, copy=True): - """ - Create a DeviceArray like obj and optionally copy data from - host to device. If obj already represents device memory, it is returned and - no copy is made. - """ - if _driver.is_device_memory(obj): # it's already on the dGPU - return obj, False - else: # needs to be copied to the dGPU - sentry_contiguous(obj) - devobj = from_array_like(obj) - if copy: - devobj.copy_to_device(obj, stream=stream, context=context) - return devobj, True - - diff --git a/numba/numba/roc/hsadrv/devices.py b/numba/numba/roc/hsadrv/devices.py deleted file mode 100644 index 904bad53e..000000000 --- a/numba/numba/roc/hsadrv/devices.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -Expose each GPU device directly -""" -from __future__ import print_function, absolute_import, division -import functools -from numba import servicelib -from .driver import hsa as driver, Context as _Context - - -class _culist(object): - """A thread local list of GPU instances - """ - - def __init__(self): - self._lst = None - - @property - def _gpus(self): - if not self._lst: - self._lst = self._init_gpus() - return self._lst - - def _init_gpus(self): - gpus = [] - for com in driver.components: - gpus.append(CU(com)) - return gpus - - def __getitem__(self, item): - return self._gpus[item] - - def append(self, item): - return self._gpus.append(item) - - def __len__(self): - return len(self._gpus) - - def __nonzero__(self): - return bool(self._gpus) - - def __iter__(self): - return iter(self._gpus) - - __bool__ = __nonzero__ - - def reset(self): - for gpu in self: - gpu.reset() - - @property - def current(self): - """Get the current GPU object associated with the thread - """ - return _custack.top - - -cus = _culist() -del _culist - - -class CU(object): - def __init__(self, cu): - self._cu = cu - self._context = None - - def __getattr__(self, key): - """Redirect to self._gpu - """ - if key.startswith('_'): - raise AttributeError(key) - return getattr(self._cu, key) - - def __repr__(self): - return repr(self._cu) - - def associate_context(self): - """Associate the context of this GPU to the running thread - """ - # No context was created for this GPU - if self._context is None: - self._context = self._cu.create_context() - - return self._context - - def __enter__(self): - self.associate_context() - _custack.push(self) - - def __exit__(self, exc_type, exc_val, exc_tb): - assert _get_device() is self - self._context.pop() - _custack.pop() - - def reset(self): - if self._context: - self._context.reset() - self._context = None - - -_cpu_context = None - - -def get_cpu_context(): - global _cpu_context - if _cpu_context is None: - cpu_agent = [a for a in driver.agents if not a.is_component][0] - _cpu_context = _Context(cpu_agent) - return _cpu_context - - -def get_gpu(i): - return cus[i] - -def get_num_gpus(): - return len(cus) - - -_custack = servicelib.TLStack() - - -def _get_device(devnum=0): - """Get the current device or use a device by device number. - """ - if not _custack: - _custack.push(get_gpu(devnum)) - return _custack.top - - -def get_context(devnum=0): - """Get the current device or use a device by device number, and - return the HSA context. - """ - return _get_device(devnum=devnum).associate_context() - - -def get_all_contexts(): - return [get_context(i) for i in range(get_num_gpus())] - - -def require_context(fn): - """ - A decorator to ensure a context for the HSA subsystem - """ - - @functools.wraps(fn) - def _require_cu_context(*args, **kws): - get_context() - return fn(*args, **kws) - - return _require_cu_context - - -def reset(): - cus.reset() - _custack.clear() - - diff --git a/numba/numba/roc/hsadrv/driver.py b/numba/numba/roc/hsadrv/driver.py deleted file mode 100644 index b200da285..000000000 --- a/numba/numba/roc/hsadrv/driver.py +++ /dev/null @@ -1,1548 +0,0 @@ -""" -HSA driver bridge implementation -""" - -from __future__ import absolute_import, print_function, division - -import sys -import atexit -import os -import ctypes -import struct -import traceback -import weakref -import logging -from contextlib import contextmanager - -from collections import Sequence, defaultdict, deque -from numba.utils import total_ordering -from numba import mviewbuf -from numba import utils -from numba import config -from .error import HsaSupportError, HsaDriverError, HsaApiError -from . import enums, enums_ext, drvapi -from numba.utils import longint as long -import numpy as np - - -_logger = logging.getLogger(__name__) - - -class HsaKernelTimedOut(HsaDriverError): - pass - - -def _device_type_to_string(device): - try: - return ['CPU', 'GPU', 'DSP'][device] - except IndexError: - return 'Unknown' - - -DEFAULT_HSA_DRIVER = '/opt/rocm/lib/libhsa-runtime64.so' - - -def _find_driver(): - envpath = os.environ.get('NUMBA_HSA_DRIVER', DEFAULT_HSA_DRIVER) - if envpath == '0': - # Force fail - _raise_driver_not_found() - - # Determine DLL type - if (struct.calcsize('P') != 8 - or sys.platform == 'win32' - or sys.platform == 'darwin'): - _raise_platform_not_supported() - else: - # Assume to be *nix like and 64 bit - dlloader = ctypes.CDLL - dldir = ['/usr/lib', '/usr/lib64'] - dlname = 'libhsa-runtime64.so' - - if envpath is not None: - try: - envpath = os.path.abspath(envpath) - except ValueError: - raise HsaSupportError("NUMBA_HSA_DRIVER %s is not a valid path" % - envpath) - if not os.path.isfile(envpath): - raise HsaSupportError("NUMBA_HSA_DRIVER %s is not a valid file " - "path. Note it must be a filepath of the .so/" - ".dll/.dylib or the driver" % envpath) - candidates = [envpath] - else: - # First search for the name in the default library path. - # If that is not found, try the specific path. - candidates = [dlname] + [os.path.join(x, dlname) for x in dldir] - - # Load the driver; Collect driver error information - path_not_exist = [] - driver_load_error = [] - - for path in candidates: - try: - dll = dlloader(path) - except OSError as e: - # Problem opening the DLL - path_not_exist.append(not os.path.isfile(path)) - driver_load_error.append(e) - else: - return dll - - # Problem loading driver - if all(path_not_exist): - _raise_driver_not_found() - else: - errmsg = '\n'.join(str(e) for e in driver_load_error) - _raise_driver_error(errmsg) - - -PLATFORM_NOT_SUPPORTED_ERROR = """ -HSA is not currently supported on this platform ({0}). -""" - - -def _raise_platform_not_supported(): - raise HsaSupportError(PLATFORM_NOT_SUPPORTED_ERROR.format(sys.platform)) - - -DRIVER_NOT_FOUND_MSG = """ -The HSA runtime library cannot be found. - -If you are sure that the HSA is installed, try setting environment -variable NUMBA_HSA_DRIVER with the file path of the HSA runtime shared -library. -""" - - -def _raise_driver_not_found(): - raise HsaSupportError(DRIVER_NOT_FOUND_MSG) - - -DRIVER_LOAD_ERROR_MSG = """ -A HSA runtime library was found, but failed to load with error: -%s -""" - - -def _raise_driver_error(e): - raise HsaSupportError(DRIVER_LOAD_ERROR_MSG % e) - - -MISSING_FUNCTION_ERRMSG = """driver missing function: %s. -""" - - -class Recycler(object): - def __init__(self): - self._garbage = [] - self.enabled = True - - def free(self, obj): - self._garbage.append(obj) - self.service() - - def _cleanup(self): - for obj in self._garbage: - obj._finalizer(obj) - del self._garbage[:] - - def service(self): - if self.enabled: - if len(self._garbage) > 10: - self._cleanup() - - def drain(self): - self._cleanup() - self.enabled = False - - -# The Driver ########################################################### - - -class Driver(object): - """ - Driver API functions are lazily bound. - """ - _singleton = None - _agent_map = None - _api_prototypes = drvapi.API_PROTOTYPES # avoid premature GC at exit - - _hsa_properties = { - 'version_major': (enums.HSA_SYSTEM_INFO_VERSION_MAJOR, ctypes.c_uint16), - 'version_minor': (enums.HSA_SYSTEM_INFO_VERSION_MINOR, ctypes.c_uint16), - 'timestamp': (enums.HSA_SYSTEM_INFO_TIMESTAMP, ctypes.c_uint64), - 'timestamp_frequency': (enums.HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, ctypes.c_uint16), - 'signal_max_wait': (enums.HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, ctypes.c_uint64), - } - - def __new__(cls): - obj = cls._singleton - if obj is not None: - return obj - else: - obj = object.__new__(cls) - cls._singleton = obj - return obj - - def __init__(self): - try: - if config.DISABLE_HSA: - raise HsaSupportError("HSA disabled by user") - self.lib = _find_driver() - self.is_initialized = False - self.initialization_error = None - except HsaSupportError as e: - self.is_initialized = True - self.initialization_error = e - - self._agent_map = None - self._programs = {} - self._recycler = Recycler() - self._active_streams = weakref.WeakSet() - - def _initialize_api(self): - if self.is_initialized: - return - - self.is_initialized = True - try: - self.hsa_init() - except HsaApiError as e: - self.initialization_error = e - raise HsaDriverError("Error at driver init: \n%s:" % e) - else: - @atexit.register - def shutdown(): - try: - for agent in self.agents: - agent.release() - except AttributeError: - # this is because no agents initialised - # so self.agents isn't present - pass - else: - self._recycler.drain() - - def _initialize_agents(self): - if self._agent_map is not None: - return - - self._initialize_api() - - agent_ids = [] - - def on_agent(agent_id, ctxt): - agent_ids.append(agent_id) - return enums.HSA_STATUS_SUCCESS - - callback = drvapi.HSA_ITER_AGENT_CALLBACK_FUNC(on_agent) - self.hsa_iterate_agents(callback, None) - - agent_map = dict((agent_id, Agent(agent_id)) for agent_id in agent_ids) - self._agent_map = agent_map - - @property - def is_available(self): - self._initialize_api() - return self.initialization_error is None - - @property - def agents(self): - self._initialize_agents() - return self._agent_map.values() - - def create_program(self, model=enums.HSA_MACHINE_MODEL_LARGE, - profile=enums.HSA_PROFILE_FULL, - rounding_mode=enums.HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - options=None): - program = drvapi.hsa_ext_program_t() - assert options is None - self.hsa_ext_program_create(model, profile, rounding_mode, - options, ctypes.byref(program)) - return Program(program) - - def create_signal(self, initial_value, consumers=None): - if consumers is None: - consumers = tuple(self.agents) - - consumers_len = len(consumers) - consumers_type = drvapi.hsa_agent_t * consumers_len - consumers = consumers_type(*[c._id for c in consumers]) - - result = drvapi.hsa_signal_t() - self.hsa_signal_create(initial_value, consumers_len, consumers, - ctypes.byref(result)) - return Signal(result.value) - - def __getattr__(self, fname): - # Initialize driver - self._initialize_api() - - # First try if it is an hsa property - try: - enum, typ = self._hsa_properties[fname] - result = typ() - self.hsa_system_get_info(enum, ctypes.byref(result)) - return result.value - except KeyError: - pass - - # if not a property... try if it is an api call - try: - proto = self._api_prototypes[fname] - except KeyError: - raise AttributeError(fname) - - if self.initialization_error is not None: - raise HsaSupportError("Error at driver init: \n%s:" % - self.initialization_error) - - # Find function in driver library - libfn = self._find_api(fname) - - for key, val in proto.items(): - setattr(libfn, key, val) - - def driver_wrapper(fn): - def wrapped(*args, **kwargs): - _logger.debug('call driver api: %s', fname) - return fn(*args, **kwargs) - return wrapped - - retval = driver_wrapper(libfn) - setattr(self, fname, retval) - return retval - - def _find_api(self, fname): - # Try regular - try: - return getattr(self.lib, fname) - except AttributeError: - pass - - # Not found. - # Delay missing function error to use - def absent_function(*args, **kws): - raise HsaDriverError(MISSING_FUNCTION_ERRMSG % fname) - - setattr(self, fname, absent_function) - return absent_function - - @property - def components(self): - """Returns a ordered list of components - - The first device should be picked first - """ - return list(filter(lambda a: a.is_component, reversed(sorted( - self.agents)))) - - def create_stream(self): - st = Stream() - self._active_streams.add(st) - return st - - def implicit_sync(self): - """ - Implicit synchronization for all asynchronous streams - across all devices. - """ - _logger.info("implicit sync") - for st in self._active_streams: - st.synchronize() - - -hsa = Driver() - -class HsaWrapper(object): - def __getattr__(self, fname): - try: - enum, typ = self._hsa_properties[fname] - except KeyError: - raise AttributeError( - "%r object has no attribute %r" % (self.__class__, fname)) - - func = getattr(hsa, self._hsa_info_function) - result = typ() - is_array_type = hasattr(typ, '_length_') - # if the result is not ctypes array, get a reference) - result_buff = result if is_array_type else ctypes.byref(result) - func(self._id, enum, result_buff) - - if not is_array_type or typ._type_ == ctypes.c_char: - return result.value - else: - return list(result) - - def __dir__(self): - return sorted(set(dir(type(self)) + - self.__dict__.keys() + - self._hsa_properties.keys())) - -@total_ordering -class Agent(HsaWrapper): - """Abstracts a HSA compute agent. - - This will wrap and provide an OO interface for hsa_agent_t C-API elements - """ - - # Note this will be handled in a rather unconventional way. When agents get - # initialized by the driver, a set of instances for all the available agents - # will be created. After that creation, the __new__ and __init__ methods will - # be replaced, and the constructor will act as a mapping from an agent_id to - # the equivalent Agent object. Any attempt to create an Agent with a non - # existing agent_id will result in an error. - # - # the logic for this resides in Driver._initialize_agents - - _hsa_info_function = 'hsa_agent_get_info' - _hsa_properties = { - 'name': (enums.HSA_AGENT_INFO_NAME, ctypes.c_char * 64), - 'vendor_name': (enums.HSA_AGENT_INFO_VENDOR_NAME, ctypes.c_char * 64), - 'feature': (enums.HSA_AGENT_INFO_FEATURE, drvapi.hsa_agent_feature_t), - 'wavefront_size': ( - enums.HSA_AGENT_INFO_WAVEFRONT_SIZE, ctypes.c_uint32), - 'workgroup_max_dim': ( - enums.HSA_AGENT_INFO_WORKGROUP_MAX_DIM, ctypes.c_uint16 * 3), - 'grid_max_dim': (enums.HSA_AGENT_INFO_GRID_MAX_DIM, drvapi.hsa_dim3_t), - 'grid_max_size': (enums.HSA_AGENT_INFO_GRID_MAX_SIZE, ctypes.c_uint32), - 'fbarrier_max_size': ( - enums.HSA_AGENT_INFO_FBARRIER_MAX_SIZE, ctypes.c_uint32), - 'queues_max': (enums.HSA_AGENT_INFO_QUEUES_MAX, ctypes.c_uint32), - 'queue_max_size': ( - enums.HSA_AGENT_INFO_QUEUE_MAX_SIZE, ctypes.c_uint32), - 'queue_type': ( - enums.HSA_AGENT_INFO_QUEUE_TYPE, drvapi.hsa_queue_type_t), - 'node': (enums.HSA_AGENT_INFO_NODE, ctypes.c_uint32), - '_device': (enums.HSA_AGENT_INFO_DEVICE, drvapi.hsa_device_type_t), - 'cache_size': (enums.HSA_AGENT_INFO_CACHE_SIZE, ctypes.c_uint32 * 4), - 'isa': (enums.HSA_AGENT_INFO_ISA, drvapi.hsa_isa_t), - } - - def __init__(self, agent_id): - # This init will only happen when initializing the agents. After - # the agent initialization the instances of this class are considered - # initialized and locked, so this method will be removed. - self._id = agent_id - self._recycler = hsa._recycler - self._queues = set() - self._initialize_regions() - self._initialize_mempools() - - @property - def device(self): - return _device_type_to_string(self._device) - - @property - def is_component(self): - return (self.feature & enums.HSA_AGENT_FEATURE_KERNEL_DISPATCH) != 0 - - @property - def regions(self): - return self._regions - - @property - def mempools(self): - return self._mempools - - def _initialize_regions(self): - region_ids = [] - - def on_region(region_id, ctxt): - region_ids.append(region_id) - return enums.HSA_STATUS_SUCCESS - - callback = drvapi.HSA_AGENT_ITERATE_REGIONS_CALLBACK_FUNC(on_region) - hsa.hsa_agent_iterate_regions(self._id, callback, None) - self._regions = _RegionList([MemRegion.instance_for(self, region_id) - for region_id in region_ids]) - - def _initialize_mempools(self): - mempool_ids = [] - - def on_region(_id, ctxt=None): - mempool_ids.append(_id) - return enums.HSA_STATUS_SUCCESS - - callback = drvapi.HSA_AMD_AGENT_ITERATE_MEMORY_POOLS_CALLBACK(on_region) - hsa.hsa_amd_agent_iterate_memory_pools(self._id, callback, None) - self._mempools = _RegionList([MemPool.instance_for(self, mempool_id) - for mempool_id in mempool_ids]) - - def _create_queue(self, size, callback=None, data=None, - private_segment_size=None, group_segment_size=None, - queue_type=None): - assert queue_type is not None - assert size <= self.queue_max_size - - cb_typ = drvapi.HSA_QUEUE_CALLBACK_FUNC - cb = ctypes.cast(None, cb_typ) if callback is None else cb_typ(callback) - result = ctypes.POINTER(drvapi.hsa_queue_t)() - private_segment_size = (ctypes.c_uint32(-1) - if private_segment_size is None - else private_segment_size) - group_segment_size = (ctypes.c_uint32(-1) - if group_segment_size is None - else group_segment_size) - hsa.hsa_queue_create(self._id, size, queue_type, cb, data, - private_segment_size, group_segment_size, - ctypes.byref(result)) - - q = Queue(self, result) - self._queues.add(q) - return weakref.proxy(q) - - def create_queue_single(self, *args, **kwargs): - kwargs['queue_type'] = enums.HSA_QUEUE_TYPE_SINGLE - return self._create_queue(*args, **kwargs) - - def create_queue_multi(self, *args, **kwargs): - kwargs['queue_type'] = enums.HSA_QUEUE_TYPE_MULTI - return self._create_queue(*args, **kwargs) - - def release(self): - """ - Release all resources - - Called at system teardown - """ - for q in list(self._queues): - q.release() - - def release_queue(self, queue): - self._queues.remove(queue) - self._recycler.free(queue) - - def __repr__(self): - return "".format(self._id, - self.device, - self.vendor_name, - self.name, - " (component)" if self.is_component else "") - - def _rank(self): - return (self.is_component, self.grid_max_size, self._device) - - def __lt__(self, other): - if isinstance(self, Agent): - return self._rank() < other._rank() - else: - return NotImplemented - - def __eq__(self, other): - if isinstance(self, Agent): - return self._rank() == other._rank() - else: - return NotImplemented - - def __hash__(self): - return hash(self._rank()) - - def create_context(self): - return Context(self) - - -class _RegionList(Sequence): - __slots__ = '_all', 'globals', 'readonlys', 'privates', 'groups' - - def __init__(self, lst): - self._all = tuple(lst) - self.globals = tuple(x for x in lst if x.kind == 'global') - self.readonlys = tuple(x for x in lst if x.kind == 'readonly') - self.privates = tuple(x for x in lst if x.kind == 'private') - self.groups = tuple(x for x in lst if x.kind == 'group') - - def __len__(self): - return len(self._all) - - def __contains__(self, item): - return item in self._all - - def __reversed__(self): - return reversed(self._all) - - def __getitem__(self, idx): - return self._all[idx] - - -class MemPool(HsaWrapper): - """Abstracts a HSA mem pool. - - This will wrap and provide an OO interface for hsa_amd_memory_pool_t - C-API elements - """ - _hsa_info_function = 'hsa_amd_memory_pool_get_info' - - _hsa_properties = { - 'segment': ( - enums_ext.HSA_AMD_MEMORY_POOL_INFO_SEGMENT, - drvapi.hsa_amd_segment_t - ), - '_flags': ( - enums_ext.HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, - ctypes.c_uint32 - ), - 'size': (enums_ext.HSA_AMD_MEMORY_POOL_INFO_SIZE, - ctypes.c_size_t), - 'alloc_allowed': (enums_ext.HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, - ctypes.c_bool), - 'alloc_granule': (enums_ext.HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, - ctypes.c_size_t), - 'alloc_alignment': (enums_ext.HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, - ctypes.c_size_t), - 'accessible_by_all': (enums_ext.HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, - ctypes.c_bool), - } - - _segment_name_map = { - enums_ext.HSA_AMD_SEGMENT_GLOBAL: 'global', - enums_ext.HSA_AMD_SEGMENT_READONLY: 'readonly', - enums_ext.HSA_AMD_SEGMENT_PRIVATE: 'private', - enums_ext.HSA_AMD_SEGMENT_GROUP: 'group', - } - - def __init__(self, agent, pool): - """Do not instantiate MemPool objects directly, use the factory class - method 'instance_for' to ensure MemPool identity""" - self._id = pool - self._owner_agent = agent - self._as_parameter_ = self._id - - @property - def kind(self): - return self._segment_name_map[self.segment] - - @property - def agent(self): - return self._owner_agent - - def supports(self, check_flag): - """ - Determines if a given feature is supported by this MemRegion. - Feature flags are found in "./enums_exp.py" under: - * hsa_amd_memory_pool_global_flag_t - Params: - check_flag: Feature flag to test - """ - if self.kind == 'global': - return self._flags & check_flag - else: - return False - - def allocate(self, nbytes): - assert self.alloc_allowed - assert nbytes >= 0 - buff = ctypes.c_void_p() - flags = ctypes.c_uint32(0) # From API docs "Must be 0"! - hsa.hsa_amd_memory_pool_allocate(self._id, nbytes, flags, ctypes.byref(buff)) - if buff.value is None: - raise HsaDriverError("Failed to allocate from {}".format(self)) - return buff - - _instance_dict = {} - - @classmethod - def instance_for(cls, owner, _id): - try: - return cls._instance_dict[_id] - except KeyError: - new_instance = cls(owner, _id) - cls._instance_dict[_id] = new_instance - return new_instance - - -class MemRegion(HsaWrapper): - """Abstracts a HSA memory region. - - This will wrap and provide an OO interface for hsa_region_t C-API elements - """ - _hsa_info_function = 'hsa_region_get_info' - _hsa_properties = { - 'segment': ( - enums.HSA_REGION_INFO_SEGMENT, - drvapi.hsa_region_segment_t - ), - '_flags': ( - enums.HSA_REGION_INFO_GLOBAL_FLAGS, - drvapi.hsa_region_global_flag_t - ), - 'host_accessible': (enums_ext.HSA_AMD_REGION_INFO_HOST_ACCESSIBLE, - ctypes.c_bool), - 'size': (enums.HSA_REGION_INFO_SIZE, - ctypes.c_size_t), - 'alloc_max_size': (enums.HSA_REGION_INFO_ALLOC_MAX_SIZE, - ctypes.c_size_t), - 'alloc_alignment': (enums.HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, - ctypes.c_size_t), - 'alloc_granule': (enums.HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE, - ctypes.c_size_t), - 'alloc_allowed': (enums.HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, - ctypes.c_bool), - } - - _segment_name_map = { - enums.HSA_REGION_SEGMENT_GLOBAL: 'global', - enums.HSA_REGION_SEGMENT_READONLY: 'readonly', - enums.HSA_REGION_SEGMENT_PRIVATE: 'private', - enums.HSA_REGION_SEGMENT_GROUP: 'group', - } - - def __init__(self, agent, region_id): - """Do not instantiate MemRegion objects directly, use the factory class - method 'instance_for' to ensure MemRegion identity""" - self._id = region_id - self._owner_agent = agent - self._as_parameter_ = self._id - - @property - def kind(self): - return self._segment_name_map[self.segment] - - @property - def agent(self): - return self._owner_agent - - def supports(self, check_flag): - """ - Determines if a given feature is supported by this MemRegion. - Feature flags are found in "./enums.py" under: - * hsa_region_global_flag_t - Params: - check_flag: Feature flag to test - """ - if self.kind == 'global': - return self._flags & check_flag - else: - return False - - def allocate(self, nbytes): - assert self.alloc_allowed - assert nbytes <= self.alloc_max_size - assert nbytes >= 0 - buff = ctypes.c_void_p() - hsa.hsa_memory_allocate(self._id, nbytes, ctypes.byref(buff)) - return buff - - def free(self, ptr): - hsa.hsa_memory_free(ptr) - - _instance_dict = {} - - @classmethod - def instance_for(cls, owner, _id): - try: - return cls._instance_dict[_id] - except KeyError: - new_instance = cls(owner, _id) - cls._instance_dict[_id] = new_instance - return new_instance - - -class Queue(object): - def __init__(self, agent, queue_ptr): - """The id in a queue is a pointer to the queue object returned by hsa_queue_create. - The Queue object has ownership on that queue object""" - self._agent = weakref.proxy(agent) - self._id = queue_ptr - self._as_parameter_ = self._id - self._finalizer = hsa.hsa_queue_destroy - - def release(self): - self._agent.release_queue(self) - - def __getattr__(self, fname): - return getattr(self._id.contents, fname) - - @contextmanager - def _get_packet(self, packet_type): - # Write AQL packet at the calculated queue index address - queue_struct = self._id.contents - queue_mask = queue_struct.size - 1 - assert (ctypes.sizeof(packet_type) == - ctypes.sizeof(drvapi.hsa_kernel_dispatch_packet_t)) - packet_array_t = (packet_type * queue_struct.size) - - # Obtain the current queue write index - index = hsa.hsa_queue_add_write_index_acq_rel(self._id, 1) - - while True: - read_offset = hsa.hsa_queue_load_read_index_acquire(self._id) - if read_offset <= index < read_offset + queue_struct.size: - break - - queue_offset = index & queue_mask - queue = packet_array_t.from_address(queue_struct.base_address) - packet = queue[queue_offset] - - # zero init - ctypes.memset(ctypes.addressof(packet), 0, ctypes.sizeof(packet_type)) - yield packet - # Increment write index - # Ring the doorbell - hsa.hsa_signal_store_release(self._id.contents.doorbell_signal, index) - - def insert_barrier(self, dep_signal): - with self._get_packet(drvapi.hsa_barrier_and_packet_t) as packet: - # Populate packet - packet.dep_signal0 = dep_signal._id - - header = 0 - header |= enums.HSA_FENCE_SCOPE_SYSTEM << enums.HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE - header |= enums.HSA_FENCE_SCOPE_SYSTEM << enums.HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE - header |= enums.HSA_PACKET_TYPE_BARRIER_AND << enums.HSA_PACKET_HEADER_TYPE - header |= 1 << enums.HSA_PACKET_HEADER_BARRIER - - # Original example calls for an atomic store. - # Since we are on x86, store of aligned 16 bit is atomic. - # The C code is - # __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); - packet.header = header - - def dispatch(self, symbol, kernargs, - workgroup_size=None, - grid_size=None, - signal=None): - _logger.info("dispatch %s", symbol.name) - dims = len(workgroup_size) - assert dims == len(grid_size) - assert 0 < dims <= 3 - assert grid_size >= workgroup_size - if workgroup_size > tuple(self._agent.workgroup_max_dim)[:dims]: - msg = "workgroupsize is too big {0} > {1}" - raise HsaDriverError(msg.format(workgroup_size, - tuple(self._agent.workgroup_max_dim)[:dims])) - s = signal if signal is not None else hsa.create_signal(1) - - # Note: following vector_copy.c - with self._get_packet(drvapi.hsa_kernel_dispatch_packet_t) as packet: - - # Populate packet - packet.setup |= dims << enums.HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS - - packet.workgroup_size_x = workgroup_size[0] - packet.workgroup_size_y = workgroup_size[1] if dims > 1 else 1 - packet.workgroup_size_z = workgroup_size[2] if dims > 2 else 1 - - packet.grid_size_x = grid_size[0] - packet.grid_size_y = grid_size[1] if dims > 1 else 1 - packet.grid_size_z = grid_size[2] if dims > 2 else 1 - - packet.completion_signal = s._id - - packet.kernel_object = symbol.kernel_object - - packet.kernarg_address = (0 if kernargs is None - else kernargs.value) - - packet.private_segment_size = symbol.private_segment_size - packet.group_segment_size = symbol.group_segment_size - - header = 0 - header |= enums.HSA_FENCE_SCOPE_SYSTEM << enums.HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE - header |= enums.HSA_FENCE_SCOPE_SYSTEM << enums.HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE - header |= enums.HSA_PACKET_TYPE_KERNEL_DISPATCH << enums.HSA_PACKET_HEADER_TYPE - - # Original example calls for an atomic store. - # Since we are on x86, store of aligned 16 bit is atomic. - # The C code is - # __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); - packet.header = header - - # Wait on the dispatch completion signal - - # synchronous if no signal was provided - if signal is None: - _logger.info('wait for sychronous kernel to complete') - timeout = 10 - if not s.wait_until_ne_one(timeout=timeout): - msg = "Kernel timed out after {timeout} second" - raise HsaKernelTimedOut(msg.format(timeout=timeout)) - - def __dir__(self): - return sorted(set(dir(self._id.contents) + - self.__dict__.keys())) - - def owned(self): - return ManagedQueueProxy(self) - - -class ManagedQueueProxy(object): - def __init__(self, queue): - self._queue = weakref.ref(queue) - - def __getattr__(self, item): - return getattr(self._queue(), item) - - -class Signal(object): - """The id for the signal is going to be the hsa_signal_t returned by create_signal. - Lifetime of the underlying signal will be tied with this object". - Note that it is likely signals will have lifetime issues.""" - - def __init__(self, signal_id): - self._id = signal_id - self._as_parameter_ = self._id - utils.finalize(self, hsa.hsa_signal_destroy, self._id) - - def load_relaxed(self): - return hsa.hsa_signal_load_relaxed(self._id) - - def load_acquire(self): - return hsa.hsa_signal_load_acquire(self._id) - - def wait_until_ne_one(self, timeout=None): - """ - Returns a boolean to indicate whether the wait has timeout - """ - one = 1 - mhz = 10 ** 6 - if timeout is None: - # Infinite - expire = -1 # UINT_MAX - else: - # timeout as seconds - expire = timeout * hsa.timestamp_frequency * mhz - - # XXX: use active wait instead of blocked seem to avoid hang in docker - hsa.hsa_signal_wait_acquire(self._id, enums.HSA_SIGNAL_CONDITION_NE, - one, expire, - enums.HSA_WAIT_STATE_ACTIVE) - return self.load_relaxed() != one - - -class BrigModule(object): - def __init__(self, brig_buffer): - """ - Take a byte buffer of a Brig module - """ - buf = ctypes.create_string_buffer(brig_buffer) - self._buffer = buf - self._id = ctypes.cast(ctypes.addressof(buf), - drvapi.hsa_ext_module_t) - - @classmethod - def from_file(cls, file_name): - with open(file_name, 'rb') as fin: - buf = fin.read() - - return BrigModule(buf) - - def __len__(self): - return len(self._buffer) - - def __repr__(self): - return "".format(hex(id(self)), - len(self)) - - -class Program(object): - def __init__(self, model=enums.HSA_MACHINE_MODEL_LARGE, - profile=enums.HSA_PROFILE_FULL, - rounding_mode=enums.HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - options=None, version_major=1, version_minor=0): - self._id = drvapi.hsa_ext_program_t() - assert options is None - - def check_fptr_return(hsa_status): - if hsa_status is not enums.HSA_STATUS_SUCCESS: - msg = ctypes.c_char_p() - hsa.hsa_status_string(hsa_status, ctypes.byref(msg)) - _logger.info(msg.value.decode("utf-8")) - exit(-hsa_status) - - support = ctypes.c_bool(0) - hsa.hsa_system_extension_supported(enums.HSA_EXTENSION_FINALIZER, - version_major, - version_minor, - ctypes.byref(support)) - - assert support.value, ('HSA system extension %s.%s not supported' % - (version_major, version_minor)) - - # struct of function pointers - self._ftabl = drvapi.hsa_ext_finalizer_1_00_pfn_t() - - # populate struct - hsa.hsa_system_get_extension_table(enums.HSA_EXTENSION_FINALIZER, - version_major, - version_minor, - ctypes.byref(self._ftabl)) - - ret = self._ftabl.hsa_ext_program_create(model, profile, - rounding_mode, options, - ctypes.byref(self._id)) - - check_fptr_return(ret) - - self._as_parameter_ = self._id - utils.finalize(self, self._ftabl.hsa_ext_program_destroy, - self._id) - - def add_module(self, module): - self._ftabl.hsa_ext_program_add_module(self._id, module._id) - - def finalize(self, isa, callconv=0, options=None): - """ - The program object is safe to be deleted after ``finalize``. - """ - code_object = drvapi.hsa_code_object_t() - control_directives = drvapi.hsa_ext_control_directives_t() - ctypes.memset(ctypes.byref(control_directives), 0, - ctypes.sizeof(control_directives)) - self._ftabl.hsa_ext_program_finalize(self._id, - isa, - callconv, - control_directives, - options, - enums.HSA_CODE_OBJECT_TYPE_PROGRAM, - ctypes.byref(code_object)) - return CodeObject(code_object) - - -class CodeObject(object): - def __init__(self, code_object): - self._id = code_object - self._as_parameter_ = self._id - utils.finalize(self, hsa.hsa_code_object_destroy, self._id) - - -class Executable(object): - def __init__(self): - ex = drvapi.hsa_executable_t() - hsa.hsa_executable_create(enums.HSA_PROFILE_FULL, - enums.HSA_EXECUTABLE_STATE_UNFROZEN, - None, - ctypes.byref(ex)) - self._id = ex - self._as_parameter_ = self._id - utils.finalize(self, hsa.hsa_executable_destroy, self._id) - - def load(self, agent, code_object): - hsa.hsa_executable_load_code_object(self._id, agent._id, - code_object._id, None) - - def freeze(self): - """Freeze executable before we can query for symbol""" - hsa.hsa_executable_freeze(self._id, None) - - def get_symbol(self, agent, name): - symbol = drvapi.hsa_executable_symbol_t() - hsa.hsa_executable_get_symbol(self._id, None, - ctypes.create_string_buffer( - name.encode('ascii')), - agent._id, 0, - ctypes.byref(symbol)) - return Symbol(name, symbol) - - -class Symbol(HsaWrapper): - _hsa_info_function = 'hsa_executable_symbol_get_info' - _hsa_properties = { - 'kernel_object': ( - enums.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - ctypes.c_uint64, - ), - 'kernarg_segment_size': ( - enums.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, - ctypes.c_uint32, - ), - 'group_segment_size': ( - enums.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - ctypes.c_uint32, - ), - 'private_segment_size': ( - enums.HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - ctypes.c_uint32, - ), - } - - def __init__(self, name, symbol_id): - self._id = symbol_id - self.name = name - - -class MemoryPointer(object): - __hsa_memory__ = True - - def __init__(self, context, pointer, size, finalizer=None): - assert isinstance(context, Context) - self.context = context - self.device_pointer = pointer - self.size = size - self._hsa_memsize_ = size - self.finalizer = finalizer - self.is_managed = finalizer is not None - self.is_alive = True - self.refct = 0 - - def __del__(self): - try: - if self.is_managed and self.is_alive: - self.finalizer() - except: - traceback.print_exc() - - def own(self): - return OwnedPointer(weakref.proxy(self)) - - def free(self): - """ - Forces the device memory to the trash. - """ - if self.is_managed: - if not self.is_alive: - raise RuntimeError("Freeing dead memory") - self.finalizer() - self.is_alive = False - - def view(self): - pointer = self.device_pointer.value - view = MemoryPointer(self.context, pointer, self.size) - return OwnedPointer(weakref.proxy(self), view) - - @property - def device_ctypes_pointer(self): - return self.device_pointer - - def allow_access_to(self, *agents): - """ - Grant access to given *agents*. - Upon return, only the listed-agents and the owner agent have direct - access to this pointer. - """ - ct = len(agents) - if ct == 0: - return - agent_array = (ct * drvapi.hsa_agent_t)(*[a._id for a in agents]) - hsa.hsa_amd_agents_allow_access(ct, agent_array, None, - self.device_pointer) - - -class HostMemory(mviewbuf.MemAlloc): - def __init__(self, context, owner, pointer, size): - self.context = context - self.owned = owner - self.size = size - self.host_pointer = pointer - self.handle = self.host_pointer - - # For buffer interface - self._buflen_ = self.size - self._bufptr_ = self.host_pointer.value - - def own(self): - return self - - -class OwnedPointer(object): - def __init__(self, memptr, view=None): - self._mem = memptr - self._mem.refct += 1 - if view is None: - self._view = self._mem - else: - assert not view.is_managed - self._view = view - - def __del__(self): - try: - self._mem.refct -= 1 - assert self._mem.refct >= 0 - if self._mem.refct == 0: - self._mem.free() - except ReferenceError: - pass - except: - traceback.print_exc() - - def __getattr__(self, fname): - """Proxy MemoryPointer methods - """ - return getattr(self._view, fname) - - -class Context(object): - """ - A context is associated with a component - """ - - """ - Parameters: - agent the agent, and instance of the class Agent - """ - - # a weak set of active Stream objects - _active_streams = weakref.WeakSet() - - def __init__(self, agent): - self._agent = weakref.proxy(agent) - - if self._agent.is_component: # only components have queues - qs = agent.queue_max_size - defq = self._agent.create_queue_multi(qs, callback=self._callback) - self._defaultqueue = defq.owned() - - self.allocations = utils.UniqueDict() - # get pools - coarse_flag = enums_ext.HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED - fine_flag = enums_ext.HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED - alloc_mps = [mp for mp in agent.mempools.globals if mp.alloc_allowed] - self._coarsegrain_mempool = None - self._finegrain_mempool = None - for mp in alloc_mps: - if mp.supports(coarse_flag): - self._coarsegrain_mempool = mp - if mp.supports(fine_flag): - self._finegrain_mempool = mp - - def _callback(self, status, queue): - drvapi._check_error(status, queue) - sys.exit(1) - - @property - def unproxy(self): - # This is a trick to help handle weakproxy comparison with actual - # instance. - # See https://stackoverflow.com/a/49319989 for inspiration and the - # whole page for more general discussion. - return self - - @property - def default_queue(self): - return self._defaultqueue - - @property - def agent(self): - return self._agent - - @property - def coarsegrain_mempool(self): - if self._coarsegrain_mempool is None: - msg = 'coarsegrain mempool is not available in {}'.format(self._agent) - raise ValueError(msg) - return self._coarsegrain_mempool - - @property - def finegrain_mempool(self): - if self._finegrain_mempool is None: - msg = 'finegrain mempool is not available in {}'.format(self._agent) - raise ValueError(msg) - return self._finegrain_mempool - - def memalloc(self, nbytes, memTypeFlags=None, hostAccessible=True): - """ - Allocates memory. - Parameters: - nbytes the number of bytes to allocate. - memTypeFlags the flags for which the memory region must have support,\ - due to the inherent rawness of the underlying call, the\ - validity of the flag is not checked, cf. C language. - hostAccessible boolean as to whether the region in which the\ - allocation takes place should be host accessible - """ - hw = self._agent.device - all_reg = self._agent.regions - flag_ok_r = list() # regions which pass the memTypeFlags test - regions = list() - - # don't support DSP - if hw == "GPU" or hw == "CPU": - # check user requested flags - if memTypeFlags is not None: - for r in all_reg: - count = 0 - for flags in memTypeFlags: - if r.supports(flags): - count += 1 - if count == len(memTypeFlags): - flag_ok_r.append(r) - else: - flag_ok_r = all_reg - - # check system required flags for allocation - for r in flag_ok_r: - # check the mem region is coarse grained if dGPU present - # TODO: this probably ought to explicitly check for a dGPU. - if (hw == "GPU" and - not r.supports(enums.HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED)): - continue - # check accessibility criteria - if hostAccessible: - if r.host_accessible: - regions.append(r) - else: - if not r.host_accessible: - regions.append(r) - - else: - raise RuntimeError("Unknown device type string \"%s\"" % hw) - - assert len(regions) > 0, "No suitable memory regions found." - - # walk though valid regions trying to malloc until there's none left - mem = None - for region_id in regions: - try: - mem = MemRegion.instance_for(self._agent, region_id)\ - .allocate(nbytes) - except HsaApiError: # try next memory region if an allocation fails - pass - else: # allocation succeeded, stop looking for memory - break - - if mem is None: - raise RuntimeError("Memory allocation failed. No agent/region \ - combination could meet allocation restraints \ - (hardware = %s, size = %s, flags = %s)." % \ - ( hw, nbytes, memTypeFlags)) - - fin = _make_mem_finalizer(hsa.hsa_memory_free) - ret = MemoryPointer(weakref.proxy(self), mem, nbytes, - finalizer=fin(self, mem)) - if mem.value is None: - raise RuntimeError("MemoryPointer has no value") - self.allocations[mem.value] = ret - return ret.own() - - def mempoolalloc(self, nbytes, allow_access_to=(), finegrain=False): - """ - Allocates memory in a memory pool. - Parameters: - *nbytes* the number of bytes to allocate. - *allow_acces_to* - *finegrain* - """ - mempool = (self.finegrain_mempool - if finegrain - else self.coarsegrain_mempool) - - buff = mempool.allocate(nbytes) - fin = _make_mem_finalizer(hsa.hsa_amd_memory_pool_free) - mp = MemoryPointer(weakref.proxy(self), buff, nbytes, - finalizer=fin(self, buff)) - mp.allow_access_to(*allow_access_to) - self.allocations[buff.value] = mp - return mp.own() - - def memhostalloc(self, size, finegrain, allow_access_to): - mem = self.mempoolalloc(size, allow_access_to=allow_access_to, - finegrain=finegrain) - return HostMemory(weakref.proxy(self), owner=mem, - pointer=mem.device_pointer, size=mem.size) - - -class Stream(object): - """ - An asynchronous stream for async API - """ - def __init__(self): - self._signals = deque() - self._callbacks = defaultdict(list) - - def _add_signal(self, signal): - """ - Add a signal that corresponds to an async task. - """ - # XXX: too many pending signals seem to cause async copy to hang - if len(self._signals) > 100: - self._sync(50) - self._signals.append(signal) - - def _add_callback(self, callback): - assert callable(callback) - self._callbacks[self._get_last_signal()].append(callback) - - def _get_last_signal(self): - """ - Get the last signal. - """ - return self._signals[-1] if self._signals else None - - def synchronize(self): - """ - Synchronize the stream. - """ - self._sync(len(self._signals)) - - def _sync(self, limit): - ct = 0 - while self._signals: - if ct >= limit: - break - sig = self._signals.popleft() - if sig.load_relaxed() == 1: - sig.wait_until_ne_one() - for cb in self._callbacks[sig]: - cb() - del self._callbacks[sig] - ct += 1 - - -def _make_mem_finalizer(dtor): - """ - finalises memory - Parameters: - dtor a function that will delete/free held memory from a reference - - Returns: - Finalising function - """ - def mem_finalize(context, handle): - allocations = context.allocations - sync = hsa.implicit_sync - - def core(): - _logger.info("Current allocations: %s", allocations) - if allocations: - _logger.info("Attempting delete on %s" % handle.value) - del allocations[handle.value] - sync() # implicit sync - dtor(handle) - return core - - return mem_finalize - -def device_pointer(obj): - "Get the device pointer as an integer" - return device_ctypes_pointer(obj).value - - -def device_ctypes_pointer(obj): - "Get the ctypes object for the device pointer" - if obj is None: - return c_void_p(0) - require_device_memory(obj) - return obj.device_ctypes_pointer - - -def is_device_memory(obj): - """All HSA dGPU memory object is recognized as an instance with the - attribute "__hsa_memory__" defined and its value evaluated to True. - - All HSA memory object should also define an attribute named - "device_pointer" which value is an int(or long) object carrying the pointer - value of the device memory address. This is not tested in this method. - """ - return getattr(obj, '__hsa_memory__', False) - - -def require_device_memory(obj): - """A sentry for methods that accept HSA memory object. - """ - if not is_device_memory(obj): - raise Exception("Not a HSA memory object.") - - -def host_pointer(obj): - """ - NOTE: The underlying data pointer from the host data buffer is used and - it should not be changed until the operation which can be asynchronous - completes. - """ - if isinstance(obj, (int, long)): - return obj - - forcewritable = isinstance(obj, np.void) - return mviewbuf.memoryview_get_buffer(obj, forcewritable) - - -def host_to_dGPU(context, dst, src, size): - """ - Copy data from a host memory region to a dGPU. - Parameters: - context the dGPU context - dst a pointer to the destination location in dGPU memory - src a pointer to the source location in host memory - size the size (in bytes) of data to transfer - """ - _logger.info("CPU->dGPU") - if size < 0: - raise ValueError("Invalid size given: %s" % size) - - hsa.hsa_memory_copy(device_pointer(dst), host_pointer(src), size) - - -def dGPU_to_host(context, dst, src, size): - """ - Copy data from a host memory region to a dGPU. - Parameters: - context the dGPU context - dst a pointer to the destination location in dGPU memory - src a pointer to the source location in host memory - size the size (in bytes) of data to transfer - """ - _logger.info("dGPU->CPU") - if size < 0: - raise ValueError("Invalid size given: %s" % size) - - hsa.hsa_memory_copy(host_pointer(dst), device_pointer(src), size) - - -def dGPU_to_dGPU(context, dst, src, size): - _logger.info("dGPU->dGPU") - if size < 0: - raise ValueError("Invalid size given: %s" % size) - - hsa.hsa_memory_copy(device_pointer(dst), device_pointer(src), size) - - -def async_host_to_dGPU(dst_ctx, src_ctx, dst, src, size, stream): - _logger.info("Async CPU->dGPU") - async_copy_dgpu(dst_ctx=dst_ctx, src_ctx=src_ctx, - src=host_pointer(src), dst=device_pointer(dst), - size=size, stream=stream) - - -def async_dGPU_to_host(dst_ctx, src_ctx, dst, src, size, stream): - _logger.info("Async dGPU->CPU") - async_copy_dgpu(dst_ctx=dst_ctx, src_ctx=src_ctx, - dst=host_pointer(dst), src=device_pointer(src), - size=size, stream=stream) - - -def async_dGPU_to_dGPU(dst_ctx, src_ctx, dst, src, size, stream): - _logger.info("Async dGPU->dGPU") - async_copy_dgpu(dst_ctx=dst_ctx, src_ctx=src_ctx, - dst=device_pointer(dst), src=device_pointer(src), - size=size, stream=stream) - - -def async_copy_dgpu(dst_ctx, src_ctx, dst, src, size, stream): - if size < 0: - raise ValueError("Invalid size given: %s" % size) - - completion_signal = hsa.create_signal(1) - dependent_signal = stream._get_last_signal() - - if dependent_signal is not None: - dsignal = drvapi.hsa_signal_t(dependent_signal._id) - signals = (1, ctypes.byref(dsignal), completion_signal) - else: - signals = (0, None, completion_signal) - - hsa.hsa_amd_memory_async_copy(dst, dst_ctx._agent._id, - src, src_ctx._agent._id, - size, *signals) - - stream._add_signal(completion_signal) - - -def dgpu_count(): - """ - Returns the number of discrete GPUs present on the current machine. - """ - ngpus = 0 - try: - for a in hsa.agents: - if a.is_component and a.device == 'GPU': - ngpus += 1 - except: - pass - return ngpus - -""" -True if a dGPU is present in the current machine. -""" -dgpu_present = dgpu_count() > 0 - diff --git a/numba/numba/roc/hsadrv/drvapi.py b/numba/numba/roc/hsadrv/drvapi.py deleted file mode 100644 index 8ffcdc8d8..000000000 --- a/numba/numba/roc/hsadrv/drvapi.py +++ /dev/null @@ -1,1738 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import ctypes -import warnings - -from ... import utils - -from . import enums -from .error import HsaApiError, HsaWarning - -_PTR = ctypes.POINTER - -# This deals with types which are defined as -# typedef struct { uint64_t handle;}; -handle_struct = ctypes.c_uint64 - -#------------------------------------------------------------------------------ -# HSA types from hsa.h, ordered as per header file - -hsa_status_t = ctypes.c_int # enum -class hsa_dim3_t(ctypes.Structure): - _fields_ = [ - ('x', ctypes.c_uint32), - ('y', ctypes.c_uint32), - ('z', ctypes.c_uint32) - ] -hsa_access_permission_t = ctypes.c_int # enum -hsa_endianness_t = ctypes.c_int # enum -hsa_machine_model_t = ctypes.c_int # enum -hsa_profile_t = ctypes.c_int # enum -hsa_system_info_t = ctypes.c_int # enum -hsa_extension_t = ctypes.c_int # enum -hsa_agent_t = handle_struct -hsa_agent_feature_t = ctypes.c_int # enum -hsa_device_type_t = ctypes.c_int # enum -hsa_default_float_rounding_mode_t = ctypes.c_int # enum -hsa_agent_info_t = ctypes.c_int # enum -hsa_exception_policy_t = ctypes.c_int # enum -hsa_signal_t = handle_struct -hsa_signal_value_t = ctypes.c_uint64 if enums.HSA_LARGE_MODEL else ctypes.c_uint32 -hsa_signal_condition_t = ctypes.c_int # enum -hsa_wait_state_t = ctypes.c_int # enum -hsa_region_t = handle_struct -hsa_queue_type_t = ctypes.c_int # enum -hsa_queue_feature_t = ctypes.c_int # enum -class hsa_queue_t(ctypes.Structure): - """In theory, this should be aligned to 64 bytes. In any case, allocation - of this structure is done by the hsa library""" - _fields_ = [ - ('type', hsa_queue_type_t), - ('features', ctypes.c_uint32), - ('base_address', ctypes.c_void_p), # if LARGE MODEL - ('doorbell_signal', hsa_signal_t), - ('size', ctypes.c_uint32), - ('reserved1', ctypes.c_uint32), - ('id', ctypes.c_uint32), - ] -hsa_packet_type_t = ctypes.c_int # enum -hsa_fence_scope_t = ctypes.c_int # enum -hsa_packet_header_t = ctypes.c_int # enum -hsa_packet_header_width_t = ctypes.c_int # enum -hsa_kernel_dispatch_packet_setup_t = ctypes.c_int # enum -hsa_kernel_dispatch_packet_setup_width_t = ctypes.c_int # enum -class hsa_kernel_dispatch_packet_t(ctypes.Structure): - _fields_ = [ - ('header', ctypes.c_uint16), - ('setup', ctypes.c_uint16), - ('workgroup_size_x', ctypes.c_uint16), - ('workgroup_size_y', ctypes.c_uint16), - ('workgroup_size_z', ctypes.c_uint16), - ('reserved0', ctypes.c_uint16), # Must be zero - ('grid_size_x', ctypes.c_uint32), - ('grid_size_y', ctypes.c_uint32), - ('grid_size_z', ctypes.c_uint32), - ('private_segment_size', ctypes.c_uint32), - ('group_segment_size', ctypes.c_uint32), - ('kernel_object', ctypes.c_uint64), - # NOTE: Small model not dealt with properly...! - # ifdef HSA_LARGE_MODEL - ('kernarg_address', ctypes.c_uint64), - # SMALL Machine has a reserved uint32 - ('reserved2', ctypes.c_uint64), # Must be zero - ('completion_signal', hsa_signal_t), - ] -class hsa_agent_dispatch_packet_t(ctypes.Structure): - """This should be aligned to HSA_PACKET_ALIGN_BYTES (64)""" - _fields_ = [ - ('header', ctypes.c_uint16), - ('type', ctypes.c_uint16), - ('reserved0', ctypes.c_uint32), - # NOTE: Small model not dealt with properly...! - ('return_address', ctypes.c_void_p), - ('arg', ctypes.c_uint64 * 4), - ('reserved2', ctypes.c_uint64), - ('completion_signal', hsa_signal_t), - ] -class hsa_barrier_and_packet_t(ctypes.Structure): - _fields_ = [ - ('header', ctypes.c_uint16), - ('reserved0', ctypes.c_uint16), - ('reserved1', ctypes.c_uint32), - ('dep_signal0', hsa_signal_t), - ('dep_signal1', hsa_signal_t), - ('dep_signal2', hsa_signal_t), - ('dep_signal3', hsa_signal_t), - ('dep_signal4', hsa_signal_t), - ('reserved2', ctypes.c_uint64), - ('completion_signal', hsa_signal_t), - ] - -hsa_barrier_or_packet_t = hsa_barrier_and_packet_t - -hsa_region_segment_t = ctypes.c_int # enum -hsa_region_global_flag_t = ctypes.c_int # enum -hsa_region_info_t = ctypes.c_int # enum -hsa_symbol_kind_t = ctypes.c_int # enum -hsa_variable_allocation_t = ctypes.c_int # enum -hsa_symbol_linkage_t = ctypes.c_int # enum -hsa_variable_segment_t = ctypes.c_int # enum -hsa_isa_t = handle_struct -hsa_isa_info_t = ctypes.c_int # enum -hsa_code_object_t = handle_struct -hsa_callback_data_t = handle_struct -hsa_code_object_type_t = ctypes.c_int # enum -hsa_code_object_info_t = ctypes.c_int # enum -hsa_code_symbol_t = handle_struct -hsa_code_symbol_info_t = ctypes.c_int # enum -hsa_executable_t = handle_struct -hsa_executable_state_t = ctypes.c_int # enum -hsa_executable_info_t = ctypes.c_int # enum -hsa_executable_symbol_t = handle_struct -hsa_executable_symbol_info_t = ctypes.c_int # enum -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# HSA types from Brig.h, ordered as per header file -# NOTE: not all of the definitions are needed -BrigVersion32_t = ctypes.c_uint32 -MODULE_IDENTIFICATION_LENGTH=8 -class BrigModuleHeader(ctypes.Structure): - _fields_ = [ - ('identification', ctypes.c_char*MODULE_IDENTIFICATION_LENGTH), - ('brigMajor', BrigVersion32_t), - ('brigMinor', BrigVersion32_t), - ('byteCount', ctypes.c_uint64), - ('hash', ctypes.c_uint8*64), - ('reserved', ctypes.c_uint32), - ('sectionCount', ctypes.c_uint32), - ('sectionIndex', ctypes.c_uint64), - ] - -BrigModule_t = _PTR(BrigModuleHeader) - -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# HSA types from hsa_ext_amd.h, ordered as per header file -hsa_amd_agent_info_t = ctypes.c_int # enum -hsa_amd_region_info_t = ctypes.c_int # enum -hsa_amd_coherency_type_t = ctypes.c_int # enum -class hsa_amd_profiling_dispatch_time_t(ctypes.Structure): - _fields_ = [ - ('start', ctypes.c_uint64), - ('end', ctypes.c_uint64), - ] - -# typedef bool (*hsa_amd_signal_handler)(hsa_signal_value_t value, void* arg); -hsa_amd_signal_handler = _PTR( - ctypes.CFUNCTYPE(ctypes.c_bool, - hsa_signal_value_t, - ctypes.c_void_p) - ) - -hsa_amd_segment_t = ctypes.c_int # enum -hsa_amd_memory_pool_t = handle_struct -hsa_amd_memory_pool_global_flag_t = ctypes.c_int # enum -hsa_amd_memory_pool_info_t = ctypes.c_int # enum -hsa_amd_memory_pool_access_t = ctypes.c_int # enum -hsa_amd_link_info_type_t = ctypes.c_int # enum -hsa_amd_memory_pool_link_info_t = ctypes.c_int # enum -hsa_amd_agent_memory_pool_info_t = ctypes.c_int # enum -class hsa_amd_image_descriptor_t(ctypes.Structure): - _fields_ = [ - ('version', ctypes.c_uint32), - ('deviceID', ctypes.c_uint32), - ('data', ctypes.c_uint32*1), - ] -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# HSA types from hsa_ext_finalize.h, ordered as per header file -hsa_ext_module_t = BrigModule_t - -hsa_ext_program_t = handle_struct -hsa_ext_program_info_t = ctypes.c_int # enum -hsa_ext_finalizer_call_convention_t = ctypes.c_int # enum -class hsa_ext_control_directives_t(ctypes.Structure): - _fields_ = [ - ('control_directives_mask', ctypes.c_uint64), - ('break_exceptions_mask', ctypes.c_uint16), - ('detect_exceptions_mask', ctypes.c_uint16), - ('max_dynamic_group_size', ctypes.c_uint32), - ('max_flat_grid_size', ctypes.c_uint64), - ('max_flat_workgroup_size', ctypes.c_uint32), - ('reserved1', ctypes.c_uint32), - ('required_grid_size', ctypes.c_uint64*3), - ('required_workgroup_size', hsa_dim3_t), - ('required_dim', ctypes.c_uint8), - ('reserved2', ctypes.c_uint8*75), - ] - -# function pointers, that are used in the -# "hsa_ext_finalizer_1_00_pfn_t" struct of pointers -HSA_EXT_PROGRAM_CREATE_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_machine_model_t, # machine_model - hsa_profile_t, # profile - hsa_default_float_rounding_mode_t, # default_float_rounding_mode - ctypes.c_char_p, # options - _PTR(hsa_ext_program_t)) # program - -HSA_EXT_PROGRAM_DESTROY_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_ext_program_t) # program - -HSA_EXT_PROGRAM_ADD_MODULE_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_ext_program_t, # program - hsa_ext_module_t) # module - -HSA_EXT_PROGRAM_ITERATE_MODULES_CALLBACK_FUNC = ctypes.CFUNCTYPE( - hsa_status_t, # return - hsa_ext_program_t, # program - hsa_ext_module_t, # module - ctypes.c_void_p) # data - -HSA_EXT_PROGRAM_ITERATE_MODULES_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_ext_program_t, # program - HSA_EXT_PROGRAM_ITERATE_MODULES_CALLBACK_FUNC, # callback - ctypes.c_void_p) # data - -HSA_EXT_PROGRAM_GET_INFO_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_ext_program_t, # program - hsa_ext_program_info_t, # attribute - ctypes.c_void_p) # value - -HSA_EXT_PROGRAM_FINALIZE_FPTR = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_ext_program_t, # program - hsa_isa_t, # isa - ctypes.c_int32, # call_convention - hsa_ext_control_directives_t, # control_directives - ctypes.c_char_p, #options - hsa_code_object_type_t, #code_object_type - _PTR(hsa_code_object_t)) # code_object - -# this struct holds function pointers -class hsa_ext_finalizer_1_00_pfn_t(ctypes.Structure): - _fields_ = [ - ('hsa_ext_program_create', HSA_EXT_PROGRAM_CREATE_FPTR), - ('hsa_ext_program_destroy', HSA_EXT_PROGRAM_DESTROY_FPTR), - ('hsa_ext_program_add_module', HSA_EXT_PROGRAM_ADD_MODULE_FPTR), - ('hsa_ext_program_iterate_modules', - HSA_EXT_PROGRAM_ITERATE_MODULES_FPTR), - ('hsa_ext_program_get_info', HSA_EXT_PROGRAM_GET_INFO_FPTR), - ('hsa_ext_program_finalize', HSA_EXT_PROGRAM_FINALIZE_FPTR) - ] - -#------------------------------------------------------------------------------ - - - -#------------------------------------------------------------------------------ -# HSA types from hsa_ext_image.h (NOTE: support incomplete) - -hsa_ext_image_t = handle_struct -hsa_ext_image_geometry_t = ctypes.c_int # enum -hsa_ext_image_channel_type_t = ctypes.c_int # enum -hsa_ext_image_channel_order_t = ctypes.c_int # enum - -class hsa_ext_image_format_t(ctypes.Structure): - _fields_ = [ - ("channel_type", hsa_ext_image_channel_type_t), - ("channel_order", hsa_ext_image_channel_order_t) - ] - -class hsa_ext_image_descriptor_t(ctypes.Structure): - _fields_ = [ - ("geometry", hsa_ext_image_geometry_t), - ("width", ctypes.c_size_t), - ("height", ctypes.c_size_t), - ("depth", ctypes.c_size_t), - ("array_size", ctypes.c_size_t), - ("format", hsa_ext_image_format_t) - ] - -hsa_ext_image_capability_t = ctypes.c_int # enum - -class hsa_ext_image_data_info_t(ctypes.Structure): - _fields_ = [ - ("size", ctypes.c_size_t), - ("alignment", ctypes.c_size_t), - ] - -class hsa_ext_image_region_t(ctypes.Structure): - _fields_ = [ - ("offset", hsa_dim3_t), - ("offset", hsa_dim3_t), - ] - -hsa_ext_sampler_t = handle_struct -hsa_ext_sampler_addressing_mode_t = ctypes.c_int # enum -hsa_ext_sampler_coordinate_mode_t = ctypes.c_int # enum -hsa_ext_sampler_filter_mode_t = ctypes.c_int # enum - -class hsa_ext_sampler_descriptor_t(ctypes.Structure): - _fields_ = [ - ("coordinate_mode", hsa_ext_sampler_coordinate_mode_t), - ("filter_mode", hsa_ext_sampler_filter_mode_t), - ("address_mode", hsa_ext_sampler_addressing_mode_t) - ] - -#NOTE: Not implemented yet: hsa_ext_images_1_00_pfn_t -#------------------------------------------------------------------------------ - -#------------------------------------------------------------------------------ -# callbacks that have no related typedef in the hsa include files - -HSA_ITER_AGENT_CALLBACK_FUNC = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_agent_t, # agent - ctypes.py_object) # this is a c_void_p used to wrap a python object - -HSA_QUEUE_CALLBACK_FUNC = ctypes.CFUNCTYPE( - None, # return value - hsa_status_t, - _PTR(hsa_queue_t), - ctypes.py_object) # this is a c_void_p used to wrap a python object - -HSA_AGENT_ITERATE_REGIONS_CALLBACK_FUNC = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_region_t, # region - ctypes.py_object) # this is a c_void_p used to wrap a python object - -# hsa_status_t (*callback)(hsa_code_object_t code_object, hsa_code_symbol_t symbol, void* data), -HSA_CODE_OBJECT_ITERATE_SYMBOLS_CALLBACK = ctypes.CFUNCTYPE( - hsa_status_t, # return value - hsa_code_object_t, - hsa_code_symbol_t, - ctypes.py_object) # this is a c_void_p used to wrap a python object - -# hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, void **address), -HSA_ALLOC_CALLBACK_FUNCTION = ctypes.CFUNCTYPE( - hsa_status_t, # return value - ctypes.c_size_t, - hsa_callback_data_t, - _PTR(ctypes.c_void_p) # this might need to be a ptr to a py_object - ) - -void_fn_ptr = ctypes.CFUNCTYPE( - None, - ctypes.c_void_p) # this might need to be a ptr to a py_object - -# hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data) -HSA_AMD_AGENT_ITERATE_MEMORY_POOLS_CALLBACK = ctypes.CFUNCTYPE( - hsa_status_t, - hsa_amd_memory_pool_t, - ctypes.c_void_p) # this is a c_void_p used to wrap a python object - - -#------------------------------------------------------------------------------ - -# Functions used by API calls returning hsa_status_t to check for errors ###### - -def _build_reverse_error_warn_maps(): - err_map = utils.UniqueDict() - warn_map = utils.UniqueDict() - - for name in [name for name in dir(enums) if name.startswith('HSA_')]: - code = getattr(enums, name) - if 'STATUS_ERROR' in name: - err_map[code] = name - elif 'STATUS_INFO' in name: - warn_map[code] = name - else: - pass # should we warn here? - return err_map, warn_map - -ERROR_MAP, WARN_MAP = _build_reverse_error_warn_maps() - - -def _check_error(result, func, arguments): - if result != enums.HSA_STATUS_SUCCESS: - if result >= enums.HSA_STATUS_ERROR: - errname = ERROR_MAP.get(result, "UNKNOWN_HSA_ERROR") - msg = "Call to {0} returned {1}".format(func.__name__, errname) - raise HsaApiError(result, msg) - else: - warnname = WARN_MAP.get(result, "UNKNOWN_HSA_INFO") - msg = "Call to {0} returned {1}".format(func.__name__, warnname) - warnings.warn(msg, HsaWarning) - - -# The API prototypes -# These are order based on header files. -API_PROTOTYPES = { - -#------------------------------------------------------------------------------ -# HSA functions from hsa.h, ordered as per header file. - - # hsa_status_t hsa_status_string( - # hsa_status_t status, - # const char **status_string); - 'hsa_status_string': { - 'restype': hsa_status_t, - 'argtypes': [hsa_status_t, _PTR(ctypes.c_char_p)], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_init(void) - 'hsa_init': { - 'restype': hsa_status_t, - 'argtypes': [], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_shut_down(void) - 'hsa_shut_down': { - 'restype': hsa_status_t, - 'argtypes': [], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_system_get_info(hsa_system_info_t, void*) - 'hsa_system_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_system_info_t, ctypes.c_void_p], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_system_extension_supported(uint16_t, uint16_t, - # uint16_t, bool *); - 'hsa_system_extension_supported': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_uint16, # extension - ctypes.c_uint16, # version_major - ctypes.c_uint16, # version_minor - _PTR(ctypes.c_bool)], # result - 'errcheck': _check_error - }, - - # hsa_status_t hsa_system_get_extension_table(uint16_t, uint16_t, - # uint16_t, void *); - 'hsa_system_get_extension_table': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_uint16, # extension - ctypes.c_uint16, # version_major - ctypes.c_uint16, # version_minor - ctypes.c_void_p], # result - 'errcheck': _check_error - }, - - # hsa_status_t hsa_agent_get_info(hsa_agent_t, hsa_agent_info_t, void*) - 'hsa_agent_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_agent_t, hsa_agent_info_t, ctypes.c_void_p], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_iterate_agents(hsa_status_t(*)(hsa_agent_t, void*), - # void*) - 'hsa_iterate_agents': { - 'restype': hsa_status_t, - 'argtypes': [HSA_ITER_AGENT_CALLBACK_FUNC, ctypes.py_object], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_agent_get_exception_policies(hsa_agent_t agent, - # hsa_profile_t profile, - # uint16_t *mask); - 'hsa_agent_get_exception_policies': { - 'restype': hsa_status_t, - 'argtypes': [hsa_agent_t, hsa_profile_t, _PTR(ctypes.c_uint16)], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent, - # uint16_t version_major, - # uint16_t version_minor, bool *result); - 'hsa_agent_extension_supported': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_uint16, hsa_agent_t, ctypes.c_uint16, ctypes.c_uint16, - _PTR(ctypes.c_bool)], - 'errcheck': _check_error - }, - - #-------------------------------------------------------------------------- - # Signals - #-------------------------------------------------------------------------- - - # hsa_status_t hsa_signal_create( - # hsa_signal_value_t initial_value, - # uint32_t agent_count, - # const hsa_agent_t *agents, - # hsa_signal_t *signal) - 'hsa_signal_create': { - 'restype': hsa_status_t, - 'argtypes': [hsa_signal_value_t, - ctypes.c_uint32, - _PTR(hsa_agent_t), - _PTR(hsa_signal_t)], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_signal_destroy( - # hsa_signal_t signal) - 'hsa_signal_destroy': { - 'restype': hsa_status_t, - 'argtypes': [hsa_signal_t], - 'errcheck': _check_error - }, - - # hsa_signal_value_t hsa_signal_load_acquire( - # hsa_signal_t signal); - 'hsa_signal_load_acquire': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t], - }, - - # hsa_signal_value_t hsa_signal_load_relaxed( - # hsa_signal_t signal); - 'hsa_signal_load_relaxed': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t], - }, - - # void hsa_signal_store_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_store_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_store_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_store_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t], - }, - - # hsa_signal_value_t hsa_signal_exchange_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_exchange_acq_rel': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_exchange_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_exchange_acquire': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_exchange_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_exchange_relaxed': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_exchange_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_exchange_release': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_cas_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t expected, - # hsa_signal_value_t value); - 'hsa_signal_cas_acq_rel': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_cas_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t expected, - # hsa_signal_value_t value); - 'hsa_signal_cas_acquire': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_cas_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t expected, - # hsa_signal_value_t value); - 'hsa_signal_cas_relaxed': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t hsa_signal_cas_release( - # hsa_signal_t signal, - # hsa_signal_value_t expected, - # hsa_signal_value_t value); - 'hsa_signal_cas_release': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, hsa_signal_value_t, hsa_signal_value_t] - }, - - # void hsa_signal_add_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_add_acq_rel': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_add_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_add_acquire': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_add_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_add_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_add_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_add_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_subtract_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_subtract_acq_rel': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_subtract_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_subtract_acquire': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_subtract_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_subtract_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_subtract_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_subtract_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_and_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_and_acq_rel': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_and_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_and_acquire': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_and_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_and_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_and_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_and_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_or_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_or_acq_rel': { - 'restype': None, - 'argtypes': [hsa_signal_t, - hsa_signal_value_t] - }, - - # void hsa_signal_or_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_or_acquire': { - 'restype': None, - 'argtypes': [hsa_signal_t, - hsa_signal_value_t] - }, - - # void hsa_signal_or_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_or_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, - hsa_signal_value_t] - }, - - # void hsa_signal_or_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_or_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, - hsa_signal_value_t] - }, - - # void hsa_signal_xor_acq_rel( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_xor_acq_rel': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_xor_acquire( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_xor_acquire': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_xor_relaxed( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_xor_relaxed': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # void hsa_signal_xor_release( - # hsa_signal_t signal, - # hsa_signal_value_t value); - 'hsa_signal_xor_release': { - 'restype': None, - 'argtypes': [hsa_signal_t, hsa_signal_value_t] - }, - - # hsa_signal_value_t HSA_API - # hsa_signal_wait_acquire(hsa_signal_t signal, - # hsa_signal_condition_t condition, - # hsa_signal_value_t compare_value, - # uint64_t timeout_hint, - # hsa_wait_state_t wait_state_hint); - 'hsa_signal_wait_acquire': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, - hsa_signal_condition_t, - hsa_signal_value_t, - ctypes.c_uint64, - hsa_wait_state_t] - }, - - # hsa_signal_value_t hsa_signal_wait_relaxed( - # hsa_signal_t signal, - # hsa_signal_condition_t condition, - # hsa_signal_value_t compare_value, - # uint64_t timeout_hint, - # hsa_wait_state_t wait_state_hint); - 'hsa_signal_wait_relaxed': { - 'restype': hsa_signal_value_t, - 'argtypes': [hsa_signal_t, - hsa_signal_condition_t, - hsa_signal_value_t, - ctypes.c_uint64, - hsa_wait_state_t], - }, - - #-------------------------------------------------------------------------- - # Queues - #-------------------------------------------------------------------------- - - # hsa_status_t HSA_API - # hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, - # void (*callback)(hsa_status_t status, hsa_queue_t *source, - # void *data), - # void *data, uint32_t private_segment_size, - # uint32_t group_segment_size, hsa_queue_t **queue); - 'hsa_queue_create': { - 'restype': hsa_status_t, - 'argtypes': [hsa_agent_t, - ctypes.c_uint32, - hsa_queue_type_t, - HSA_QUEUE_CALLBACK_FUNC, - ctypes.c_void_p, # data - ctypes.c_uint32, # private segment size - ctypes.c_uint32, # group segment size - _PTR(_PTR(hsa_queue_t))], - 'errcheck': _check_error - }, - - # hsa_status_t - # hsa_soft_queue_create(hsa_region_t region, uint32_t size, - # hsa_queue_type_t type, uint32_t features, - # hsa_signal_t doorbell_signal, hsa_queue_t **queue); - 'hsa_soft_queue_create': { - 'restype': hsa_status_t, - 'argtypes': [hsa_region_t, - ctypes.c_uint32, - hsa_queue_type_t, - ctypes.c_uint32, - hsa_signal_t, - _PTR(_PTR(hsa_queue_t))], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_queue_destroy( - # hsa_queue_t *queue) - 'hsa_queue_destroy': { - 'restype': hsa_status_t, - 'argtypes': [_PTR(hsa_queue_t)], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_queue_inactivate(hsa_queue_t *queue); - 'hsa_queue_inactivate': { - 'restype': hsa_status_t, - 'argtypes': [_PTR(hsa_queue_t)], - 'errcheck': _check_error - }, - - # uint64_t hsa_queue_load_read_index_acquire(hsa_queue_t *queue); - 'hsa_queue_load_read_index_acquire': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t)] - }, - - # uint64_t hsa_queue_load_read_index_relaxed(hsa_queue_t *queue); - 'hsa_queue_load_read_index_relaxed': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t)] - }, - - # uint64_t hsa_queue_load_write_index_acquire(hsa_queue_t *queue); - 'hsa_queue_load_write_index_acquire': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t)] - }, - - # uint64_t hsa_queue_load_write_index_relaxed(hsa_queue_t *queue); - 'hsa_queue_load_write_index_relaxed': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t)] - }, - - # void hsa_queue_store_write_index_relaxed(hsa_queue_t *queue, uint64_t value); - 'hsa_queue_store_write_index_relaxed': { - 'restype': None, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # void hsa_queue_store_write_index_release(hsa_queue_t *queue, uint64_t value); - 'hsa_queue_store_write_index_release': { - 'restype': None, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # uint64_t hsa_queue_cas_write_index_acq_rel( - # hsa_queue_t *queue, - # uint64_t expected, - # uint64_t value); - 'hsa_queue_cas_write_index_acq_rel': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64, ctypes.c_uint64] - }, - - # uint64_t hsa_queue_cas_write_index_acquire( - # hsa_queue_t *queue, - # uint64_t expected, - # uint64_t value); - 'hsa_queue_cas_write_index_acquire': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64, ctypes.c_uint64] - }, - - # uint64_t hsa_queue_cas_write_index_relaxed( - # hsa_queue_t *queue, - # uint64_t expected, - # uint64_t value); - 'hsa_queue_cas_write_index_relaxed': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64, ctypes.c_uint64] - }, - - # uint64_t hsa_queue_cas_write_index_release( - # hsa_queue_t *queue, - # uint64_t expected, - # uint64_t value); - 'hsa_queue_cas_write_index_release': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64, ctypes.c_uint64] - }, - - # uint64_t hsa_queue_add_write_index_acq_rel( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_add_write_index_acq_rel': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # uint64_t hsa_queue_add_write_index_acquire( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_add_write_index_acquire': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # uint64_t hsa_queue_add_write_index_relaxed( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_add_write_index_relaxed': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # uint64_t hsa_queue_add_write_index_release( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_add_write_index_release': { - 'restype': ctypes.c_uint64, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # void hsa_queue_store_read_index_relaxed( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_store_read_index_relaxed': { - 'restype': None, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - # void hsa_queue_store_read_index_release( - # hsa_queue_t *queue, - # uint64_t value); - 'hsa_queue_store_read_index_release': { - 'restype': None, - 'argtypes': [_PTR(hsa_queue_t), ctypes.c_uint64] - }, - - #-------------------------------------------------------------------------- - # Memory - #-------------------------------------------------------------------------- - - # hsa_status_t hsa_region_get_info( - # hsa_region_t region, - # hsa_region_info_t attribute, - # void *value); - 'hsa_region_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_region_t, hsa_region_info_t, ctypes.c_void_p], - 'errcheck': _check_error, - }, - - # hsa_status_t hsa_agent_iterate_regions( - # hsa_agent_t agent, - # hsa_status_t (*callback)(hsa_region_t region, void *data), - # void *data); - 'hsa_agent_iterate_regions': { - 'restype': hsa_status_t, - 'argtypes': [hsa_agent_t, - HSA_AGENT_ITERATE_REGIONS_CALLBACK_FUNC, - ctypes.py_object], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_memory_allocate( - # hsa_region_t region, - # size_t size, - # void **ptr); - 'hsa_memory_allocate': { - 'restype': hsa_status_t, - 'argtypes': [hsa_region_t, ctypes.c_size_t, _PTR(ctypes.c_void_p)], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_memory_free( - # void *ptr); - 'hsa_memory_free': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_memory_copy( - # void * dst, - # const void * src, - # size_t size); - 'hsa_memory_copy': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, - # hsa_agent_t agent, - # hsa_access_permission_t access); - 'hsa_memory_assign_agent': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p, hsa_agent_t, hsa_access_permission_t], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_memory_register( - # void *address, - # size_t size); - 'hsa_memory_register': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p, ctypes.c_size_t], - 'errcheck': _check_error - }, - - # hsa_status_t hsa_memory_deregister( - # void *address, - # size_t size); - 'hsa_memory_deregister': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p, ctypes.c_size_t], - 'errcheck': _check_error - }, - - #-------------------------------------------------------------------------- - # Code Object functions - #-------------------------------------------------------------------------- - - # hsa_status_t HSA_API hsa_isa_from_name(const char* name, - # hsa_isa_t* isa); - 'hsa_isa_from_name': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_char_p, _PTR(hsa_isa_t)], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_isa_get_info(hsa_isa_t isa, - # hsa_isa_info_t attribute, - # uint32_t index, - # void* value); - 'hsa_isa_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_isa_t, hsa_isa_info_t, ctypes.c_void_p], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_isa_compatible(hsa_isa_t code_object_isa, - # hsa_isa_t agent_isa, - # bool* result); - 'hsa_isa_compatible': { - 'restype': hsa_status_t, - 'argtypes': [hsa_isa_t, hsa_isa_t, _PTR(ctypes.c_bool)], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_serialize( - # hsa_code_object_t code_object, - # hsa_status_t (*alloc_callback)(size_t size, - # hsa_callback_data_t data, void **address), - # hsa_callback_data_t callback_data, - # const char *options, - # void **serialized_code_object, - # size_t *serialized_code_object_size); - 'hsa_code_object_serialize': { - 'restype': hsa_status_t, - 'argtypes': [HSA_ALLOC_CALLBACK_FUNCTION, - hsa_callback_data_t, - _PTR(ctypes.c_void_p), - hsa_callback_data_t, - ctypes.c_char_p, - _PTR(ctypes.c_void_p), - _PTR(ctypes.c_size_t)], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_deserialize( - # void *serialized_code_object, - # size_t serialized_code_object_size, - # const char *options, - # hsa_code_object_t *code_object); - 'hsa_code_object_deserialize': { - 'restype': hsa_status_t, - 'argtypes': [ctypes.c_void_p, - ctypes.c_size_t, - ctypes.c_char_p, - _PTR(hsa_code_object_t)], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_destroy( - # hsa_code_object_t code_object); - 'hsa_code_object_destroy': { - 'restype': hsa_status_t, - 'argtypes': [hsa_code_object_t], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_get_info( - # hsa_code_object_t code_object, - # hsa_code_object_info_t attribute, - # void *value); - 'hsa_code_object_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_code_object_t, - hsa_code_object_info_t, - ctypes.c_void_p - ], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_get_symbol( - # hsa_code_object_t code_object, - # const char *symbol_name, - # hsa_code_symbol_t *symbol); - 'hsa_code_object_get_symbol': { - 'restype': hsa_status_t, - 'argtypes': [hsa_code_object_t, - ctypes.c_char_p, - _PTR(hsa_code_symbol_t) - ], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_symbol_get_info( - # hsa_code_symbol_t code_symbol, - # hsa_code_symbol_info_t attribute, - # void *value); - 'hsa_code_symbol_get_info': { - 'restype': hsa_status_t, - 'argtypes': [hsa_code_symbol_t, - hsa_code_symbol_info_t, - ctypes.c_void_p - ], - 'errcheck': _check_error - }, - - # hsa_status_t HSA_API hsa_code_object_iterate_symbols( - # hsa_code_object_t code_object, - # hsa_status_t (*callback)(hsa_code_object_t code_object, hsa_code_symbol_t symbol, void* data), - # void* data); - 'hsa_code_object_iterate_symbols': { - 'restype': hsa_status_t, - 'argtypes': [hsa_code_object_t, - HSA_CODE_OBJECT_ITERATE_SYMBOLS_CALLBACK, - ctypes.c_void_p - ], - 'errcheck': _check_error - }, - - #-------------------------------------------------------------------------- - # Executable functions - #-------------------------------------------------------------------------- - - # hsa_status_t HSA_API hsa_executable_create( - # hsa_profile_t profile, - # hsa_executable_state_t executable_state, - # const char *options, - # hsa_executable_t *executable); - - "hsa_executable_create": { - 'restype': hsa_status_t, - 'argtypes': [hsa_profile_t, - hsa_executable_state_t, - ctypes.c_char_p, - ctypes.POINTER(hsa_executable_t)], - 'errcheck': _check_error, - }, - - # hsa_status_t HSA_API hsa_executable_destroy( - # hsa_executable_t executable); - - "hsa_executable_destroy": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_t, - ], - }, - - # hsa_status_t HSA_API hsa_executable_load_code_object( - # hsa_executable_t executable, - # hsa_agent_t agent, - # hsa_code_object_t code_object, - # const char *options); - - "hsa_executable_load_code_object": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_t, - hsa_agent_t, - hsa_code_object_t, - ctypes.c_char_p, - ], - }, - - # hsa_status_t HSA_API hsa_executable_freeze( - # hsa_executable_t executable, - # const char *options); - - "hsa_executable_freeze": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_t, - ctypes.c_char_p, - ], - }, - - # hsa_status_t HSA_API hsa_executable_get_info( - # hsa_executable_t executable, - # hsa_executable_info_t attribute, - # void *value); - "hsa_executable_get_info": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_t, - hsa_executable_info_t, - ctypes.c_void_p - ], - }, - - # hsa_status_t HSA_API hsa_executable_global_variable_define( - # hsa_executable_t executable, - # const char *variable_name, - # void *address); - "hsa_executable_global_variable_define": { - 'restype': hsa_status_t, - 'argtypes': [hsa_executable_t, - ctypes.c_char_p, - ctypes.c_void_p], - 'errcheck': _check_error, - }, - - # hsa_status_t HSA_API hsa_executable_agent_global_variable_define( - # hsa_executable_t executable, - # hsa_agent_t agent, - # const char *variable_name, - # void *address); - "hsa_executable_agent_global_variable_define": { - 'restype': hsa_status_t, - 'argtypes': [hsa_executable_t, - hsa_agent_t, - ctypes.c_char_p, - ctypes.c_void_p], - 'errcheck': _check_error, - }, - - # hsa_status_t HSA_API hsa_executable_readonly_variable_define( - # hsa_executable_t executable, - # hsa_agent_t agent, - # const char *variable_name, - # void *address); - "hsa_executable_readonly_variable_define": { - 'restype': hsa_status_t, - 'argtypes': [hsa_executable_t, - hsa_agent_t, - ctypes.c_char_p, - ctypes.c_void_p], - 'errcheck': _check_error, - }, - - # hsa_status_t HSA_API hsa_executable_validate( - # hsa_executable_t executable, - # uint32_t* result); - "hsa_executable_validate": { - 'restype': hsa_status_t, - 'argtypes': [hsa_executable_t, - _PTR(ctypes.c_uint32)], - 'errcheck': _check_error, - }, - - # hsa_status_t HSA_API hsa_executable_get_symbol( - # hsa_executable_t executable, - # const char *module_name, - # const char *symbol_name, - # hsa_agent_t agent, - # int32_t call_convention, - # hsa_executable_symbol_t *symbol); - "hsa_executable_get_symbol": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_t, - ctypes.c_char_p, # module_name (must be NULL for program linkage) - ctypes.c_char_p, # symbol_name - hsa_agent_t, - ctypes.c_int32, - ctypes.POINTER(hsa_executable_symbol_t), - ], - }, - - # hsa_status_t HSA_API hsa_executable_symbol_get_info( - # hsa_executable_symbol_t executable_symbol, - # hsa_executable_symbol_info_t attribute, - # void *value); - "hsa_executable_symbol_get_info": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_symbol_t, - hsa_executable_symbol_info_t, - ctypes.c_void_p, - ], - }, - - - #hsa_status_t HSA_API hsa_executable_iterate_symbols( - # hsa_executable_t executable, - # hsa_status_t (*callback)(hsa_executable_t executable, hsa_executable_symbol_t symbol, void* data), - # void* data); - "hsa_executable_iterate_symbols": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_executable_symbol_t, - hsa_executable_symbol_info_t, - ctypes.c_void_p, - ], - }, - - - #-------------------------------------------------------------------------- - # AMD extensions from hsa_ext_amd.h - #-------------------------------------------------------------------------- - - # hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent, - # hsa_amd_coherency_type_t* type); - - "hsa_amd_coherency_get_type": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - _PTR(hsa_amd_coherency_type_t), - ], - }, - - # hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent, - # hsa_amd_coherency_type_t type); - "hsa_amd_coherency_get_type": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - hsa_amd_coherency_type_t, - ], - }, - - # hsa_status_t HSA_API - # hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable); - "hsa_amd_profiling_set_profiler_enabled": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - _PTR(hsa_queue_t), - ctypes.c_int, - ], - }, - - # hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time( - # hsa_agent_t agent, hsa_signal_t signal, - # hsa_amd_profiling_dispatch_time_t* time); - "hsa_amd_profiling_get_dispatch_time": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - hsa_signal_t, - _PTR(hsa_amd_profiling_dispatch_time_t) - ], - }, - - # hsa_status_t HSA_API - # hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent, - # uint64_t agent_tick, - # uint64_t* system_tick); - "hsa_amd_profiling_convert_tick_to_system_domain": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_uint64, - _PTR(ctypes.c_uint64) - ], - }, - - # hsa_status_t HSA_API - # hsa_amd_signal_async_handler(hsa_signal_t signal, - # hsa_signal_condition_t cond, - # hsa_signal_value_t value, - # hsa_amd_signal_handler handler, void* arg); - "hsa_amd_signal_async_handler": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_signal_t, - hsa_signal_condition_t, - hsa_signal_value_t, - hsa_amd_signal_handler, - ctypes.c_void_p, - ], - }, - - #hsa_amd_async_function(void (*callback)(void* arg), void* arg); - "hsa_amd_async_function": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.POINTER(void_fn_ptr), - ctypes.c_void_p, - ], - }, - - #uint32_t HSA_API - #hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals, - # hsa_signal_condition_t* conds, - # hsa_signal_value_t* values, uint64_t timeout_hint, - # hsa_wait_state_t wait_hint, - # hsa_signal_value_t* satisfying_value); - "hsa_amd_signal_wait_any": { - 'errcheck': _check_error, - 'restype': ctypes.c_uint32, - 'argtypes': [ - ctypes.c_uint32, - _PTR(hsa_signal_t), - _PTR(hsa_signal_condition_t), - _PTR(hsa_signal_value_t), - ctypes.c_uint64, - hsa_wait_state_t, - _PTR(hsa_signal_value_t), - ], - }, - - # hsa_status_t HSA_API hsa_amd_image_get_info_max_dim(hsa_agent_t agent, - # hsa_agent_info_t attribute, - # void* value); - "hsa_amd_image_get_info_max_dim": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - hsa_agent_info_t, - ctypes.c_void_p, - ], - }, - - # hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue, - # uint32_t num_cu_mask_count, - # const uint32_t* cu_mask); - "hsa_amd_queue_cu_set_mask": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - _PTR(hsa_queue_t), - ctypes.c_uint32, - _PTR(ctypes.c_uint32) - ], - }, - - # hsa_status_t HSA_API - # hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool, - # hsa_amd_memory_pool_info_t attribute, - # void* value); - "hsa_amd_memory_pool_get_info": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_amd_memory_pool_t, - hsa_amd_memory_pool_info_t, - ctypes.c_void_p - ], - }, - - # hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( - # hsa_agent_t agent, - # hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data), - # void* data); - "hsa_amd_agent_iterate_memory_pools": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - HSA_AMD_AGENT_ITERATE_MEMORY_POOLS_CALLBACK, - ctypes.c_void_p - ], - }, - - # hsa_status_t HSA_API hsa_amd_memory_pool_allocate - # (hsa_amd_memory_pool_t memory_pool, size_t size, - # uint32_t flags, void** ptr); - "hsa_amd_memory_pool_allocate": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_amd_memory_pool_t, - ctypes.c_size_t, - ctypes.c_uint32, - _PTR(ctypes.c_void_p) - ], - }, - - # hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr); - "hsa_amd_memory_pool_free": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p - ], - }, - - # hsa_status_t HSA_API hsa_amd_memory_async_copy(void* dst, - # hsa_agent_t dst_agent, const void* src, - # hsa_agent_t src_agent, size_t size, - # uint32_t num_dep_signals, - # const hsa_signal_t* dep_signals, - # hsa_signal_t completion_signal); - "hsa_amd_memory_async_copy": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p, - hsa_agent_t, - ctypes.c_void_p, - hsa_agent_t, - ctypes.c_size_t, - ctypes.c_uint32, - _PTR(hsa_signal_t), - hsa_signal_t - ], - }, - - # hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info( - # hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool, - # hsa_amd_agent_memory_pool_info_t attribute, void* value); - "hsa_amd_agent_memory_pool_get_info": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - hsa_amd_memory_pool_t, - hsa_amd_agent_memory_pool_info_t, - ctypes.c_void_p - ], - }, - - - # hsa_status_t HSA_API - # hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents, - # const uint32_t* flags, const void* ptr); - "hsa_amd_agents_allow_access": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_uint32, - _PTR(hsa_agent_t), - _PTR(ctypes.c_uint32), - ctypes.c_void_p - ], - }, - - - # hsa_status_t HSA_API - # hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool, - # hsa_amd_memory_pool_t dst_memory_pool, - # bool* result); - "hsa_amd_memory_pool_can_migrate": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_amd_memory_pool_t, - hsa_amd_memory_pool_t, - _PTR(ctypes.c_bool) - ], - }, - - - # hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr, - # hsa_amd_memory_pool_t memory_pool, - # uint32_t flags); - "hsa_amd_memory_migrate": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p, - hsa_amd_memory_pool_t, - ctypes.c_uint32 - ], - }, - - - # hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, - # hsa_agent_t* agents, int num_agent, - # void** agent_ptr); - "hsa_amd_memory_lock": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p, - ctypes.c_size_t, - _PTR(hsa_agent_t), - ctypes.c_int, - _PTR(ctypes.c_void_p) - ], - }, - - - # hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr); - "hsa_amd_memory_unlock": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p - ], - }, - - - # hsa_status_t HSA_API - # hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count); - "hsa_amd_memory_unlock": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_void_p - ], - }, - - # hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, - # hsa_agent_t* agents, - # int interop_handle, - # uint32_t flags, - # size_t* size, - # void** ptr, - # size_t* metadata_size, - # const void** metadata); - "hsa_amd_interop_map_buffer": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - ctypes.c_uint32, - _PTR(hsa_agent_t), - ctypes.c_int, - ctypes.c_uint32, - _PTR(ctypes.c_size_t), - _PTR(ctypes.c_void_p), - _PTR(ctypes.c_size_t), - _PTR(ctypes.c_void_p), - ], - }, - - - # hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr); - "hsa_amd_interop_map_buffer": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - _PTR(ctypes.c_void_p), - ], - }, - - - # hsa_status_t HSA_API hsa_amd_image_create( - # hsa_agent_t agent, - # const hsa_ext_image_descriptor_t *image_descriptor, - # const hsa_amd_image_descriptor_t *image_layout, - # const void *image_data, - # hsa_access_permission_t access_permission, - # hsa_ext_image_t *image - # ); - "hsa_amd_image_create": { - 'errcheck': _check_error, - 'restype': hsa_status_t, - 'argtypes': [ - hsa_agent_t, - _PTR(hsa_ext_image_descriptor_t), - _PTR(hsa_amd_image_descriptor_t), - ctypes.c_void_p, - hsa_access_permission_t, - hsa_ext_image_t - ], - }, - - #-------------------------------------------------------------------------- - # Functions from hsa_ext_finalize.h - # NOTE: To access these functions use the hsa_ext_finalizer_1_00_pfn_t - # struct. - #-------------------------------------------------------------------------- - -} diff --git a/numba/numba/roc/hsadrv/enums.py b/numba/numba/roc/hsadrv/enums.py deleted file mode 100644 index da48c3999..000000000 --- a/numba/numba/roc/hsadrv/enums.py +++ /dev/null @@ -1,482 +0,0 @@ -"""Enum values for HSA - -Note that Python namespacing could be used to avoid the C-like -prefixing, but we choose to keep the same names as found in the C -enums, in order to match the documentation. -""" - -import ctypes - -HSA_LARGE_MODEL = ctypes.sizeof(ctypes.c_void_p) == 8 - -# hsa_status_t - -# The function has been executed successfully. -HSA_STATUS_SUCCESS = 0x0 -# A traversal over a list of elements has been interrupted by the -# application before completing. -HSA_STATUS_INFO_BREAK = 0x1 -# A generic error has occurred. -HSA_STATUS_ERROR = 0x1000 -# One of the actual arguments does not meet a precondition stated in the -# documentation of the corresponding formal argument. -HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001 -# The requested queue creation is not valid. -HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002 -# The requested allocation is not valid. -HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003 -# The agent is invalid. -HSA_STATUS_ERROR_INVALID_AGENT = 0x1004 -# The memory region is invalid. -HSA_STATUS_ERROR_INVALID_REGION = 0x1005 -# The signal is invalid. -HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006 -# The queue is invalid. -HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007 -# The HSA runtime failed to allocate the necessary resources. This error -# may also occur when the HSA runtime needs to spawn threads or create -# internal OS-specific events. -HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008 -# The AQL packet is malformed. -HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009 -# An error has been detected while releasing a resource. -HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A -# An API other than ::hsa_init has been invoked while the reference count -# of the HSA runtime is 0. -HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B -# The maximum reference count for the object has been reached. -HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C -# The arguments passed to a functions are not compatible. -HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D -# The index is invalid.\ -HSA_STATUS_ERROR_INVALID_INDEX = 0x100E -# The instruction set architecture is invalid. -HSA_STATUS_ERROR_INVALID_ISA = 0x100F, -# The instruction set architecture name is invalid. -HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017 -# The code object is invalid. -HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010 -# The executable is invalid. -HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011 -# The executable is frozen. -HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012 -# There is no symbol with the given name. -HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013 -# The variable is already defined. -HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014 -# The variable is undefined. -HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015 -# An HSAIL operation resulted on a hardware exception. -HSA_STATUS_ERROR_EXCEPTION = 0x1016 - -# hsa_packet_type_t -HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0 -# The packet has been processed in the past, but has not been reassigned to -# the packet processor. A packet processor must not process a packet of this -# type. All queues support this packet type. -HSA_PACKET_TYPE_INVALID = 1 -# Packet used by agents for dispatching jobs to kernel agents. Not all -# queues support packets of this type (see ::hsa_queue_feature_t). -HSA_PACKET_TYPE_KERNEL_DISPATCH = 2 -# Packet used by agents to delay processing of subsequent packets, and to -# express complex dependencies between multiple packets. All queues support -# this packet type. -HSA_PACKET_TYPE_BARRIER_AND = 3 -# Packet used by agents for dispatching jobs to agents. Not all -# queues support packets of this type (see ::hsa_queue_feature_t). -HSA_PACKET_TYPE_AGENT_DISPATCH = 4 -# Packet used by agents to delay processing of subsequent packets, and to -# express complex dependencies between multiple packets. All queues support -# this packet type. -HSA_PACKET_TYPE_BARRIER_OR = 5 - -# hsa_queue_type_t -HSA_QUEUE_TYPE_MULTI = 0 -HSA_QUEUE_TYPE_SINGLE = 1 - -# hsa_queue_feature_t -HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1 -HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2 - -# hsa_fence_scope_t -HSA_FENCE_SCOPE_NONE = 0 -HSA_FENCE_SCOPE_AGENT = 1 -HSA_FENCE_SCOPE_SYSTEM = 2 - -# hsa_wait_state_t -# The application thread may be rescheduled while waiting on the signal. -HSA_WAIT_STATE_BLOCKED = 0 -# The application thread stays active while waiting on a signal. -HSA_WAIT_STATE_ACTIVE = 1 - -# hsa_signal_condition_t -HSA_SIGNAL_CONDITION_EQ = 0 -HSA_SIGNAL_CONDITION_NE = 1 -HSA_SIGNAL_CONDITION_LT = 2 -HSA_SIGNAL_CONDITION_GTE = 3 - -# # hsa_dim_t -# HSA_DIM_X = 0 -# HSA_DIM_Y = 1 -# HSA_DIM_Z = 2 - -# hsa_extension_t -HSA_EXTENSION_FINALIZER = 0 -HSA_EXTENSION_IMAGES = 1 -HSA_EXTENSION_AMD_PROFILER = 2 - -# hsa_agent_feature_t -HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1 -HSA_AGENT_FEATURE_AGENT_DISPATCH = 2 - -# hsa_device_type_t -HSA_DEVICE_TYPE_CPU = 0 -HSA_DEVICE_TYPE_GPU = 1 -HSA_DEVICE_TYPE_DSP = 2 - -# hsa_system_info_t -HSA_SYSTEM_INFO_VERSION_MAJOR = 0 -HSA_SYSTEM_INFO_VERSION_MINOR = 1 -HSA_SYSTEM_INFO_TIMESTAMP = 2 -HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3 -HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4 -HSA_SYSTEM_INFO_ENDIANNESS = 5 -HSA_SYSTEM_INFO_MACHINE_MODEL = 6 -HSA_SYSTEM_INFO_EXTENSIONS = 7 - -# hsa_agent_info_t - -# Agent name. The type of this attribute is a NUL-terminated char[64]. If -# the name of the agent uses less than 63 characters, the rest of the -# array must be filled with NULs. -HSA_AGENT_INFO_NAME = 0 -# Name of vendor. The type of this attribute is a NUL-terminated char[64]. If -# the name of the vendor uses less than 63 characters, the rest of the array -# must be filled with NULs. -HSA_AGENT_INFO_VENDOR_NAME = 1 -# Agent capability. The type of this attribute is ::hsa_agent_feature_t. -HSA_AGENT_INFO_FEATURE = 2 -# Machine model supported by the agent. The type of this attribute is -# ::hsa_machine_model_t. -HSA_AGENT_INFO_MACHINE_MODEL = 3 -# Profile supported by the agent. The type of this attribute is -# ::hsa_profile_t. -HSA_AGENT_INFO_PROFILE = 4 -# Default floating-point rounding mode. The type of this attribute is -# ::hsa_default_float_rounding_mode_t, but the value -# ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed. -HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5 -# Default floating-point rounding modes supported by the agent in the Base -# profile. The type of this attribute is a mask of -# ::hsa_default_float_rounding_mode_t. The default floating-point rounding -# mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not be set. -HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23 -# Flag indicating that the f16 HSAIL operation is at least as fast as the -# f32 operation in the current agent. The value of this attribute is -# undefined if the agent is not a kernel agent. The type of this -# attribute is bool. -HSA_AGENT_INFO_FAST_F16_OPERATION = 24 -# Number of work-items in a wavefront. Must be a power of 2 in the range -# [1,256]. The value of this attribute is undefined if the agent is not -# a kernel agent. The type of this attribute is uint32_t. -HSA_AGENT_INFO_WAVEFRONT_SIZE = 6 -# Maximum number of work-items of each dimension of a work-group. Each -# maximum must be greater than 0. No maximum can exceed the value of -# ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is -# undefined if the agent is not a kernel agent. The type of this -# attribute is uint16_t[3]. -HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7 -# Maximum total number of work-items in a work-group. The value of this -# attribute is undefined if the agent is not a kernel agent. The type -# of this attribute is uint32_t. -HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8 -# Maximum number of work-items of each dimension of a grid. Each maximum must -# be greater than 0, and must not be smaller than the corresponding value in -# ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of -# ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined if -# the agent is not a kernel agent. The type of this attribute is -# ::hsa_dim3_t. -HSA_AGENT_INFO_GRID_MAX_DIM = 9 -# Maximum total number of work-items in a grid. The value of this attribute -# is undefined if the agent is not a kernel agent. The type of this -# attribute is uint32_t. -HSA_AGENT_INFO_GRID_MAX_SIZE = 10 -# Maximum number of fbarriers per work-group. Must be at least 32. The value -# of this attribute is undefined if the agent is not a kernel agent. The -# type of this attribute is uint32_t. -HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11 -# Maximum number of queues that can be active (created but not destroyed) at -# one time in the agent. The type of this attribute is uint32_t. -HSA_AGENT_INFO_QUEUES_MAX = 12 -# Minimum number of packets that a queue created in the agent -# can hold. Must be a power of 2 greater than 0. Must not exceed -# the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this -# attribute is uint32_t. -HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13 -# Maximum number of packets that a queue created in the agent can -# hold. Must be a power of 2 greater than 0. The type of this attribute -# is uint32_t. -HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14 -# Type of a queue created in the agent. The type of this attribute is -# ::hsa_queue_type_t. -HSA_AGENT_INFO_QUEUE_TYPE = 15 -# Identifier of the NUMA node associated with the agent. The type of this -# attribute is uint32_t. -HSA_AGENT_INFO_NODE = 16 -# Type of hardware device associated with the agent. The type of this -# attribute is ::hsa_device_type_t. -HSA_AGENT_INFO_DEVICE = 17 -# Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size -# of 0 for a particular level indicates that there is no cache information -# for that level. The type of this attribute is uint32_t[4]. -HSA_AGENT_INFO_CACHE_SIZE = 18 -# Instruction set architecture of the agent. The type of this attribute -# is ::hsa_isa_t. -HSA_AGENT_INFO_ISA = 19 -# Bit-mask indicating which extensions are supported by the agent. An -# extension with an ID of @p i is supported if the bit at position @p i is -# set. The type of this attribute is uint8_t[128]. -HSA_AGENT_INFO_EXTENSIONS = 20 -# Major version of the HSA runtime specification supported by the -# agent. The type of this attribute is uint16_t. -HSA_AGENT_INFO_VERSION_MAJOR = 21 -# Minor version of the HSA runtime specification supported by the -# agent. The type of this attribute is uint16_t. -HSA_AGENT_INFO_VERSION_MINOR = 22 - -# hsa_region_segment_t -# Global segment. Used to hold data that is shared by all agents. -HSA_REGION_SEGMENT_GLOBAL = 0 -# Read-only segment. Used to hold data that remains constant during the -# execution of a kernel. -HSA_REGION_SEGMENT_READONLY = 1 -# Private segment. Used to hold data that is local to a single work-item. -HSA_REGION_SEGMENT_PRIVATE = 2 -# Group segment. Used to hold data that is shared by the work-items of a -# work-group. -HSA_REGION_SEGMENT_GROUP = 3 - -# hsa_region_global_flag_t -# The application can use memory in the region to store kernel arguments, and -# provide the values for the kernarg segment of a kernel dispatch. If this -# flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set. -HSA_REGION_GLOBAL_FLAG_KERNARG = 1 -# Updates to memory in this region are immediately visible to all the -# agents under the terms of the HSA memory model. If this -# flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set. -HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2 -# Updates to memory in this region can be performed by a single agent at -# a time. If a different agent in the system is allowed to access the -# region, the application must explicitely invoke ::hsa_memory_assign_agent -# in order to transfer ownership to that agent for a particular buffer. -HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4 - -# hsa_region_info_t - -# Segment where memory in the region can be used. The type of this -# attribute is ::hsa_region_segment_t. -HSA_REGION_INFO_SEGMENT = 0 -# Flag mask. The value of this attribute is undefined if the value of -# ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of -# this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t -# values. -HSA_REGION_INFO_GLOBAL_FLAGS = 1 -# Size of this region, in bytes. The type of this attribute is size_t. -HSA_REGION_INFO_SIZE = 2 -# Maximum allocation size in this region, in bytes. Must not exceed the value -# of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t. -# -# If the region is in the global or readonly segments, this is the maximum -# size that the application can pass to ::hsa_memory_allocate. If the region -# is in the group segment, this is the maximum size (per work-group) that can -# be requested for a given kernel dispatch. If the region is in the private -# segment, this is the maximum size (per work-item) that can be request for a -# specific kernel dispatch. -HSA_REGION_INFO_ALLOC_MAX_SIZE = 4 -# Indicates whether memory in this region can be allocated using -# ::hsa_memory_allocate. The type of this attribute is bool. -# -# The value of this flag is always false for regions in the group and private -# segments. -HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5 -# Allocation granularity of buffers allocated by ::hsa_memory_allocate in -# this region. The size of a buffer allocated in this region is a multiple of -# the value of this attribute. The value of this attribute is only defined if -# ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type -# of this attribute is size_t. -HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6 -# Alignment of buffers allocated by ::hsa_memory_allocate in this region. The -# value of this attribute is only defined if -# ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must -# be a power of 2. The type of this attribute is size_t. -HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7 - - -# hsa_profile_t -HSA_PROFILE_BASE = 0 -HSA_PROFILE_FULL = 1 - -# hsa_machine_model_t -HSA_MACHINE_MODEL_SMALL = 0 -HSA_MACHINE_MODEL_LARGE = 1 - - -# hsa_executable_symbol_info_t - - -# The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t. -HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0 -# The length of the symbol name. The type of this attribute is uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1 -# The name of the symbol. The type of this attribute is character array with -# the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH -# attribute -HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2 -# The length of the module name to which this symbol belongs if this symbol -# has module linkage, otherwise 0 is returned. The type of this attribute is -# uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3 -# The module name to which this symbol belongs if this symbol has module -# linkage, otherwise empty string is returned. The type of this attribute is -# character array with the length equal to the value of -# ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute. -HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4 -# Agent associated with this symbol. If the symbol is a variable, the -# value of this attribute is only defined if -# ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is -# ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t. -HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20 -# The address of the variable. The value of this attribute is undefined if -# the symbol is not a variable. The type of this attribute is uint64_t. -# If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is -# returned. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21 -# The linkage kind of the symbol. The type of this attribute is -# ::hsa_symbol_linkage_t. -HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5 -# Indicates whether the symbol corresponds to a definition. The type of this -# attribute is bool. -HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17 -# The allocation kind of the variable. The value of this attribute is -# undefined if the symbol is not a variable. The type of this attribute is -# ::hsa_variable_allocation_t. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6 -# The segment kind of the variable. The value of this attribute is undefined -# if the symbol is not a variable. The type of this attribute is -# ::hsa_variable_segment_t. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7 -# Alignment of the variable. The value of this attribute is undefined if -# the symbol is not a variable. The type of this attribute is uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8 -# Size of the variable. The value of this attribute is undefined if -# the symbol is not a variable. The type of this attribute is uint32_t. -# -# A value of 0 is returned if the variable is an external variable and has an -# unknown dimension. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9 -# Indicates whether the variable is constant. The value of this attribute is -# undefined if the symbol is not a variable. The type of this attribute is -# bool. -HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10 - -# Kernel object handle, used in the kernel dispatch packet. The value of this -# attribute is undefined if the symbol is not a kernel. The type of this -# attribute is uint64_t. -# -# If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 -# is returned. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22 -# Size of kernarg segment memory that is required to hold the values of the -# kernel arguments, in bytes. The value of this attribute is undefined if the -# symbol is not a kernel. The type of this attribute is uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11 -# Alignment (in bytes) of the buffer used to pass arguments to the kernel, -# which is the maximum of 16 and the maximum alignment of any of the kernel -# arguments. The value of this attribute is undefined if the symbol is not a -# kernel. The type of this attribute is uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12 -# Size of static group segment memory required by the kernel (per -# work-group), in bytes. The value of this attribute is undefined -# if the symbol is not a kernel. The type of this attribute is uint32_t. -# -# The reported amount does not include any dynamically allocated group -# segment memory that may be requested by the application when a kernel is -# dispatched. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13 -# Size of static private, spill, and arg segment memory required by -# this kernel (per work-item), in bytes. The value of this attribute is -# undefined if the symbol is not a kernel. The type of this attribute is -# uint32_t. -# -# If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is -# true, the kernel may use more private memory than the reported value, and -# the application must add the dynamic call stack usage to @a -# private_segment_size when populating a kernel dispatch packet. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14 -# Dynamic callstack flag. The value of this attribute is undefined if the -# symbol is not a kernel. The type of this attribute is bool. -# -# If this flag is set (the value is true), the kernel uses a dynamically -# sized call stack. This can happen if recursive calls, calls to indirect -# functions, or the HSAIL alloca instruction are present in the kernel. -HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15 -# Indirect function object handle. The value of this attribute is undefined -# if the symbol is not an indirect function, or the associated agent does -# not support the Full Profile. The type of this attribute depends on the -# machine model: if machine model is small, then the type is uint32_t, if -# machine model is large, then the type is uint64_t. -# -# If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 -# is returned. -HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23 -# Call convention of the indirect function. The value of this attribute is -# undefined if the symbol is not an indirect function, or the associated -# agent does not support the Full Profile. The type of this attribute is -# uint32_t. -HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16 - - -# hsa_default_float_rounding_mode_t - -# Use a default floating-point rounding mode specified elsewhere. -HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0 -# Operations that specify the default floating-point mode are rounded to zero -# by default. -HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1 -# Operations that specify the default floating-point mode are rounded to the -# nearest representable number and that ties should be broken by selecting -# the value with an even least significant bit. -HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2 - -# hsa_code_object_type_t -HSA_CODE_OBJECT_TYPE_PROGRAM = 0 - - -# hsa_executable_state_t - -# Executable state, which allows the user to load code objects and define -# external variables. Variable addresses, kernel code handles, and -# indirect function code handles are not available in query operations until -# the executable is frozen (zero always returned). - -HSA_EXECUTABLE_STATE_UNFROZEN = 0 - -# Executable state, which allows the user to query variable addresses, -# kernel code handles, and indirect function code handles using query -# operation. Loading new code objects, as well as defining external variables -# is not allowed in this state. - -HSA_EXECUTABLE_STATE_FROZEN = 1 - - -# hsa_kernel_dispatch_packet_setup_t -HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0 - - - -# hsa_packet_header_t -HSA_PACKET_HEADER_TYPE = 0 -HSA_PACKET_HEADER_BARRIER = 8 -HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9 -HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11 - diff --git a/numba/numba/roc/hsadrv/enums_ext.py b/numba/numba/roc/hsadrv/enums_ext.py deleted file mode 100644 index 8ab4588a7..000000000 --- a/numba/numba/roc/hsadrv/enums_ext.py +++ /dev/null @@ -1,254 +0,0 @@ -"""Enum values for HSA from the HSA extension header - -Note that Python namespacing could be used to avoid the C-like -prefixing, but we choose to keep the same names as found in the C -enums, in order to match the documentation. -""" - -# These enums are a direct translation of those found in: -# hsa_ext_amd.h from the ROCR-Runtime. For example: -# https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/master/src/inc/hsa_ext_amd.h -# Comments relating to the values are largely wholesale copied. - -import ctypes - - -#------------------------------------------------------------------------------ -# -# Anonymous enum expressing that a memory pool is invalid -# -HSA_STATUS_ERROR_INVALID_MEMORY_POOL = 40 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Agent attributes -# -# Enums of the type hsa_amd_agent_info_t - -# Chip identifier. The type of this attribute is uint32_t. -HSA_AMD_AGENT_INFO_CHIP_ID = 0xA000 - -# Size of a cacheline in bytes. The type of this attribute is uint32_t. -HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001 - -# The number of compute unit available in the agent. The type of this -# attribute is uint32_t. -HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002 - -# The maximum clock frequency of the agent in MHz. The type of this -# attribute is uint32_t. -HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003 - -# Internay driver node identifier. The type of this attribute is uint32_t. -HSA_AMD_AGENT_INFO_DRIVER_NODE_ID = 0xA004 - -# Max number of watch points on memory address ranges to generate exception -# events when the watched addresses are accessed. -HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS = 0xA005 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Region attributes -# -# Enums of the type hsa_amd_region_info_t - -# Determine if host can access the region. The type of this attribute is bool. -HSA_AMD_REGION_INFO_HOST_ACCESSIBLE = 0xA000 - -# Base address of the region in flat address space. -HSA_AMD_REGION_INFO_BASE = 0xA001 - -# Memory Interface width, the return value type is uint32_t. -# This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_WIDTH. -HSA_AMD_REGION_INFO_BUS_WIDTH = 0xA002 - -# Max Memory Clock, the return value type is uint32_t. -# This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY. -HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY = 0xA003 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Coherency attributes of a fine grained region -# -# Enums of the type hsa_amd_coherency_type_t - -# Coherent region. -HSA_AMD_COHERENCY_TYPE_COHERENT = 0 - -# Non coherent region. -HSA_AMD_COHERENCY_TYPE_NONCOHERENT = 1 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Memory segments associated with a memory pool. -# -# Enums of the type hsa_amd_segment_t - -# Global segment. Used to hold data that is shared by all agents. -HSA_AMD_SEGMENT_GLOBAL = 0 - -# Read-only segment. Used to hold data that remains constant during the -# execution of a kernel. -HSA_AMD_SEGMENT_READONLY = 1 - -# Private segment. Used to hold data that is local to a single work-item. -HSA_AMD_SEGMENT_PRIVATE = 2 - -# Group segment. Used to hold data that is shared by the work-items of a -# work-group. -HSA_AMD_SEGMENT_GROUP = 3 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Memory pool global flags. -# -# Enums of the type hsa_amd_memory_pool_global_flag_t. - -# The application can use allocations in the memory pool to store kernel -# arguments, and provide the values for the kernarg segment of -# a kernel dispatch. -HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT = 1 - -# Updates to memory in this pool conform to HSA memory consistency model. -# If this flag is set, then HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED -# must not be set. -HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED = 2 - -# Writes to memory in this pool can be performed by a single agent at a time. -HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED = 4 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Memory pool features flags. -# -# Enums of the type hsa_amd_memory_pool_info_t. - -# Segment where the memory pool resides. The type of this attribute is -# hsa_amd_segment_t. -HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0 - -# Flag mask. The value of this attribute is undefined if the value of -# HSA_AMD_MEMORY_POOL_INFO_SEGMENT is not HSA_AMD_SEGMENT_GLOBAL. The type -# of this attribute is uint32_t, a bit-field of -# hsa_amd_memory_pool_global_flag_t values. -HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1 - -# Size of this pool, in bytes. The type of this attribute is size_t. -HSA_AMD_MEMORY_POOL_INFO_SIZE = 2 - -# Indicates whether memory in this pool can be allocated using -# hsa_amd_memory_pool_allocate. The type of this attribute is bool. -# The value of this flag is always false for memory pools in the group and -# private segments. -HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5 - -# Allocation granularity of buffers allocated by hsa_amd_memory_pool_allocate -# in this memory pool. The size of a buffer allocated in this pool is a -# multiple of the value of this attribute. The value of this attribute is -# only defined if HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for -# this pool. The type of this attribute is size_t. -HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6 - -# Alignment of buffers allocated by hsa_amd_memory_pool_allocate in this -# pool. The value of this attribute is only defined if -# HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool, and -# must be a power of 2. The type of this attribute is size_t. -HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7 - -# This memory_pool can be made directly accessible by all the agents in the -# system (hsa_amd_agent_memory_pool_get_info returns -# HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT for all agents). The type of -# this attribute is bool. -HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Type of accesses to a memory pool from a given agent. -# -# Enums of the type hsa_amd_memory_pool_access_t - -# The agent cannot directly access any buffer in the memory pool. -HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED = 0 - -# The agent can directly access a buffer located in the pool; the application -# does not need to invoke hsa_amd_agents_allow_access. -HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT = 1 - -# The agent can directly access a buffer located in the pool, but only if the -# application has previously requested access to that buffer using -# hsa_amd_agents_allow_access. -HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT = 2 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Properties of the relationship between an agent a memory pool. -# -# Enums of the type hsa_amd_link_info_type_t - -# Hyper-transport bus type. -HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0 - -# QPI bus type. -HSA_AMD_LINK_INFO_TYPE_QPI = 1 - -# PCIe bus type. -HSA_AMD_LINK_INFO_TYPE_PCIE = 2 - -# Infiniband bus type. -HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3 -#------------------------------------------------------------------------------ - - -#------------------------------------------------------------------------------ -# -# Access to buffers located in the memory pool. The type of this attribute -# is hsa_amd_memory_pool_access_t. -# -# Enums of type hsa_amd_agent_memory_pool_info_t. - -# An agent can always directly access buffers currently located in a memory -# pool that is associated (the memory_pool is one of the values returned by -# hsa_amd_agent_iterate_memory_pools on the agent) with that agent. If the -# buffer is currently located in a memory pool that is not associated with -# the agent, and the value returned by this function for the given -# combination of agent and memory pool is not -# HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED, the application still needs to -# invoke hsa_amd_agents_allow_access in order to gain direct access to the -# buffer. - -# If the given agent can directly access buffers the pool, the result is not -# HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is associated -# with the agent, or it is of fined-grained type, the result must not be -# HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is not -# associated with the agent, and does not reside in the global segment, the -# result must be HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. -HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS = 0 - -# Number of links to hop when accessing the memory pool from the specified -# agent. The type of this attribute is uint32_t. -HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS = 1 - -# Details of each link hop when accessing the memory pool starting from the -# specified agent. The type of this attribute is an array size of -# HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS with each element containing -# hsa_amd_memory_pool_link_info_t. -HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO = 2 -#------------------------------------------------------------------------------ - - diff --git a/numba/numba/roc/hsadrv/error.py b/numba/numba/roc/hsadrv/error.py deleted file mode 100644 index c3c6ffae9..000000000 --- a/numba/numba/roc/hsadrv/error.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import print_function, absolute_import, division - - -class HsaDriverError(Exception): - pass - - -class HsaSupportError(ImportError): - pass - - -class HsaApiError(HsaDriverError): - def __init__(self, code, msg): - self.code = code - super(HsaApiError, self).__init__(msg) - - -class HsaWarning(UserWarning): - pass - - -class HsaKernelLaunchError(HsaDriverError): - pass - - -class HsaContextMismatchError(HsaDriverError): - def __init__(self, expect, got): - fmt = ("device array is associated with a different " - "context: expect {0} but got {1}") - msg = fmt.format(expect, got) - super(HsaContextMismatchError, self).__init__(msg) - - diff --git a/numba/numba/roc/hsaimpl.py b/numba/numba/roc/hsaimpl.py deleted file mode 100644 index e05809520..000000000 --- a/numba/numba/roc/hsaimpl.py +++ /dev/null @@ -1,301 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import operator -from functools import reduce - -from llvmlite.llvmpy.core import Type -import llvmlite.llvmpy.core as lc -import llvmlite.binding as ll -from llvmlite import ir - -from numba.targets.imputils import Registry -from numba import cgutils -from numba import types -from numba.itanium_mangler import mangle_c, mangle, mangle_type -from . import target -from . import stubs -from . import hlc -from . import enums - -registry = Registry() -lower = registry.lower - -_void_value = lc.Constant.null(lc.Type.pointer(lc.Type.int(8))) - -# ----------------------------------------------------------------------------- - - -def _declare_function(context, builder, name, sig, cargs, - mangler=mangle_c): - """Insert declaration for a opencl builtin function. - Uses the Itanium mangler. - - Args - ---- - context: target context - - builder: llvm builder - - name: str - symbol name - - sig: signature - function signature of the symbol being declared - - cargs: sequence of str - C type names for the arguments - - mangler: a mangler function - function to use to mangle the symbol - - """ - mod = builder.module - if sig.return_type == types.void: - llretty = lc.Type.void() - else: - llretty = context.get_value_type(sig.return_type) - llargs = [context.get_value_type(t) for t in sig.args] - fnty = Type.function(llretty, llargs) - mangled = mangler(name, cargs) - fn = mod.get_or_insert_function(fnty, mangled) - fn.calling_convention = target.CC_SPIR_FUNC - return fn - - -@lower(stubs.get_global_id, types.uint32) -def get_global_id_impl(context, builder, sig, args): - [dim] = args - get_global_id = _declare_function(context, builder, 'get_global_id', sig, - ['unsigned int']) - res = builder.call(get_global_id, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.get_local_id, types.uint32) -def get_local_id_impl(context, builder, sig, args): - [dim] = args - get_local_id = _declare_function(context, builder, 'get_local_id', sig, - ['unsigned int']) - res = builder.call(get_local_id, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.get_group_id, types.uint32) -def get_group_id_impl(context, builder, sig, args): - [dim] = args - get_group_id = _declare_function(context, builder, 'get_group_id', sig, - ['unsigned int']) - res = builder.call(get_group_id, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.get_num_groups, types.uint32) -def get_num_groups_impl(context, builder, sig, args): - [dim] = args - get_num_groups = _declare_function(context, builder, 'get_num_groups', sig, - ['unsigned int']) - res = builder.call(get_num_groups, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.get_work_dim) -def get_work_dim_impl(context, builder, sig, args): - get_work_dim = _declare_function(context, builder, 'get_work_dim', sig, - ["void"]) - res = builder.call(get_work_dim, []) - return res - - -@lower(stubs.get_global_size, types.uint32) -def get_global_size_impl(context, builder, sig, args): - [dim] = args - get_global_size = _declare_function(context, builder, 'get_global_size', - sig, ['unsigned int']) - res = builder.call(get_global_size, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.get_local_size, types.uint32) -def get_local_size_impl(context, builder, sig, args): - [dim] = args - get_local_size = _declare_function(context, builder, 'get_local_size', - sig, ['unsigned int']) - res = builder.call(get_local_size, [dim]) - return context.cast(builder, res, types.uintp, types.intp) - - -@lower(stubs.barrier, types.uint32) -def barrier_one_arg_impl(context, builder, sig, args): - [flags] = args - barrier = _declare_function(context, builder, 'barrier', sig, - ['unsigned int']) - builder.call(barrier, [flags]) - return _void_value - -@lower(stubs.barrier) -def barrier_no_arg_impl(context, builder, sig, args): - assert not args - sig = types.void(types.uint32) - barrier = _declare_function(context, builder, 'barrier', sig, - ['unsigned int']) - flags = context.get_constant(types.uint32, enums.CLK_GLOBAL_MEM_FENCE) - builder.call(barrier, [flags]) - return _void_value - - -@lower(stubs.mem_fence, types.uint32) -def mem_fence_impl(context, builder, sig, args): - [flags] = args - mem_fence = _declare_function(context, builder, 'mem_fence', sig, - ['unsigned int']) - builder.call(mem_fence, [flags]) - return _void_value - - -@lower(stubs.wavebarrier) -def wavebarrier_impl(context, builder, sig, args): - assert not args - fnty = Type.function(Type.void(), []) - fn = builder.module.declare_intrinsic('llvm.amdgcn.wave.barrier', fnty=fnty) - builder.call(fn, []) - return _void_value - -@lower(stubs.activelanepermute_wavewidth, - types.Any, types.uint32, types.Any, types.bool_) -def activelanepermute_wavewidth_impl(context, builder, sig, args): - [src, laneid, identity, use_ident] = args - assert sig.args[0] == sig.args[2] - elem_type = sig.args[0] - bitwidth = elem_type.bitwidth - intbitwidth = Type.int(bitwidth) - i32 = Type.int(32) - i1 = Type.int(1) - name = "__hsail_activelanepermute_wavewidth_b{0}".format(bitwidth) - - fnty = Type.function(intbitwidth, [intbitwidth, i32, intbitwidth, i1]) - fn = builder.module.get_or_insert_function(fnty, name=name) - fn.calling_convention = target.CC_SPIR_FUNC - - def cast(val): - return builder.bitcast(val, intbitwidth) - - result = builder.call(fn, [cast(src), laneid, cast(identity), use_ident]) - return builder.bitcast(result, context.get_value_type(elem_type)) - -def _gen_ds_permute(intrinsic_name): - def _impl(context, builder, sig, args): - """ - args are (index, src) - """ - assert sig.args[0] == sig.args[1] - idx, src = args - i32 = Type.int(32) - fnty = Type.function(i32, [i32, i32]) - fn = builder.module.declare_intrinsic(intrinsic_name, fnty=fnty) - # the args are byte addressable, VGPRs are 4 wide so mul idx by 4 - four = lc.Constant.int(i32, 4) - idx = builder.mul(idx, four) - return builder.call(fn, (idx, src)) - return _impl - -lower(stubs.ds_permute, types.int32, types.int32)(_gen_ds_permute('llvm.amdgcn.ds.permute')) -lower(stubs.ds_bpermute, types.int32, types.int32)(_gen_ds_permute('llvm.amdgcn.ds.bpermute')) - -@lower(stubs.atomic.add, types.Array, types.intp, types.Any) -@lower(stubs.atomic.add, types.Array, - types.UniTuple, types.Any) -@lower(stubs.atomic.add, types.Array, types.Tuple, - types.Any) -def hsail_atomic_add_tuple(context, builder, sig, args): - aryty, indty, valty = sig.args - ary, inds, val = args - dtype = aryty.dtype - - if indty == types.intp: - indices = [inds] # just a single integer - indty = [indty] - else: - indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) - indices = [context.cast(builder, i, t, types.intp) - for t, i in zip(indty, indices)] - - if dtype != valty: - raise TypeError("expecting %s but got %s" % (dtype, valty)) - - if aryty.ndim != len(indty): - raise TypeError("indexing %d-D array with %d-D index" % - (aryty.ndim, len(indty))) - - lary = context.make_array(aryty)(context, builder, ary) - ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) - - return builder.atomic_rmw("add", ptr, val, ordering='monotonic') - - -@lower('hsail.smem.alloc', types.UniTuple, types.Any) -def hsail_smem_alloc_array(context, builder, sig, args): - shape, dtype = args - return _generic_array(context, builder, shape=shape, dtype=dtype, - symbol_name='_hsapy_smem', - addrspace=target.SPIR_LOCAL_ADDRSPACE) - - -def _generic_array(context, builder, shape, dtype, symbol_name, addrspace): - elemcount = reduce(operator.mul, shape) - lldtype = context.get_data_type(dtype) - laryty = Type.array(lldtype, elemcount) - - if addrspace == target.SPIR_LOCAL_ADDRSPACE: - lmod = builder.module - - # Create global variable in the requested address-space - gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) - - if elemcount <= 0: - raise ValueError("array length <= 0") - else: - gvmem.linkage = lc.LINKAGE_INTERNAL - - if dtype not in types.number_domain: - raise TypeError("unsupported type: %s" % dtype) - - # Convert to generic address-space - dataptr = context.addrspacecast(builder, gvmem, - target.SPIR_GENERIC_ADDRSPACE) - - else: - raise NotImplementedError("addrspace {addrspace}".format(**locals())) - - return _make_array(context, builder, dataptr, dtype, shape) - - -def _make_array(context, builder, dataptr, dtype, shape, layout='C'): - ndim = len(shape) - # Create array object - aryty = types.Array(dtype=dtype, ndim=ndim, layout='C') - ary = context.make_array(aryty)(context, builder) - - targetdata = _get_target_data(context) - lldtype = context.get_data_type(dtype) - itemsize = lldtype.get_abi_size(targetdata) - # Compute strides - rstrides = [itemsize] - for i, lastsize in enumerate(reversed(shape[1:])): - rstrides.append(lastsize * rstrides[-1]) - strides = [s for s in reversed(rstrides)] - - kshape = [context.get_constant(types.intp, s) for s in shape] - kstrides = [context.get_constant(types.intp, s) for s in strides] - - context.populate_array(ary, - data=builder.bitcast(dataptr, ary.data.type), - shape=cgutils.pack_array(builder, kshape), - strides=cgutils.pack_array(builder, kstrides), - itemsize=context.get_constant(types.intp, itemsize), - meminfo=None) - - return ary._getvalue() - - -def _get_target_data(context): - return ll.create_target_data(hlc.DATALAYOUT[context.address_size]) diff --git a/numba/numba/roc/initialize.py b/numba/numba/roc/initialize.py deleted file mode 100644 index 589ca0a4a..000000000 --- a/numba/numba/roc/initialize.py +++ /dev/null @@ -1,27 +0,0 @@ -#### Additional initialization code ###### - - -def _initialize_ufunc(): - from numba.npyufunc import Vectorize - - def init_vectorize(): - from numba.roc.vectorizers import HsaVectorize - - return HsaVectorize - - Vectorize.target_registry.ondemand['roc'] = init_vectorize - - -def _initialize_gufunc(): - from numba.npyufunc import GUVectorize - - def init_guvectorize(): - from numba.roc.vectorizers import HsaGUFuncVectorize - - return HsaGUFuncVectorize - - GUVectorize.target_registry.ondemand['roc'] = init_guvectorize - - -_initialize_ufunc() -_initialize_gufunc() diff --git a/numba/numba/roc/mathdecl.py b/numba/numba/roc/mathdecl.py deleted file mode 100644 index 382496977..000000000 --- a/numba/numba/roc/mathdecl.py +++ /dev/null @@ -1,346 +0,0 @@ -from __future__ import print_function, absolute_import, division -import math -from numba import types, utils -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - signature, Registry) - -registry = Registry() -builtin_attr = registry.register_attr -infer_global = registry.register_global - - -@builtin_attr -class MathModuleAttribute(AttributeTemplate): - key = types.Module(math) - - def resolve_fabs(self, mod): - return types.Function(Math_fabs) - - def resolve_exp(self, mod): - return types.Function(Math_exp) - - def resolve_expm1(self, mod): - return types.Function(Math_expm1) - - def resolve_sqrt(self, mod): - return types.Function(Math_sqrt) - - def resolve_log(self, mod): - return types.Function(Math_log) - - def resolve_log1p(self, mod): - return types.Function(Math_log1p) - - def resolve_log10(self, mod): - return types.Function(Math_log10) - - def resolve_sin(self, mod): - return types.Function(Math_sin) - - def resolve_cos(self, mod): - return types.Function(Math_cos) - - def resolve_tan(self, mod): - return types.Function(Math_tan) - - def resolve_sinh(self, mod): - return types.Function(Math_sinh) - - def resolve_cosh(self, mod): - return types.Function(Math_cosh) - - def resolve_tanh(self, mod): - return types.Function(Math_tanh) - - def resolve_asin(self, mod): - return types.Function(Math_asin) - - def resolve_acos(self, mod): - return types.Function(Math_acos) - - def resolve_atan(self, mod): - return types.Function(Math_atan) - - def resolve_atan2(self, mod): - return types.Function(Math_atan2) - - def resolve_asinh(self, mod): - return types.Function(Math_asinh) - - def resolve_acosh(self, mod): - return types.Function(Math_acosh) - - def resolve_atanh(self, mod): - return types.Function(Math_atanh) - - def resolve_pi(self, mod): - return types.float64 - - def resolve_e(self, mod): - return types.float64 - - def resolve_floor(self, mod): - return types.Function(Math_floor) - - def resolve_ceil(self, mod): - return types.Function(Math_ceil) - - def resolve_trunc(self, mod): - return types.Function(Math_trunc) - - def resolve_isnan(self, mod): - return types.Function(Math_isnan) - - def resolve_isinf(self, mod): - return types.Function(Math_isinf) - - def resolve_degrees(self, mod): - return types.Function(Math_degrees) - - def resolve_radians(self, mod): - return types.Function(Math_radians) - - # def resolve_hypot(self, mod): - # return types.Function(Math_hypot) - - def resolve_copysign(self, mod): - return types.Function(Math_copysign) - - def resolve_fmod(self, mod): - return types.Function(Math_fmod) - - def resolve_pow(self, mod): - return types.Function(Math_pow) - - def resolve_erf(self, mod): - return types.Function(Math_erf) - - def resolve_erfc(self, mod): - return types.Function(Math_erfc) - - def resolve_gamma(self, mod): - return types.Function(Math_gamma) - - def resolve_lgamma(self, mod): - return types.Function(Math_lgamma) - - -class Math_unary(ConcreteTemplate): - cases = [ - signature(types.float64, types.int64), - signature(types.float64, types.uint64), - signature(types.float32, types.float32), - signature(types.float64, types.float64), - ] - - -class Math_fabs(Math_unary): - key = math.fabs - - -class Math_exp(Math_unary): - key = math.exp - - -class Math_expm1(Math_unary): - key = math.expm1 - - -class Math_sqrt(Math_unary): - key = math.sqrt - - -class Math_log(Math_unary): - key = math.log - - -class Math_log1p(Math_unary): - key = math.log1p - - -class Math_log10(Math_unary): - key = math.log10 - - -class Math_sin(Math_unary): - key = math.sin - - -class Math_cos(Math_unary): - key = math.cos - - -class Math_tan(Math_unary): - key = math.tan - - -class Math_sinh(Math_unary): - key = math.sinh - - -class Math_cosh(Math_unary): - key = math.cosh - - -class Math_tanh(Math_unary): - key = math.tanh - - -class Math_asin(Math_unary): - key = math.asin - - -class Math_acos(Math_unary): - key = math.acos - - -class Math_atan(Math_unary): - key = math.atan - - -class Math_atan2(ConcreteTemplate): - key = math.atan2 - cases = [ - signature(types.float64, types.int64, types.int64), - signature(types.float64, types.uint64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -class Math_asinh(Math_unary): - key = math.asinh - - -class Math_acosh(Math_unary): - key = math.acosh - - -class Math_atanh(Math_unary): - key = math.atanh - - -class Math_floor(Math_unary): - key = math.floor - - -class Math_ceil(Math_unary): - key = math.ceil - - -class Math_trunc(Math_unary): - key = math.trunc - - -class Math_radians(Math_unary): - key = math.radians - - -class Math_degrees(Math_unary): - key = math.degrees - - -# class Math_hypot(ConcreteTemplate): -# key = math.hypot -# cases = [ -# signature(types.float64, types.int64, types.int64), -# signature(types.float64, types.uint64, types.uint64), -# signature(types.float32, types.float32, types.float32), -# signature(types.float64, types.float64, types.float64), -# ] - - -class Math_erf(Math_unary): - key = math.erf - -class Math_erfc(Math_unary): - key = math.erfc - -class Math_gamma(Math_unary): - key = math.gamma - -class Math_lgamma(Math_unary): - key = math.lgamma - - -class Math_binary(ConcreteTemplate): - cases = [ - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -class Math_copysign(Math_binary): - key = math.copysign - - -class Math_fmod(Math_binary): - key = math.fmod - - -class Math_pow(ConcreteTemplate): - key = math.pow - cases = [ - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - signature(types.float32, types.float32, types.int32), - signature(types.float64, types.float64, types.int32), - ] - - -class Math_isnan(ConcreteTemplate): - key = math.isnan - cases = [ - signature(types.boolean, types.int64), - signature(types.boolean, types.uint64), - signature(types.boolean, types.float32), - signature(types.boolean, types.float64), - ] - - -class Math_isinf(ConcreteTemplate): - key = math.isinf - cases = [ - signature(types.boolean, types.int64), - signature(types.boolean, types.uint64), - signature(types.boolean, types.float32), - signature(types.boolean, types.float64), - ] - - -infer_global(math, types.Module(math)) -infer_global(math.fabs, types.Function(Math_fabs)) -infer_global(math.exp, types.Function(Math_exp)) -infer_global(math.expm1, types.Function(Math_expm1)) -infer_global(math.sqrt, types.Function(Math_sqrt)) -infer_global(math.log, types.Function(Math_log)) -infer_global(math.log1p, types.Function(Math_log1p)) -infer_global(math.log10, types.Function(Math_log10)) -infer_global(math.sin, types.Function(Math_sin)) -infer_global(math.cos, types.Function(Math_cos)) -infer_global(math.tan, types.Function(Math_tan)) -infer_global(math.sinh, types.Function(Math_sinh)) -infer_global(math.cosh, types.Function(Math_cosh)) -infer_global(math.tanh, types.Function(Math_tanh)) -infer_global(math.asin, types.Function(Math_asin)) -infer_global(math.acos, types.Function(Math_acos)) -infer_global(math.atan, types.Function(Math_atan)) -infer_global(math.atan2, types.Function(Math_atan2)) -infer_global(math.asinh, types.Function(Math_asinh)) -infer_global(math.acosh, types.Function(Math_acosh)) -infer_global(math.atanh, types.Function(Math_atanh)) -# infer_global(math.hypot, types.Function(Math_hypot)) -infer_global(math.floor, types.Function(Math_floor)) -infer_global(math.ceil, types.Function(Math_ceil)) -infer_global(math.trunc, types.Function(Math_trunc)) -infer_global(math.isnan, types.Function(Math_isnan)) -infer_global(math.isinf, types.Function(Math_isinf)) -infer_global(math.degrees, types.Function(Math_degrees)) -infer_global(math.radians, types.Function(Math_radians)) -infer_global(math.copysign, types.Function(Math_copysign)) -infer_global(math.fmod, types.Function(Math_fmod)) -infer_global(math.pow, types.Function(Math_pow)) -infer_global(math.erf, types.Function(Math_erf)) -infer_global(math.erfc, types.Function(Math_erfc)) -infer_global(math.gamma, types.Function(Math_gamma)) -infer_global(math.lgamma, types.Function(Math_lgamma)) diff --git a/numba/numba/roc/mathimpl.py b/numba/numba/roc/mathimpl.py deleted file mode 100644 index 5d7f96c0c..000000000 --- a/numba/numba/roc/mathimpl.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import print_function, absolute_import, division -import math -import warnings - -from numba.targets.imputils import Registry -from numba import types -from numba.itanium_mangler import mangle -from .hsaimpl import _declare_function - -registry = Registry() -lower = registry.lower - -# ----------------------------------------------------------------------------- - -_unary_b_f = types.int32(types.float32) -_unary_b_d = types.int32(types.float64) -_unary_f_f = types.float32(types.float32) -_unary_d_d = types.float64(types.float64) -_binary_f_ff = types.float32(types.float32, types.float32) -_binary_d_dd = types.float64(types.float64, types.float64) - -function_descriptors = { - 'isnan': (_unary_b_f, _unary_b_d), - 'isinf': (_unary_b_f, _unary_b_d), - - 'ceil': (_unary_f_f, _unary_d_d), - 'floor': (_unary_f_f, _unary_d_d), - - 'fabs': (_unary_f_f, _unary_d_d), - - 'sqrt': (_unary_f_f, _unary_d_d), - 'exp': (_unary_f_f, _unary_d_d), - 'expm1': (_unary_f_f, _unary_d_d), - 'log': (_unary_f_f, _unary_d_d), - 'log10': (_unary_f_f, _unary_d_d), - 'log1p': (_unary_f_f, _unary_d_d), - - 'sin': (_unary_f_f, _unary_d_d), - 'cos': (_unary_f_f, _unary_d_d), - 'tan': (_unary_f_f, _unary_d_d), - 'asin': (_unary_f_f, _unary_d_d), - 'acos': (_unary_f_f, _unary_d_d), - 'atan': (_unary_f_f, _unary_d_d), - 'sinh': (_unary_f_f, _unary_d_d), - 'cosh': (_unary_f_f, _unary_d_d), - 'tanh': (_unary_f_f, _unary_d_d), - 'asinh': (_unary_f_f, _unary_d_d), - 'acosh': (_unary_f_f, _unary_d_d), - 'atanh': (_unary_f_f, _unary_d_d), - - 'copysign': (_binary_f_ff, _binary_d_dd), - 'atan2': (_binary_f_ff, _binary_d_dd), - 'pow': (_binary_f_ff, _binary_d_dd), - 'fmod': (_binary_f_ff, _binary_d_dd), - - 'erf': (_unary_f_f, _unary_d_d), - 'erfc': (_unary_f_f, _unary_d_d), - 'gamma': (_unary_f_f, _unary_d_d), - 'lgamma': (_unary_f_f, _unary_d_d), - - # unsupported functions listed in the math module documentation: - # frexp, ldexp, trunc, modf, factorial, fsum -} - - -# some functions may be named differently by the underlying math -# library as oposed to the Python name. -_lib_counterpart = { - 'gamma': 'tgamma' -} - - -def _mk_fn_decl(name, decl_sig): - sym = _lib_counterpart.get(name, name) - - def core(context, builder, sig, args): - fn = _declare_function(context, builder, sym, decl_sig, decl_sig.args, - mangler=mangle) - res = builder.call(fn, args) - return context.cast(builder, res, decl_sig.return_type, sig.return_type) - - core.__name__ = name - return core - - -_supported = ['sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'atan2', 'sinh', - 'cosh', 'tanh', 'asinh', 'acosh', 'atanh', 'isnan', 'isinf', - 'ceil', 'floor', 'fabs', 'sqrt', 'exp', 'expm1', 'log', - 'log10', 'log1p', 'copysign', 'pow', 'fmod', 'erf', 'erfc', - 'gamma', 'lgamma', - ] - -for name in _supported: - sigs = function_descriptors.get(name) - if sigs is None: - warnings.warn("HSA - failed to register '{0}'".format(name)) - continue - - try: - # only symbols present in the math module - key = getattr(math, name) - except AttributeError: - continue - - for sig in sigs: - fn = _mk_fn_decl(name, sig) - lower(key, *sig.args)(fn) diff --git a/numba/numba/roc/stubs.py b/numba/numba/roc/stubs.py deleted file mode 100644 index b28d8a37a..000000000 --- a/numba/numba/roc/stubs.py +++ /dev/null @@ -1,168 +0,0 @@ -from __future__ import print_function, absolute_import -from numba import types, ir, typing, macro - - -_stub_error = NotImplementedError("This is a stub.") - - -def get_global_id(*args, **kargs): - """ - OpenCL get_global_id() - """ - raise _stub_error - - -def get_local_id(*args, **kargs): - """ - OpenCL get_local_id() - """ - raise _stub_error - - -def get_global_size(*args, **kargs): - """ - OpenCL get_global_size() - """ - raise _stub_error - - -def get_local_size(*args, **kargs): - """ - OpenCL get_local_size() - """ - raise _stub_error - - -def get_group_id(*args, **kargs): - """ - OpenCL get_group_id() - """ - raise _stub_error - - -def get_num_groups(*args, **kargs): - """ - OpenCL get_num_groups() - """ - raise _stub_error - - -def get_work_dim(*args, **kargs): - """ - OpenCL get_work_dim() - """ - raise _stub_error - - -def barrier(*args, **kargs): - """ - OpenCL barrier() - - Example: - - # workgroup barrier + local memory fence - hsa.barrier(hsa.CLK_LOCAL_MEM_FENCE) - # workgroup barrier + global memory fence - hsa.barrier(hsa.CLK_GLOBAL_MEM_FENCE) - # workgroup barrier + global memory fence - hsa.barrier() - - """ - raise _stub_error - - -def mem_fence(*args, **kargs): - """ - OpenCL mem_fence() - - Example: - - # local memory fence - hsa.mem_fence(hsa.CLK_LOCAL_MEM_FENCE) - # global memory fence - hsa.mem_fence(hsa.CLK_GLOBAL_MEM_FENCE) - """ - raise _stub_error - - -def wavebarrier(): - """ - HSAIL wavebarrier - """ - raise _stub_error - - -def activelanepermute_wavewidth(src, laneid, identity, useidentity): - """ - HSAIL activelanepermute_wavewidth_* - """ - raise _stub_error - - -def ds_permute(src_lane, dest_lane): - """ - AMDGCN Data Share intrinsic forwards permute (push semantics) - """ - raise _stub_error - - -def ds_bpermute(src_lane, dest_lane): - """ - AMDGCN Data Share intrinsic backwards permute (pull semantics) - """ - raise _stub_error - - -class Stub(object): - """A stub object to represent special objects which is meaningless - outside the context of HSA-python. - """ - _description_ = '' - __slots__ = () # don't allocate __dict__ - - def __new__(cls): - raise NotImplementedError("%s is not instantiable" % cls) - - def __repr__(self): - return self._description_ - - -def shared_array(shape, dtype): - shape = _legalize_shape(shape) - ndim = len(shape) - fname = "hsail.smem.alloc" - restype = types.Array(dtype, ndim, 'C') - sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) - return ir.Intrinsic(fname, sig, args=(shape, dtype)) - - -class shared(Stub): - """shared namespace - """ - _description_ = '' - - array = macro.Macro('shared.array', shared_array, callable=True, - argnames=['shape', 'dtype']) - - -def _legalize_shape(shape): - if isinstance(shape, tuple): - return shape - elif isinstance(shape, int): - return (shape,) - else: - raise TypeError("invalid type for shape; got {0}".format(type(shape))) - -#------------------------------------------------------------------------------- -# atomic - -class atomic(Stub): - """atomic namespace - """ - _description_ = '' - - class add(Stub): - """add(ary, idx, val) - - Perform atomic ary[idx] += val - """ diff --git a/numba/numba/roc/target.py b/numba/numba/roc/target.py deleted file mode 100644 index 3d12957d2..000000000 --- a/numba/numba/roc/target.py +++ /dev/null @@ -1,330 +0,0 @@ -from __future__ import print_function, absolute_import - -import re - -from llvmlite.llvmpy import core as lc -from llvmlite import ir as llvmir -from llvmlite import binding as ll - -from numba import typing, types, utils, cgutils -from numba.utils import cached_property -from numba import datamodel -from numba.targets.base import BaseContext -from numba.targets.callconv import MinimalCallConv -from . import codegen -from .hlc import DATALAYOUT - -CC_SPIR_KERNEL = "spir_kernel" -CC_SPIR_FUNC = "" - - -# ----------------------------------------------------------------------------- -# Typing - - -class HSATypingContext(typing.BaseContext): - def load_additional_registries(self): - from . import hsadecl, mathdecl - - self.install_registry(hsadecl.registry) - self.install_registry(mathdecl.registry) - - -# ----------------------------------------------------------------------------- -# Implementation - -VALID_CHARS = re.compile(r'[^a-z0-9]', re.I) - - -# Address spaces -SPIR_GENERIC_ADDRSPACE = 0 -SPIR_GLOBAL_ADDRSPACE = 1 -SPIR_REGION_ADDRSPACE = 2 -SPIR_CONSTANT_ADDRSPACE = 4 -SPIR_LOCAL_ADDRSPACE = 3 -SPIR_PRIVATE_ADDRSPACE = 5 -SPIR_CONSTANT_32BIT_ADDRSPACE = 6 - -SPIR_VERSION = (2, 0) - - -class GenericPointerModel(datamodel.PrimitiveModel): - def __init__(self, dmm, fe_type): - adrsp = SPIR_GENERIC_ADDRSPACE - be_type = dmm.lookup(fe_type.dtype).get_data_type().as_pointer(adrsp) - super(GenericPointerModel, self).__init__(dmm, fe_type, be_type) - - -def _init_data_model_manager(): - dmm = datamodel.default_manager.copy() - dmm.register(types.CPointer, GenericPointerModel) - return dmm - - -hsa_data_model_manager = _init_data_model_manager() - - -class HSATargetContext(BaseContext): - implement_powi_as_math_call = True - generic_addrspace = SPIR_GENERIC_ADDRSPACE - - def init(self): - self._internal_codegen = codegen.JITHSACodegen("numba.hsa.jit") - self._target_data = ll.create_target_data(DATALAYOUT[utils.MACHINE_BITS]) - # Override data model manager - self.data_model_manager = hsa_data_model_manager - - def load_additional_registries(self): - from . import hsaimpl, mathimpl - - self.insert_func_defn(hsaimpl.registry.functions) - self.insert_func_defn(mathimpl.registry.functions) - - @cached_property - def call_conv(self): - return HSACallConv(self) - - def codegen(self): - return self._internal_codegen - - @property - def target_data(self): - return self._target_data - - def mangler(self, name, argtypes): - def repl(m): - ch = m.group(0) - return "_%X_" % ord(ch) - - qualified = name + '.' + '.'.join(str(a) for a in argtypes) - mangled = VALID_CHARS.sub(repl, qualified) - return 'hsapy_devfn_' + mangled - - def prepare_hsa_kernel(self, func, argtypes): - module = func.module - func.linkage = 'linkonce_odr' - - module.data_layout = DATALAYOUT[self.address_size] - wrapper = self.generate_kernel_wrapper(func, argtypes) - - return wrapper - - def mark_hsa_device(self, func): - # Adapt to SPIR - # module = func.module - func.calling_convention = CC_SPIR_FUNC - func.linkage = 'linkonce_odr' - return func - - def generate_kernel_wrapper(self, func, argtypes): - module = func.module - arginfo = self.get_arg_packer(argtypes) - - def sub_gen_with_global(lty): - if isinstance(lty, llvmir.PointerType): - return (lty.pointee.as_pointer(SPIR_GLOBAL_ADDRSPACE), - lty.addrspace) - return lty, None - - if len(arginfo.argument_types) > 0: - llargtys, changed = zip(*map(sub_gen_with_global, - arginfo.argument_types)) - else: - llargtys = changed = () - wrapperfnty = lc.Type.function(lc.Type.void(), llargtys) - - wrapper_module = self.create_module("hsa.kernel.wrapper") - wrappername = 'hsaPy_{name}'.format(name=func.name) - - argtys = list(arginfo.argument_types) - fnty = lc.Type.function(lc.Type.int(), - [self.call_conv.get_return_type( - types.pyobject)] + argtys) - - func = wrapper_module.add_function(fnty, name=func.name) - func.calling_convention = CC_SPIR_FUNC - - wrapper = wrapper_module.add_function(wrapperfnty, name=wrappername) - - builder = lc.Builder(wrapper.append_basic_block('')) - - # Adjust address space of each kernel argument - fixed_args = [] - for av, adrsp in zip(wrapper.args, changed): - if adrsp is not None: - casted = self.addrspacecast(builder, av, adrsp) - fixed_args.append(casted) - else: - fixed_args.append(av) - - callargs = arginfo.from_arguments(builder, fixed_args) - - # XXX handle error status - status, _ = self.call_conv.call_function(builder, func, types.void, - argtypes, callargs) - builder.ret_void() - - set_hsa_kernel(wrapper) - - # Link - module.link_in(ll.parse_assembly(str(wrapper_module))) - # To enable inlining which is essential because addrspacecast 1->0 is - # illegal. Inlining will optimize the addrspacecast out. - func.linkage = 'internal' - wrapper = module.get_function(wrapper.name) - module.get_function(func.name).linkage = 'internal' - return wrapper - - def declare_function(self, module, fndesc): - ret = super(HSATargetContext, self).declare_function(module, fndesc) - # XXX: Refactor fndesc instead of this special case - if fndesc.llvm_func_name.startswith('hsapy_devfn'): - ret.calling_convention = CC_SPIR_FUNC - return ret - - def make_constant_array(self, builder, typ, ary): - """ - Return dummy value. - """ - # - # a = self.make_array(typ)(self, builder) - # return a._getvalue() - raise NotImplementedError - - def addrspacecast(self, builder, src, addrspace): - """ - Handle addrspacecast - """ - ptras = llvmir.PointerType(src.type.pointee, addrspace=addrspace) - return builder.addrspacecast(src, ptras) - - -def set_hsa_kernel(fn): - """ - Ensure `fn` is usable as a SPIR kernel. - - Fix calling convention - - Add metadata - """ - mod = fn.module - - # Set nounwind - # fn.add_attribute(lc.ATTR_NO_UNWIND) - - # Set SPIR kernel calling convention - fn.calling_convention = CC_SPIR_KERNEL - - # Mark kernels - ocl_kernels = mod.get_or_insert_named_metadata("opencl.kernels") - ocl_kernels.add(lc.MetaData.get(mod, [fn, - gen_arg_addrspace_md(fn), - gen_arg_access_qual_md(fn), - gen_arg_type(fn), - gen_arg_type_qual(fn), - gen_arg_base_type(fn)])) - - # SPIR version 2.0 - make_constant = lambda x: lc.Constant.int(lc.Type.int(), x) - spir_version_constant = [make_constant(x) for x in SPIR_VERSION] - - spir_version = mod.get_or_insert_named_metadata("opencl.spir.version") - if not spir_version.operands: - spir_version.add(lc.MetaData.get(mod, spir_version_constant)) - - ocl_version = mod.get_or_insert_named_metadata("opencl.ocl.version") - if not ocl_version.operands: - ocl_version.add(lc.MetaData.get(mod, spir_version_constant)) - - ## The following metadata does not seem to be necessary - # Other metadata - # empty_md = lc.MetaData.get(mod, ()) - # others = ["opencl.used.extensions", - # "opencl.used.optional.core.features", - # "opencl.compiler.options"]cat - # - # for name in others: - # nmd = mod.get_or_insert_named_metadata(name) - # if not nmd.operands: - # nmd.add(empty_md) - - -def gen_arg_addrspace_md(fn): - """ - Generate kernel_arg_addr_space metadata - """ - mod = fn.module - fnty = fn.type.pointee - codes = [] - - for a in fnty.args: - if cgutils.is_pointer(a): - codes.append(SPIR_GLOBAL_ADDRSPACE) - else: - codes.append(SPIR_PRIVATE_ADDRSPACE) - - consts = [lc.Constant.int(lc.Type.int(), x) for x in codes] - name = lc.MetaDataString.get(mod, "kernel_arg_addr_space") - return lc.MetaData.get(mod, [name] + consts) - - -def gen_arg_access_qual_md(fn): - """ - Generate kernel_arg_access_qual metadata - """ - mod = fn.module - consts = [lc.MetaDataString.get(mod, "none")] * len(fn.args) - name = lc.MetaDataString.get(mod, "kernel_arg_access_qual") - return lc.MetaData.get(mod, [name] + consts) - - -def gen_arg_type(fn): - """ - Generate kernel_arg_type metadata - """ - mod = fn.module - fnty = fn.type.pointee - consts = [lc.MetaDataString.get(mod, str(a)) for a in fnty.args] - name = lc.MetaDataString.get(mod, "kernel_arg_type") - return lc.MetaData.get(mod, [name] + consts) - - -def gen_arg_type_qual(fn): - """ - Generate kernel_arg_type_qual metadata - """ - mod = fn.module - fnty = fn.type.pointee - consts = [lc.MetaDataString.get(mod, "") for _ in fnty.args] - name = lc.MetaDataString.get(mod, "kernel_arg_type_qual") - return lc.MetaData.get(mod, [name] + consts) - - -def gen_arg_base_type(fn): - """ - Generate kernel_arg_base_type metadata - """ - mod = fn.module - fnty = fn.type.pointee - consts = [lc.MetaDataString.get(mod, str(a)) for a in fnty.args] - name = lc.MetaDataString.get(mod, "kernel_arg_base_type") - return lc.MetaData.get(mod, [name] + consts) - - -class HSACallConv(MinimalCallConv): - def call_function(self, builder, callee, resty, argtys, args, env=None): - """ - Call the Numba-compiled *callee*. - """ - assert env is None - retty = callee.args[0].type.pointee - retvaltmp = cgutils.alloca_once(builder, retty) - # initialize return value - builder.store(cgutils.get_null_value(retty), retvaltmp) - - arginfo = self.context.get_arg_packer(argtys) - args = arginfo.as_arguments(builder, args) - realargs = [retvaltmp] + list(args) - code = builder.call(callee, realargs) - status = self._get_return_status(builder, code) - retval = builder.load(retvaltmp) - out = self.context.get_returned_value(builder, resty, retval) - return status, out diff --git a/numba/numba/roc/tests/__init__.py b/numba/numba/roc/tests/__init__.py deleted file mode 100644 index 436be4b5e..000000000 --- a/numba/numba/roc/tests/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from numba.testing import SerialSuite -from numba.testing import load_testsuite -from numba import roc -from os.path import dirname, join - -def load_tests(loader, tests, pattern): - - suite = SerialSuite() - this_dir = dirname(__file__) - - if roc.is_available(): - suite.addTests(load_testsuite(loader, join(this_dir, 'hsadrv'))) - suite.addTests(load_testsuite(loader, join(this_dir, 'hsapy'))) - - else: - print("skipped HSA tests") - return suite diff --git a/numba/numba/roc/tests/hsadrv/__init__.py b/numba/numba/roc/tests/hsadrv/__init__.py deleted file mode 100644 index cff5a36cc..000000000 --- a/numba/numba/roc/tests/hsadrv/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import SerialSuite -from numba.testing import load_testsuite -import os - -def load_tests(loader, tests, pattern): - return SerialSuite(load_testsuite(loader, os.path.dirname(__file__))) diff --git a/numba/numba/roc/tests/hsadrv/test_async.py b/numba/numba/roc/tests/hsadrv/test_async.py deleted file mode 100644 index 78a86ec65..000000000 --- a/numba/numba/roc/tests/hsadrv/test_async.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import roc -import numba.unittest_support as unittest -from numba.roc.hsadrv.driver import dgpu_present - - -@unittest.skipUnless(dgpu_present, 'test only on dGPU system') -class TestAsync(unittest.TestCase): - - def test_coarsegrain_array(self): - arr = roc.coarsegrain_array(shape=1024, dtype=np.float32) - self.assertEqual(arr.size, 1024) - arr[:] = expect = np.arange(arr.size) - np.testing.assert_allclose(arr, expect) - - def test_async_copy_to_device(self): - arr = np.arange(1024) - - devarr = roc.to_device(arr) - - # allocate pinned array equivalent - hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype) - hostarr[:] = arr + 100 - - stream = roc.stream() - ct = len(stream._signals) - devarr.copy_to_device(hostarr, stream=stream) - self.assertEqual(ct + 1, len(stream._signals), - "no new async signal") - # implicit synchronization - got = devarr.copy_to_host() - self.assertEqual(0, len(stream._signals), - "missing implicit synchronization") - np.testing.assert_equal(hostarr, got) - - def test_async_copy_to_device_and_back(self): - arr = np.arange(1024) - hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype) - gotarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype) - stream = roc.stream() - ct = len(stream._signals) - devarr = roc.to_device(hostarr, stream=stream) - self.assertEqual(ct + 1, len(stream._signals)) - devarr.copy_to_host(gotarr, stream=stream) - self.assertEqual(ct + 2, len(stream._signals)) - stream.synchronize() - self.assertEqual(0, len(stream._signals)) - np.testing.assert_equal(hostarr, gotarr) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsadrv/test_driver.py b/numba/numba/roc/tests/hsadrv/test_driver.py deleted file mode 100644 index 7fd9fbc2f..000000000 --- a/numba/numba/roc/tests/hsadrv/test_driver.py +++ /dev/null @@ -1,638 +0,0 @@ -from __future__ import print_function, absolute_import -import ctypes -import os -import threading -try: - import queue -except ImportError: - import Queue as queue - -import numpy as np - -import numba.unittest_support as unittest -from numba.roc.hsadrv.driver import hsa, Queue, Program, Executable,\ - BrigModule, Context, dgpu_present - -from numba.roc.hsadrv.driver import hsa as roc -import numba.roc.api as hsaapi -from numba import float32, float64, vectorize - -from numba.roc.hsadrv import drvapi -from numba.roc.hsadrv import enums -from numba.roc.hsadrv import enums_ext - -from numba import config - -class TestLowLevelApi(unittest.TestCase): - """This test checks that all the functions defined in drvapi - bind properly using ctypes.""" - - def test_functions_available(self): - missing_functions = [] - for fname in drvapi.API_PROTOTYPES.keys(): - try: - getattr(hsa, fname) - except Exception as e: - missing_functions.append("'{0}': {1}".format(fname, str(e))) - - self.assertEqual(len(missing_functions), 0, - msg='\n'.join(missing_functions)) - - -class TestAgents(unittest.TestCase): - def test_agents_init(self): - self.assertGreater(len(roc.agents), 0) - - def test_agents_create_queue_single(self): - for agent in roc.agents: - if agent.is_component: - queue = agent.create_queue_single(2 ** 5) - self.assertIsInstance(queue, Queue) - - def test_agents_create_queue_multi(self): - for agent in roc.agents: - if agent.is_component: - queue = agent.create_queue_multi(2 ** 5) - self.assertIsInstance(queue, Queue) - - -class _TestBase(unittest.TestCase): - def setUp(self): - self.gpu = [a for a in roc.agents if a.is_component][0] - self.cpu = [a for a in roc.agents if not a.is_component][0] - self.queue = self.gpu.create_queue_multi(self.gpu.queue_max_size) - - def tearDown(self): - del self.queue - del self.gpu - del self.cpu - - -def get_brig_file(): - path = os.path.join('/opt/rocm/hsa/sample/vector_copy_full.brig') - assert os.path.isfile(path) - return path - -def _check_example_file(): - try: - get_brig_file() - except BaseException: - return False - return True - -has_brig_example = _check_example_file() - - -@unittest.skipUnless(has_brig_example, "Brig example not found") -class TestBrigModule(unittest.TestCase): - def test_from_file(self): - brig_file = get_brig_file() - brig_module = BrigModule.from_file(brig_file) - self.assertGreater(len(brig_module), 0) - - -@unittest.skipUnless(has_brig_example, "Brig example not found") -class TestProgram(_TestBase): - def test_create_program(self): - brig_file = get_brig_file() - symbol = '&__vector_copy_kernel' - brig_module = BrigModule.from_file(brig_file) - program = Program() - program.add_module(brig_module) - code = program.finalize(self.gpu.isa) - - ex = Executable() - ex.load(self.gpu, code) - ex.freeze() - - sym = ex.get_symbol(self.gpu, symbol) - self.assertGreater(sym.kernarg_segment_size, 0) - - -class TestMemory(_TestBase): - def test_region_list(self): - self.assertGreater(len(self.gpu.regions.globals), 0) - self.assertGreater(len(self.gpu.regions.groups), 0) - # The following maybe empty - # print(self.gpu.regions.privates) - # print(self.gpu.regions.readonlys) - - def test_register(self): - src = np.random.random(1024).astype(np.float32) - roc.hsa_memory_register(src.ctypes.data, src.nbytes) - roc.hsa_memory_deregister(src.ctypes.data, src.nbytes) - - def test_allocate(self): - regions = self.gpu.regions - # More than one region - self.assertGreater(len(regions), 0) - # Find kernel argument regions - kernarg_regions = list() - for r in regions: - if r.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG): - kernarg_regions.append(r) - - self.assertGreater(len(kernarg_regions), 0) - # Test allocating at the kernel argument region - kernarg_region = kernarg_regions[0] - nelem = 10 - ptr = kernarg_region.allocate(ctypes.sizeof(ctypes.c_float) * nelem) - self.assertNotEqual(ctypes.addressof(ptr), 0, - "pointer must not be NULL") - # Test writing to it - src = np.random.random(nelem).astype(np.float32) - ctypes.memmove(ptr, src.ctypes.data, src.nbytes) - - ref = (ctypes.c_float * nelem).from_address(ptr.value) - for i in range(src.size): - self.assertEqual(ref[i], src[i]) - roc.hsa_memory_free(ptr) - - @unittest.skipUnless(dgpu_present, "dGPU only") - def test_coarse_grained_allocate(self): - """ - Tests the coarse grained allocation works on a dGPU. - It performs a data copying round trip via: - memory - | - HSA cpu memory - | - HSA dGPU host accessible memory <---| - | | - HSA dGPU memory --------------------| - """ - gpu_regions = self.gpu.regions - gpu_only_coarse_regions = list() - gpu_host_accessible_coarse_regions = list() - for r in gpu_regions: - if r.supports(enums.HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED): - if r.host_accessible: - gpu_host_accessible_coarse_regions.append(r) - else: - gpu_only_coarse_regions.append(r) - - # check we have 1+ coarse gpu region(s) of each type - self.assertGreater(len(gpu_only_coarse_regions), 0) - self.assertGreater(len(gpu_host_accessible_coarse_regions), 0) - - cpu_regions = self.cpu.regions - cpu_coarse_regions = list() - for r in cpu_regions: - if r.supports(enums.HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED): - cpu_coarse_regions.append(r) - # check we have 1+ coarse cpu region(s) - self.assertGreater(len(cpu_coarse_regions), 0) - - # ten elements of data used - nelem = 10 - - # allocation - cpu_region = cpu_coarse_regions[0] - cpu_ptr = cpu_region.allocate(ctypes.sizeof(ctypes.c_float) * nelem) - self.assertNotEqual(ctypes.addressof(cpu_ptr), 0, - "pointer must not be NULL") - - gpu_only_region = gpu_only_coarse_regions[0] - gpu_only_ptr = gpu_only_region.allocate(ctypes.sizeof(ctypes.c_float) * - nelem) - self.assertNotEqual(ctypes.addressof(gpu_only_ptr), 0, - "pointer must not be NULL") - - gpu_host_accessible_region = gpu_host_accessible_coarse_regions[0] - gpu_host_accessible_ptr = gpu_host_accessible_region.allocate( - ctypes.sizeof(ctypes.c_float) * nelem) - self.assertNotEqual(ctypes.addressof(gpu_host_accessible_ptr), 0, - "pointer must not be NULL") - - # Test writing to allocated area - src = np.random.random(nelem).astype(np.float32) - roc.hsa_memory_copy(cpu_ptr, src.ctypes.data, src.nbytes) - roc.hsa_memory_copy(gpu_host_accessible_ptr, cpu_ptr, src.nbytes) - roc.hsa_memory_copy(gpu_only_ptr, gpu_host_accessible_ptr, src.nbytes) - - # check write is correct - cpu_ref = (ctypes.c_float * nelem).from_address(cpu_ptr.value) - for i in range(src.size): - self.assertEqual(cpu_ref[i], src[i]) - - gpu_ha_ref = (ctypes.c_float * nelem).\ - from_address(gpu_host_accessible_ptr.value) - for i in range(src.size): - self.assertEqual(gpu_ha_ref[i], src[i]) - - # zero out host accessible GPU memory and CPU memory - z0 = np.zeros(nelem).astype(np.float32) - roc.hsa_memory_copy(cpu_ptr, z0.ctypes.data, z0.nbytes) - roc.hsa_memory_copy(gpu_host_accessible_ptr, cpu_ptr, z0.nbytes) - - # check zeroing is correct - for i in range(z0.size): - self.assertEqual(cpu_ref[i], z0[i]) - - for i in range(z0.size): - self.assertEqual(gpu_ha_ref[i], z0[i]) - - # copy back the data from the GPU - roc.hsa_memory_copy(gpu_host_accessible_ptr, gpu_only_ptr, src.nbytes) - - # check the copy back is ok - for i in range(src.size): - self.assertEqual(gpu_ha_ref[i], src[i]) - - # free - roc.hsa_memory_free(cpu_ptr) - roc.hsa_memory_free(gpu_only_ptr) - roc.hsa_memory_free(gpu_host_accessible_ptr) - - @unittest.skipUnless(has_brig_example, "Brig example not found") - @unittest.skipUnless(dgpu_present, "dGPU only") - @unittest.skip("Permanently skip? HSA spec violation causes corruption") - def test_coarse_grained_kernel_execution(self): - """ - This tests the execution of a kernel on a dGPU using coarse memory - regions for the buffers. - NOTE: the code violates the HSA spec in that it uses a coarse region - for kernargs, this is a performance hack. - """ - - from numba.roc.hsadrv.driver import BrigModule, Program, hsa,\ - Executable - - # get a brig file - brig_file = get_brig_file() - brig_module = BrigModule.from_file(brig_file) - self.assertGreater(len(brig_module), 0) - - # use existing GPU regions for computation space - gpu_regions = self.gpu.regions - gpu_only_coarse_regions = list() - gpu_host_accessible_coarse_regions = list() - for r in gpu_regions: - if r.supports(enums.HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED): - if r.host_accessible: - gpu_host_accessible_coarse_regions.append(r) - else: - gpu_only_coarse_regions.append(r) - - # check we have 1+ coarse gpu region(s) of each type - self.assertGreater(len(gpu_only_coarse_regions), 0) - self.assertGreater(len(gpu_host_accessible_coarse_regions), 0) - - # Compilation phase: - - # FIXME: this is dubious, assume launching agent is indexed first - agent = roc.components[0] - - prog = Program() - prog.add_module(brig_module) - - # get kernel and load - code = prog.finalize(agent.isa) - - ex = Executable() - ex.load(agent, code) - ex.freeze() - - # extract symbols - sym = ex.get_symbol(agent, "&__vector_copy_kernel") - self.assertNotEqual(sym.kernel_object, 0) - self.assertGreater(sym.kernarg_segment_size, 0) - - # attempt kernel excution - import ctypes - import numpy as np - - # Do memory allocations - - # allocate and initialise memory - nelem = 1024 * 1024 - - src = np.random.random(nelem).astype(np.float32) - z0 = np.zeros_like(src) - - # alloc host accessible memory - nbytes = ctypes.sizeof(ctypes.c_float) * nelem - gpu_host_accessible_region = gpu_host_accessible_coarse_regions[0] - host_in_ptr = gpu_host_accessible_region.allocate(nbytes) - self.assertNotEqual(host_in_ptr.value, None, - "pointer must not be NULL") - host_out_ptr = gpu_host_accessible_region.allocate(nbytes) - self.assertNotEqual(host_out_ptr.value, None, - "pointer must not be NULL") - - # init mem with data - roc.hsa_memory_copy(host_in_ptr, src.ctypes.data, src.nbytes) - roc.hsa_memory_copy(host_out_ptr, z0.ctypes.data, z0.nbytes) - - # alloc gpu only memory - gpu_only_region = gpu_only_coarse_regions[0] - gpu_in_ptr = gpu_only_region.allocate(nbytes) - self.assertNotEqual(gpu_in_ptr.value, None, "pointer must not be NULL") - gpu_out_ptr = gpu_only_region.allocate(nbytes) - self.assertNotEqual(gpu_out_ptr.value, None, - "pointer must not be NULL") - - # copy memory from host accessible location to gpu only - roc.hsa_memory_copy(gpu_in_ptr, host_in_ptr, src.nbytes) - - # Do kernargs - - # Find a coarse region (for better performance on dGPU) in which - # to place kernargs. NOTE: This violates the HSA spec - kernarg_regions = list() - for r in gpu_host_accessible_coarse_regions: - # NOTE: VIOLATION - if r.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG): - kernarg_regions.append(r) - self.assertGreater(len(kernarg_regions), 0) - - # use first region for args - kernarg_region = kernarg_regions[0] - - kernarg_ptr = kernarg_region.allocate( - 2 * ctypes.sizeof(ctypes.c_void_p)) - - self.assertNotEqual(kernarg_ptr, None, "pointer must not be NULL") - - # wire in gpu memory - argref = (2 * ctypes.c_size_t).from_address(kernarg_ptr.value) - argref[0] = gpu_in_ptr.value - argref[1] = gpu_out_ptr.value - - # signal - sig = roc.create_signal(1) - - # create queue and dispatch job - - queue = agent.create_queue_single(32) - queue.dispatch(sym, kernarg_ptr, workgroup_size=(256, 1, 1), - grid_size=(nelem, 1, 1),signal=None) - - # copy result back to host accessible memory to check - roc.hsa_memory_copy(host_out_ptr, gpu_out_ptr, src.nbytes) - - # check the data is recovered - ref = (nelem * ctypes.c_float).from_address(host_out_ptr.value) - np.testing.assert_equal(ref, src) - - # free - roc.hsa_memory_free(host_in_ptr) - roc.hsa_memory_free(host_out_ptr) - roc.hsa_memory_free(gpu_in_ptr) - roc.hsa_memory_free(gpu_out_ptr) - - -class TestContext(_TestBase): - """Tests the Context class behaviour is correct.""" - - def test_memalloc(self): - """ - Tests Context.memalloc() for a given, in the parlance of HSA,\ - `component`. Testing includes specialisations for the supported - components of dGPUs and APUs. - """ - n = 10 # things to alloc - nbytes = ctypes.sizeof(ctypes.c_double) * n - - # run if a dGPU is present - if dgpu_present: - # find a host accessible region - dGPU_agent = self.gpu - CPU_agent = self.cpu - gpu_ctx = Context(dGPU_agent) - gpu_only_mem = gpu_ctx.memalloc(nbytes, hostAccessible=False) - ha_mem = gpu_ctx.memalloc(nbytes, hostAccessible=True) - - # on dGPU systems, all host mem is host accessible - cpu_ctx = Context(CPU_agent) - cpu_mem = cpu_ctx.memalloc(nbytes, hostAccessible=True) - - # Test writing to allocated area - src = np.random.random(n).astype(np.float64) - roc.hsa_memory_copy(cpu_mem.device_pointer, src.ctypes.data, src.nbytes) - roc.hsa_memory_copy(ha_mem.device_pointer, cpu_mem.device_pointer, src.nbytes) - roc.hsa_memory_copy(gpu_only_mem.device_pointer, ha_mem.device_pointer, src.nbytes) - - # clear - z0 = np.zeros_like(src) - roc.hsa_memory_copy(ha_mem.device_pointer, z0.ctypes.data, z0.nbytes) - ref = (n * ctypes.c_double).from_address(ha_mem.device_pointer.value) - for k in range(n): - self.assertEqual(ref[k], 0) - - # copy back from dGPU - roc.hsa_memory_copy(ha_mem.device_pointer, gpu_only_mem.device_pointer, src.nbytes) - for k in range(n): - self.assertEqual(ref[k], src[k]) - - else: #TODO: write APU variant - pass - - def check_mempools(self, agent, has_fine_grain=True): - # get allocation-allowed pools - mp_alloc_list = [mp for mp in agent.mempools if mp.alloc_allowed] - mpdct = {'global': [], 'readonly': [], 'private': [], 'group': []} - - for mp in mp_alloc_list: - mpdct[mp.kind].append(mp) - - # only globals are allocation-allowed - if has_fine_grain: - self.assertEqual(len(mpdct['global']), 2) - else: - self.assertEqual(len(mpdct['global']), 1) - self.assertEqual(len(mpdct['readonly']), 0) - self.assertEqual(len(mpdct['private']), 0) - self.assertEqual(len(mpdct['group']), 0) - - self.assertEqual(len(agent.mempools.globals), len(mpdct['global'])) - - # the global-pools are coarse-grain and fine-grain pools - glbs = mpdct['global'] - coarsegrain = None - finegrain = None - for gmp in glbs: - if gmp.supports(enums_ext.HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED): - coarsegrain = gmp - if gmp.supports(enums_ext.HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED): - finegrain = gmp - - self.assertIsNotNone(coarsegrain) - if has_fine_grain: - self.assertIsNotNone(finegrain) - else: - self.assertIsNone(finegrain) - self.assertIsNot(coarsegrain, finegrain) - - def test_cpu_mempool_property(self): - self.check_mempools(self.cpu) - - @unittest.skipUnless(dgpu_present, "dGPU only") - def test_gpu_mempool_property(self): - self.check_mempools(self.gpu, has_fine_grain=False) - - @unittest.skipUnless(dgpu_present, "dGPU only") - def test_mempool(self): - n = 10 # things to alloc - nbytes = ctypes.sizeof(ctypes.c_double) * n - - dGPU_agent = self.gpu - CPU_agent = self.cpu - - # allocate a GPU memory pool - gpu_ctx = Context(dGPU_agent) - gpu_only_mem = gpu_ctx.mempoolalloc(nbytes) - - # allocate a CPU memory pool, allow the GPU access to it - cpu_ctx = Context(CPU_agent) - cpu_mem = cpu_ctx.mempoolalloc(nbytes, allow_access_to=[gpu_ctx.agent]) - - ## Test writing to allocated area - src = np.random.random(n).astype(np.float64) - roc.hsa_memory_copy(cpu_mem.device_pointer, src.ctypes.data, src.nbytes) - roc.hsa_memory_copy(gpu_only_mem.device_pointer, cpu_mem.device_pointer, src.nbytes) - - - # clear - z0 = np.zeros_like(src) - roc.hsa_memory_copy(cpu_mem.device_pointer, z0.ctypes.data, z0.nbytes) - ref = (n * ctypes.c_double).from_address(cpu_mem.device_pointer.value) - for k in range(n): - self.assertEqual(ref[k], 0) - - # copy back from dGPU - roc.hsa_memory_copy(cpu_mem.device_pointer, gpu_only_mem.device_pointer, src.nbytes) - for k in range(n): - self.assertEqual(ref[k], src[k]) - - def check_mempool_with_flags(self, finegrain): - dGPU_agent = self.gpu - gpu_ctx = Context(dGPU_agent) - - CPU_agent = self.cpu - cpu_ctx = Context(CPU_agent) - - # get mempool with specific flags - cpu_ctx.mempoolalloc(1024, allow_access_to=[gpu_ctx._agent]) - - @unittest.skipUnless(dgpu_present, 'dGPU only') - def test_mempool_finegrained(self): - self.check_mempool_with_flags(finegrain=True) - - @unittest.skipUnless(dgpu_present, 'dGPU only') - def test_mempool_coarsegrained(self): - self.check_mempool_with_flags(finegrain=False) - - @unittest.skipUnless(dgpu_present, 'dGPU only') - def test_mempool_amd_example(self): - dGPU_agent = self.gpu - gpu_ctx = Context(dGPU_agent) - CPU_agent = self.cpu - cpu_ctx = Context(CPU_agent) - - kNumInt = 1024 - kSize = kNumInt * ctypes.sizeof(ctypes.c_int) - - dependent_signal = roc.create_signal(0) - completion_signal = roc.create_signal(0) - - ## allocate host src and dst, allow gpu access - flags = dict(allow_access_to=[gpu_ctx.agent], finegrain=False) - host_src = cpu_ctx.mempoolalloc(kSize, **flags) - host_dst = cpu_ctx.mempoolalloc(kSize, **flags) - - # there's a loop in `i` here over GPU hardware - i = 0 - - # get gpu local pool - local_memory = gpu_ctx.mempoolalloc(kSize) - - host_src_view = (kNumInt * ctypes.c_int).from_address(host_src.device_pointer.value) - host_dst_view = (kNumInt * ctypes.c_int).from_address(host_dst.device_pointer.value) - - host_src_view[:] = i + 2016 + np.arange(0, kNumInt, dtype=np.int32) - host_dst_view[:] = np.zeros(kNumInt, dtype=np.int32) - - # print("GPU: %s"%gpu_ctx._agent.name) - # print("CPU: %s"%cpu_ctx._agent.name) - - roc.hsa_signal_store_relaxed(completion_signal, 1); - - q = queue.Queue() - - class validatorThread(threading.Thread): - def run(self): - val = roc.hsa_signal_wait_acquire( - completion_signal, - enums.HSA_SIGNAL_CONDITION_EQ, - 0, - ctypes.c_uint64(-1), - enums.HSA_WAIT_STATE_ACTIVE) - - q.put(val) # wait_res - - # this could be a call on the signal itself dependent_signal.store_relaxed(1) - roc.hsa_signal_store_relaxed(dependent_signal, 1); - - h2l_start = threading.Semaphore(value=0) - - class l2hThread(threading.Thread): - def run(self): - dep_signal = drvapi.hsa_signal_t(dependent_signal._id) - roc.hsa_amd_memory_async_copy(host_dst.device_pointer.value, - cpu_ctx._agent._id, - local_memory.device_pointer.value, - gpu_ctx._agent._id, kSize, 1, - ctypes.byref(dep_signal), - completion_signal) - h2l_start.release() # signal h2l to start - - class h2lThread(threading.Thread): - def run(self): - h2l_start.acquire() # to wait until l2h thread has started - roc.hsa_amd_memory_async_copy(local_memory.device_pointer.value, - gpu_ctx._agent._id, - host_src.device_pointer.value, - cpu_ctx._agent._id, kSize, 0, - None, - dependent_signal) - - timeout = 10 # 10 seconds timeout - # # init thread instances - validator = validatorThread() - l2h = l2hThread() - h2l = h2lThread() - # run them - validator.start() - l2h.start() - h2l.start() - # join - l2h.join(timeout) - h2l.join(timeout) - validator.join(timeout) - # verify - wait_res = q.get() - self.assertEqual(wait_res, 0) - np.testing.assert_allclose(host_dst_view, host_src_view) - - @unittest.skipUnless(dgpu_present, "dGPU only") - def test_to_device_to_host(self): - """ - Tests .to_device() and .copy_to_host() - """ - n = 10 - data = np.zeros(n) - output = np.zeros(n) - @vectorize("float64(float64)", target='roc') - def func(x): - return x + 1 - - hsaapi.to_device(data) - out_device = hsaapi.to_device(output) - func(data, out=out_device) - host_output = out_device.copy_to_host() - np.testing.assert_equal(np.ones(n), host_output) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/__init__.py b/numba/numba/roc/tests/hsapy/__init__.py deleted file mode 100644 index cff5a36cc..000000000 --- a/numba/numba/roc/tests/hsapy/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from numba.testing import SerialSuite -from numba.testing import load_testsuite -import os - -def load_tests(loader, tests, pattern): - return SerialSuite(load_testsuite(loader, os.path.dirname(__file__))) diff --git a/numba/numba/roc/tests/hsapy/run_far_branch.py b/numba/numba/roc/tests/hsapy/run_far_branch.py deleted file mode 100644 index 81f0193b9..000000000 --- a/numba/numba/roc/tests/hsapy/run_far_branch.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import print_function, absolute_import - -import math - -import numpy as np - -import numba - - -def run_far_jump(): - - gt_as_str = 'float32' - R_EARTH = 6371.0 # km - - @numba.roc.jit(device=True) - def deg2rad(deg): - return math.pi * deg / 180.0 - - sig = '%s(%s, %s, %s, %s)' % ((gt_as_str,) * 5) - - @numba.vectorize(sig, target='roc') - def gpu_great_circle_distance(lat1, lng1, lat2, lng2): - '''Return the great-circle distance in km between (lat1, lng1) and (lat2, lng2) - on the surface of the Earth.''' - lat1, lng1 = deg2rad(lat1), deg2rad(lng1) - lat2, lng2 = deg2rad(lat2), deg2rad(lng2) - - sin_lat1, cos_lat1 = math.sin(lat1), math.cos(lat1) - sin_lat2, cos_lat2 = math.sin(lat2), math.cos(lat2) - - delta = lng1 - lng2 - sin_delta, cos_delta = math.sin(delta), math.cos(delta) - - numerator = math.sqrt((cos_lat1 * sin_delta)**2 + - (cos_lat1 * sin_lat2 - sin_lat1 * cos_lat2 * cos_delta)**2) - denominator = sin_lat1 * sin_lat2 + cos_lat1 * cos_lat2 * cos_delta - return R_EARTH * math.atan2(numerator, denominator) - - arr = np.random.random(10).astype(np.float32) - - gpu_great_circle_distance(arr, arr, arr, arr) - - - -if __name__ == '__main__': - run_far_jump() diff --git a/numba/numba/roc/tests/hsapy/test_async_kernel.py b/numba/numba/roc/tests/hsapy/test_async_kernel.py deleted file mode 100644 index 055269b6b..000000000 --- a/numba/numba/roc/tests/hsapy/test_async_kernel.py +++ /dev/null @@ -1,68 +0,0 @@ -""" -Test async kernel copy -""" - -import logging - -import numpy as np - -from numba import roc -import numba.unittest_support as unittest -from numba.roc.hsadrv.driver import dgpu_present - -logger = logging.getLogger() - - -@unittest.skipUnless(dgpu_present, 'test only on dGPU system') -class TestAsyncKernel(unittest.TestCase): - def test_1(self): - logger.info('context info: %s', roc.get_context().agent) - - @roc.jit("int32[:], int32[:]") - def add1_kernel(dst, src): - i = roc.get_global_id(0) - if i < dst.size: - dst[i] = src[i] + 1 - - blksz = 256 - gridsz = 10**5 - nitems = blksz * gridsz - ntimes = 500 - - arr = np.arange(nitems, dtype=np.int32) - - logger.info('make coarse_arr') - coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype) - coarse_arr[:] = arr - - logger.info('make coarse_res_arr') - coarse_res_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype) - coarse_res_arr[:] = 0 - - logger.info("make stream") - stream = roc.stream() - - logger.info('make gpu_res_arr') - gpu_res_arr = roc.device_array_like(coarse_arr) - - logger.info('make gpu_arr') - gpu_arr = roc.to_device(coarse_arr, stream=stream) - - for i in range(ntimes): - logger.info('launch kernel: %d', i) - add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr) - gpu_arr.copy_to_device(gpu_res_arr, stream=stream) - - logger.info('get kernel result') - gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream) - - logger.info("synchronize") - stream.synchronize() - - logger.info("compare result") - np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_atomics.py b/numba/numba/roc/tests/hsapy/test_atomics.py deleted file mode 100644 index 71b7b8a4b..000000000 --- a/numba/numba/roc/tests/hsapy/test_atomics.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np - -import numba -from numba import roc -import numba.unittest_support as unittest - - -def atomic_add(ary): - tid = roc.get_local_id(0) - sm = roc.shared.array(32, numba.uint32) - sm[tid] = 0 - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - bin = ary[tid] % 32 - roc.atomic.add(sm, bin, 1) - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - ary[tid] = sm[tid] - - -def atomic_add2(ary): - tx = roc.get_local_id(0) - ty = roc.get_local_id(1) - sm = roc.shared.array((4, 8), numba.uint32) - sm[tx, ty] = ary[tx, ty] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - roc.atomic.add(sm, (tx, ty), 1) - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - ary[tx, ty] = sm[tx, ty] - - -def atomic_add3(ary): - tx = roc.get_local_id(0) - ty = roc.get_local_id(1) - sm = roc.shared.array((4, 8), numba.uint32) - sm[tx, ty] = ary[tx, ty] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - roc.atomic.add(sm, (tx, numba.uint64(ty)), 1) - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - ary[tx, ty] = sm[tx, ty] - - -class TestHsaAtomics(unittest.TestCase): - def test_atomic_add(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32) - orig = ary.copy() - hsa_atomic_add = roc.jit('void(uint32[:])')(atomic_add) - hsa_atomic_add[1, 32](ary) - - gold = np.zeros(32, dtype=np.uint32) - for i in range(orig.size): - gold[orig[i]] += 1 - - self.assertTrue(np.all(ary == gold)) - - def test_atomic_add2(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8) - orig = ary.copy() - hsa_atomic_add2 = roc.jit('void(uint32[:,:])')(atomic_add2) - hsa_atomic_add2[1, (4, 8)](ary) - self.assertTrue(np.all(ary == orig + 1)) - - def test_atomic_add3(self): - ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8) - orig = ary.copy() - hsa_atomic_add3 = roc.jit('void(uint32[:,:])')(atomic_add3) - hsa_atomic_add3[1, (4, 8)](ary) - - self.assertTrue(np.all(ary == orig + 1)) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_autojit.py b/numba/numba/roc/tests/hsapy/test_autojit.py deleted file mode 100644 index a31bfc0c7..000000000 --- a/numba/numba/roc/tests/hsapy/test_autojit.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -import numba.unittest_support as unittest -from numba import roc - - -def copy_kernel(out, inp): - i = roc.get_global_id(0) - if i < out.size: - out[i] = inp[i] - - -class TestAutoJit(unittest.TestCase): - def test_autojit_kernel(self): - kernel = roc.jit(copy_kernel) - inp = np.arange(10) - out = np.zeros_like(inp) - kernel.forall(out.size)(out, inp) - np.testing.assert_equal(inp, out) - - def test_autojit_device(self): - @roc.jit(device=True) - def inner(a, b): - return a + b - - @roc.jit - def outer(A, B): - i = roc.get_global_id(0) - if i < A.size: - A[i] = inner(A[i], B[i]) - - A = np.arange(10) - Aorig = A.copy() - B = np.arange(10) - - outer.forall(A.size)(A, B) - self.assertFalse(np.all(Aorig == A)) - np.testing.assert_equal(Aorig + B, A) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/roc/tests/hsapy/test_barrier.py b/numba/numba/roc/tests/hsapy/test_barrier.py deleted file mode 100644 index 9f339d319..000000000 --- a/numba/numba/roc/tests/hsapy/test_barrier.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import unittest_support as unittest -from numba import roc, float32 - - -class TestBarrier(unittest.TestCase): - def test_proper_lowering(self): - @roc.jit("void(float32[::1])") - def twice(A): - i = roc.get_global_id(0) - d = A[i] - roc.barrier(roc.CLK_LOCAL_MEM_FENCE) # local mem fence - A[i] = d * 2 - - N = 256 - arr = np.random.random(N).astype(np.float32) - orig = arr.copy() - - twice[2, 128](arr) - - # Assembly contains barrier instruction? - self.assertIn("s_barrier", twice.assembly) - # The computation is correct? - np.testing.assert_allclose(orig * 2, arr) - - def test_no_arg_barrier_support(self): - @roc.jit("void(float32[::1])") - def twice(A): - i = roc.get_global_id(0) - d = A[i] - # no argument defaults to global mem fence - # which is the same for local in hsail - roc.barrier() - A[i] = d * 2 - - N = 256 - arr = np.random.random(N).astype(np.float32) - orig = arr.copy() - - twice[2, 128](arr) - - # Assembly contains barrier instruction? - self.assertIn("s_barrier", twice.assembly) - # The computation is correct? - np.testing.assert_allclose(orig * 2, arr) - - def test_local_memory(self): - blocksize = 10 - - @roc.jit("void(float32[::1])") - def reverse_array(A): - sm = roc.shared.array(shape=blocksize, dtype=float32) - i = roc.get_global_id(0) - - # preload - sm[i] = A[i] - # barrier - roc.barrier(roc.CLK_LOCAL_MEM_FENCE) # local mem fence - # write - A[i] += sm[blocksize - 1 - i] - - arr = np.arange(blocksize).astype(np.float32) - orig = arr.copy() - - reverse_array[1, blocksize](arr) - - expected = orig[::-1] + orig - np.testing.assert_allclose(expected, arr) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_compiler.py b/numba/numba/roc/tests/hsapy/test_compiler.py deleted file mode 100644 index 45de88603..000000000 --- a/numba/numba/roc/tests/hsapy/test_compiler.py +++ /dev/null @@ -1,128 +0,0 @@ -from __future__ import print_function, absolute_import - -import tempfile -import os -import numpy as np - -import numba.unittest_support as unittest -from numba import roc -from numba import types -from numba.roc import compiler -from numba.roc.hsadrv.driver import hsa as hsart -from numba.roc.hsadrv.driver import BrigModule, Executable, Program - - -def copy_kernel(out, inp): - out[0] = inp[0] - - -def copy_kernel_1d(out, inp): - i = roc.get_global_id(0) - if i < out.size: - out[i] = inp[i] - - -def assign_value(out, inp): - i = roc.get_global_id(0) - if i < out.size: - out[i] = inp - - -class TestCodeGeneration(unittest.TestCase): - def test_copy_kernel(self): - arytype = types.float32[:] - kernel = compiler.compile_kernel(copy_kernel, [arytype] * 2) - self.assertIn(".globl\t{0}".format(kernel.entry_name), - kernel.assembly) - - def test_copy_kernel_1d(self): - arytype = types.float32[:] - kernel = compiler.compile_kernel(copy_kernel_1d, [arytype] * 2) - self.assertIn(".globl\t{0}".format(kernel.entry_name), - kernel.assembly) - - -class _TestBase(unittest.TestCase): - def setUp(self): - self.gpu = [a for a in hsart.agents if a.is_component][0] - self.cpu = [a for a in hsart.agents if not a.is_component][0] - self.queue = self.gpu.create_queue_multi(self.gpu.queue_max_size) - - def tearDown(self): - del self.queue - del self.gpu - del self.cpu - - -class TestExecution(unittest.TestCase): - def test_hsa_kernel(self): - src = np.arange(1024, dtype=np.float32) - dst = np.zeros_like(src) - - # Compiler kernel - arytype = types.float32[::1] - kernel = compiler.compile_kernel(copy_kernel_1d, [arytype] * 2) - - # Run kernel - kernel[src.size // 256, 256](dst, src) - - np.testing.assert_equal(src, dst) - - -class TestKernelArgument(unittest.TestCase): - def _test_template(self, nbtype, src): - dtype = np.dtype(str(nbtype)) - dst = np.zeros(1, dtype=dtype) - src = dtype.type(src) - arytype = nbtype[::1] - kernel = compiler.compile_kernel(assign_value, [arytype, nbtype]) - kernel[1, 1](dst, src) - self.assertEqual(dst[0], src) - - def test_float64(self): - self._test_template(nbtype=types.float64, src=1. / 3.) - - def test_float32(self): - self._test_template(nbtype=types.float32, src=1. / 3.) - - def test_int32(self): - self._test_template(nbtype=types.int32, src=123) - - def test_int16(self): - self._test_template(nbtype=types.int16, src=123) - - def test_complex64(self): - self._test_template(nbtype=types.complex64, src=12 + 34j) - - def test_complex128(self): - self._test_template(nbtype=types.complex128, src=12 + 34j) - - -def udt_devfunc(a, i): - return a[i] - - -class TestDeviceFunction(unittest.TestCase): - def test_device_function(self): - src = np.arange(10, dtype=np.int32) - dst = np.zeros_like(src) - - arytype = types.int32[::1] - devfn = compiler.compile_device(udt_devfunc, arytype.dtype, - [arytype, types.intp]) - - def udt_devfunc_caller(dst, src): - i = roc.get_global_id(0) - if i < dst.size: - dst[i] = devfn(src, i) - - kernel = compiler.compile_kernel(udt_devfunc_caller, - [arytype, arytype]) - - kernel[src.size, 1](dst, src) - np.testing.assert_equal(dst, src) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/roc/tests/hsapy/test_decorator.py b/numba/numba/roc/tests/hsapy/test_decorator.py deleted file mode 100644 index 80392fb22..000000000 --- a/numba/numba/roc/tests/hsapy/test_decorator.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -import numba.unittest_support as unittest -from numba import roc - - -class TestDecorators(unittest.TestCase): - def test_kernel_jit(self): - @roc.jit("(float32[:], float32[:])") - def copy_vector(dst, src): - tid = roc.get_global_id(0) - if tid < dst.size: - dst[tid] = src[tid] - - src = np.arange(10, dtype=np.uint32) - dst = np.zeros_like(src) - copy_vector[10, 1](dst, src) - np.testing.assert_equal(dst, src) - - def test_device_jit(self): - @roc.jit("float32(float32[:], intp)", device=True) - def inner(src, idx): - return src[idx] - - @roc.jit("(float32[:], float32[:])") - def outer(dst, src): - tid = roc.get_global_id(0) - if tid < dst.size: - dst[tid] = inner(src, tid) - - src = np.arange(10, dtype=np.uint32) - dst = np.zeros_like(src) - outer[10, 1](dst, src) - np.testing.assert_equal(dst, src) - - def test_autojit_kernel(self): - @roc.jit - def copy_vector(dst, src): - tid = roc.get_global_id(0) - if tid < dst.size: - dst[tid] = src[tid] - - for dtype in [np.uint32, np.float32]: - src = np.arange(10, dtype=dtype) - dst = np.zeros_like(src) - copy_vector[10, 1](dst, src) - np.testing.assert_equal(dst, src) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_gufuncbuilding.py b/numba/numba/roc/tests/hsapy/test_gufuncbuilding.py deleted file mode 100644 index da17d4ee0..000000000 --- a/numba/numba/roc/tests/hsapy/test_gufuncbuilding.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import unittest_support as unittest -from numba.roc.vectorizers import HsaGUFuncVectorize -from numba.roc.dispatch import HSAGenerializedUFunc -from numba import guvectorize - - -def ufunc_add_core(a, b, c): - for i in range(c.size): - c[i] = a[i] + b[i] - - -class TestGUFuncBuilding(unittest.TestCase): - def test_gufunc_building(self): - ufbldr = HsaGUFuncVectorize(ufunc_add_core, "(x),(x)->(x)") - ufbldr.add("(float32[:], float32[:], float32[:])") - ufbldr.add("(intp[:], intp[:], intp[:])") - ufunc = ufbldr.build_ufunc() - self.assertIsInstance(ufunc, HSAGenerializedUFunc) - - # Test integer version - A = np.arange(100, dtype=np.intp) - B = np.arange(100, dtype=np.intp) + 1 - expected = A + B - got = ufunc(A, B) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test integer version with 2D inputs - A = A.reshape(50, 2) - B = B.reshape(50, 2) - expected = A + B - got = ufunc(A, B) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test integer version with 3D inputs - A = A.reshape(5, 10, 2) - B = B.reshape(5, 10, 2) - expected = A + B - got = ufunc(A, B) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test real version - A = np.arange(100, dtype=np.float32) - B = np.arange(100, dtype=np.float32) + 1 - expected = A + B - got = ufunc(A, B) - - np.testing.assert_allclose(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.float32), got.dtype) - - # Test real version with 2D inputs - A = A.reshape(50, 2) - B = B.reshape(50, 2) - expected = A + B - got = ufunc(A, B) - - np.testing.assert_allclose(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.float32), got.dtype) - - def test_gufunc_building_scalar_output(self): - def sum_row(inp, out): - tmp = 0. - for i in range(inp.shape[0]): - tmp += inp[i] - out[0] = tmp - - ufbldr = HsaGUFuncVectorize(sum_row, "(n)->()") - ufbldr.add("void(int32[:], int32[:])") - ufunc = ufbldr.build_ufunc() - - inp = np.arange(300, dtype=np.int32).reshape(100, 3) - out = ufunc(inp) - - for i in range(inp.shape[0]): - np.testing.assert_equal(inp[i].sum(), out[i]) - - def test_gufunc_scalar_input_saxpy(self): - def axpy(a, x, y, out): - for i in range(out.shape[0]): - out[i] = a * x[i] + y[i] - - ufbldr = HsaGUFuncVectorize(axpy, '(),(t),(t)->(t)') - ufbldr.add("void(float32, float32[:], float32[:], float32[:])") - saxpy = ufbldr.build_ufunc() - - A = np.float32(2) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i]) - - X = np.arange(10, dtype=np.float32) - Y = np.arange(10, dtype=np.float32) - out = saxpy(A, X, Y) - - for j in range(10): - exp = A * X[j] + Y[j] - self.assertTrue(exp == out[j], (exp, out[j])) - - A = np.arange(5, dtype=np.float32) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A[j] * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i], (exp, out[j, i])) - - -class TestGUFuncDecor(unittest.TestCase): - def test_gufunc_decorator(self): - @guvectorize(["void(float32, float32[:], float32[:], float32[:])"], - '(),(t),(t)->(t)', target='roc') - def saxpy(a, x, y, out): - for i in range(out.shape[0]): - out[i] = a * x[i] + y[i] - - A = np.float32(2) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i]) - - X = np.arange(10, dtype=np.float32) - Y = np.arange(10, dtype=np.float32) - out = saxpy(A, X, Y) - - for j in range(10): - exp = A * X[j] + Y[j] - self.assertTrue(exp == out[j], (exp, out[j])) - - A = np.arange(5, dtype=np.float32) - X = np.arange(10, dtype=np.float32).reshape(5, 2) - Y = np.arange(10, dtype=np.float32).reshape(5, 2) - out = saxpy(A, X, Y) - - for j in range(5): - for i in range(2): - exp = A[j] * X[j, i] + Y[j, i] - self.assertTrue(exp == out[j, i], (exp, out[j, i])) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_large_code.py b/numba/numba/roc/tests/hsapy/test_large_code.py deleted file mode 100644 index d3cef00b9..000000000 --- a/numba/numba/roc/tests/hsapy/test_large_code.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import print_function, absolute_import - -import sys -import os -import os.path -import subprocess -import math - -import numba -import numba.unittest_support as unittest - -class TestLargeCode(unittest.TestCase): - - def test_far_jump(self): - from . import run_far_branch - - pyinterp = sys.executable - numba_dir = os.path.abspath(os.path.join(os.path.dirname(numba.__file__), os.pardir)) - script, ext = os.path.splitext(os.path.relpath(run_far_branch.__file__, numba_dir)) - script = script.replace(os.path.sep, '.') - args = [pyinterp, script] - cmd = '{} -m {}'.format(*args) - - oldpp = os.environ.get('PYTHONPATH') - os.environ['PYTHONPATH'] = numba_dir - - try: - subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) - finally: - if oldpp is None: - del os.environ['PYTHONPATH'] - else: - os.environ['PYTHONPATH'] = oldpp - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_linkage.py b/numba/numba/roc/tests/hsapy/test_linkage.py deleted file mode 100644 index eb6b6e935..000000000 --- a/numba/numba/roc/tests/hsapy/test_linkage.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import print_function, absolute_import - -import numba.unittest_support as unittest -from numba import roc - - -class TestLinkage(unittest.TestCase): - def test_indirection(self): - @roc.jit(device=True) - def base(): - pass - - @roc.jit(device=True) - def layer1(): - base() - - @roc.jit(device=True) - def layer2(): - layer1() - base() - - @roc.jit - def kernel(a): - layer2() - - kernel[1, 1](1) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_math.py b/numba/numba/roc/tests/hsapy/test_math.py deleted file mode 100644 index 7d373aa41..000000000 --- a/numba/numba/roc/tests/hsapy/test_math.py +++ /dev/null @@ -1,182 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np -import math - -import numba.unittest_support as unittest -from numba import roc -from numba import utils - - -class TestMath(unittest.TestCase): - def _get_tol(self, math_fn, ty): - """gets the tolerance for functions when the input is of type 'ty'""" - - low_res = { - (math.gamma, np.float64): 1e-14, - (math.lgamma, np.float64): 1e-13, - (math.asin, np.float64): 1e-9, - (math.acos, np.float64): 4e-9, - (math.sqrt, np.float64): 2e-8, - } - default = 1e-15 if ty == np.float64 else 1e-6 - return low_res.get((math_fn, ty), default) - - def _generic_test_unary(self, math_fn, npy_fn, - cases=None, - span=(-1., 1.), count=128, - types=(np.float32, np.float64)): - - @roc.jit - def fn(dst, src): - i = roc.get_global_id(0) - if i < dst.size: - dst[i] = math_fn(src[i]) - - for dtype in types: - if cases is None: - src = np.linspace(span[0], span[1], count).astype(dtype) - else: - src = np.array(cases, dtype=dtype) - - dst = np.zeros_like(src) - fn[src.size, 1](dst, src) - np.testing.assert_allclose(dst, npy_fn(src), - rtol=self._get_tol(math_fn, dtype), - err_msg='{0} ({1})'.format( - math_fn.__name__, - dtype.__name__)) - - def _generic_test_binary(self, math_fn, npy_fn, - cases=None, - span=(-1., 1., 1., -1.), count=128, - types=(np.float32, np.float64)): - - @roc.jit - def fn(dst, src1, src2): - i = roc.get_global_id(0) - if i < dst.size: - dst[i] = math_fn(src1[i], src2[i]) - - for dtype in types: - if cases is None: - src1 = np.linspace(span[0], span[1], count).astype(dtype) - src2 = np.linspace(span[2], span[3], count).astype(dtype) - else: - src1 = np.array(cases[0], dtype=dtype) - src2 = np.array(cases[1], dtype=dtype) - - dst = np.zeros_like(src1) - fn[dst.size, 1](dst, src1, src2) - np.testing.assert_allclose(dst, npy_fn(src1, src2), - rtol=self._get_tol(math_fn, dtype), - err_msg='{0} ({1})'.format( - math_fn.__name__, - dtype.__name__)) - - def test_trig(self): - funcs = [math.sin, math.cos, math.tan] - - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-np.pi, np.pi)) - - def test_trig_inv(self): - funcs = [(math.asin, np.arcsin), - (math.acos, np.arccos), - (math.atan, np.arctan)] - - for fn, np_fn in funcs: - self._generic_test_unary(fn, np_fn) - - def test_trigh(self): - funcs = [math.sinh, math.cosh, math.tanh] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-4.0, 4.0)) - - def test_trigh_inv(self): - funcs = [(math.asinh, np.arcsinh, (-4, 4)), - (math.acosh, np.arccosh, (1, 9)), - (math.atanh, np.arctanh, (-0.9, 0.9))] - - for fn, np_fn, span in funcs: - self._generic_test_unary(fn, np_fn, span=span) - - def test_classify(self): - funcs = [math.isnan, math.isinf] - cases = (float('nan'), float('inf'), float('-inf'), float('-nan'), - 0, 3, -2) - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - cases=cases) - - def test_floor_ceil(self): - funcs = [math.ceil, math.floor] - - for fn in funcs: - # cases with varied decimals - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-1013.14, 843.21)) - # cases that include "exact" integers - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-16, 16), count=129) - - def test_fabs(self): - funcs = [math.fabs] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-63.3, 63.3)) - - def test_unary_exp(self): - funcs = [math.exp] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-30, 30)) - - def test_unary_expm1(self): - funcs = [math.expm1] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(-30, 30)) - - def test_sqrt(self): - funcs = [math.sqrt] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(0, 1000)) - - def test_log(self): - funcs = [math.log, math.log10, math.log1p] - for fn in funcs: - self._generic_test_unary(fn, getattr(np, fn.__name__), - span=(0.1, 2500)) - - def test_binaries(self): - funcs = [math.copysign, math.fmod] - for fn in funcs: - self._generic_test_binary(fn, getattr(np, fn.__name__)) - - def test_pow(self): - funcs = [(math.pow, np.power)] - for fn, npy_fn in funcs: - self._generic_test_binary(fn, npy_fn) - - def test_atan2(self): - funcs = [(math.atan2, np.arctan2)] - for fn, npy_fn in funcs: - self._generic_test_binary(fn, npy_fn) - - def test_erf(self): - funcs = [math.erf, math.erfc] - for fn in funcs: - self._generic_test_unary(fn, np.vectorize(fn)) - - def test_gamma(self): - funcs = [math.gamma, math.lgamma] - for fn in funcs: - self._generic_test_unary(fn, np.vectorize(fn), span=(1e-4, 4.0)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_matmul.py b/numba/numba/roc/tests/hsapy/test_matmul.py deleted file mode 100644 index 643f50bbd..000000000 --- a/numba/numba/roc/tests/hsapy/test_matmul.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from timeit import default_timer as timer -import numpy as np - -from numba import unittest_support as unittest -from numba import roc, float32 -from numba.roc.hsadrv.error import HsaKernelLaunchError - - -class TestMatMul(unittest.TestCase): - def test_matmul_naive(self): - @roc.jit - def matmul(A, B, C): - i = roc.get_global_id(0) - j = roc.get_global_id(1) - - if i >= C.shape[0] or j >= C.shape[1]: - return - - tmp = 0 - - for k in range(A.shape[1]): - tmp += A[i, k] * B[k, j] - - C[i, j] = tmp - - N = 256 - A = np.random.random((N, N)).astype(np.float32) - B = np.random.random((N, N)).astype(np.float32) - C = np.zeros_like(A) - - with roc.register(A, B, C): - ts = timer() - matmul[(N // 16, N // 16), (16, 16)](A, B, C) - te = timer() - print("1st GPU time:", te - ts) - - with roc.register(A, B, C): - ts = timer() - matmul[(N // 16, N // 16), (16, 16)](A, B, C) - te = timer() - print("2nd GPU time:", te - ts) - - ts = timer() - ans = np.dot(A, B) - te = timer() - print("CPU time:", te - ts) - np.testing.assert_allclose(ans, C, rtol=1e-5) - - def check_matmul_fast(self, gridsize, blocksize): - - @roc.jit - def matmulfast(A, B, C): - x = roc.get_global_id(0) - y = roc.get_global_id(1) - - tx = roc.get_local_id(0) - ty = roc.get_local_id(1) - - sA = roc.shared.array(shape=(blocksize, blocksize), dtype=float32) - sB = roc.shared.array(shape=(blocksize, blocksize), dtype=float32) - - if x >= C.shape[0] or y >= C.shape[1]: - return - - tmp = 0 - - for i in range(gridsize): - # preload - sA[tx, ty] = A[x, ty + i * blocksize] - sB[tx, ty] = B[tx + i * blocksize, y] - # wait for preload to end - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - # compute loop - for j in range(blocksize): - tmp += sA[tx, j] * sB[j, ty] - # wait for compute to end - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - C[x, y] = tmp - - N = gridsize * blocksize - A = np.random.random((N, N)).astype(np.float32) - B = np.random.random((N, N)).astype(np.float32) - C = np.zeros_like(A) - - griddim = gridsize, gridsize - blockdim = blocksize, blocksize - - with roc.register(A, B, C): - ts = timer() - matmulfast[griddim, blockdim](A, B, C) - te = timer() - print("1st GPU time:", te - ts) - - with roc.register(A, B, C): - ts = timer() - matmulfast[griddim, blockdim](A, B, C) - te = timer() - print("2nd GPU time:", te - ts) - - ts = timer() - ans = np.dot(A, B) - te = timer() - print("CPU time:", te - ts) - np.testing.assert_allclose(ans, C, rtol=1e-5) - - def test_matmul_fast(self): - self.check_matmul_fast(gridsize=8, blocksize=8) - - def test_matmul_fast_insufficient_resources(self): - with self.assertRaises(HsaKernelLaunchError): - self.check_matmul_fast(gridsize=8, blocksize=20) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_memory.py b/numba/numba/roc/tests/hsapy/test_memory.py deleted file mode 100644 index 8102da54c..000000000 --- a/numba/numba/roc/tests/hsapy/test_memory.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Test arrays backed by different memory -""" - -import logging - -import numpy as np - -from numba import roc -import numba.unittest_support as unittest -from numba.roc.hsadrv.driver import dgpu_present - -logger = logging.getLogger() - - -@roc.jit -def copy_kernel(dst, src): - i = roc.get_global_id(0) - if i < dst.size: - dst[i] = src[i] - - -@unittest.skipUnless(dgpu_present, 'test only on dGPU system') -class TestMemory(unittest.TestCase): - def test_auto_device(self): - blkct = 4 - blksz = 128 - nelem = blkct * blksz - expect = np.arange(nelem) + 1 - got = np.zeros_like(expect) - copy_kernel[blkct, blksz](got, expect.copy()) - np.testing.assert_equal(got, expect) - - def test_device_array(self): - blkct = 4 - blksz = 128 - nelem = blkct * blksz - expect = np.arange(nelem) + 1 - logger.info('device array like') - darr = roc.device_array_like(expect) - logger.info('pre launch') - copy_kernel[blkct, blksz](darr, roc.to_device(expect)) - logger.info('post launch') - got = darr.copy_to_host() - np.testing.assert_equal(got, expect) - - def test_coarsegrain_array(self): - blkct = 4 - blksz = 128 - nelem = blkct * blksz - expect = np.arange(nelem) + 1 - logger.info('coarsegrain array') - got = roc.coarsegrain_array(shape=expect.shape, dtype=expect.dtype) - got.fill(0) - logger.info('pre launch') - copy_kernel[blkct, blksz](got, expect.copy()) - logger.info('post launch') - np.testing.assert_equal(got, expect) - - def test_finegrain_array(self): - blkct = 4 - blksz = 128 - nelem = blkct * blksz - expect = np.arange(nelem) + 1 - logger.info('finegrain array') - got = roc.finegrain_array(shape=expect.shape, dtype=expect.dtype) - got.fill(0) - logger.info('pre launch') - copy_kernel[blkct, blksz](got, expect.copy()) - logger.info('post launch') - np.testing.assert_equal(got, expect) - -@unittest.skipUnless(dgpu_present, 'test only on dGPU system') -class TestDeviceMemorye(unittest.TestCase): - def test_device_device_transfer(self): - # This has to be run in isolation and before the above - # TODO: investigate why?! - nelem = 1000 - expect = np.arange(nelem, dtype=np.int32) + 1 - logger.info('device array like') - darr = roc.device_array_like(expect) - self.assertTrue(np.all(expect != darr.copy_to_host())) - logger.info('to_device') - stage = roc.to_device(expect) - logger.info('device -> device') - darr.copy_to_device(stage) - np.testing.assert_equal(expect, darr.copy_to_host()) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_occupancy.py b/numba/numba/roc/tests/hsapy/test_occupancy.py deleted file mode 100644 index 7f0fd743b..000000000 --- a/numba/numba/roc/tests/hsapy/test_occupancy.py +++ /dev/null @@ -1,110 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from numba import unittest_support as unittest -from numba.roc.gcn_occupancy import get_limiting_factors - - -class TestOccupancy(unittest.TestCase): - def check_limits(self, inputs, expected_outputs): - outputs = get_limiting_factors(**inputs) - for k, expect in expected_outputs.items(): - got = getattr(outputs, k) - if k == 'occupancy': - self.assertAlmostEqual(got, expect, msg=k) - else: - self.assertEqual(got, expect, k) - - def test_limits_1(self): - inputs = dict(group_size=400, - vgpr_per_workitem=139, - sgpr_per_wave=49) - outputs = dict( - allowed_wave_due_to_sgpr=10, - allowed_wave_due_to_vgpr=1, - allowed_wave=1, - allowed_vgpr_per_workitem=128, - occupancy=0, - reasons=set(['allowed_wave_due_to_vgpr', - 'allowed_wave', - 'group_size']), - ) - self.check_limits(inputs, outputs) - - def test_limits_2(self): - inputs = dict(group_size=256, - vgpr_per_workitem=139, - sgpr_per_wave=49) - outputs = dict( - allowed_wave_due_to_sgpr=10, - allowed_wave_due_to_vgpr=1, - allowed_wave=1, - allowed_vgpr_per_workitem=256, - occupancy=.10, - reasons=set(), - ) - self.check_limits(inputs, outputs) - - def test_limits_3(self): - inputs = dict(group_size=2048, - vgpr_per_workitem=16, - sgpr_per_wave=70) - outputs = dict( - allowed_wave_due_to_sgpr=7, - allowed_wave_due_to_vgpr=16, - allowed_wave=7, - allowed_vgpr_per_workitem=32, - occupancy=0, - reasons=set(['allowed_wave_due_to_sgpr', - 'allowed_wave', - 'group_size']), - ) - self.check_limits(inputs, outputs) - - def test_limits_4(self): - inputs = dict(group_size=2048, - vgpr_per_workitem=32, - sgpr_per_wave=50) - outputs = dict( - allowed_wave_due_to_sgpr=10, - allowed_wave_due_to_vgpr=8, - allowed_wave=8, - allowed_vgpr_per_workitem=32, - occupancy=0, - reasons=set(['group_size']), - ) - self.check_limits(inputs, outputs) - - def test_limits_5(self): - inputs = dict(group_size=4, - vgpr_per_workitem=128, - sgpr_per_wave=10) - outputs = dict( - allowed_wave_due_to_sgpr=51, - allowed_wave_due_to_vgpr=2, - allowed_wave=2, - allowed_vgpr_per_workitem=256, - occupancy=.1, - reasons=set(), - ) - self.check_limits(inputs, outputs) - - def test_limits_6(self): - inputs = dict(group_size=4, - vgpr_per_workitem=257, - sgpr_per_wave=3) - outputs = dict( - allowed_wave_due_to_sgpr=170, - allowed_wave_due_to_vgpr=0, - allowed_wave=0, - allowed_vgpr_per_workitem=256, - occupancy=0, - reasons=set(['allowed_wave_due_to_vgpr', - 'allowed_wave']), - ) - self.check_limits(inputs, outputs) - - -if __name__ == '__main__': - unittest.main() - - diff --git a/numba/numba/roc/tests/hsapy/test_positioning.py b/numba/numba/roc/tests/hsapy/test_positioning.py deleted file mode 100644 index c9db57c76..000000000 --- a/numba/numba/roc/tests/hsapy/test_positioning.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np -from numba import roc -import numba.unittest_support as unittest - - -class TestPositioning(unittest.TestCase): - - def test_kernel_jit(self): - @roc.jit - def udt(output): - global_id = roc.get_global_id(0) - global_size = roc.get_global_size(0) - local_id = roc.get_local_id(0) - group_id = roc.get_group_id(0) - num_groups = roc.get_num_groups(0) - workdim = roc.get_work_dim() - local_size = roc.get_local_size(0) - - output[0, group_id, local_id] = global_id - output[1, group_id, local_id] = global_size - output[2, group_id, local_id] = local_id - output[3, group_id, local_id] = local_size - output[4, group_id, local_id] = group_id - output[5, group_id, local_id] = num_groups - output[6, group_id, local_id] = workdim - - out = np.zeros((7, 2, 3), dtype=np.intp) - udt[2, 3](out) - - np.testing.assert_equal([[0, 1, 2], [3, 4, 5]], out[0]) - np.testing.assert_equal(6, out[1]) - np.testing.assert_equal([[0, 1, 2]] * 2, out[2]) - np.testing.assert_equal(3, out[3]) - np.testing.assert_equal([[0, 0, 0], [1, 1, 1]], out[4]) - np.testing.assert_equal(2, out[5]) - np.testing.assert_equal(1, out[6]) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/roc/tests/hsapy/test_reduction.py b/numba/numba/roc/tests/hsapy/test_reduction.py deleted file mode 100644 index 6a9b4b8ab..000000000 --- a/numba/numba/roc/tests/hsapy/test_reduction.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import roc, intp - -WAVESIZE = 64 - -@roc.jit(device=True) -def wave_reduce(val): - tid = roc.get_local_id(0) - laneid = tid % WAVESIZE - - width = WAVESIZE // 2 - while width: - if laneid < width: - val[laneid] += val[laneid + width] - val[laneid + width] = -1 # debug - roc.wavebarrier() - width = width // 2 - - # First thread has the result - roc.wavebarrier() - return val[0] - -@roc.jit -def kernel_warp_reduce(inp, out): - idx = roc.get_group_id(0) - val = inp[idx] - out[idx] = wave_reduce(val) - -@roc.jit -def kernel_flat_reduce(inp, out): - out[0] = wave_reduce(inp) - -class TestReduction(unittest.TestCase): - - def template_wave_reduce_int(self, dtype): - numblk = 2 - inp = np.arange(numblk * WAVESIZE, dtype=dtype).reshape(numblk, WAVESIZE) - inp_cpy = np.copy(inp) - out = np.zeros((numblk,)) - kernel_warp_reduce[numblk, WAVESIZE](inp, out) - - np.testing.assert_equal(out, inp_cpy.sum(axis=1)) - - def test_wave_reduce_intp(self): - self.template_wave_reduce_int(np.intp) - - def test_wave_reduce_int32(self): - self.template_wave_reduce_int(np.int32) - - def template_wave_reduce_real(self, dtype): - numblk = 2 - inp = np.linspace(0, 1, numblk * WAVESIZE).astype(dtype) - inp = inp.reshape(numblk, WAVESIZE) - inp_cpy = np.copy(inp) - out = np.zeros((numblk,)) - kernel_warp_reduce[numblk, WAVESIZE](inp, out) - - np.testing.assert_allclose(out, inp_cpy.sum(axis=1)) - - def test_wave_reduce_float64(self): - self.template_wave_reduce_real(np.float64) - - def test_wave_reduce_float32(self): - self.template_wave_reduce_real(np.float32) - - def test_flat_reduce(self): - inp = np.arange(WAVESIZE) # destroyed in kernel - out = np.zeros((1,)) - kernel_flat_reduce[1, WAVESIZE](inp, out) - np.testing.assert_allclose(out[0], np.arange(WAVESIZE).sum()) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_scan.py b/numba/numba/roc/tests/hsapy/test_scan.py deleted file mode 100644 index 6656d1ec9..000000000 --- a/numba/numba/roc/tests/hsapy/test_scan.py +++ /dev/null @@ -1,449 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import roc, intp - - -@roc.jit(device=True) -def device_scan_generic(tid, data): - """Inclusive prefix sum within a single block - - Requires tid should have range [0, data.size) and data.size must be - power of 2. - """ - n = data.size - - # Upsweep - offset = 1 - d = n // 2 - while d > 0: - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - if tid < d: - ai = offset * (2 * tid + 1) - 1 - bi = offset * (2 * tid + 2) - 1 - data[bi] += data[ai] - - offset *= 2 - d //= 2 - - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - prefixsum = data[n - 1] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - if tid == 0: - data[n - 1] = 0 - - # Downsweep - d = 1 - offset = n - while d < n: - offset //= 2 - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - if tid < d: - ai = offset * (2 * tid + 1) - 1 - bi = offset * (2 * tid + 2) - 1 - - tmp = data[ai] - data[ai] = data[bi] - data[bi] += tmp - - d *= 2 - - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - return prefixsum - - -_WARPSIZE = 64 - - -@roc.jit(device=True) -def warp_scan(tid, temp, inclusive): - """Intra-warp scan - - Note - ---- - Assume all threads are in lockstep - """ - roc.wavebarrier() - lane = tid & (_WARPSIZE - 1) - if lane >= 1: - temp[tid] += temp[tid - 1] - - roc.wavebarrier() - if lane >= 2: - temp[tid] += temp[tid - 2] - - roc.wavebarrier() - if lane >= 4: - temp[tid] += temp[tid - 4] - - roc.wavebarrier() - if lane >= 8: - temp[tid] += temp[tid - 8] - - roc.wavebarrier() - if lane >= 16: - temp[tid] += temp[tid - 16] - - roc.wavebarrier() - if lane >= 32: - temp[tid] += temp[tid - 32] - - roc.wavebarrier() - if inclusive: - return temp[tid] - else: - return temp[tid - 1] if lane > 0 else 0 - - -@roc.jit(device=True) -def device_scan(tid, data, temp, inclusive): - """ - Args - ---- - tid: - thread id - data: scalar - input for tid - temp: shared memory for temporary work - """ - lane = tid & (_WARPSIZE - 1) - warpid = tid >> 6 - - # Preload - temp[tid] = data - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Scan warps in parallel - warp_scan_res = warp_scan(tid, temp, inclusive) - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Get parital result - if lane == (_WARPSIZE - 1): - temp[warpid] = temp[tid] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Scan the partial results - if warpid == 0: - warp_scan(tid, temp, True) - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Accumlate scanned partial results - if warpid > 0: - warp_scan_res += temp[warpid - 1] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Output - if tid == temp.size - 1: - # Last thread computes prefix sum - if inclusive: - temp[0] = warp_scan_res - else: - temp[0] = warp_scan_res + data - - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - # Load prefixsum - prefixsum = temp[0] - roc.barrier(roc.CLK_GLOBAL_MEM_FENCE) - - return warp_scan_res, prefixsum - -@roc.jit(device=True) -def shuffle_up(val, width): - tid = roc.get_local_id(0) - roc.wavebarrier() - idx = (tid + width) % _WARPSIZE - res = roc.ds_permute(idx, val) - return res - -@roc.jit(device=True) -def shuf_wave_inclusive_scan(val): - tid = roc.get_local_id(0) - lane = tid & (_WARPSIZE - 1) - - roc.wavebarrier() - shuf = shuffle_up(val, 1) - if lane >= 1: - val += shuf - - roc.wavebarrier() - shuf = shuffle_up(val, 2) - if lane >= 2: - val += shuf - - roc.wavebarrier() - shuf = shuffle_up(val, 4) - if lane >= 4: - val += shuf - - roc.wavebarrier() - shuf = shuffle_up(val, 8) - if lane >= 8: - val += shuf - - roc.wavebarrier() - shuf = shuffle_up(val, 16) - if lane >= 16: - val += shuf - - roc.wavebarrier() - shuf = shuffle_up(val, 32) - if lane >= 32: - val += shuf - - roc.wavebarrier() - return val - - -@roc.jit(device=True) -def shuf_device_inclusive_scan(data, temp): - """ - Args - ---- - data: scalar - input for tid - temp: shared memory for temporary work, requires at least - threadcount/wavesize storage - """ - tid = roc.get_local_id(0) - lane = tid & (_WARPSIZE - 1) - warpid = tid >> 6 - - # Scan warps in parallel - warp_scan_res = shuf_wave_inclusive_scan(data) - - roc.barrier() - - # Store partial sum into shared memory - if lane == (_WARPSIZE - 1): - temp[warpid] = warp_scan_res - - roc.barrier() - - # Scan the partial sum by first wave - if warpid == 0: - shuf_wave_inclusive_scan(temp[lane]) - - roc.barrier() - - # Get block sum for each wave - blocksum = 0 # first wave is 0 - if warpid > 0: - blocksum = temp[warpid - 1] - - return warp_scan_res + blocksum - - -class TestScan(unittest.TestCase): - def test_single_block(self): - - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(64, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - sm_data[tid] = data[gid] - prefixsum = device_scan_generic(tid, sm_data) - data[gid] = sm_data[tid] - if tid == 0: - sums[blkid] = prefixsum - - data = np.random.randint(0, 4, size=64).astype(np.intp) - expected = data.cumsum() - sums = np.zeros(1, dtype=np.intp) - scan_block[1, 64](data, sums) - np.testing.assert_equal(expected[:-1], data[1:]) - self.assertEqual(expected[-1], sums[0]) - self.assertEqual(0, data[0]) - - def test_multi_block(self): - - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(64, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - sm_data[tid] = data[gid] - prefixsum = device_scan_generic(tid, sm_data) - data[gid] = sm_data[tid] - if tid == 0: - sums[blkid] = prefixsum - - nd_data = np.random.randint(0, 4, size=3 * 64).astype( - np.intp).reshape(3, 64) - nd_expected = nd_data.cumsum(axis=1) - sums = np.zeros(3, dtype=np.intp) - scan_block[3, 64](nd_data.ravel(), sums) - - for nd in range(nd_expected.shape[0]): - expected = nd_expected[nd] - data = nd_data[nd] - np.testing.assert_equal(expected[:-1], data[1:]) - self.assertEqual(expected[-1], sums[nd]) - self.assertEqual(0, data[0]) - - def test_multi_large_block(self): - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(128, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - sm_data[tid] = data[gid] - prefixsum = device_scan_generic(tid, sm_data) - data[gid] = sm_data[tid] - sums[blkid, tid] = prefixsum - - nd_data = np.random.randint(0, 4, size=3 * 128).astype( - np.intp).reshape(3, 128) - nd_expected = nd_data.cumsum(axis=1) - sums = np.zeros((3, 128), dtype=np.intp) - scan_block[3, 128](nd_data.ravel(), sums) - - for nd in range(nd_expected.shape[0]): - expected = nd_expected[nd] - data = nd_data[nd] - np.testing.assert_equal(expected[:-1], data[1:]) - np.testing.assert_equal(expected[-1], sums[nd]) - self.assertEqual(0, data[0]) - - -class TestFasterScan(unittest.TestCase): - - def test_single_block(self): - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(64, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - - scanval, prefixsum = device_scan(tid, data[gid], sm_data, - False) - - data[gid] = scanval - if tid == 0: - sums[blkid] = prefixsum - - data = np.random.randint(0, 4, size=64).astype(np.intp) - expected = data.cumsum() - sums = np.zeros(1, dtype=np.intp) - scan_block[1, 64](data, sums) - np.testing.assert_equal(expected[:-1], data[1:]) - self.assertEqual(expected[-1], sums[0]) - self.assertEqual(0, data[0]) - - def test_single_larger_block(self): - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(256, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - - scanval, prefixsum = device_scan(tid, data[gid], sm_data, - False) - data[gid] = scanval - if tid == 0: - sums[blkid] = prefixsum - - data = np.random.randint(0, 4, size=256).astype(np.intp) - expected = data.cumsum() - sums = np.zeros(1, dtype=np.intp) - scan_block[1, 256](data, sums) - np.testing.assert_equal(expected[:-1], data[1:]) - print(data) - print(sums) - self.assertEqual(expected[-1], sums[0]) - self.assertEqual(0, data[0]) - - def test_multi_large_block(self): - @roc.jit - def scan_block(data, sums): - sm_data = roc.shared.array(128, dtype=intp) - tid = roc.get_local_id(0) - gid = roc.get_global_id(0) - blkid = roc.get_group_id(0) - - scanval, prefixsum = device_scan(tid, data[gid], sm_data, - False) - - data[gid] = scanval - sums[blkid, tid] = prefixsum - - nd_data = np.random.randint(0, 4, size=3 * 128).astype( - np.intp).reshape(3, 128) - nd_expected = nd_data.cumsum(axis=1) - sums = np.zeros((3, 128), dtype=np.intp) - scan_block[3, 128](nd_data.ravel(), sums) - - for nd in range(nd_expected.shape[0]): - expected = nd_expected[nd] - data = nd_data[nd] - np.testing.assert_equal(expected[:-1], data[1:]) - np.testing.assert_equal(expected[-1], sums[nd]) - self.assertEqual(0, data[0]) - -class TestShuffleScan(unittest.TestCase): - - def test_shuffle_ds_permute(self): - @roc.jit - def foo(inp, mask, out): - tid = roc.get_local_id(0) - out[tid] = roc.ds_permute(inp[tid], mask[tid]) - - inp = np.arange(64, dtype=np.intp) - np.random.seed(0) - for i in range(10): - mask = np.random.randint(0, inp.size, inp.size).astype(np.uint32) - out = np.zeros_like(inp) - foo[1, 64](inp, mask, out) - np.testing.assert_equal(inp[mask], out) - - def test_shuffle_up(self): - @roc.jit - def foo(inp, out): - gid = roc.get_global_id(0) - out[gid] = shuffle_up(inp[gid], 1) - - inp = np.arange(128, dtype=np.intp) - out = np.zeros_like(inp) - foo[1, 128](inp, out) - - inp = inp.reshape(2, 64) - out = out.reshape(inp.shape) - - for i in range(out.shape[0]): - np.testing.assert_equal(inp[0, :-1], out[0, 1:]) - np.testing.assert_equal(inp[0, -1], out[0, 0]) - - def test_shuf_wave_inclusive_scan(self): - @roc.jit - def foo(inp, out): - gid = roc.get_global_id(0) - out[gid] = shuf_wave_inclusive_scan(inp[gid]) - - inp = np.arange(64, dtype=np.intp) - out = np.zeros_like(inp) - foo[1, 64](inp, out) - np.testing.assert_equal(inp.cumsum(), out) - - def test_shuf_device_inclusive_scan(self): - @roc.jit - def foo(inp, out): - gid = roc.get_global_id(0) - temp = roc.shared.array(2, dtype=intp) - out[gid] = shuf_device_inclusive_scan(inp[gid], temp) - - inp = np.arange(128, dtype=np.intp) - out = np.zeros_like(inp) - - foo[1, inp.size](inp, out) - np.testing.assert_equal(np.cumsum(inp), out) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_simple.py b/numba/numba/roc/tests/hsapy/test_simple.py deleted file mode 100644 index 13e15c951..000000000 --- a/numba/numba/roc/tests/hsapy/test_simple.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np -from numba import roc -from numba.roc.hsadrv.error import HsaKernelLaunchError -import numba.unittest_support as unittest - - -class TestSimple(unittest.TestCase): - - def test_array_access(self): - magic_token = 123 - - @roc.jit - def udt(output): - output[0] = magic_token - - out = np.zeros(1, dtype=np.intp) - udt[1, 1](out) - - self.assertEqual(out[0], magic_token) - - def test_array_access_2d(self): - magic_token = 123 - - @roc.jit - def udt(output): - for i in range(output.shape[0]): - for j in range(output.shape[1]): - output[i, j] = magic_token - - out = np.zeros((10, 10), dtype=np.intp) - udt[1, 1](out) - np.testing.assert_equal(out, magic_token) - - def test_array_access_3d(self): - magic_token = 123 - - @roc.jit - def udt(output): - for i in range(output.shape[0]): - for j in range(output.shape[1]): - for k in range(output.shape[2]): - output[i, j, k] = magic_token - - out = np.zeros((10, 10, 10), dtype=np.intp) - udt[1, 1](out) - np.testing.assert_equal(out, magic_token) - - def test_global_id(self): - @roc.jit - def udt(output): - global_id = roc.get_global_id(0) - output[global_id] = global_id - - # Allocate extra space to track bad indexing - out = np.zeros(100 + 2, dtype=np.intp) - udt[10, 10](out[1:-1]) - - np.testing.assert_equal(out[1:-1], np.arange(100)) - - self.assertEqual(out[0], 0) - self.assertEqual(out[-1], 0) - - def test_local_id(self): - @roc.jit - def udt(output): - global_id = roc.get_global_id(0) - local_id = roc.get_local_id(0) - output[global_id] = local_id - - # Allocate extra space to track bad indexing - out = np.zeros(100 + 2, dtype=np.intp) - udt[10, 10](out[1:-1]) - - subarr = out[1:-1] - - for parted in np.split(subarr, 10): - np.testing.assert_equal(parted, np.arange(10)) - - self.assertEqual(out[0], 0) - self.assertEqual(out[-1], 0) - - def test_group_id(self): - @roc.jit - def udt(output): - global_id = roc.get_global_id(0) - group_id = roc.get_group_id(0) - output[global_id] = group_id + 1 - - # Allocate extra space to track bad indexing - out = np.zeros(100 + 2, dtype=np.intp) - udt[10, 10](out[1:-1]) - - subarr = out[1:-1] - - for i, parted in enumerate(np.split(subarr, 10), start=1): - np.testing.assert_equal(parted, i) - - self.assertEqual(out[0], 0) - self.assertEqual(out[-1], 0) - - - def test_workdim(self): - @roc.jit - def udt(output): - global_id = roc.get_global_id(0) - workdim = roc.get_work_dim() - output[global_id] = workdim - - out = np.zeros(10, dtype=np.intp) - udt[1, 10](out) - np.testing.assert_equal(out, 1) - - @roc.jit - def udt2(output): - g0 = roc.get_global_id(0) - g1 = roc.get_global_id(1) - output[g0, g1] = roc.get_work_dim() - - out = np.zeros((2, 5), dtype=np.intp) - udt2[(1, 1), (2, 5)](out) - np.testing.assert_equal(out, 2) - - def test_empty_kernel(self): - @roc.jit - def udt(): - pass - - udt[1, 1]() - - def test_workgroup_oversize(self): - @roc.jit - def udt(): - pass - - with self.assertRaises(HsaKernelLaunchError) as raises: - udt[1, 2**30]() - self.assertIn("Try reducing group-size", str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/tests/hsapy/test_ufuncbuilding.py b/numba/numba/roc/tests/hsapy/test_ufuncbuilding.py deleted file mode 100644 index 1e8fb8d30..000000000 --- a/numba/numba/roc/tests/hsapy/test_ufuncbuilding.py +++ /dev/null @@ -1,110 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import unittest_support as unittest -from numba import vectorize -from numba.roc.vectorizers import HsaVectorize -from numba.roc.dispatch import HsaUFuncDispatcher - - -def ufunc_add_core(a, b): - return a + b - - -class TestUFuncBuilding(unittest.TestCase): - def test_ufunc_building(self): - ufbldr = HsaVectorize(ufunc_add_core) - ufbldr.add("float32(float32, float32)") - ufbldr.add("intp(intp, intp)") - ufunc = ufbldr.build_ufunc() - self.assertIsInstance(ufunc, HsaUFuncDispatcher) - - # Test integer version - A = np.arange(100, dtype=np.intp) - B = np.arange(100, dtype=np.intp) + 1 - expected = A + B - got = ufunc(A, B) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test real version - A = np.arange(100, dtype=np.float32) - B = np.arange(100, dtype=np.float32) + 1 - expected = A + B - got = ufunc(A, B) - - np.testing.assert_allclose(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.float32), got.dtype) - - -class TestVectorizeDecor(unittest.TestCase): - def test_vectorize_decor(self): - @vectorize(["float32(float32, float32, float32)", - "intp(intp, intp, intp)"], - target='roc') - def axpy(a, x, y): - return a * x + y - - - self.assertIsInstance(axpy, HsaUFuncDispatcher) - # Test integer version - A = np.arange(100, dtype=np.intp) - X = np.arange(100, dtype=np.intp) + 1 - Y = np.arange(100, dtype=np.intp) + 2 - expected = A * X + Y - got = axpy(A, X, Y) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test real version - A = np.arange(100, dtype=np.float32) - X = np.arange(100, dtype=np.float32) + 1 - Y = np.arange(100, dtype=np.float32) + 2 - expected = A * X + Y - got = axpy(A, X, Y) - - np.testing.assert_allclose(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.float32), got.dtype) - - -class TestVectorizeScalar(unittest.TestCase): - def test_scalar_input(self): - @vectorize(["float32(float32, float32, float32)", - "intp(intp, intp, intp)"], - target='roc') - def axpy(a, x, y): - return a * x + y - - self.assertIsInstance(axpy, HsaUFuncDispatcher) - # Test integer version - A = 2 - X = np.arange(100, dtype=np.intp) + 1 - Y = np.arange(100, dtype=np.intp) + 2 - expected = A * X + Y - got = axpy(A, X, Y) - - np.testing.assert_equal(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.intp), got.dtype) - - # Test real version - A = 2.3 - X = np.arange(100, dtype=np.float32) + 1 - Y = np.arange(100, dtype=np.float32) + 2 - expected = A * X + Y - got = axpy(A, X, Y) - - np.testing.assert_allclose(expected, got) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(np.dtype(np.float32), got.dtype) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/roc/vectorizers.py b/numba/numba/roc/vectorizers.py deleted file mode 100644 index 29385968f..000000000 --- a/numba/numba/roc/vectorizers.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import print_function, absolute_import - -from numba import roc -from numba.npyufunc import deviceufunc - -from numba.roc import dispatch - -vectorizer_stager_source = ''' -def __vectorized_{name}({args}, __out__): - - __tid__ = __hsa__.get_local_id(0) - __blksz__ = __hsa__.get_local_size(0) - __blkid__ = __hsa__.get_group_id(0) - - __tid0__ = __tid__ + __blksz__ * (4 * __blkid__) - __tid1__ = __tid__ + __blksz__ * (4 * __blkid__ + 1) - __tid2__ = __tid__ + __blksz__ * (4 * __blkid__ + 2) - __tid3__ = __tid__ + __blksz__ * (4 * __blkid__ + 3) - - __ilp0__ = __tid0__ < __out__.shape[0] - if not __ilp0__: - # Early escape - return - __ilp1__ = __tid1__ < __out__.shape[0] - __ilp2__ = __tid2__ < __out__.shape[0] - __ilp3__ = __tid3__ < __out__.shape[0] - - if __ilp3__: - __args0__ = {argitems_0} - __args1__ = {argitems_1} - __args2__ = {argitems_2} - __args3__ = {argitems_3} - - __r0__ = __core__(*__args0__) - __r1__ = __core__(*__args1__) - __r2__ = __core__(*__args2__) - __r3__ = __core__(*__args3__) - - __out__[__tid0__] = __r0__ - __out__[__tid1__] = __r1__ - __out__[__tid2__] = __r2__ - __out__[__tid3__] = __r3__ - - elif __ilp2__: - __args0__ = {argitems_0} - __args1__ = {argitems_1} - __args2__ = {argitems_2} - - __r0__ = __core__(*__args0__) - __r1__ = __core__(*__args1__) - __r2__ = __core__(*__args2__) - - __out__[__tid0__] = __r0__ - __out__[__tid1__] = __r1__ - __out__[__tid2__] = __r2__ - - elif __ilp1__: - __args0__ = {argitems_0} - __args1__ = {argitems_1} - - __r0__ = __core__(*__args0__) - __r1__ = __core__(*__args1__) - - __out__[__tid0__] = __r0__ - __out__[__tid1__] = __r1__ - - else: - __args0__ = {argitems_0} - __r0__ = __core__(*__args0__) - __out__[__tid0__] = __r0__ - -''' - - -class HsaVectorize(deviceufunc.DeviceVectorize): - def _compile_core(self, sig): - hsadevfn = roc.jit(sig, device=True)(self.pyfunc) - return hsadevfn, hsadevfn.cres.signature.return_type - - def _get_globals(self, corefn): - glbl = self.pyfunc.__globals__ - glbl.update({'__hsa__': roc, - '__core__': corefn}) - return glbl - - def _compile_kernel(self, fnobj, sig): - return roc.jit(sig)(fnobj) - - def _get_kernel_source(self, template, sig, funcname): - args = ['a%d' % i for i in range(len(sig.args))] - - def make_argitems(n): - out = ', '.join('%s[__tid%d__]' % (i, n) for i in args) - if len(args) < 2: - # Less than two arguments. - # We need to wrap the argument in a tuple because - # we use stararg later. - return "({0},)".format(out) - else: - return out - - fmts = dict(name=funcname, - args=', '.join(args), - argitems_0=make_argitems(n=0), - argitems_1=make_argitems(n=1), - argitems_2=make_argitems(n=2), - argitems_3=make_argitems(n=3)) - src = template.format(**fmts) - return src - - def build_ufunc(self): - return dispatch.HsaUFuncDispatcher(self.kernelmap) - - @property - def _kernel_template(self): - return vectorizer_stager_source - - -# ------------------------------------------------------------------------------ -# Generalized HSA ufuncs - -_gufunc_stager_source = ''' -def __gufunc_{name}({args}): - __tid__ = __hsa__.get_global_id(0) - if __tid__ < {checkedarg}: - __core__({argitems}) -''' - - -class HsaGUFuncVectorize(deviceufunc.DeviceGUFuncVectorize): - def build_ufunc(self): - engine = deviceufunc.GUFuncEngine(self.inputsig, self.outputsig) - return dispatch.HSAGenerializedUFunc(kernelmap=self.kernelmap, - engine=engine) - - def _compile_kernel(self, fnobj, sig): - return roc.jit(sig)(fnobj) - - @property - def _kernel_template(self): - return _gufunc_stager_source - - def _get_globals(self, sig): - corefn = roc.jit(sig, device=True)(self.pyfunc) - glbls = self.py_func.__globals__.copy() - glbls.update({'__hsa__': roc, - '__core__': corefn}) - return glbls - diff --git a/numba/numba/runtests.py b/numba/numba/runtests.py deleted file mode 100644 index 25d70c72b..000000000 --- a/numba/numba/runtests.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import print_function - -import json -import re - - -def _main(argv, **kwds): - from numba.testing import run_tests - # This helper function assumes the first element of argv - # is the name of the calling program. - # The 'main' API function is invoked in-process, and thus - # will synthesize that name. - - if '--failed-first' in argv: - # Failed first - argv.remove('--failed-first') - return _FailedFirstRunner().main(argv, kwds) - elif '--last-failed' in argv: - argv.remove('--last-failed') - return _FailedFirstRunner(last_failed=True).main(argv, kwds) - else: - return run_tests(argv, defaultTest='numba.tests', - **kwds).wasSuccessful() - - -def main(*argv, **kwds): - """keyword arguments are accepted for backward compatiblity only. - See `numba.testing.run_tests()` documentation for details.""" - return _main(['
'] + list(argv), **kwds) - - -class _FailedFirstRunner(object): - """ - Test Runner to handle the failed-first (--failed-first) option. - """ - cache_filename = '.runtests_lastfailed' - - def __init__(self, last_failed=False): - self.last_failed = last_failed - - def main(self, argv, kwds): - from numba.testing import run_tests - prog = argv[0] - argv = argv[1:] - flags = [a for a in argv if a.startswith('-')] - - all_tests, failed_tests = self.find_last_failed(argv) - # Prepare tests to run - if failed_tests: - ft = "There were {} previously failed tests" - print(ft.format(len(failed_tests))) - remaing_tests = [t for t in all_tests - if t not in failed_tests] - if self.last_failed: - tests = list(failed_tests) - else: - tests = failed_tests + remaing_tests - else: - if self.last_failed: - tests = [] - else: - tests = list(all_tests) - - if not tests: - print("No tests to run") - return True - # Run the testsuite - print("Running {} tests".format(len(tests))) - print('Flags', flags) - result = run_tests([prog] + flags + tests, **kwds) - # Save failed - self.save_failed_tests(result, all_tests) - return result.wasSuccessful() - - def save_failed_tests(self, result, all_tests): - cache = [] - # Find failed tests - failed = set() - for case in result.errors + result.failures: - failed.add(case[0].id()) - # Build cache - for t in all_tests: - if t in failed: - cache.append(t) - # Write cache - with open(self.cache_filename, 'w') as fout: - json.dump(cache, fout) - - def find_last_failed(self, argv): - from numba.tests.support import captured_output - - # Find all tests - listargv = ['-l'] + [a for a in argv if not a.startswith('-')] - with captured_output("stdout") as stream: - main(*listargv) - - pat = re.compile(r"^(\w+\.)+\w+$") - lines = stream.getvalue().splitlines() - all_tests = [x for x in lines if pat.match(x) is not None] - - try: - fobj = open(self.cache_filename) - except IOError: - failed_tests = [] - else: - with fobj as fin: - failed_tests = json.load(fin) - return all_tests, failed_tests - - -if __name__ == '__main__': - import sys - # For parallel testing under Windows - from multiprocessing import freeze_support - freeze_support() - sys.exit(0 if _main(sys.argv) else 1) diff --git a/numba/numba/runtime/__init__.py b/numba/numba/runtime/__init__.py deleted file mode 100644 index 5653f954d..000000000 --- a/numba/numba/runtime/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import - -from .nrt import rtsys diff --git a/numba/numba/runtime/_nrt_python.c b/numba/numba/runtime/_nrt_python.c deleted file mode 100644 index e48641b6d..000000000 --- a/numba/numba/runtime/_nrt_python.c +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Definition of NRT functions for marshalling from / to Python objects. - * This module is included by _nrt_pythonmod.c and by pycc-compiled modules. - */ - -#include "../_pymodule.h" - -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include -#include - -#include "../_arraystruct.h" -#include "nrt.h" - - -/* - * Create a NRT MemInfo for data owned by a PyObject. - */ - -static void -pyobject_dtor(void *ptr, size_t size, void* info) { - PyGILState_STATE gstate; - PyObject *ownerobj = info; - - gstate = PyGILState_Ensure(); /* ensure the GIL */ - Py_DECREF(ownerobj); /* release the python object */ - PyGILState_Release(gstate); /* release the GIL */ -} - -static NRT_MemInfo * -meminfo_new_from_pyobject(void *data, PyObject *ownerobj) { - size_t dummy_size = 0; - Py_INCREF(ownerobj); - return NRT_MemInfo_new(data, dummy_size, pyobject_dtor, ownerobj); -} - -/* - * A Python object wrapping a NRT meminfo. - */ - -typedef struct { - PyObject_HEAD - NRT_MemInfo *meminfo; -} MemInfoObject; - -static -int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"ptr", NULL}; - PyObject *raw_ptr_obj; - void *raw_ptr; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", keywords, &raw_ptr_obj)) { - return -1; - } - raw_ptr = PyLong_AsVoidPtr(raw_ptr_obj); - if(PyErr_Occurred()) return -1; - self->meminfo = (NRT_MemInfo *)raw_ptr; - assert (NRT_MemInfo_refcount(self->meminfo) > 0 && "0 refcount"); - return 0; -} - - -#if PY_MAJOR_VERSION < 3 -static Py_ssize_t -MemInfo_rdwrbufferproc(PyObject *self, Py_ssize_t segment, void **ptrptr) -{ - MemInfoObject *mio = (MemInfoObject *)self; - NRT_MemInfo *mi = mio->meminfo; - if (segment != 0) { - PyErr_SetString(PyExc_TypeError, "MemInfo only has 1 segment"); - return -1; - } - *ptrptr = NRT_MemInfo_data(mi); - return NRT_MemInfo_size(mi); -} - -static Py_ssize_t -MemInfo_segcountproc(PyObject *self, Py_ssize_t *lenp) { - MemInfoObject *mio = (MemInfoObject *)self; - NRT_MemInfo *mi = mio->meminfo; - if (lenp) { - *lenp = NRT_MemInfo_size(mi); - } - return 1; -} - -#else /* PY_MAJOR_VERSION < 3 */ - -static int -MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags) { - Py_ssize_t len; - void *buf; - int readonly = 0; - - MemInfoObject *miobj = (MemInfoObject*)exporter; - NRT_MemInfo *mi = miobj->meminfo; - - buf = NRT_MemInfo_data(mi); - len = NRT_MemInfo_size(mi); - return PyBuffer_FillInfo(view, exporter, buf, len, readonly, flags); -} -#endif /* PY_MAJOR_VERSION < 3 */ - -#if PY_MAJOR_VERSION < 3 -static PyBufferProcs MemInfo_bufferProcs = {MemInfo_rdwrbufferproc, - MemInfo_rdwrbufferproc, - MemInfo_segcountproc, - NULL}; -#else -static PyBufferProcs MemInfo_bufferProcs = {MemInfo_getbuffer, NULL}; -#endif - -static -PyObject* -MemInfo_acquire(MemInfoObject *self) { - NRT_MemInfo_acquire(self->meminfo); - Py_RETURN_NONE; -} - -static -PyObject* -MemInfo_release(MemInfoObject *self) { - NRT_MemInfo_release(self->meminfo); - Py_RETURN_NONE; -} - -static -PyObject* -MemInfo_get_data(MemInfoObject *self, void *closure) { - return PyLong_FromVoidPtr(NRT_MemInfo_data(self->meminfo)); -} - -static -PyObject* -MemInfo_get_refcount(MemInfoObject *self, void *closure) { - size_t refct = NRT_MemInfo_refcount(self->meminfo); - if ( refct == (size_t)-1 ) { - PyErr_SetString(PyExc_ValueError, "invalid MemInfo"); - return NULL; - } - return PyLong_FromSize_t(refct); -} - -static void -MemInfo_dealloc(MemInfoObject *self) -{ - NRT_MemInfo_release(self->meminfo); - Py_TYPE(self)->tp_free((PyObject*)self); -} - -static PyMethodDef MemInfo_methods[] = { - {"acquire", (PyCFunction)MemInfo_acquire, METH_NOARGS, - "Increment the reference count" - }, - {"release", (PyCFunction)MemInfo_release, METH_NOARGS, - "Decrement the reference count" - }, - {NULL} /* Sentinel */ -}; - - -static PyGetSetDef MemInfo_getsets[] = { - {"data", - (getter)MemInfo_get_data, NULL, - "Get the data pointer as an integer", - NULL}, - {"refcount", - (getter)MemInfo_get_refcount, NULL, - "Get the refcount", - NULL}, - {NULL} /* Sentinel */ -}; - - -static PyTypeObject MemInfoType = { -#if (PY_MAJOR_VERSION < 3) - PyObject_HEAD_INIT(NULL) - 0, /* ob_size*/ -#else - PyVarObject_HEAD_INIT(NULL, 0) -#endif - "_nrt_python._MemInfo", /* tp_name*/ - sizeof(MemInfoObject), /* tp_basicsize*/ - 0, /* tp_itemsize*/ - (destructor)MemInfo_dealloc, /* tp_dealloc*/ - 0, /* tp_print*/ - 0, /* tp_getattr*/ - 0, /* tp_setattr*/ - 0, /* tp_compare*/ - 0, /* tp_repr*/ - 0, /* tp_as_number*/ - 0, /* tp_as_sequence*/ - 0, /* tp_as_mapping*/ - 0, /* tp_hash */ - 0, /* tp_call*/ - 0, /* tp_str*/ - 0, /* tp_getattro*/ - 0, /* tp_setattro*/ - &MemInfo_bufferProcs, /* tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags*/ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - MemInfo_methods, /* tp_methods */ - 0, /* tp_members */ - MemInfo_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)MemInfo_init, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; - - -/* - * Array adaptor code - */ - -NUMBA_EXPORT_FUNC(int) -NRT_adapt_ndarray_from_python(PyObject *obj, arystruct_t* arystruct) { - PyArrayObject *ndary; - int i, ndim; - npy_intp *p; - void *data; - - if (!PyArray_Check(obj)) { - return -1; - } - - ndary = (PyArrayObject*)obj; - ndim = PyArray_NDIM(ndary); - data = PyArray_DATA(ndary); - - arystruct->meminfo = meminfo_new_from_pyobject((void*)data, obj); - arystruct->data = data; - arystruct->nitems = PyArray_SIZE(ndary); - arystruct->itemsize = PyArray_ITEMSIZE(ndary); - arystruct->parent = obj; - p = arystruct->shape_and_strides; - for (i = 0; i < ndim; i++, p++) { - *p = PyArray_DIM(ndary, i); - } - for (i = 0; i < ndim; i++, p++) { - *p = PyArray_STRIDE(ndary, i); - } - - NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_from_python %p\n", - arystruct->meminfo)); - return 0; -} - -static -PyObject* try_to_return_parent(arystruct_t *arystruct, int ndim, - PyArray_Descr *descr) -{ - int i; - PyArrayObject *array = (PyArrayObject *)arystruct->parent; - - if (!PyArray_Check(arystruct->parent)) - /* Parent is a generic buffer-providing object */ - goto RETURN_ARRAY_COPY; - - if (PyArray_DATA(array) != arystruct->data) - goto RETURN_ARRAY_COPY; - - if (PyArray_NDIM(array) != ndim) - goto RETURN_ARRAY_COPY; - - if (PyObject_RichCompareBool((PyObject *) PyArray_DESCR(array), - (PyObject *) descr, Py_EQ) <= 0) - goto RETURN_ARRAY_COPY; - - for(i = 0; i < ndim; ++i) { - if (PyArray_DIMS(array)[i] != arystruct->shape_and_strides[i]) - goto RETURN_ARRAY_COPY; - if (PyArray_STRIDES(array)[i] != arystruct->shape_and_strides[ndim + i]) - goto RETURN_ARRAY_COPY; - } - - /* Yes, it is the same array - Return new reference */ - Py_INCREF((PyObject *)array); - return (PyObject *)array; - -RETURN_ARRAY_COPY: - return NULL; -} - -NUMBA_EXPORT_FUNC(PyObject *) -NRT_adapt_ndarray_to_python(arystruct_t* arystruct, int ndim, - int writeable, PyArray_Descr *descr) -{ - PyArrayObject *array; - MemInfoObject *miobj = NULL; - PyObject *args; - npy_intp *shape, *strides; - int flags = 0; - - if (!PyArray_DescrCheck(descr)) { - PyErr_Format(PyExc_TypeError, - "expected dtype object, got '%.200s'", - Py_TYPE(descr)->tp_name); - return NULL; - } - - if (arystruct->parent) { - PyObject *obj = try_to_return_parent(arystruct, ndim, descr); - if (obj) { - /* Release NRT reference to the numpy array */ - if (arystruct->meminfo) - NRT_MemInfo_release(arystruct->meminfo); - return obj; - } - } - - if (arystruct->meminfo) { - /* wrap into MemInfoObject */ - miobj = PyObject_New(MemInfoObject, &MemInfoType); - args = PyTuple_New(1); - /* SETITEM steals reference */ - PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); - /* Note: MemInfo_init() does not incref. This function steals the - * NRT reference. - */ - if (MemInfo_init(miobj, args, NULL)) { - return NULL; - } - Py_DECREF(args); - } - - shape = arystruct->shape_and_strides; - strides = shape + ndim; - Py_INCREF((PyObject *) descr); - array = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, descr, ndim, - shape, strides, arystruct->data, - flags, (PyObject *) miobj); - - if (array == NULL) - return NULL; - - /* Set writable */ -#if NPY_API_VERSION >= 0x00000007 - if (writeable) { - PyArray_ENABLEFLAGS(array, NPY_ARRAY_WRITEABLE); - } - else { - PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE); - } -#else - if (writeable) { - array->flags |= NPY_WRITEABLE; - } - else { - array->flags &= ~NPY_WRITEABLE; - } -#endif - - if (miobj) { - /* Set the MemInfoObject as the base object */ -#if NPY_API_VERSION >= 0x00000007 - if (-1 == PyArray_SetBaseObject(array, - (PyObject *) miobj)) - { - Py_DECREF(array); - Py_DECREF(miobj); - return NULL; - } -#else - PyArray_BASE(array) = (PyObject *) miobj; -#endif - - } - return (PyObject *) array; -} - -NUMBA_EXPORT_FUNC(void) -NRT_adapt_buffer_from_python(Py_buffer *buf, arystruct_t *arystruct) -{ - int i; - npy_intp *p; - - if (buf->obj) { - /* Allocate new MemInfo only if the buffer has a parent */ - arystruct->meminfo = meminfo_new_from_pyobject((void*)buf->buf, buf->obj); - } - arystruct->data = buf->buf; - arystruct->itemsize = buf->itemsize; - arystruct->parent = buf->obj; - arystruct->nitems = 1; - p = arystruct->shape_and_strides; - for (i = 0; i < buf->ndim; i++, p++) { - *p = buf->shape[i]; - arystruct->nitems *= buf->shape[i]; - } - for (i = 0; i < buf->ndim; i++, p++) { - *p = buf->strides[i]; - } -} - - -/* Initialization subroutines for modules including this source file */ - -static int -init_nrt_python_module(PyObject *module) -{ - MemInfoType.tp_new = PyType_GenericNew; - if (PyType_Ready(&MemInfoType)) - return -1; - return 0; -} diff --git a/numba/numba/runtime/_nrt_pythonmod.c b/numba/numba/runtime/_nrt_pythonmod.c deleted file mode 100644 index dfcc50288..000000000 --- a/numba/numba/runtime/_nrt_pythonmod.c +++ /dev/null @@ -1,198 +0,0 @@ -#define NUMBA_EXPORT_FUNC(_rettype) static _rettype -#define NUMBA_EXPORT_DATA(_vartype) static _vartype - -#include "_nrt_python.c" - -static PyObject * -memsys_shutdown(PyObject *self, PyObject *args) { - NRT_MemSys_shutdown(); - Py_RETURN_NONE; -} - -static PyObject * -memsys_use_cpython_allocator(PyObject *self, PyObject *args) { - NRT_MemSys_set_allocator(PyMem_RawMalloc, - PyMem_RawRealloc, - PyMem_RawFree); - Py_RETURN_NONE; -} - -static PyObject * -memsys_set_atomic_inc_dec(PyObject *self, PyObject *args) { - PyObject *addr_inc_obj, *addr_dec_obj; - void *addr_inc, *addr_dec; - if (!PyArg_ParseTuple(args, "OO", &addr_inc_obj, &addr_dec_obj)) { - return NULL; - } - addr_inc = PyLong_AsVoidPtr(addr_inc_obj); - if(PyErr_Occurred()) return NULL; - addr_dec = PyLong_AsVoidPtr(addr_dec_obj); - if(PyErr_Occurred()) return NULL; - NRT_MemSys_set_atomic_inc_dec(addr_inc, addr_dec); - Py_RETURN_NONE; -} - -static PyObject * -memsys_set_atomic_cas(PyObject *self, PyObject *args) { - PyObject *addr_cas_obj; - void *addr_cas; - if (!PyArg_ParseTuple(args, "O", &addr_cas_obj)) { - return NULL; - } - addr_cas = PyLong_AsVoidPtr(addr_cas_obj); - if(PyErr_Occurred()) return NULL; - NRT_MemSys_set_atomic_cas(addr_cas); - Py_RETURN_NONE; -} - -static PyObject * -memsys_get_stats_alloc(PyObject *self, PyObject *args) { - return PyLong_FromSize_t(NRT_MemSys_get_stats_alloc()); -} - -static PyObject * -memsys_get_stats_free(PyObject *self, PyObject *args) { - return PyLong_FromSize_t(NRT_MemSys_get_stats_free()); -} - -static PyObject * -memsys_get_stats_mi_alloc(PyObject *self, PyObject *args) { - return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_alloc()); -} - -static PyObject * -memsys_get_stats_mi_free(PyObject *self, PyObject *args) { - return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_free()); -} - - -/* - * Create a new MemInfo with a owner PyObject - */ -static PyObject * -meminfo_new(PyObject *self, PyObject *args) { - PyObject *addr_data_obj; - void *addr_data; - PyObject *ownerobj; - NRT_MemInfo *mi; - if (!PyArg_ParseTuple(args, "OO", &addr_data_obj, &ownerobj)) { - return NULL; - } - addr_data = PyLong_AsVoidPtr(addr_data_obj); - if (PyErr_Occurred()) - return NULL; - mi = meminfo_new_from_pyobject(addr_data, ownerobj); - return PyLong_FromVoidPtr(mi); -} - -/* - * Create a new MemInfo with a new NRT allocation - */ -static PyObject * -meminfo_alloc(PyObject *self, PyObject *args) { - NRT_MemInfo *mi; - Py_ssize_t size; - if (!PyArg_ParseTuple(args, "n", &size)) { - return NULL; - } - mi = NRT_MemInfo_alloc(size); - return PyLong_FromVoidPtr(mi); -} - -/* - * Like meminfo_alloc but set memory to zero after allocation and before - * deallocation. - */ -static PyObject * -meminfo_alloc_safe(PyObject *self, PyObject *args) { - NRT_MemInfo *mi; - Py_ssize_t size; - if (!PyArg_ParseTuple(args, "n", &size)) { - return NULL; - } - mi = NRT_MemInfo_alloc_safe(size); - return PyLong_FromVoidPtr(mi); -} - -static PyMethodDef ext_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } -#define declmethod_noargs(func) { #func , ( PyCFunction )func , METH_NOARGS, NULL } - declmethod_noargs(memsys_use_cpython_allocator), - declmethod_noargs(memsys_shutdown), - declmethod(memsys_set_atomic_inc_dec), - declmethod(memsys_set_atomic_cas), - declmethod_noargs(memsys_get_stats_alloc), - declmethod_noargs(memsys_get_stats_free), - declmethod_noargs(memsys_get_stats_mi_alloc), - declmethod_noargs(memsys_get_stats_mi_free), - declmethod(meminfo_new), - declmethod(meminfo_alloc), - declmethod(meminfo_alloc_safe), - { NULL }, -#undef declmethod -}; - - - -static PyObject * -build_c_helpers_dict(void) -{ - PyObject *dct = PyDict_New(); - if (dct == NULL) - goto error; - -#define _declpointer(name, value) do { \ - PyObject *o = PyLong_FromVoidPtr(value); \ - if (o == NULL) goto error; \ - if (PyDict_SetItemString(dct, name, o)) { \ - Py_DECREF(o); \ - goto error; \ - } \ - Py_DECREF(o); \ -} while (0) - -#define declmethod(func) _declpointer(#func, &NRT_##func) - -declmethod(adapt_ndarray_from_python); -declmethod(adapt_ndarray_to_python); -declmethod(adapt_buffer_from_python); -declmethod(MemInfo_alloc); -declmethod(MemInfo_alloc_safe); -declmethod(MemInfo_alloc_aligned); -declmethod(MemInfo_alloc_safe_aligned); -declmethod(MemInfo_alloc_dtor_safe); -declmethod(MemInfo_call_dtor); -declmethod(MemInfo_new_varsize); -declmethod(MemInfo_new_varsize_dtor); -declmethod(MemInfo_varsize_alloc); -declmethod(MemInfo_varsize_free); -declmethod(MemInfo_varsize_realloc); -declmethod(MemInfo_release); -declmethod(Allocate); -declmethod(Free); - - -#undef declmethod - return dct; -error: - Py_XDECREF(dct); - return NULL; -} - -MOD_INIT(_nrt_python) { - PyObject *m; - MOD_DEF(m, "_nrt_python", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - import_array(); - NRT_MemSys_init(); - if (init_nrt_python_module(m)) - return MOD_ERROR_VAL; - - Py_INCREF(&MemInfoType); - PyModule_AddObject(m, "_MemInfo", (PyObject *) (&MemInfoType)); - - PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); - - return MOD_SUCCESS_VAL(m); -} diff --git a/numba/numba/runtime/context.py b/numba/numba/runtime/context.py deleted file mode 100644 index f44fb8ca7..000000000 --- a/numba/numba/runtime/context.py +++ /dev/null @@ -1,222 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from llvmlite import ir - -from numba import cgutils, types - - -class NRTContext(object): - """ - An object providing access to NRT APIs in the lowering pass. - """ - - def __init__(self, context, enabled): - self._context = context - self._enabled = enabled - - def _require_nrt(self): - if not self._enabled: - raise RuntimeError("NRT required but not enabled") - - def allocate(self, builder, size): - """ - Low-level allocate a new memory area of `size` bytes. - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) - fn = mod.get_or_insert_function(fnty, name="NRT_Allocate") - fn.return_value.add_attribute("noalias") - return builder.call(fn, [size]) - - def free(self, builder, ptr): - """ - Low-level free a memory area allocated with allocate(). - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(ir.VoidType(), [cgutils.voidptr_t]) - fn = mod.get_or_insert_function(fnty, name="NRT_Free") - return builder.call(fn, [ptr]) - - def meminfo_alloc(self, builder, size): - """ - Allocate a new MemInfo with a data payload of `size` bytes. - - A pointer to the MemInfo is returned. - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) - fn = mod.get_or_insert_function(fnty, name="NRT_MemInfo_alloc_safe") - fn.return_value.add_attribute("noalias") - return builder.call(fn, [size]) - - def meminfo_alloc_dtor(self, builder, size, dtor): - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, - [cgutils.intp_t, cgutils.voidptr_t]) - fn = mod.get_or_insert_function(fnty, - name="NRT_MemInfo_alloc_dtor_safe") - fn.return_value.add_attribute("noalias") - return builder.call(fn, [size, - builder.bitcast(dtor, cgutils.voidptr_t)]) - - def meminfo_alloc_aligned(self, builder, size, align): - """ - Allocate a new MemInfo with an aligned data payload of `size` bytes. - The data pointer is aligned to `align` bytes. `align` can be either - a Python int or a LLVM uint32 value. - - A pointer to the MemInfo is returned. - """ - self._require_nrt() - - mod = builder.module - u32 = ir.IntType(32) - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) - fn = mod.get_or_insert_function(fnty, - name="NRT_MemInfo_alloc_safe_aligned") - fn.return_value.add_attribute("noalias") - if isinstance(align, int): - align = self._context.get_constant(types.uint32, align) - else: - assert align.type == u32, "align must be a uint32" - return builder.call(fn, [size, align]) - - def meminfo_new_varsize(self, builder, size): - """ - Allocate a MemInfo pointing to a variable-sized data area. The area - is separately allocated (i.e. two allocations are made) so that - re-allocating it doesn't change the MemInfo's address. - - A pointer to the MemInfo is returned. - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) - fn = mod.get_or_insert_function(fnty, name="NRT_MemInfo_new_varsize") - fn.return_value.add_attribute("noalias") - return builder.call(fn, [size]) - - def meminfo_new_varsize_dtor(self, builder, size, dtor): - """ - Like meminfo_new_varsize() but also set the destructor for - cleaning up references to objects inside the allocation. - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, - [cgutils.intp_t, cgutils.voidptr_t]) - fn = mod.get_or_insert_function( - fnty, name="NRT_MemInfo_new_varsize_dtor") - return builder.call(fn, [size, dtor]) - - def meminfo_varsize_alloc(self, builder, meminfo, size): - """ - Allocate a new data area for a MemInfo created by meminfo_new_varsize(). - The new data pointer is returned, for convenience. - - Contrary to realloc(), this always allocates a new area and doesn't - copy the old data. This is useful if resizing a container needs - more than simply copying the data area (e.g. for hash tables). - - The old pointer will have to be freed with meminfo_varsize_free(). - """ - return self._call_varsize_alloc(builder, meminfo, size, - "NRT_MemInfo_varsize_alloc") - - def meminfo_varsize_realloc(self, builder, meminfo, size): - """ - Reallocate a data area allocated by meminfo_new_varsize(). - The new data pointer is returned, for convenience. - """ - return self._call_varsize_alloc(builder, meminfo, size, - "NRT_MemInfo_varsize_realloc") - - def meminfo_varsize_free(self, builder, meminfo, ptr): - """ - Free a memory area allocated for a NRT varsize object. - Note this does *not* free the NRT object itself! - """ - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(ir.VoidType(), - [cgutils.voidptr_t, cgutils.voidptr_t]) - fn = mod.get_or_insert_function(fnty, name="NRT_MemInfo_varsize_free") - return builder.call(fn, (meminfo, ptr)) - - def _call_varsize_alloc(self, builder, meminfo, size, funcname): - self._require_nrt() - - mod = builder.module - fnty = ir.FunctionType(cgutils.voidptr_t, - [cgutils.voidptr_t, cgutils.intp_t]) - fn = mod.get_or_insert_function(fnty, name=funcname) - fn.return_value.add_attribute("noalias") - return builder.call(fn, [meminfo, size]) - - def meminfo_data(self, builder, meminfo): - """ - Given a MemInfo pointer, return a pointer to the allocated data - managed by it. This works for MemInfos allocated with all the - above methods. - """ - self._require_nrt() - - from numba.runtime.nrtdynmod import meminfo_data_ty - - mod = builder.module - fn = mod.get_or_insert_function(meminfo_data_ty, - name="NRT_MemInfo_data_fast") - return builder.call(fn, [meminfo]) - - def _call_incref_decref(self, builder, root_type, typ, value, - funcname, getters=()): - self._require_nrt() - - from numba.runtime.nrtdynmod import incref_decref_ty - - data_model = self._context.data_model_manager[typ] - - members = data_model.traverse(builder) - for mtyp, getter in members: - self._call_incref_decref(builder, root_type, mtyp, value, - funcname, getters + (getter,)) - - if data_model.has_nrt_meminfo(): - # Call the chain of getters to compute the member value - for getter in getters: - value = getter(value) - try: - meminfo = data_model.get_nrt_meminfo(builder, value) - except NotImplementedError as e: - raise NotImplementedError("%s: %s" % (root_type, str(e))) - assert meminfo is not None # since has_nrt_meminfo() - mod = builder.module - fn = mod.get_or_insert_function(incref_decref_ty, name=funcname) - # XXX "nonnull" causes a crash in test_dyn_array: can this - # function be called with a NULL pointer? - fn.args[0].add_attribute("noalias") - fn.args[0].add_attribute("nocapture") - builder.call(fn, [meminfo]) - - def incref(self, builder, typ, value): - """ - Recursively incref the given *value* and its members. - """ - self._call_incref_decref(builder, typ, typ, value, "NRT_incref") - - def decref(self, builder, typ, value): - """ - Recursively decref the given *value* and its members. - """ - self._call_incref_decref(builder, typ, typ, value, "NRT_decref") diff --git a/numba/numba/runtime/nrt.c b/numba/numba/runtime/nrt.c deleted file mode 100644 index 2dda3026e..000000000 --- a/numba/numba/runtime/nrt.c +++ /dev/null @@ -1,445 +0,0 @@ -#include -#include /* for memset */ -#include "nrt.h" -#include "assert.h" - -#if !defined MIN -#define MIN(a, b) ((a) < (b)) ? (a) : (b) -#endif - - -typedef int (*atomic_meminfo_cas_func)(void **ptr, void *cmp, - void *repl, void **oldptr); - - -/* NOTE: if changing the layout, please update numba.runtime.atomicops */ -struct MemInfo { - size_t refct; - NRT_dtor_function dtor; - void *dtor_info; - void *data; - size_t size; /* only used for NRT allocated memory */ -}; - - -/* - * Misc helpers. - */ - -static void nrt_fatal_error(const char *msg) -{ - fprintf(stderr, "Fatal Numba error: %s\n", msg); - fflush(stderr); /* it helps in Windows debug build */ - -#if defined(MS_WINDOWS) && defined(_DEBUG) - DebugBreak(); -#endif - abort(); -} - -/* - * Global resources. - */ - -struct MemSys { - /* Atomic increment and decrement function */ - NRT_atomic_inc_dec_func atomic_inc, atomic_dec; - /* Atomic CAS */ - atomic_meminfo_cas_func atomic_cas; - /* Shutdown flag */ - int shutting; - /* Stats */ - size_t stats_alloc, stats_free, stats_mi_alloc, stats_mi_free; - /* System allocation functions */ - struct { - NRT_malloc_func malloc; - NRT_realloc_func realloc; - NRT_free_func free; - } allocator; -}; - -/* The Memory System object */ -static NRT_MemSys TheMSys; - - -void NRT_MemSys_init(void) { - memset(&TheMSys, 0, sizeof(NRT_MemSys)); - /* Bind to libc allocator */ - TheMSys.allocator.malloc = malloc; - TheMSys.allocator.realloc = realloc; - TheMSys.allocator.free = free; -} - -void NRT_MemSys_shutdown(void) { - TheMSys.shutting = 1; - /* Revert to use our non-atomic stub for all atomic operations - because the JIT-ed version will be removed. - Since we are at interpreter shutdown, - it cannot be running multiple threads anymore. */ - NRT_MemSys_set_atomic_inc_dec_stub(); - NRT_MemSys_set_atomic_cas_stub(); -} - -void NRT_MemSys_set_allocator(NRT_malloc_func malloc_func, - NRT_realloc_func realloc_func, - NRT_free_func free_func) -{ - if ((malloc_func != TheMSys.allocator.malloc || - realloc_func != TheMSys.allocator.realloc || - free_func != TheMSys.allocator.free) && - (TheMSys.stats_alloc != TheMSys.stats_free || - TheMSys.stats_mi_alloc != TheMSys.stats_mi_free)) { - nrt_fatal_error("cannot change allocator while blocks are allocated"); - } - TheMSys.allocator.malloc = malloc_func; - TheMSys.allocator.realloc = realloc_func; - TheMSys.allocator.free = free_func; -} - -void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, - NRT_atomic_inc_dec_func dec) -{ - TheMSys.atomic_inc = inc; - TheMSys.atomic_dec = dec; -} - -void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas) { - TheMSys.atomic_cas = (atomic_meminfo_cas_func) cas; -} - -size_t NRT_MemSys_get_stats_alloc() { - return TheMSys.stats_alloc; -} - -size_t NRT_MemSys_get_stats_free() { - return TheMSys.stats_free; -} - -size_t NRT_MemSys_get_stats_mi_alloc() { - return TheMSys.stats_mi_alloc; -} - -size_t NRT_MemSys_get_stats_mi_free() { - return TheMSys.stats_mi_free; -} - -static -size_t nrt_testing_atomic_inc(size_t *ptr){ - /* non atomic */ - size_t out = *ptr; - out += 1; - *ptr = out; - return out; -} - -static -size_t nrt_testing_atomic_dec(size_t *ptr){ - /* non atomic */ - size_t out = *ptr; - out -= 1; - *ptr = out; - return out; -} - -static -int nrt_testing_atomic_cas(void* volatile *ptr, void *cmp, void *val, - void * *oldptr){ - /* non atomic */ - void *old = *ptr; - *oldptr = old; - if (old == cmp) { - *ptr = val; - return 1; - } - return 0; - -} - -void NRT_MemSys_set_atomic_inc_dec_stub(void){ - NRT_MemSys_set_atomic_inc_dec(nrt_testing_atomic_inc, - nrt_testing_atomic_dec); -} - -void NRT_MemSys_set_atomic_cas_stub(void) { - NRT_MemSys_set_atomic_cas(nrt_testing_atomic_cas); -} - - -/* - * The MemInfo structure. - */ - -void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info) -{ - mi->refct = 1; /* starts with 1 refct */ - mi->dtor = dtor; - mi->dtor_info = dtor_info; - mi->data = data; - mi->size = size; - /* Update stats */ - TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); -} - -NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info) -{ - NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); - NRT_MemInfo_init(mi, data, size, dtor, dtor_info); - return mi; -} - -size_t NRT_MemInfo_refcount(NRT_MemInfo *mi) { - /* Should never returns 0 for a valid MemInfo */ - if (mi && mi->data) - return mi->refct; - else{ - return (size_t)-1; - } -} - -static -void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { - NRT_Debug(nrt_debug_print("nrt_internal_dtor_safe %p, %p\n", ptr, info)); - /* See NRT_MemInfo_alloc_safe() */ - memset(ptr, 0xDE, MIN(size, 256)); -} - -static -void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out) { - NRT_MemInfo *mi; - char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size); - mi = (NRT_MemInfo *) base; - *mi_out = mi; - return base + sizeof(NRT_MemInfo); -} - - -static -void nrt_internal_custom_dtor_safe(void *ptr, size_t size, void *info) { - NRT_dtor_function dtor = info; - NRT_Debug(nrt_debug_print("nrt_internal_custom_dtor_safe %p, %p\n", - ptr, info)); - if (dtor) { - dtor(ptr, size, NULL); - } - - nrt_internal_dtor_safe(ptr, size, NULL); -} - - -NRT_MemInfo *NRT_MemInfo_alloc(size_t size) { - NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); - NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); - return mi; -} - -NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size) { - return NRT_MemInfo_alloc_dtor_safe(size, NULL); -} - -NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor) { - NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); - /* Only fill up a couple cachelines with debug markers, to minimize - overhead. */ - memset(data, 0xCB, MIN(size, 256)); - NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_dtor_safe %p %zu\n", data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor); - return mi; -} - - -static -void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, - NRT_MemInfo **mi) -{ - size_t offset, intptr, remainder; - char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi); - intptr = (size_t) base; - /* See if we are aligned */ - remainder = intptr % align; - if (remainder == 0){ /* Yes */ - offset = 0; - } else { /* No, move forward `offset` bytes */ - offset = align - remainder; - } - return base + offset; -} - -NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) { - NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); - NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); - return mi; -} - -NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { - NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); - /* Only fill up a couple cachelines with debug markers, to minimize - overhead. */ - memset(data, 0xCB, MIN(size, 256)); - NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", - data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size); - return mi; -} - -void NRT_MemInfo_destroy(NRT_MemInfo *mi) { - NRT_Free(mi); - TheMSys.atomic_inc(&TheMSys.stats_mi_free); -} - -void NRT_MemInfo_acquire(NRT_MemInfo *mi) { - NRT_Debug(nrt_debug_print("NRT_acquire %p refct=%zu\n", mi, - mi->refct)); - assert(mi->refct > 0 && "RefCt cannot be zero"); - TheMSys.atomic_inc(&mi->refct); -} - -void NRT_MemInfo_call_dtor(NRT_MemInfo *mi) { - NRT_Debug(nrt_debug_print("nrt_meminfo_call_dtor %p\n", mi)); - if (mi->dtor && !TheMSys.shutting) - /* We have a destructor and the system is not shutting down */ - mi->dtor(mi->data, mi->size, mi->dtor_info); - /* Clear and release MemInfo */ - NRT_MemInfo_destroy(mi); -} - -void NRT_MemInfo_release(NRT_MemInfo *mi) { - NRT_Debug(nrt_debug_print("NRT_release %p refct=%zu\n", mi, - mi->refct)); - assert (mi->refct > 0 && "RefCt cannot be 0"); - /* RefCt drop to zero */ - if (TheMSys.atomic_dec(&mi->refct) == 0) { - NRT_MemInfo_call_dtor(mi); - } -} - -void* NRT_MemInfo_data(NRT_MemInfo* mi) { - return mi->data; -} - -size_t NRT_MemInfo_size(NRT_MemInfo* mi) { - return mi->size; -} - - -void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out) { - fprintf(out, "MemInfo %p refcount %zu\n", mi, mi->refct); -} - -/* - * Resizable buffer API. - */ - -static void -nrt_varsize_dtor(void *ptr, size_t size, void *info) { - NRT_Debug(nrt_debug_print("nrt_buffer_dtor %p\n", ptr)); - if (info) { - /* call element dtor */ - typedef void dtor_fn_t(void *ptr); - dtor_fn_t *dtor = info; - dtor(ptr); - } - NRT_Free(ptr); -} - -NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) -{ - NRT_MemInfo *mi; - void *data = NRT_Allocate(size); - if (data == NULL) - return NULL; - - mi = NRT_MemInfo_new(data, size, nrt_varsize_dtor, NULL); - NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_alloc size=%zu " - "-> meminfo=%p, data=%p\n", size, mi, data)); - return mi; -} - -NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor) { - NRT_MemInfo *mi = NRT_MemInfo_new_varsize(size); - if (mi) { - mi->dtor_info = dtor; - } - return mi; -} - -void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) -{ - if (mi->dtor != nrt_varsize_dtor) { - nrt_fatal_error("ERROR: NRT_MemInfo_varsize_alloc called " - "with a non varsize-allocated meminfo"); - return NULL; /* unreachable */ - } - mi->data = NRT_Allocate(size); - if (mi->data == NULL) - return NULL; - mi->size = size; - NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_alloc %p size=%zu " - "-> data=%p\n", mi, size, mi->data)); - return mi->data; -} - -void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size) -{ - if (mi->dtor != nrt_varsize_dtor) { - nrt_fatal_error("ERROR: NRT_MemInfo_varsize_realloc called " - "with a non varsize-allocated meminfo"); - return NULL; /* unreachable */ - } - mi->data = NRT_Reallocate(mi->data, size); - if (mi->data == NULL) - return NULL; - mi->size = size; - NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_realloc %p size=%zu " - "-> data=%p\n", mi, size, mi->data)); - return mi->data; -} - -void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) -{ - NRT_Free(ptr); - if (ptr == mi->data) - mi->data = NULL; -} - -/* - * Low-level allocation wrappers. - */ - -void* NRT_Allocate(size_t size) { - void *ptr = TheMSys.allocator.malloc(size); - NRT_Debug(nrt_debug_print("NRT_Allocate bytes=%zu ptr=%p\n", size, ptr)); - TheMSys.atomic_inc(&TheMSys.stats_alloc); - return ptr; -} - -void *NRT_Reallocate(void *ptr, size_t size) { - void *new_ptr = TheMSys.allocator.realloc(ptr, size); - NRT_Debug(nrt_debug_print("NRT_Reallocate bytes=%zu ptr=%p -> %p\n", - size, ptr, new_ptr)); - return new_ptr; -} - -void NRT_Free(void *ptr) { - NRT_Debug(nrt_debug_print("NRT_Free %p\n", ptr)); - TheMSys.allocator.free(ptr); - TheMSys.atomic_inc(&TheMSys.stats_free); -} - -/* - * Debugging printf function used internally - */ -void nrt_debug_print(char *fmt, ...) { - va_list args; - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} diff --git a/numba/numba/runtime/nrt.h b/numba/numba/runtime/nrt.h deleted file mode 100644 index e85c47c1b..000000000 --- a/numba/numba/runtime/nrt.h +++ /dev/null @@ -1,226 +0,0 @@ -/* -All functions described here are threadsafe. -*/ - -#ifndef NUMBA_NRT_H_ -#define NUMBA_NRT_H_ - - -#include -#include -#include "../_numba_common.h" - -/* Debugging facilities - enabled at compile-time */ -/* #undef NDEBUG */ -#if 0 -# define NRT_Debug(X) X -#else -# define NRT_Debug(X) if (0) { X; } -#endif - -/* TypeDefs */ -typedef void (*NRT_dtor_function)(void *ptr, size_t size, void *info); -typedef size_t (*NRT_atomic_inc_dec_func)(size_t *ptr); -typedef int (*NRT_atomic_cas_func)(void * volatile *ptr, void *cmp, void *repl, - void **oldptr); - -typedef struct MemInfo NRT_MemInfo; -typedef struct MemSys NRT_MemSys; - -typedef void *(*NRT_malloc_func)(size_t size); -typedef void *(*NRT_realloc_func)(void *ptr, size_t new_size); -typedef void (*NRT_free_func)(void *ptr); - - -/* Memory System API */ - -/* Initialize the memory system */ -VISIBILITY_HIDDEN -void NRT_MemSys_init(void); - -/* Shutdown the memory system */ -VISIBILITY_HIDDEN -void NRT_MemSys_shutdown(void); - -/* - * Register the system allocation functions - */ -VISIBILITY_HIDDEN -void NRT_MemSys_set_allocator(NRT_malloc_func, NRT_realloc_func, NRT_free_func); - -/* - * Register the atomic increment and decrement functions - */ -VISIBILITY_HIDDEN -void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, - NRT_atomic_inc_dec_func dec); - - -/* - * Register the atomic compare and swap function - */ -VISIBILITY_HIDDEN -void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas); - -/* - * Register a non-atomic STUB for increment and decrement - */ -VISIBILITY_HIDDEN -void NRT_MemSys_set_atomic_inc_dec_stub(void); - -/* - * Register a non-atomic STUB for compare and swap - */ -VISIBILITY_HIDDEN -void NRT_MemSys_set_atomic_cas_stub(void); - -/* - * The following functions get internal statistics of the memory subsystem. - */ -VISIBILITY_HIDDEN -size_t NRT_MemSys_get_stats_alloc(void); -VISIBILITY_HIDDEN -size_t NRT_MemSys_get_stats_free(void); -VISIBILITY_HIDDEN -size_t NRT_MemSys_get_stats_mi_alloc(void); -VISIBILITY_HIDDEN -size_t NRT_MemSys_get_stats_mi_free(void); - -/* Memory Info API */ - -/* Create a new MemInfo for external memory - * - * data: data pointer being tracked - * dtor: destructor to execute - * dtor_info: additional information to pass to the destructor - */ -VISIBILITY_HIDDEN -NRT_MemInfo* NRT_MemInfo_new(void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info); - -VISIBILITY_HIDDEN -void NRT_MemInfo_init(NRT_MemInfo *mi, void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info); - -/* - * Returns the refcount of a MemInfo or (size_t)-1 if error. - */ -VISIBILITY_HIDDEN -size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); - -/* - * Allocate memory of `size` bytes and return a pointer to a MemInfo structure - * that describes the allocation - */ -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_alloc(size_t size); - -/* - * The "safe" NRT_MemInfo_alloc performs additional steps to help debug - * memory errors. - * It is guaranteed to: - * - zero-fill to the memory region after allocation and before deallocation. - * - may do more in the future - */ -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size); - -/* - * Similar to NRT_MemInfo_alloc_safe but with a custom dtor. - */ -VISIBILITY_HIDDEN -NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor); - -/* - * Aligned versions of the NRT_MemInfo_alloc and NRT_MemInfo_alloc_safe. - * These take an additional argument `align` for number of bytes to align to. - */ -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align); -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align); - -/* - * Internal API. - * Release a MemInfo. Calls NRT_MemSys_insert_meminfo. - */ -VISIBILITY_HIDDEN -void NRT_MemInfo_destroy(NRT_MemInfo *mi); - -/* - * Acquire a reference to a MemInfo - */ -VISIBILITY_HIDDEN -void NRT_MemInfo_acquire(NRT_MemInfo* mi); - -/* - * Release a reference to a MemInfo - */ -VISIBILITY_HIDDEN -void NRT_MemInfo_release(NRT_MemInfo* mi); - -/* - * Internal/Compiler API. - * Invoke the registered destructor of a MemInfo. - */ -VISIBILITY_HIDDEN -void NRT_MemInfo_call_dtor(NRT_MemInfo *mi); - -/* - * Returns the data pointer - */ -VISIBILITY_HIDDEN -void* NRT_MemInfo_data(NRT_MemInfo* mi); - -/* - * Returns the allocated size - */ -VISIBILITY_HIDDEN -size_t NRT_MemInfo_size(NRT_MemInfo* mi); - - -/* - * NRT API for resizable buffers. - */ -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size); -VISIBILITY_HIDDEN -NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor); -VISIBILITY_HIDDEN -void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size); -VISIBILITY_HIDDEN -void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size); -VISIBILITY_HIDDEN -void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr); - -/* - * Print debug info to FILE - */ -VISIBILITY_HIDDEN -void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); - - -/* Low-level allocation wrappers. */ - -/* - * Allocate memory of `size` bytes. - */ -VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); - -/* - * Deallocate memory pointed by `ptr`. - */ -VISIBILITY_HIDDEN void NRT_Free(void *ptr); - -/* - * Reallocate memory at `ptr`. - */ -VISIBILITY_HIDDEN void *NRT_Reallocate(void *ptr, size_t size); - -/* - * Debugging printf function used internally - */ -VISIBILITY_HIDDEN void nrt_debug_print(char *fmt, ...); - - -#endif /* NUMBA_NRT_H_ */ diff --git a/numba/numba/runtime/nrt.py b/numba/numba/runtime/nrt.py deleted file mode 100644 index e113ac456..000000000 --- a/numba/numba/runtime/nrt.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from collections import namedtuple - -from . import nrtdynmod -from llvmlite import binding as ll - -from numba.utils import finalize as _finalize -from . import _nrt_python as _nrt - -_nrt_mstats = namedtuple("nrt_mstats", ["alloc", "free", "mi_alloc", "mi_free"]) - - -class _Runtime(object): - def __init__(self): - self._init = False - - def initialize(self, ctx): - """Initializes the NRT - - Must be called before any actual call to the NRT API. - Safe to be called multiple times. - """ - from numba.compiler import lock_compiler - - with lock_compiler: - if self._init: - # Already initialized - return - - # Register globals into the system - for py_name in _nrt.c_helpers: - c_name = "NRT_" + py_name - c_address = _nrt.c_helpers[py_name] - ll.add_symbol(c_name, c_address) - - # Compile atomic operations - self._library = nrtdynmod.compile_nrt_functions(ctx) - - self._ptr_inc = self._library.get_pointer_to_function("nrt_atomic_add") - self._ptr_dec = self._library.get_pointer_to_function("nrt_atomic_sub") - self._ptr_cas = self._library.get_pointer_to_function("nrt_atomic_cas") - - # Install atomic ops to NRT - _nrt.memsys_set_atomic_inc_dec(self._ptr_inc, self._ptr_dec) - _nrt.memsys_set_atomic_cas(self._ptr_cas) - - self._init = True - - def _init_guard(self): - if not self._init: - msg = "Runtime must be initialized before use." - raise RuntimeError(msg) - - @staticmethod - def shutdown(): - """ - Shutdown the NRT - Safe to be called without calling Runtime.initialize first - """ - _nrt.memsys_shutdown() - - @property - def library(self): - """ - Return the Library object containing the various NRT functions. - """ - self._init_guard() - return self._library - - def meminfo_new(self, data, pyobj): - """ - Returns a MemInfo object that tracks memory at `data` owned by `pyobj`. - MemInfo will acquire a reference on `pyobj`. - The release of MemInfo will release a reference on `pyobj`. - """ - self._init_guard() - mi = _nrt.meminfo_new(data, pyobj) - return MemInfo(mi) - - def meminfo_alloc(self, size, safe=False): - """ - Allocate a new memory of `size` bytes and returns a MemInfo object - that tracks the allocation. When there is no more reference to the - MemInfo object, the underlying memory will be deallocated. - - If `safe` flag is True, the memory is allocated using the `safe` scheme. - This is used for debugging and testing purposes. - See `NRT_MemInfo_alloc_safe()` in "nrt.h" for details. - """ - self._init_guard() - if safe: - mi = _nrt.meminfo_alloc_safe(size) - else: - mi = _nrt.meminfo_alloc(size) - return MemInfo(mi) - - def get_allocation_stats(self): - """ - Returns a namedtuple of (alloc, free, mi_alloc, mi_free) for count of - each memory operations. - """ - # No init guard needed to access stats members - return _nrt_mstats(alloc=_nrt.memsys_get_stats_alloc(), - free=_nrt.memsys_get_stats_free(), - mi_alloc=_nrt.memsys_get_stats_mi_alloc(), - mi_free=_nrt.memsys_get_stats_mi_free()) - - -# Alias to _nrt_python._MemInfo -MemInfo = _nrt._MemInfo - -# Create runtime -_nrt.memsys_use_cpython_allocator() -rtsys = _Runtime() - -# Install finalizer -_finalize(rtsys, _Runtime.shutdown) - -# Avoid future use of the class -del _Runtime diff --git a/numba/numba/runtime/nrtdynmod.py b/numba/numba/runtime/nrtdynmod.py deleted file mode 100644 index c736352aa..000000000 --- a/numba/numba/runtime/nrtdynmod.py +++ /dev/null @@ -1,213 +0,0 @@ -""" -Dynamically generate the NRT module -""" - -from __future__ import print_function, absolute_import, division - -from numba.config import MACHINE_BITS -from numba import cgutils, types -from llvmlite import ir, binding - -# Flag to enable debug print in NRT_incref and NRT_decref -_debug_print = False - -_word_type = ir.IntType(MACHINE_BITS) -_pointer_type = ir.PointerType(ir.IntType(8)) - -_meminfo_struct_type = ir.LiteralStructType([ - _word_type, # size_t refct - _pointer_type, # dtor_function dtor - _pointer_type, # void *dtor_info - _pointer_type, # void *data - _word_type, # size_t size - ]) - - -incref_decref_ty = ir.FunctionType(ir.VoidType(), [_pointer_type]) -meminfo_data_ty = ir.FunctionType(_pointer_type, [_pointer_type]) - - -def _define_nrt_meminfo_data(module): - """ - Implement NRT_MemInfo_data_fast in the module. This allows LLVM - to inline lookup of the data pointer. - """ - fn = module.get_or_insert_function(meminfo_data_ty, - name="NRT_MemInfo_data_fast") - builder = ir.IRBuilder(fn.append_basic_block()) - [ptr] = fn.args - struct_ptr = builder.bitcast(ptr, _meminfo_struct_type.as_pointer()) - data_ptr = builder.load(cgutils.gep(builder, struct_ptr, 0, 3)) - builder.ret(data_ptr) - - -def _define_nrt_incref(module, atomic_incr): - """ - Implement NRT_incref in the module - """ - fn_incref = module.get_or_insert_function(incref_decref_ty, - name="NRT_incref") - # Cannot inline this for refcount pruning to work - fn_incref.attributes.add('noinline') - builder = ir.IRBuilder(fn_incref.append_basic_block()) - [ptr] = fn_incref.args - is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) - with cgutils.if_unlikely(builder, is_null): - builder.ret_void() - - if _debug_print: - cgutils.printf(builder, "*** NRT_Incref %zu [%p]\n", builder.load(ptr), - ptr) - builder.call(atomic_incr, [builder.bitcast(ptr, atomic_incr.args[0].type)]) - builder.ret_void() - - -def _define_nrt_decref(module, atomic_decr): - """ - Implement NRT_decref in the module - """ - fn_decref = module.get_or_insert_function(incref_decref_ty, - name="NRT_decref") - # Cannot inline this for refcount pruning to work - fn_decref.attributes.add('noinline') - calldtor = module.add_function(ir.FunctionType(ir.VoidType(), [_pointer_type]), - name="NRT_MemInfo_call_dtor") - - builder = ir.IRBuilder(fn_decref.append_basic_block()) - [ptr] = fn_decref.args - is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) - with cgutils.if_unlikely(builder, is_null): - builder.ret_void() - - if _debug_print: - cgutils.printf(builder, "*** NRT_Decref %zu [%p]\n", builder.load(ptr), - ptr) - - # For memory fence usage, see https://llvm.org/docs/Atomics.html - - # A release fence is used before the relevant write operation. - # No-op on x86. On POWER, it lowers to lwsync. - builder.fence("release") - newrefct = builder.call(atomic_decr, - [builder.bitcast(ptr, atomic_decr.args[0].type)]) - - refct_eq_0 = builder.icmp_unsigned("==", newrefct, - ir.Constant(newrefct.type, 0)) - with cgutils.if_unlikely(builder, refct_eq_0): - # An acquire fence is used after the relevant read operation. - # No-op on x86. On POWER, it lowers to lwsync. - builder.fence("acquire") - builder.call(calldtor, [ptr]) - builder.ret_void() - - -# Set this to True to measure the overhead of atomic refcounts compared -# to non-atomic. -_disable_atomicity = 0 - - -def _define_atomic_inc_dec(module, op, ordering): - """Define a llvm function for atomic increment/decrement to the given module - Argument ``op`` is the operation "add"/"sub". Argument ``ordering`` is - the memory ordering. The generated function returns the new value. - """ - ftype = ir.FunctionType(_word_type, [_word_type.as_pointer()]) - fn_atomic = ir.Function(module, ftype, name="nrt_atomic_{0}".format(op)) - - [ptr] = fn_atomic.args - bb = fn_atomic.append_basic_block() - builder = ir.IRBuilder(bb) - ONE = ir.Constant(_word_type, 1) - if not _disable_atomicity: - oldval = builder.atomic_rmw(op, ptr, ONE, ordering=ordering) - # Perform the operation on the old value so that we can pretend returning - # the "new" value. - res = getattr(builder, op)(oldval, ONE) - builder.ret(res) - else: - oldval = builder.load(ptr) - newval = getattr(builder, op)(oldval, ONE) - builder.store(newval, ptr) - builder.ret(oldval) - - return fn_atomic - - -def _define_atomic_cas(module, ordering): - """Define a llvm function for atomic compare-and-swap. - The generated function is a direct wrapper of the LLVM cmpxchg with the - difference that the a int indicate success (1) or failure (0) is returned - and the last argument is a output pointer for storing the old value. - - Note - ---- - On failure, the generated function behaves like an atomic load. The loaded - value is stored to the last argument. - """ - ftype = ir.FunctionType(ir.IntType(32), [_word_type.as_pointer(), - _word_type, _word_type, - _word_type.as_pointer()]) - fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") - - [ptr, cmp, repl, oldptr] = fn_cas.args - bb = fn_cas.append_basic_block() - builder = ir.IRBuilder(bb) - outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) - old, ok = cgutils.unpack_tuple(builder, outtup, 2) - builder.store(old, oldptr) - builder.ret(builder.zext(ok, ftype.return_type)) - - return fn_cas - - -def _define_nrt_unresolved_abort(ctx, module): - """ - Defines an abort function due to unresolved symbol. - - The function takes no args and will always raise an exception. - It should be safe to call this function with incorrect number of arguments. - """ - fnty = ctx.call_conv.get_function_type(types.none, ()) - fn = ir.Function(module, fnty, name="nrt_unresolved_abort") - bb = fn.append_basic_block() - builder = ir.IRBuilder(bb) - msg = "numba jitted function aborted due to unresolved symbol" - ctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) - return fn - - -def create_nrt_module(ctx): - """ - Create an IR module defining the LLVM NRT functions. - A (IR module, library) tuple is returned. - """ - codegen = ctx.codegen() - library = codegen.create_library("nrt") - - # Implement LLVM module with atomic ops - ir_mod = library.create_ir_module("nrt_module") - - atomic_inc = _define_atomic_inc_dec(ir_mod, "add", ordering='monotonic') - atomic_dec = _define_atomic_inc_dec(ir_mod, "sub", ordering='monotonic') - _define_atomic_cas(ir_mod, ordering='monotonic') - - _define_nrt_meminfo_data(ir_mod) - _define_nrt_incref(ir_mod, atomic_inc) - _define_nrt_decref(ir_mod, atomic_dec) - - _define_nrt_unresolved_abort(ctx, ir_mod) - - return ir_mod, library - - -def compile_nrt_functions(ctx): - """ - Compile all LLVM NRT functions and return a library containing them. - The library is created using the given target context. - """ - ir_mod, library = create_nrt_module(ctx) - - library.add_ir_module(ir_mod) - library.finalize() - - return library diff --git a/numba/numba/runtime/nrtopt.py b/numba/numba/runtime/nrtopt.py deleted file mode 100644 index 0ba3f5de1..000000000 --- a/numba/numba/runtime/nrtopt.py +++ /dev/null @@ -1,171 +0,0 @@ -""" -NRT specific optimizations -""" -import re -from collections import defaultdict, deque -from llvmlite import binding as ll -from numba import cgutils - -_regex_incref = re.compile(r'\s*(?:tail)?\s*call void @NRT_incref\((.*)\)') -_regex_decref = re.compile(r'\s*(?:tail)?\s*call void @NRT_decref\((.*)\)') -_regex_bb = re.compile(r'([\'"]?[-a-zA-Z$._][-a-zA-Z$._0-9]*[\'"]?:)|^define') - - -def _remove_redundant_nrt_refct(llvmir): - # Note: As soon as we have better utility in analyzing materialized LLVM - # module in llvmlite, we can redo this without so much string - # processing. - def _extract_functions(module): - cur = [] - for line in str(module).splitlines(): - if line.startswith('define'): - # start of function - assert not cur - cur.append(line) - elif line.startswith('}'): - # end of function - assert cur - cur.append(line) - yield True, cur - cur = [] - elif cur: - cur.append(line) - else: - yield False, [line] - - def _process_function(func_lines): - out = [] - for is_bb, bb_lines in _extract_basic_blocks(func_lines): - if is_bb and bb_lines: - bb_lines = _process_basic_block(bb_lines) - out += bb_lines - return out - - def _extract_basic_blocks(func_lines): - assert func_lines[0].startswith('define') - assert func_lines[-1].startswith('}') - yield False, [func_lines[0]] - - cur = [] - for ln in func_lines[1:-1]: - m = _regex_bb.match(ln) - if m is not None: - # line is a basic block separator - yield True, cur - cur = [] - yield False, [ln] - elif ln: - cur.append(ln) - - yield True, cur - yield False, [func_lines[-1]] - - def _process_basic_block(bb_lines): - bb_lines = _move_and_group_decref_after_all_increfs(bb_lines) - bb_lines = _prune_redundant_refct_ops(bb_lines) - return bb_lines - - def _examine_refct_op(bb_lines): - for num, ln in enumerate(bb_lines): - m = _regex_incref.match(ln) - if m is not None: - yield num, m.group(1), None - continue - - m = _regex_decref.match(ln) - if m is not None: - yield num, None, m.group(1) - continue - - yield ln, None, None - - def _prune_redundant_refct_ops(bb_lines): - incref_map = defaultdict(deque) - decref_map = defaultdict(deque) - to_remove = set() - for num, incref_var, decref_var in _examine_refct_op(bb_lines): - assert not (incref_var and decref_var) - if incref_var: - if incref_var == 'i8* null': - to_remove.add(num) - else: - incref_map[incref_var].append(num) - elif decref_var: - if decref_var == 'i8* null': - to_remove.add(num) - else: - decref_map[decref_var].append(num) - - for var, decops in decref_map.items(): - incops = incref_map[var] - ct = min(len(incops), len(decops)) - for _ in range(ct): - to_remove.add(incops.pop()) - to_remove.add(decops.popleft()) - - return [ln for num, ln in enumerate(bb_lines) - if num not in to_remove] - - def _move_and_group_decref_after_all_increfs(bb_lines): - # find last incref - last_incref_pos = 0 - for pos, ln in enumerate(bb_lines): - if _regex_incref.match(ln) is not None: - last_incref_pos = pos + 1 - - # find last decref - last_decref_pos = 0 - for pos, ln in enumerate(bb_lines): - if _regex_decref.match(ln) is not None: - last_decref_pos = pos + 1 - - last_pos = max(last_incref_pos, last_decref_pos) - - # find decrefs before last_pos - decrefs = [] - head = [] - for ln in bb_lines[:last_pos]: - if _regex_decref.match(ln) is not None: - decrefs.append(ln) - else: - head.append(ln) - - # insert decrefs at last_pos - return head + decrefs + bb_lines[last_pos:] - - # Driver - processed = [] - - for is_func, lines in _extract_functions(llvmir): - if is_func: - lines = _process_function(lines) - - processed += lines - - return '\n'.join(processed) - - -def remove_redundant_nrt_refct(ll_module): - """ - Remove redundant reference count operations from the - `llvmlite.binding.ModuleRef`. This parses the ll_module as a string and - line by line to remove the unnecessary nrt refct pairs within each block. - Decref calls are moved after the last incref call in the block to avoid - temporarily decref'ing to zero (which can happen due to hidden decref from - alias). - - Note: non-threadsafe due to usage of global LLVMcontext - """ - # Early escape if NRT_incref is not used - try: - ll_module.get_function('NRT_incref') - except NameError: - return ll_module - - # the optimisation pass loses the name of module as it operates on - # strings, so back it up and reset it on completion - name = ll_module.name - newll = _remove_redundant_nrt_refct(str(ll_module)) - new_mod = ll.parse_assembly(newll) - new_mod.name = cgutils.normalize_ir_text(name) - return new_mod diff --git a/numba/numba/scripts/__init__.py b/numba/numba/scripts/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/numba/numba/scripts/generate_lower_listing.py b/numba/numba/scripts/generate_lower_listing.py deleted file mode 100644 index ceef17207..000000000 --- a/numba/numba/scripts/generate_lower_listing.py +++ /dev/null @@ -1,169 +0,0 @@ -""" -Generate documentation for all registered implementation for lowering -using reStructured text. -""" - -from __future__ import print_function - -from subprocess import check_output - -import os.path -try: - from StringIO import StringIO # py2 -except ImportError: - from io import StringIO -from collections import defaultdict -import inspect -from functools import partial - -import numba -from numba.targets.registry import cpu_target - - -def git_hash(): - out = check_output(['git', 'log', "--pretty=format:'%H'", '-n', '1']) - return out.decode('ascii').strip("'\"") - - -def get_func_name(fn): - return getattr(fn, '__qualname__', fn.__name__) - - -def gather_function_info(backend): - fninfos = defaultdict(list) - basepath = os.path.dirname(os.path.dirname(numba.__file__)) - for fn, osel in backend._defns.items(): - for sig, impl in osel.versions: - info = {} - fninfos[fn].append(info) - info['fn'] = fn - info['sig'] = sig - code, firstlineno = inspect.getsourcelines(impl) - path = inspect.getsourcefile(impl) - info['impl'] = { - 'name': get_func_name(impl), - 'filename': os.path.relpath(path, start=basepath), - 'lines': (firstlineno, firstlineno + len(code) - 1), - 'docstring': impl.__doc__ - } - - return fninfos - - -def bind_file_to_print(fobj): - return partial(print, file=fobj) - - -def format_signature(sig): - def fmt(c): - try: - return c.__name__ - except AttributeError: - return repr(c).strip('\'"') - out = tuple(map(fmt, sig)) - return '`({0})`'.format(', '.join(out)) - - -github_url = 'https://github.com/numba/numba/blob/{commit}/{path}#L{firstline}-L{lastline}' - -description = """ -This lists all lowering definition registered to the CPU target. -Each subsection corresponds to a Python function that is supported by numba -nopython mode. These functions have one or more lower implementation with -different signatures. The compiler chooses the most specific implementation -from all overloads. -""" - - -def format_function_infos(fninfos): - buf = StringIO() - try: - print = bind_file_to_print(buf) - - title_line = "Lowering Listing" - print(title_line) - print('=' * len(title_line)) - - print(description) - - commit = git_hash() - - def format_fname(fn): - try: - fname = "{0}.{1}".format(fn.__module__, get_func_name(fn)) - except AttributeError: - fname = repr(fn) - return fn, fname - - for fn, fname in sorted(map(format_fname, fninfos), key=lambda x: x[1]): - impinfos = fninfos[fn] - header_line = "``{0}``".format(fname) - print(header_line) - print('-' * len(header_line)) - print() - - formatted_sigs = map( - lambda x: format_signature(x['sig']), impinfos) - sorted_impinfos = sorted(zip(formatted_sigs, impinfos), - key=lambda x: x[0]) - - col_signatures = ['Signature'] - col_urls = ['Definition'] - - for fmtsig, info in sorted_impinfos: - impl = info['impl'] - - filename = impl['filename'] - lines = impl['lines'] - fname = impl['name'] - - source = '{0} lines {1}-{2}'.format(filename, *lines) - link = github_url.format(commit=commit, path=filename, - firstline=lines[0], lastline=lines[1]) - url = '``{0}`` `{1} <{2}>`_'.format(fname, source, link) - - col_signatures.append(fmtsig) - col_urls.append(url) - - # table formatting - max_width_col_sig = max(map(len, col_signatures)) - max_width_col_url = max(map(len, col_urls)) - padding = 2 - width_col_sig = padding * 2 + max_width_col_sig - width_col_url = padding * 2 + max_width_col_url - line_format = "{{0:^{0}}} {{1:^{1}}}".format(width_col_sig, - width_col_url) - print(line_format.format('=' * width_col_sig, '=' * width_col_url)) - print(line_format.format(col_signatures[0], col_urls[0])) - print(line_format.format('=' * width_col_sig, '=' * width_col_url)) - for sig, url in zip(col_signatures[1:], col_urls[1:]): - print(line_format.format(sig, url)) - print(line_format.format('=' * width_col_sig, '=' * width_col_url)) - print() - - return buf.getvalue() - finally: - buf.close() - - -# Main routine for this module: - -def gen_lower_listing(path=None): - """ - Generate lowering listing to ``path`` or (if None) to stdout. - """ - cpu_backend = cpu_target.target_context - cpu_backend.refresh() - - fninfos = gather_function_info(cpu_backend) - out = format_function_infos(fninfos) - - if path is None: - print(out) - else: - with open(path, 'w') as fobj: - print(out, file=fobj) - - -if __name__ == '__main__': - gen_lower_listing() diff --git a/numba/numba/serialize.py b/numba/numba/serialize.py deleted file mode 100644 index 81700fde0..000000000 --- a/numba/numba/serialize.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -Serialization support for compiled functions. -""" - -from __future__ import print_function, division, absolute_import - -import imp -import marshal -import sys -from types import FunctionType, ModuleType - -from . import bytecode, compiler - - -# -# Pickle support -# - -def _rebuild_reduction(cls, *args): - """ - Global hook to rebuild a given class from its __reduce__ arguments. - """ - return cls._rebuild(*args) - - -class _ModuleRef(object): - - def __init__(self, name): - self.name = name - - def __reduce__(self): - return _rebuild_module, (self.name,) - - -def _rebuild_module(name): - if name is None: - raise ImportError("cannot import None") - __import__(name) - return sys.modules[name] - - -def _get_function_globals_for_reduction(func): - """ - Analyse *func* and return a dictionary of global values suitable for - reduction. - """ - func_id = bytecode.FunctionIdentity.from_function(func) - bc = bytecode.ByteCode(func_id) - globs = bc.get_used_globals() - for k, v in globs.items(): - # Make modules picklable by name - if isinstance(v, ModuleType): - globs[k] = _ModuleRef(v.__name__) - # Remember the module name so that the function gets a proper __module__ - # when rebuilding. This is used to recreate the environment. - globs['__name__'] = func.__module__ - return globs - -def _reduce_function(func, globs): - """ - Reduce a Python function and its globals to picklable components. - If there are cell variables (i.e. references to a closure), their - values will be frozen. - """ - if func.__closure__: - cells = [cell.cell_contents for cell in func.__closure__] - else: - cells = None - return _reduce_code(func.__code__), globs, func.__name__, cells - -def _reduce_code(code): - """ - Reduce a code object to picklable components. - """ - return marshal.version, imp.get_magic(), marshal.dumps(code) - -def _dummy_closure(x): - """ - A dummy function allowing us to build cell objects. - """ - return lambda: x - -def _rebuild_function(code_reduced, globals, name, cell_values): - """ - Rebuild a function from its _reduce_function() results. - """ - if cell_values: - cells = tuple(_dummy_closure(v).__closure__[0] for v in cell_values) - else: - cells = () - code = _rebuild_code(*code_reduced) - modname = globals['__name__'] - try: - _rebuild_module(modname) - except ImportError: - # If the module can't be found, avoid passing it (it would produce - # errors when lowering). - del globals['__name__'] - return FunctionType(code, globals, name, (), cells) - -def _rebuild_code(marshal_version, bytecode_magic, marshalled): - """ - Rebuild a code object from its _reduce_code() results. - """ - if marshal.version != marshal_version: - raise RuntimeError("incompatible marshal version: " - "interpreter has %r, marshalled code has %r" - % (marshal.version, marshal_version)) - if imp.get_magic() != bytecode_magic: - raise RuntimeError("incompatible bytecode version") - return marshal.loads(marshalled) - diff --git a/numba/numba/servicelib/__init__.py b/numba/numba/servicelib/__init__.py deleted file mode 100644 index fd378494b..000000000 --- a/numba/numba/servicelib/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import -from .service import Service -from .threadlocal import TLStack diff --git a/numba/numba/servicelib/service.py b/numba/numba/servicelib/service.py deleted file mode 100644 index 8103d251a..000000000 --- a/numba/numba/servicelib/service.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Implement background services for the application. -This is implemented as a cooperative concurrent task. -""" -from __future__ import absolute_import, print_function, division - -import functools - - -class Service(object): - def __init__(self, name="unnamed", arg=None): - self.name = name - self.enabled = True - self.arg = arg - self._task = self.process(self.arg) - next(self._task) - - def service(self): - """ - Request for the service task. - Servicing is disabled if it is disabled thourght the "enabled" - attribute. When the task is executing, the service is disabled to - avoid recursion. - """ - if self.enabled: - enable = self.enabled - try: - # Prevent recursion - self.enabled = False - next(self._task) - finally: - self.enabled = enable - - def process(self, arg): - """ - Overrided to implement the service task. - This must be a generator. - Use `yield` to return control. - """ - raise NotImplementedError - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.service() - - def after(self, fn): - """ - A decorator for a function. Service is triggered on return. - """ - @functools.wraps(fn) - def wrap(*args, **kws): - with self: - return fn(*args, **kws) - return wrap - -# ----------------------------------------------------------------------------- -# The rest are for testing - - -class HelloService(Service): - def process(self, arg): - count = 0 - yield - while True: - print("Hello", count) - count += 1 - yield - -def test(): - serv = HelloService("my.hello") - print("1") - serv.service() - print("2") - serv.service() - - with serv: - print("3") - - @serv.after - def nested(): - print("4") - - nested() - - -if __name__ == '__main__': - test() diff --git a/numba/numba/servicelib/threadlocal.py b/numba/numba/servicelib/threadlocal.py deleted file mode 100644 index ca115dd11..000000000 --- a/numba/numba/servicelib/threadlocal.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -Implements: -- Threadlocal stack -""" -from __future__ import print_function, absolute_import, division -import threading - - -class TLStack(object): - def __init__(self): - self.local = threading.local() - - @property - def stack(self): - try: - # Retrieve thread local stack - return self.local.stack - except AttributeError: - # Initialize stack for the thread - self.local.stack = [] - return self.local.stack - - def push(self, item): - self.stack.append(item) - - def pop(self): - return self.stack.pop() - - @property - def top(self): - return self.stack[-1] - - @property - def is_empty(self): - return not self.stack - - def __bool__(self): - return not self.is_empty - - def __nonzero__(self): - return self.__bool__() - - def __len__(self): - return len(self.stack) - - def clear(self): - self.__init__() diff --git a/numba/numba/sigutils.py b/numba/numba/sigutils.py deleted file mode 100644 index e37b2c0c0..000000000 --- a/numba/numba/sigutils.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from numba import types, typing - - -def is_signature(sig): - """ - Return whether *sig* is a potentially valid signature - specification (for user-facing APIs). - """ - return isinstance(sig, (str, tuple, typing.Signature)) - - -def _parse_signature_string(signature_str): - # Just eval signature_str using the types submodules as globals - return eval(signature_str, {}, types.__dict__) - - -def normalize_signature(sig): - """ - From *sig* (a signature specification), return a ``(return_type, args)`` - tuple, where ``args`` itself is a tuple of types, and ``return_type`` - can be None if not specified. - """ - if isinstance(sig, str): - parsed = _parse_signature_string(sig) - else: - parsed = sig - if isinstance(parsed, tuple): - args, return_type = parsed, None - elif isinstance(parsed, typing.Signature): - args, return_type = parsed.args, parsed.return_type - else: - raise TypeError("invalid signature: %r instance not allowed" - % (sig.__class__.__name__,)) - - def check_type(ty): - if not isinstance(ty, types.Type): - raise TypeError("invalid signature: expected a type instance, " - "got %r" % (ty,)) - - if return_type is not None: - check_type(return_type) - for ty in args: - check_type(ty) - - return args, return_type diff --git a/numba/numba/six.py b/numba/numba/six.py deleted file mode 100644 index ffa3fe166..000000000 --- a/numba/numba/six.py +++ /dev/null @@ -1,838 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2015 Benjamin Peterson -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from __future__ import absolute_import - -import functools -import itertools -import operator -import sys -import types - -__author__ = "Benjamin Peterson " -__version__ = "1.9.0" - - -# Useful for very coarse version differentiation. -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) # Invokes __set__. - try: - # This is a bit ugly, but it avoids running this again by - # removing this descriptor. - delattr(obj.__class__, self.name) - except AttributeError: - pass - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - def __getattr__(self, attr): - _module = self._resolve() - value = getattr(_module, attr) - setattr(self, attr, value) - return value - - -class _LazyModule(types.ModuleType): - - def __init__(self, name): - super(_LazyModule, self).__init__(name) - self.__doc__ = self.__class__.__doc__ - - def __dir__(self): - attrs = ["__doc__", "__name__"] - attrs += [attr.name for attr in self._moved_attributes] - return attrs - - # Subclasses should override this - _moved_attributes = [] - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - -class _SixMetaPathImporter(object): - """ - A meta path importer to import six.moves and its submodules. - - This class implements a PEP302 finder and loader. It should be compatible - with Python 2.5 and all existing versions of Python3 - """ - def __init__(self, six_module_name): - self.name = six_module_name - self.known_modules = {} - - def _add_module(self, mod, *fullnames): - for fullname in fullnames: - self.known_modules[self.name + "." + fullname] = mod - - def _get_module(self, fullname): - return self.known_modules[self.name + "." + fullname] - - def find_module(self, fullname, path=None): - if fullname in self.known_modules: - return self - return None - - def __get_module(self, fullname): - try: - return self.known_modules[fullname] - except KeyError: - raise ImportError("This loader does not know module " + fullname) - - def load_module(self, fullname): - try: - # in case of a reload - return sys.modules[fullname] - except KeyError: - pass - mod = self.__get_module(fullname) - if isinstance(mod, MovedModule): - mod = mod._resolve() - else: - mod.__loader__ = self - sys.modules[fullname] = mod - return mod - - def is_package(self, fullname): - """ - Return true, if the named module is a package. - - We need this method to get correct spec objects with - Python 3.4 (see PEP451) - """ - return hasattr(self.__get_module(fullname), "__path__") - - def get_code(self, fullname): - """Return None - - Required, if is_package is implemented""" - self.__get_module(fullname) # eventually raises ImportError - return None - get_source = get_code # same as get_code - -_importer = _SixMetaPathImporter(__name__) - - -class _MovedItems(_LazyModule): - """Lazy loading of moved objects""" - __path__ = [] # mark as package - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("intern", "__builtin__", "sys"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("reload_module", "__builtin__", "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserDict", "UserDict", "collections"), - MovedAttribute("UserList", "UserList", "collections"), - MovedAttribute("UserString", "UserString", "collections"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), - - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), - MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), - MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), - MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("_thread", "thread", "_thread"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), - MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), - MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), - MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), - MovedModule("winreg", "_winreg"), -] -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) - if isinstance(attr, MovedModule): - _importer._add_module(attr, "moves." + attr.name) -del attr - -_MovedItems._moved_attributes = _moved_attributes - -moves = _MovedItems(__name__ + ".moves") -_importer._add_module(moves, "moves") - - -class Module_six_moves_urllib_parse(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_parse""" - - -_urllib_parse_moved_attributes = [ - MovedAttribute("ParseResult", "urlparse", "urllib.parse"), - MovedAttribute("SplitResult", "urlparse", "urllib.parse"), - MovedAttribute("parse_qs", "urlparse", "urllib.parse"), - MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), - MovedAttribute("urldefrag", "urlparse", "urllib.parse"), - MovedAttribute("urljoin", "urlparse", "urllib.parse"), - MovedAttribute("urlparse", "urlparse", "urllib.parse"), - MovedAttribute("urlsplit", "urlparse", "urllib.parse"), - MovedAttribute("urlunparse", "urlparse", "urllib.parse"), - MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), - MovedAttribute("quote", "urllib", "urllib.parse"), - MovedAttribute("quote_plus", "urllib", "urllib.parse"), - MovedAttribute("unquote", "urllib", "urllib.parse"), - MovedAttribute("unquote_plus", "urllib", "urllib.parse"), - MovedAttribute("urlencode", "urllib", "urllib.parse"), - MovedAttribute("splitquery", "urllib", "urllib.parse"), - MovedAttribute("splittag", "urllib", "urllib.parse"), - MovedAttribute("splituser", "urllib", "urllib.parse"), - MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), - MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), - MovedAttribute("uses_params", "urlparse", "urllib.parse"), - MovedAttribute("uses_query", "urlparse", "urllib.parse"), - MovedAttribute("uses_relative", "urlparse", "urllib.parse"), -] -for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) -del attr - -Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes - -_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), - "moves.urllib_parse", "moves.urllib.parse") - - -class Module_six_moves_urllib_error(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_error""" - - -_urllib_error_moved_attributes = [ - MovedAttribute("URLError", "urllib2", "urllib.error"), - MovedAttribute("HTTPError", "urllib2", "urllib.error"), - MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), -] -for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) -del attr - -Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes - -_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), - "moves.urllib_error", "moves.urllib.error") - - -class Module_six_moves_urllib_request(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_request""" - - -_urllib_request_moved_attributes = [ - MovedAttribute("urlopen", "urllib2", "urllib.request"), - MovedAttribute("install_opener", "urllib2", "urllib.request"), - MovedAttribute("build_opener", "urllib2", "urllib.request"), - MovedAttribute("pathname2url", "urllib", "urllib.request"), - MovedAttribute("url2pathname", "urllib", "urllib.request"), - MovedAttribute("getproxies", "urllib", "urllib.request"), - MovedAttribute("Request", "urllib2", "urllib.request"), - MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), - MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), - MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), - MovedAttribute("BaseHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), - MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), - MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), - MovedAttribute("FileHandler", "urllib2", "urllib.request"), - MovedAttribute("FTPHandler", "urllib2", "urllib.request"), - MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), - MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), - MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), - MovedAttribute("urlretrieve", "urllib", "urllib.request"), - MovedAttribute("urlcleanup", "urllib", "urllib.request"), - MovedAttribute("URLopener", "urllib", "urllib.request"), - MovedAttribute("FancyURLopener", "urllib", "urllib.request"), - MovedAttribute("proxy_bypass", "urllib", "urllib.request"), -] -for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) -del attr - -Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes - -_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), - "moves.urllib_request", "moves.urllib.request") - - -class Module_six_moves_urllib_response(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_response""" - - -_urllib_response_moved_attributes = [ - MovedAttribute("addbase", "urllib", "urllib.response"), - MovedAttribute("addclosehook", "urllib", "urllib.response"), - MovedAttribute("addinfo", "urllib", "urllib.response"), - MovedAttribute("addinfourl", "urllib", "urllib.response"), -] -for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) -del attr - -Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes - -_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), - "moves.urllib_response", "moves.urllib.response") - - -class Module_six_moves_urllib_robotparser(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_robotparser""" - - -_urllib_robotparser_moved_attributes = [ - MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), -] -for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) -del attr - -Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes - -_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), - "moves.urllib_robotparser", "moves.urllib.robotparser") - - -class Module_six_moves_urllib(types.ModuleType): - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - __path__ = [] # mark as package - parse = _importer._get_module("moves.urllib_parse") - error = _importer._get_module("moves.urllib_error") - request = _importer._get_module("moves.urllib_request") - response = _importer._get_module("moves.urllib_response") - robotparser = _importer._get_module("moves.urllib_robotparser") - - def __dir__(self): - return ['parse', 'error', 'request', 'response', 'robotparser'] - -_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), - "moves.urllib") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -try: - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - - -if PY3: - def get_unbound_function(unbound): - return unbound - - create_bound_method = types.MethodType - - Iterator = object -else: - def get_unbound_function(unbound): - return unbound.im_func - - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_closure = operator.attrgetter(_func_closure) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) -get_function_globals = operator.attrgetter(_func_globals) - - -if PY3: - def iterkeys(d, **kw): - return iter(d.keys(**kw)) - - def itervalues(d, **kw): - return iter(d.values(**kw)) - - def iteritems(d, **kw): - return iter(d.items(**kw)) - - def iterlists(d, **kw): - return iter(d.lists(**kw)) - - viewkeys = operator.methodcaller("keys") - - viewvalues = operator.methodcaller("values") - - viewitems = operator.methodcaller("items") -else: - def iterkeys(d, **kw): - return iter(d.iterkeys(**kw)) - - def itervalues(d, **kw): - return iter(d.itervalues(**kw)) - - def iteritems(d, **kw): - return iter(d.iteritems(**kw)) - - def iterlists(d, **kw): - return iter(d.iterlists(**kw)) - - viewkeys = operator.methodcaller("viewkeys") - - viewvalues = operator.methodcaller("viewvalues") - - viewitems = operator.methodcaller("viewitems") - -_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") -_add_doc(itervalues, "Return an iterator over the values of a dictionary.") -_add_doc(iteritems, - "Return an iterator over the (key, value) pairs of a dictionary.") -_add_doc(iterlists, - "Return an iterator over the (key, [values]) pairs of a dictionary.") - - -if PY3: - def b(s): - return s.encode("latin-1") - def u(s): - return s - unichr = chr - if sys.version_info[1] <= 1: - def int2byte(i): - return bytes((i,)) - else: - # This is about 2x faster than the implementation above on 3.2+ - int2byte = operator.methodcaller("to_bytes", 1, "big") - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO - _assertCountEqual = "assertCountEqual" - _assertRaisesRegex = "assertRaisesRegex" - _assertRegex = "assertRegex" -else: - def b(s): - return s - # Workaround for standalone backslash - def u(s): - return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") - unichr = unichr - int2byte = chr - def byte2int(bs): - return ord(bs[0]) - def indexbytes(buf, i): - return ord(buf[i]) - iterbytes = functools.partial(itertools.imap, ord) - import StringIO - StringIO = BytesIO = StringIO.StringIO - _assertCountEqual = "assertItemsEqual" - _assertRaisesRegex = "assertRaisesRegexp" - _assertRegex = "assertRegexpMatches" -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -def assertCountEqual(self, *args, **kwargs): - return getattr(self, _assertCountEqual)(*args, **kwargs) - - -def assertRaisesRegex(self, *args, **kwargs): - return getattr(self, _assertRaisesRegex)(*args, **kwargs) - - -def assertRegex(self, *args, **kwargs): - return getattr(self, _assertRegex)(*args, **kwargs) - - -if PY3: - exec_ = getattr(moves.builtins, "exec") - - - def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - -else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") - - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - -if sys.version_info[:2] == (3, 2): - exec_("""def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value -""") -elif sys.version_info[:2] > (3, 2): - exec_("""def raise_from(value, from_value): - raise value from from_value -""") -else: - def raise_from(value, from_value): - raise value - - -print_ = getattr(moves.builtins, "print", None) -if print_ is None: - def print_(*args, **kwargs): - """The new-style print function for Python 2.4 and 2.5.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - def write(data): - if not isinstance(data, basestring): - data = str(data) - # If the file has an encoding, encode unicode with it. - if (isinstance(fp, file) and - isinstance(data, unicode) and - fp.encoding is not None): - errors = getattr(fp, "errors", None) - if errors is None: - errors = "strict" - data = data.encode(fp.encoding, errors) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) -if sys.version_info[:2] < (3, 3): - _print = print_ - def print_(*args, **kwargs): - fp = kwargs.get("file", sys.stdout) - flush = kwargs.pop("flush", False) - _print(*args, **kwargs) - if flush and fp is not None: - fp.flush() - -_add_doc(reraise, """Reraise an exception.""") - -if sys.version_info[0:2] < (3, 4): - def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, - updated=functools.WRAPPER_UPDATES): - def wrapper(f): - f = functools.wraps(wrapped, assigned, updated)(f) - f.__wrapped__ = wrapped - return f - return wrapper -else: - wraps = functools.wraps - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - return type.__new__(metaclass, 'temporary_class', (), {}) - - -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - slots = orig_vars.get('__slots__') - if slots is not None: - if isinstance(slots, str): - slots = [slots] - for slots_var in slots: - orig_vars.pop(slots_var) - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper - - -def python_2_unicode_compatible(klass): - """ - A decorator that defines __unicode__ and __str__ methods under Python 2. - Under Python 3 it does nothing. - - To support Python 2 and 3 with a single code base, define a __str__ method - returning text and apply this decorator to the class. - """ - if PY2: - if '__str__' not in klass.__dict__: - raise ValueError("@python_2_unicode_compatible cannot be applied " - "to %s because it doesn't define __str__()." % - klass.__name__) - klass.__unicode__ = klass.__str__ - klass.__str__ = lambda self: self.__unicode__().encode('utf-8') - return klass - - -# Complete the moves implementation. -# This code is at the end of this module to speed up module loading. -# Turn this module into a package. -__path__ = [] # required for PEP 302 and PEP 451 -__package__ = __name__ # see PEP 366 @ReservedAssignment -if globals().get("__spec__") is not None: - __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable -# Remove other six meta path importers, since they cause problems. This can -# happen if six is removed from sys.modules and then reloaded. (Setuptools does -# this for some reason.) -if sys.meta_path: - for i, importer in enumerate(sys.meta_path): - # Here's some real nastiness: Another "instance" of the six module might - # be floating around. Therefore, we can't use isinstance() to check for - # the six meta path importer, since the other six instance will have - # inserted an importer with different class. - if (type(importer).__name__ == "_SixMetaPathImporter" and - importer.name == __name__): - del sys.meta_path[i] - break - del i, importer -# Finally, add the importer to the meta path import hook. -sys.meta_path.append(_importer) diff --git a/numba/numba/smartarray.py b/numba/numba/smartarray.py deleted file mode 100644 index 6a47b8297..000000000 --- a/numba/numba/smartarray.py +++ /dev/null @@ -1,228 +0,0 @@ -from numba.tracing import trace -from numba.errors import deprecated - -import sys - -import numpy as np - -def _o2s(dtype, shape, order): - # convert order parameter to strides - - if dtype is None or shape is None or order is None: - return None - - if order == 'F': - shape = list(shape) - shape.reverse() - strides = [] - itemsize = dtype.itemsize - for i in range(len(shape), 0, -1): - strides.append(itemsize) - itemsize *= shape[i - 1] - if order in ('C', None): - strides.reverse() - return tuple(strides) - -def _s2o(dtype, shape, strides): - # convert strides parameter to order - # Note: strides must correspond to contiguous data layout - - if strides is None or strides[-1] == dtype.itemsize: - order = 'C' - elif strides[0] == dtype.itemsize: - order = 'F' - else: - raise ValueError('strides do not correspond to contiguous data layout') - s2 = _o2s(dtype, shape, order) - if strides != s2: - raise ValueError('strides do not correspond to contiguous data layout') - return order - -class SmartArray(object): - """An array type that supports host and GPU storage.""" - - _targets = ('host', 'gpu') - - def __init__(self, obj=None, copy=True, - shape=None, dtype=None, order=None, where='host'): - """Construct a SmartArray in the memory space defined by 'where'. - Valid invocations: - - * SmartArray(obj=, copy=): - - to create a SmartArray from an existing array-like object. - The 'copy' argument specifies whether to adopt or to copy it. - - * SmartArray(shape=, dtype=, order=) - - to create a new SmartArray from scratch, given the typical NumPy array - attributes. - - (The optional 'where' argument specifies where to allocate the array - initially. (Default: 'host') - """ - - if where not in self._targets: - raise ValueError('"%s" is not a valid target'%where) - # we need either a prototype or proper type info - assert obj is not None or (shape and dtype) - self._host = self._gpu = None - self._host_valid = self._gpu_valid = False - self._allocate(where, obj, dtype, shape, _o2s(dtype, shape, order), copy) - if where == 'host': - self._host_valid = True - t = self._host - else: - self._gpu_valid = True - t = self._gpu - self._shape = t.shape - self._strides = t.strides - self._dtype = t.dtype - self._ndim = t.ndim - self._size = t.size - - @property - def shape(self): return self._shape - - @property - def strides(self): return self._strides - - @property - def dtype(self): return self._dtype - - @property - def ndim(self): return self._ndim - - @property - def size(self): return self._size - - def get(self, where='host'): - """Return the representation of 'self' in the given memory space.""" - - if where not in self._targets: - raise ValueError('"%s" is not a valid target'%where) - self._sync(where) - if where == 'host': return self._host - elif where == 'gpu': return self._gpu - else: raise ValueError('unknown memory space "%s"'%where) - - @deprecated("get('host')") - def host(self): return self.get('host') - @deprecated("get('gpu')") - def gpu(self): return self.get('gpu') - - def mark_changed(self, where='host'): - """Mark the given location as changed, broadcast updates if needed.""" - - if where not in self._targets: - raise ValueError('"%s" is not a valid target'%where) - if where == 'host': - self._invalidate('gpu') - # only sync if there are active views - if self._gpu is not None and sys.getrefcount(self._gpu) > 2: - self._sync('gpu') - elif where == 'gpu': - self._invalidate('host') - # only sync if there are active views - if self._host is not None and sys.getrefcount(self._host) > 2: - self._sync('host') - - @deprecated("mark_changed('host')") - def host_changed(self): return self.mark_changed('host') - @deprecated("mark_changed('gpu')") - def gpu_changed(self): return self.mark_changed('gpu') - - def __array__(self, *args): - - self._sync('host') - return np.array(self._host, *args) - - def _sync(self, where): - """Sync the data in one memory space with the other.""" - - if where == 'gpu': - if self._gpu is None: - self._allocate('gpu', None, self.dtype, self.shape, self.strides) - if not self._gpu_valid: - self._copy_to_gpu() - else: - if self._host is None: - self._allocate('host', None, self.dtype, self.shape, self.strides) - if not self._host_valid: - self._copy_to_host() - - @trace - def _invalidate(self, where): - """Mark the host / device array as out-of-date.""" - - if where == 'gpu': - self._gpu_valid = False - else: - self._host_valid = False - - @trace - def _allocate(self, where, obj=None, dtype=None, shape=None, strides=None, - copy=True): - if dtype: - dtype = np.dtype(dtype) - if where == 'host': - if obj is not None: - self._host = np.array(obj, dtype, copy=copy) - else: - self._host = np.empty(shape, dtype, _s2o(dtype, shape, strides)) - else: - # Don't import this at module-scope as it may not be available - # in all environments (e.g., CUDASIM) - from numba.cuda.cudadrv import devicearray as da - if obj is not None: - # If 'obj' is an array-like object but not an ndarray, - # construct an ndarray first to extract all the parameters we need. - if not isinstance(obj, np.ndarray): - obj = np.array(obj, copy=False) - self._gpu = da.from_array_like(obj) - else: - if strides is None: - strides = _o2s(dtype, shape, 'C') - self._gpu = da.DeviceNDArray(shape, strides, dtype) - - @trace - def _copy_to_gpu(self): - self._gpu.copy_to_device(self._host) - self._gpu_valid = True - - @trace - def _copy_to_host(self): - self._gpu.copy_to_host(self._host) - self._host_valid = True - - @staticmethod - def _maybe_wrap(value): - """If `value` is an ndarray, wrap it in a SmartArray, - otherwise return `value` itself.""" - - if isinstance(value, np.ndarray): - return SmartArray(value, copy=False) - else: - return value - - @trace - def __getattr__(self, name): - """Transparently forward attribute access to the host array.""" - - if self._host is None: - self._allocate('host', None, self.dtype, self.shape, self.strides) - - # FIXME: for some attributes we need to sync first ! - return self._maybe_wrap(getattr(self._host, name)) - - def __len__(self): return self.shape[0] - def __eq__(self, other): - if type(self) is not type(other): return False - # FIXME: If both arrays have valid GPU data, compare there. - return self._maybe_wrap(self.get('host') == other.get('host')) - def __getitem__(self, *args): - return self._maybe_wrap(self.get('host').__getitem__(*args)) - def __setitem__(self, *args): - return self._maybe_wrap(self.get('host').__setitem__(*args)) - def astype(self, *args): - return self._maybe_wrap(self.get('host').astype(*args)) diff --git a/numba/numba/special.py b/numba/numba/special.py deleted file mode 100644 index 54cabccf2..000000000 --- a/numba/numba/special.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from .typing.typeof import typeof -import numpy as np - -def pndindex(*args): - """ Provides an n-dimensional parallel iterator that generates index tuples - for each iteration point. Sequentially, pndindex is identical to np.ndindex. - """ - return np.ndindex(*args) - -class prange(object): - """ Provides a 1D parallel iterator that generates a sequence of integers. - Sequentially, prange is identical to range. - """ - def __new__(cls, *args): - return range(*args) - -__all__ = ['typeof', 'prange', 'pndindex'] diff --git a/numba/numba/stencil.py b/numba/numba/stencil.py deleted file mode 100644 index dc7dc32e3..000000000 --- a/numba/numba/stencil.py +++ /dev/null @@ -1,744 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import copy - -import numpy as np -from llvmlite import ir as lir - -from numba import compiler, types, ir_utils, ir, typing, numpy_support, utils -from numba import config -from numba.typing.templates import (CallableTemplate, signature, infer_global, - AbstractTemplate) -from numba.targets import registry -from numba.targets.imputils import lower_builtin -from numba.extending import register_jitable -from numba.six import exec_ - - -class StencilFuncLowerer(object): - '''Callable class responsible for lowering calls to a specific StencilFunc. - ''' - def __init__(self, sf): - self.stencilFunc = sf - - def __call__(self, context, builder, sig, args): - cres = self.stencilFunc.compile_for_argtys(sig.args, {}, - sig.return_type, None) - return context.call_internal(builder, cres.fndesc, sig, args) - -@register_jitable -def raise_if_incompatible_array_sizes(a, *args): - ashape = a.shape - for arg in args: - if a.ndim != arg.ndim: - raise ValueError("Secondary stencil array does not have same number " - " of dimensions as the first stencil input.") - argshape = arg.shape - for i in range(len(ashape)): - if ashape[i] > argshape[i]: - raise ValueError("Secondary stencil array has some dimension " - "smaller the same dimension in the first " - "stencil input.") - -class StencilFunc(object): - """ - A special type to hold stencil information for the IR. - """ - - id_counter = 0 - - def __init__(self, kernel_ir, mode, options): - self.id = type(self).id_counter - type(self).id_counter += 1 - self.kernel_ir = kernel_ir - self.mode = mode - self.options = options - self.kws = [] # remember original kws arguments - - # stencils only supported for CPU context currently - self._typingctx = registry.cpu_target.typing_context - self._targetctx = registry.cpu_target.target_context - self._typingctx.refresh() - self._targetctx.refresh() - self._install_type(self._typingctx) - self.neighborhood = self.options.get("neighborhood") - self._type_cache = {} - self._lower_me = StencilFuncLowerer(self) - - def replace_return_with_setitem(self, blocks, index_vars, out_name): - """ - Find return statements in the IR and replace them with a SetItem - call of the value "returned" by the kernel into the result array. - Returns the block labels that contained return statements. - """ - ret_blocks = [] - - for label, block in blocks.items(): - scope = block.scope - loc = block.loc - new_body = [] - for stmt in block.body: - if isinstance(stmt, ir.Return): - ret_blocks.append(label) - # If 1D array then avoid the tuple construction. - if len(index_vars) == 1: - rvar = ir.Var(scope, out_name, loc) - ivar = ir.Var(scope, index_vars[0], loc) - new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc)) - else: - # Convert the string names of the index variables into - # ir.Var's. - var_index_vars = [] - for one_var in index_vars: - index_var = ir.Var(scope, one_var, loc) - var_index_vars += [index_var] - - s_index_name = ir_utils.mk_unique_var("stencil_index") - s_index_var = ir.Var(scope, s_index_name, loc) - # Build a tuple from the index ir.Var's. - tuple_call = ir.Expr.build_tuple(var_index_vars, loc) - new_body.append(ir.Assign(tuple_call, s_index_var, loc)) - rvar = ir.Var(scope, out_name, loc) - # Write the return statements original value into - # the array using the tuple index. - si = ir.SetItem(rvar, s_index_var, stmt.value, loc) - new_body.append(si) - else: - new_body.append(stmt) - block.body = new_body - return ret_blocks - - def add_indices_to_kernel(self, kernel, index_names, ndim, - neighborhood, standard_indexed): - """ - Transforms the stencil kernel as specified by the user into one - that includes each dimension's index variable as part of the getitem - calls. So, in effect array[-1] becomes array[index0-1]. - """ - const_dict = {} - kernel_consts = [] - - if config.DEBUG_ARRAY_OPT == 1: - print("add_indices_to_kernel", ndim, neighborhood) - ir_utils.dump_blocks(kernel.blocks) - - if neighborhood is None: - need_to_calc_kernel = True - else: - need_to_calc_kernel = False - if len(neighborhood) != ndim: - raise ValueError("%d dimensional neighborhood specified for %d " \ - "dimensional input array" % (len(neighborhood), ndim)) - - tuple_table = ir_utils.get_tuple_table(kernel.blocks) - - relatively_indexed = set() - - for block in kernel.blocks.values(): - scope = block.scope - loc = block.loc - new_body = [] - for stmt in block.body: - if (isinstance(stmt, ir.Assign) and - isinstance(stmt.value, ir.Const)): - if config.DEBUG_ARRAY_OPT == 1: - print("remembering in const_dict", stmt.target.name, - stmt.value.value) - # Remember consts for use later. - const_dict[stmt.target.name] = stmt.value.value - if ((isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op in ['setitem', 'static_setitem'] - and stmt.value.value.name in kernel.arg_names) or - (isinstance(stmt, ir.SetItem) - and stmt.target.name in kernel.arg_names)): - raise ValueError("Assignments to arrays passed to stencil " \ - "kernels is not allowed.") - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op in ['getitem', 'static_getitem'] - and stmt.value.value.name in kernel.arg_names - and stmt.value.value.name not in standard_indexed): - # We found a getitem from the input array. - if stmt.value.op == 'getitem': - stmt_index_var = stmt.value.index - else: - stmt_index_var = stmt.value.index_var - # allow static_getitem since rewrite passes are applied - #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") - - relatively_indexed.add(stmt.value.value.name) - - # Store the index used after looking up the variable in - # the const dictionary. - if need_to_calc_kernel: - assert hasattr(stmt_index_var, 'name') - - if stmt_index_var.name in tuple_table: - kernel_consts += [tuple_table[stmt_index_var.name]] - elif stmt_index_var.name in const_dict: - kernel_consts += [const_dict[stmt_index_var.name]] - else: - raise ValueError("Non-constant specified for " - "stencil kernel index.") - - if ndim == 1: - # Single dimension always has index variable 'index0'. - # tmpvar will hold the real index and is computed by - # adding the relative offset in stmt.value.index to - # the current absolute location in index0. - index_var = ir.Var(scope, index_names[0], loc) - tmpname = ir_utils.mk_unique_var("stencil_index") - tmpvar = ir.Var(scope, tmpname, loc) - acc_call = ir.Expr.binop('+', stmt_index_var, - index_var, loc) - new_body.append(ir.Assign(acc_call, tmpvar, loc)) - new_body.append(ir.Assign( - ir.Expr.getitem(stmt.value.value,tmpvar,loc), - stmt.target,loc)) - else: - index_vars = [] - sum_results = [] - s_index_name = ir_utils.mk_unique_var("stencil_index") - s_index_var = ir.Var(scope, s_index_name, loc) - const_index_vars = [] - ind_stencils = [] - - # Same idea as above but you have to extract - # individual elements out of the tuple indexing - # expression and add the corresponding index variable - # to them and then reconstitute as a tuple that can - # index the array. - for dim in range(ndim): - tmpname = ir_utils.mk_unique_var("const_index") - tmpvar = ir.Var(scope, tmpname, loc) - new_body.append(ir.Assign(ir.Const(dim, loc), - tmpvar, loc)) - const_index_vars += [tmpvar] - index_var = ir.Var(scope, index_names[dim], loc) - index_vars += [index_var] - - tmpname = ir_utils.mk_unique_var("ind_stencil_index") - tmpvar = ir.Var(scope, tmpname, loc) - ind_stencils += [tmpvar] - getitemname = ir_utils.mk_unique_var("getitem") - getitemvar = ir.Var(scope, getitemname, loc) - getitemcall = ir.Expr.getitem(stmt_index_var, - const_index_vars[dim], loc) - new_body.append(ir.Assign(getitemcall, getitemvar, loc)) - acc_call = ir.Expr.binop('+', getitemvar, - index_vars[dim], loc) - new_body.append(ir.Assign(acc_call, tmpvar, loc)) - - tuple_call = ir.Expr.build_tuple(ind_stencils, loc) - new_body.append(ir.Assign(tuple_call, s_index_var, loc)) - new_body.append(ir.Assign( - ir.Expr.getitem(stmt.value.value,s_index_var,loc), - stmt.target,loc)) - else: - new_body.append(stmt) - block.body = new_body - - if need_to_calc_kernel: - # Find the size of the kernel by finding the maximum absolute value - # index used in the kernel specification. - neighborhood = [[0,0] for _ in range(ndim)] - if len(kernel_consts) == 0: - raise ValueError("Stencil kernel with no accesses to " - "relatively indexed arrays.") - - for index in kernel_consts: - if isinstance(index, tuple) or isinstance(index, list): - for i in range(len(index)): - te = index[i] - if isinstance(te, ir.Var) and te.name in const_dict: - te = const_dict[te.name] - if isinstance(te, int): - neighborhood[i][0] = min(neighborhood[i][0], te) - neighborhood[i][1] = max(neighborhood[i][1], te) - else: - raise ValueError( - "Non-constant used as stencil index.") - index_len = len(index) - elif isinstance(index, int): - neighborhood[0][0] = min(neighborhood[0][0], index) - neighborhood[0][1] = max(neighborhood[0][1], index) - index_len = 1 - else: - raise ValueError( - "Non-tuple or non-integer used as stencil index.") - if index_len != ndim: - raise ValueError( - "Stencil index does not match array dimensionality.") - - return (neighborhood, relatively_indexed) - - - def get_return_type(self, argtys): - if config.DEBUG_ARRAY_OPT == 1: - print("get_return_type", argtys) - ir_utils.dump_blocks(self.kernel_ir.blocks) - - if not isinstance(argtys[0], types.npytypes.Array): - raise ValueError("The first argument to a stencil kernel must " - "be the primary input array.") - - typemap, return_type, calltypes = compiler.type_inference_stage( - self._typingctx, - self.kernel_ir, - argtys, - None, - {}) - if isinstance(return_type, types.npytypes.Array): - raise ValueError( - "Stencil kernel must return a scalar and not a numpy array.") - - real_ret = types.npytypes.Array(return_type, argtys[0].ndim, - argtys[0].layout) - return (real_ret, typemap, calltypes) - - def _install_type(self, typingctx): - """Constructs and installs a typing class for a StencilFunc object in - the input typing context. - """ - _ty_cls = type('StencilFuncTyping_' + - str(self.id), - (AbstractTemplate,), - dict(key=self, generic=self._type_me)) - typingctx.insert_user_function(self, _ty_cls) - - def compile_for_argtys(self, argtys, kwtys, return_type, sigret): - # look in the type cache to find if result array is passed - (_, result, typemap, calltypes) = self._type_cache[argtys] - new_func = self._stencil_wrapper(result, sigret, return_type, - typemap, calltypes, *argtys) - return new_func - - def _type_me(self, argtys, kwtys): - """ - Implement AbstractTemplate.generic() for the typing class - built by StencilFunc._install_type(). - Return the call-site signature. - """ - if (self.neighborhood is not None and - len(self.neighborhood) != argtys[0].ndim): - raise ValueError("%d dimensional neighborhood specified " - "for %d dimensional input array" % - (len(self.neighborhood), argtys[0].ndim)) - - argtys_extra = argtys - sig_extra = "" - result = None - if 'out' in kwtys: - argtys_extra += (kwtys['out'],) - sig_extra += ", out=None" - result = kwtys['out'] - - if 'neighborhood' in kwtys: - argtys_extra += (kwtys['neighborhood'],) - sig_extra += ", neighborhood=None" - - # look in the type cache first - if argtys_extra in self._type_cache: - (_sig, _, _, _) = self._type_cache[argtys_extra] - return _sig - - (real_ret, typemap, calltypes) = self.get_return_type(argtys) - sig = signature(real_ret, *argtys_extra) - dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format( - ",".join(self.kernel_ir.arg_names), sig_extra)) - exec_(dummy_text) in globals(), locals() - dummy_func = eval("__numba_dummy_stencil") - sig.pysig = utils.pysignature(dummy_func) - self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra)]) - self._type_cache[argtys_extra] = (sig, result, typemap, calltypes) - return sig - - def copy_ir_with_calltypes(self, ir, calltypes): - """ - Create a copy of a given IR along with its calltype information. - We need a copy of the calltypes because copy propagation applied - to the copied IR will change the calltypes and make subsequent - uses of the original IR invalid. - """ - copy_calltypes = {} - kernel_copy = ir.copy() - kernel_copy.blocks = {} - # For each block... - for (block_label, block) in ir.blocks.items(): - new_block = copy.deepcopy(ir.blocks[block_label]) - new_block.body = [] - # For each statement in each block... - for stmt in ir.blocks[block_label].body: - # Copy the statement to the new copy of the kernel - # and if the original statement is in the original - # calltypes then add the type associated with this - # statement to the calltypes copy. - scopy = copy.deepcopy(stmt) - new_block.body.append(scopy) - if stmt in calltypes: - copy_calltypes[scopy] = calltypes[stmt] - kernel_copy.blocks[block_label] = new_block - return (kernel_copy, copy_calltypes) - - def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): - # Overall approach: - # 1) Construct a string containing a function definition for the stencil function - # that will execute the stencil kernel. This function definition includes a - # unique stencil function name, the parameters to the stencil kernel, loop - # nests across the dimenions of the input array. Those loop nests use the - # computed stencil kernel size so as not to try to compute elements where - # elements outside the bounds of the input array would be needed. - # 2) The but of the loop nest in this new function is a special sentinel - # assignment. - # 3) Get the IR of this new function. - # 4) Split the block containing the sentinel assignment and remove the sentinel - # assignment. Insert the stencil kernel IR into the stencil function IR - # after label and variable renaming of the stencil kernel IR to prevent - # conflicts with the stencil function IR. - # 5) Compile the combined stencil function IR + stencil kernel IR into existence. - - # Copy the kernel so that our changes for this callsite - # won't effect other callsites. - (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes( - self.kernel_ir, calltypes) - # The stencil kernel body becomes the body of a loop, for which args aren't needed. - ir_utils.remove_args(kernel_copy.blocks) - first_arg = kernel_copy.arg_names[0] - - in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) - name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) - ir_utils.apply_copy_propagate( - kernel_copy.blocks, - in_cps, - name_var_table, - typemap, - copy_calltypes) - - if "out" in name_var_table: - raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") - - sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) - if config.DEBUG_ARRAY_OPT == 1: - print("name_var_table", name_var_table, sentinel_name) - - the_array = args[0] - - if config.DEBUG_ARRAY_OPT == 1: - print("_stencil_wrapper", return_type, return_type.dtype, - type(return_type.dtype), args) - ir_utils.dump_blocks(kernel_copy.blocks) - - # We generate a Numba function to execute this stencil and here - # create the unique name of this function. - stencil_func_name = "__numba_stencil_%s_%s" % ( - hex(id(the_array)).replace("-", "_"), - self.id) - - # We will put a loop nest in the generated function for each - # dimension in the input array. Here we create the name for - # the index variable for each dimension. index0, index1, ... - index_vars = [] - for i in range(the_array.ndim): - index_var_name = ir_utils.get_unused_var_name("index" + str(i), - name_var_table) - index_vars += [index_var_name] - - # Create extra signature for out and neighborhood. - out_name = ir_utils.get_unused_var_name("out", name_var_table) - neighborhood_name = ir_utils.get_unused_var_name("neighborhood", - name_var_table) - sig_extra = "" - if result is not None: - sig_extra += ", {}=None".format(out_name) - if "neighborhood" in dict(self.kws): - sig_extra += ", {}=None".format(neighborhood_name) - - # Get a list of the standard indexed array names. - standard_indexed = self.options.get("standard_indexing", []) - - if first_arg in standard_indexed: - raise ValueError("The first argument to a stencil kernel must " - "use relative indexing, not standard indexing.") - - if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: - raise ValueError("Standard indexing requested for an array name " - "not present in the stencil kernel definition.") - - # Add index variables to getitems in the IR to transition the accesses - # in the kernel from relative to regular Python indexing. Returns the - # computed size of the stencil kernel and a list of the relatively indexed - # arrays. - kernel_size, relatively_indexed = self.add_indices_to_kernel( - kernel_copy, index_vars, the_array.ndim, - self.neighborhood, standard_indexed) - if self.neighborhood is None: - self.neighborhood = kernel_size - - if config.DEBUG_ARRAY_OPT == 1: - print("After add_indices_to_kernel") - ir_utils.dump_blocks(kernel_copy.blocks) - - # The return in the stencil kernel becomes a setitem for that - # particular point in the iteration space. - ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, - index_vars, out_name) - - if config.DEBUG_ARRAY_OPT == 1: - print("After replace_return_with_setitem", ret_blocks) - ir_utils.dump_blocks(kernel_copy.blocks) - - # Start to form the new function to execute the stencil kernel. - func_text = "def {}({}{}):\n".format(stencil_func_name, - ",".join(kernel_copy.arg_names), sig_extra) - - # Get loop ranges for each dimension, which could be either int - # or variable. In the latter case we'll use the extra neighborhood - # argument to the function. - ranges = [] - for i in range(the_array.ndim): - if isinstance(kernel_size[i][0], int): - lo = kernel_size[i][0] - hi = kernel_size[i][1] - else: - lo = "{}[{}][0]".format(neighborhood_name, i) - hi = "{}[{}][1]".format(neighborhood_name, i) - ranges.append((lo, hi)) - - # If there are more than one relatively indexed arrays, add a call to - # a function that will raise an error if any of the relatively indexed - # arrays are of different size than the first input array. - if len(relatively_indexed) > 1: - func_text += " raise_if_incompatible_array_sizes(" + first_arg - for other_array in relatively_indexed: - if other_array != first_arg: - func_text += "," + other_array - func_text += ")\n" - - # Get the shape of the first input array. - shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) - func_text += " {} = {}.shape\n".format(shape_name, first_arg) - - - # If we have to allocate the output array (the out argument was not used) - # then us numpy.full if the user specified a cval stencil decorator option - # or np.zeros if they didn't to allocate the array. - if result is None: - if "cval" in self.options: - cval = self.options["cval"] - if return_type.dtype != typing.typeof.typeof(cval): - raise ValueError( - "cval type does not match stencil return type.") - - out_init ="{} = np.full({}, {}, dtype=np.{})\n".format( - out_name, shape_name, cval, return_type.dtype) - - else: - out_init ="{} = np.zeros({}, dtype=np.{})\n".format( - out_name, shape_name, return_type.dtype) - func_text += " " + out_init - - offset = 1 - # Add the loop nests to the new function. - for i in range(the_array.ndim): - for j in range(offset): - func_text += " " - # ranges[i][0] is the minimum index used in the i'th dimension - # but minimum's greater than 0 don't preclude any entry in the array. - # So, take the minimum of 0 and the minimum index found in the kernel - # and this will be a negative number (potentially -0). Then, we do - # unary - on that to get the positive offset in this dimension whose - # use is precluded. - # ranges[i][1] is the maximum of 0 and the observed maximum index - # in this dimension because negative maximums would not cause us to - # preclude any entry in the array from being used. - func_text += ("for {} in range(-min(0,{})," - "{}[{}]-max(0,{})):\n").format( - index_vars[i], - ranges[i][0], - shape_name, - i, - ranges[i][1]) - offset += 1 - - for j in range(offset): - func_text += " " - # Put a sentinel in the code so we can locate it in the IR. We will - # remove this sentinel assignment and replace it with the IR for the - # stencil kernel body. - func_text += "{} = 0\n".format(sentinel_name) - func_text += " return {}\n".format(out_name) - - if config.DEBUG_ARRAY_OPT == 1: - print("new stencil func text") - print(func_text) - - # Force the new stencil function into existence. - exec_(func_text) in globals(), locals() - stencil_func = eval(stencil_func_name) - if sigret is not None: - pysig = utils.pysignature(stencil_func) - sigret.pysig = pysig - # Get the IR for the newly created stencil function. - stencil_ir = compiler.run_frontend(stencil_func) - ir_utils.remove_dels(stencil_ir.blocks) - - # rename all variables in stencil_ir afresh - var_table = ir_utils.get_name_var_table(stencil_ir.blocks) - new_var_dict = {} - reserved_names = ([sentinel_name, out_name, neighborhood_name, - shape_name] + kernel_copy.arg_names + index_vars) - for name, var in var_table.items(): - if not name in reserved_names: - new_var_dict[name] = ir_utils.mk_unique_var(name) - ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) - - stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 - - # Shift lables in the kernel copy so they are guaranteed unique - # and don't conflict with any labels in the stencil_ir. - kernel_copy.blocks = ir_utils.add_offset_to_labels( - kernel_copy.blocks, stencil_stub_last_label) - new_label = max(kernel_copy.blocks.keys()) + 1 - # Adjust ret_blocks to account for addition of the offset. - ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] - - if config.DEBUG_ARRAY_OPT == 1: - print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) - print("before replace sentinel stencil_ir") - ir_utils.dump_blocks(stencil_ir.blocks) - print("before replace sentinel kernel_copy") - ir_utils.dump_blocks(kernel_copy.blocks) - - # Search all the block in the stencil outline for the sentinel. - for label, block in stencil_ir.blocks.items(): - for i, inst in enumerate(block.body): - if (isinstance( inst, ir.Assign) and - inst.target.name == sentinel_name): - # We found the sentinel assignment. - loc = inst.loc - scope = block.scope - # split block across __sentinel__ - # A new block is allocated for the statements prior to the - # sentinel but the new block maintains the current block - # label. - prev_block = ir.Block(scope, loc) - prev_block.body = block.body[:i] - # The current block is used for statements after sentinel. - block.body = block.body[i + 1:] - # But the current block gets a new label. - body_first_label = min(kernel_copy.blocks.keys()) - - # The previous block jumps to the minimum labelled block of - # the parfor body. - prev_block.append(ir.Jump(body_first_label, loc)) - # Add all the parfor loop body blocks to the gufunc - # function's IR. - for (l, b) in kernel_copy.blocks.items(): - stencil_ir.blocks[l] = b - - stencil_ir.blocks[new_label] = block - stencil_ir.blocks[label] = prev_block - # Add a jump from all the blocks that previously contained - # a return in the stencil kernel to the block - # containing statements after the sentinel. - for ret_block in ret_blocks: - stencil_ir.blocks[ret_block].append( - ir.Jump(new_label, loc)) - break - else: - continue - break - - stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) - ir_utils.remove_dels(stencil_ir.blocks) - - assert(isinstance(the_array, types.Type)) - array_types = args - - new_stencil_param_types = list(array_types) - - if config.DEBUG_ARRAY_OPT == 1: - print("new_stencil_param_types", new_stencil_param_types) - ir_utils.dump_blocks(stencil_ir.blocks) - - # Compile the combined stencil function with the replaced loop - # body in it. - new_func = compiler.compile_ir( - self._typingctx, - self._targetctx, - stencil_ir, - new_stencil_param_types, - None, - compiler.DEFAULT_FLAGS, - {}) - return new_func - - def __call__(self, *args, **kwargs): - if (self.neighborhood is not None and - len(self.neighborhood) != args[0].ndim): - raise ValueError("{} dimensional neighborhood specified for {} " - "dimensional input array".format( - len(self.neighborhood), args[0].ndim)) - - if 'out' in kwargs: - result = kwargs['out'] - rdtype = result.dtype - rttype = numpy_support.from_dtype(rdtype) - result_type = types.npytypes.Array(rttype, result.ndim, - numpy_support.map_layout(result)) - array_types = tuple([typing.typeof.typeof(x) for x in args]) - array_types_full = tuple([typing.typeof.typeof(x) for x in args] + - [result_type]) - else: - result = None - array_types = tuple([typing.typeof.typeof(x) for x in args]) - array_types_full = array_types - - if config.DEBUG_ARRAY_OPT == 1: - print("__call__", array_types, args, kwargs) - - (real_ret, typemap, calltypes) = self.get_return_type(array_types) - new_func = self._stencil_wrapper(result, None, real_ret, typemap, - calltypes, *array_types_full) - - if result is None: - return new_func.entry_point(*args) - else: - return new_func.entry_point(*(args+(result,))) - -def stencil(func_or_mode='constant', **options): - # called on function without specifying mode style - if not isinstance(func_or_mode, str): - mode = 'constant' # default style - func = func_or_mode - else: - mode = func_or_mode - func = None - - for option in options: - if option not in ["cval", "standard_indexing", "neighborhood"]: - raise ValueError("Unknown stencil option " + option) - - wrapper = _stencil(mode, options) - if func is not None: - return wrapper(func) - return wrapper - -def _stencil(mode, options): - if mode != 'constant': - raise ValueError("Unsupported mode style " + mode) - - def decorated(func): - kernel_ir = compiler.run_frontend(func) - return StencilFunc(kernel_ir, mode, options) - - return decorated - -@lower_builtin(stencil) -def stencil_dummy_lower(context, builder, sig, args): - "lowering for dummy stencil calls" - return lir.Constant(lir.IntType(types.intp.bitwidth), 0) diff --git a/numba/numba/stencilparfor.py b/numba/numba/stencilparfor.py deleted file mode 100644 index aa906db2b..000000000 --- a/numba/numba/stencilparfor.py +++ /dev/null @@ -1,676 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -import numbers -import copy -import types as pytypes -from operator import add - -import numpy as np - -import numba -from numba import types -from numba.typing.templates import infer_global, AbstractTemplate -from numba.typing import signature -from numba import ir_utils, ir, utils, config, typing -from numba.ir_utils import (get_call_table, mk_unique_var, - compile_to_numba_ir, replace_arg_nodes, guard, - find_callname) -from numba.six import exec_ - - -def _compute_last_ind(dim_size, index_const): - if index_const > 0: - return dim_size - index_const - else: - return dim_size - -class StencilPass(object): - def __init__(self, func_ir, typemap, calltypes, array_analysis, typingctx, flags): - self.func_ir = func_ir - self.typemap = typemap - self.calltypes = calltypes - self.array_analysis = array_analysis - self.typingctx = typingctx - self.flags = flags - - def run(self): - """ Finds all calls to StencilFuncs in the IR and converts them to parfor. - """ - from numba.stencil import StencilFunc - - # Get all the calls in the function IR. - call_table, _ = get_call_table(self.func_ir.blocks) - stencil_calls = [] - stencil_dict = {} - for call_varname, call_list in call_table.items(): - if isinstance(call_list[0], StencilFunc): - # Remember all calls to StencilFuncs. - stencil_calls.append(call_varname) - stencil_dict[call_varname] = call_list[0] - if not stencil_calls: - return # return early if no stencil calls found - - # find and transform stencil calls - for label, block in self.func_ir.blocks.items(): - for i, stmt in reversed(list(enumerate(block.body))): - # Found a call to a StencilFunc. - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op == 'call' - and stmt.value.func.name in stencil_calls): - kws = dict(stmt.value.kws) - # Create dictionary of input argument number to - # the argument itself. - input_dict = {i: stmt.value.args[i] for i in - range(len(stmt.value.args))} - in_args = stmt.value.args - arg_typemap = tuple(self.typemap[i.name] for i in in_args) - for arg_type in arg_typemap: - if isinstance(arg_type, types.BaseTuple): - raise ValueError("Tuple parameters not supported " \ - "for stencil kernels in parallel=True mode.") - - out_arr = kws.get('out') - - # Get the StencilFunc object corresponding to this call. - sf = stencil_dict[stmt.value.func.name] - stencil_ir, rt, arg_to_arr_dict = get_stencil_ir(sf, - self.typingctx, arg_typemap, - block.scope, block.loc, input_dict, - self.typemap, self.calltypes) - index_offsets = sf.options.get('index_offsets', None) - gen_nodes = self._mk_stencil_parfor(label, in_args, out_arr, - stencil_ir, index_offsets, stmt.target, rt, sf, - arg_to_arr_dict) - block.body = block.body[:i] + gen_nodes + block.body[i+1:] - # Found a call to a stencil via numba.stencil(). - elif (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op == 'call' - and guard(find_callname, self.func_ir, stmt.value) - == ('stencil', 'numba')): - # remove dummy stencil() call - stmt.value = ir.Const(0, stmt.loc) - - def replace_return_with_setitem(self, blocks, exit_value_var, - parfor_body_exit_label): - """ - Find return statements in the IR and replace them with a SetItem - call of the value "returned" by the kernel into the result array. - Returns the block labels that contained return statements. - """ - for label, block in blocks.items(): - scope = block.scope - loc = block.loc - new_body = [] - for stmt in block.body: - if isinstance(stmt, ir.Return): - # previous stmt should have been a cast - prev_stmt = new_body.pop() - assert (isinstance(prev_stmt, ir.Assign) - and isinstance(prev_stmt.value, ir.Expr) - and prev_stmt.value.op == 'cast') - - new_body.append(ir.Assign(prev_stmt.value.value, exit_value_var, loc)) - new_body.append(ir.Jump(parfor_body_exit_label, loc)) - else: - new_body.append(stmt) - block.body = new_body - - def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, - index_offsets, target, return_type, stencil_func, - arg_to_arr_dict): - """ Converts a set of stencil kernel blocks to a parfor. - """ - gen_nodes = [] - stencil_blocks = stencil_ir.blocks - - if config.DEBUG_ARRAY_OPT == 1: - print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, - return_type, stencil_func, stencil_blocks) - ir_utils.dump_blocks(stencil_blocks) - - in_arr = in_args[0] - # run copy propagate to replace in_args copies (e.g. a = A) - in_arr_typ = self.typemap[in_arr.name] - in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) - name_var_table = ir_utils.get_name_var_table(stencil_blocks) - - ir_utils.apply_copy_propagate( - stencil_blocks, - in_cps, - name_var_table, - self.typemap, - self.calltypes) - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after copy_propagate") - ir_utils.dump_blocks(stencil_blocks) - ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, - self.typemap) - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after removing dead code") - ir_utils.dump_blocks(stencil_blocks) - - # create parfor vars - ndims = self.typemap[in_arr.name].ndim - scope = in_arr.scope - loc = in_arr.loc - parfor_vars = [] - for i in range(ndims): - parfor_var = ir.Var(scope, mk_unique_var( - "$parfor_index_var"), loc) - self.typemap[parfor_var.name] = types.intp - parfor_vars.append(parfor_var) - - start_lengths, end_lengths = self._replace_stencil_accesses( - stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, - arg_to_arr_dict) - - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after replace stencil accesses") - ir_utils.dump_blocks(stencil_blocks) - - # create parfor loop nests - loopnests = [] - equiv_set = self.array_analysis.get_equiv_set(label) - in_arr_dim_sizes = equiv_set.get_shape(in_arr) - - assert ndims == len(in_arr_dim_sizes) - for i in range(ndims): - last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], - end_lengths[i], gen_nodes, scope, loc) - start_ind = self._get_stencil_start_ind( - start_lengths[i], gen_nodes, scope, loc) - # start from stencil size to avoid invalid array access - loopnests.append(numba.parfor.LoopNest(parfor_vars[i], - start_ind, last_ind, 1)) - - # We have to guarantee that the exit block has maximum label and that - # there's only one exit block for the parfor body. - # So, all return statements will change to jump to the parfor exit block. - parfor_body_exit_label = max(stencil_blocks.keys()) + 1 - stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) - exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) - self.typemap[exit_value_var.name] = return_type.dtype - - # create parfor index var - for_replacing_ret = [] - if ndims == 1: - parfor_ind_var = parfor_vars[0] - else: - parfor_ind_var = ir.Var(scope, mk_unique_var( - "$parfor_index_tuple_var"), loc) - self.typemap[parfor_ind_var.name] = types.containers.UniTuple( - types.intp, ndims) - tuple_call = ir.Expr.build_tuple(parfor_vars, loc) - tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) - for_replacing_ret.append(tuple_assign) - - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after creating parfor index var") - ir_utils.dump_blocks(stencil_blocks) - - # empty init block - init_block = ir.Block(scope, loc) - if out_arr == None: - in_arr_typ = self.typemap[in_arr.name] - - shape_name = ir_utils.mk_unique_var("in_arr_shape") - shape_var = ir.Var(scope, shape_name, loc) - shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) - self.typemap[shape_name] = types.containers.UniTuple(types.intp, - in_arr_typ.ndim) - init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) - - zero_name = ir_utils.mk_unique_var("zero_val") - zero_var = ir.Var(scope, zero_name, loc) - if "cval" in stencil_func.options: - cval = stencil_func.options["cval"] - # TODO: Loosen this restriction to adhere to casting rules. - if return_type.dtype != typing.typeof.typeof(cval): - raise ValueError("cval type does not match stencil return type.") - - temp2 = return_type.dtype(cval) - else: - temp2 = return_type.dtype(0) - full_const = ir.Const(temp2, loc) - self.typemap[zero_name] = return_type.dtype - init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) - - so_name = ir_utils.mk_unique_var("stencil_output") - out_arr = ir.Var(scope, so_name, loc) - self.typemap[out_arr.name] = numba.types.npytypes.Array( - return_type.dtype, - in_arr_typ.ndim, - in_arr_typ.layout) - dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - self.typemap[dtype_g_np_var.name] = types.misc.Module(np) - dtype_g_np = ir.Global('np', np, loc) - dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) - init_block.body.append(dtype_g_np_assign) - - dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) - dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) - self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype) - dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) - init_block.body.append(dtype_attr_assign) - - stmts = ir_utils.gen_np_call("full", - np.full, - out_arr, - [shape_var, zero_var, dtype_attr_var], - self.typingctx, - self.typemap, - self.calltypes) - equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) - init_block.body.extend(stmts) - - self.replace_return_with_setitem(stencil_blocks, exit_value_var, - parfor_body_exit_label) - - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after replacing return") - ir_utils.dump_blocks(stencil_blocks) - - setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) - self.calltypes[setitem_call] = signature( - types.none, self.typemap[out_arr.name], - self.typemap[parfor_ind_var.name], - self.typemap[out_arr.name].dtype - ) - stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) - stencil_blocks[parfor_body_exit_label].body.append(setitem_call) - - # simplify CFG of parfor body (exit block could be simplified often) - # add dummy return to enable CFG - stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0, - ir.Loc("stencilparfor_dummy", -1))) - stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) - stencil_blocks[max(stencil_blocks.keys())].body.pop() - - if config.DEBUG_ARRAY_OPT == 1: - print("stencil_blocks after adding SetItem") - ir_utils.dump_blocks(stencil_blocks) - - pattern = ('stencil', [start_lengths, end_lengths]) - parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, - loc, parfor_ind_var, equiv_set, pattern, self.flags) - gen_nodes.append(parfor) - gen_nodes.append(ir.Assign(out_arr, target, loc)) - return gen_nodes - - def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope, - loc): - last_ind = dim_size - if end_length != 0: - # set last index to size minus stencil size to avoid invalid - # memory access - index_const = ir.Var(scope, mk_unique_var("stencil_const_var"), - loc) - self.typemap[index_const.name] = types.intp - if isinstance(end_length, numbers.Number): - const_assign = ir.Assign(ir.Const(end_length, loc), - index_const, loc) - else: - const_assign = ir.Assign(end_length, index_const, loc) - - gen_nodes.append(const_assign) - last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc) - self.typemap[last_ind.name] = types.intp - - g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc) - check_func = numba.njit(_compute_last_ind) - func_typ = types.functions.Dispatcher(check_func) - self.typemap[g_var.name] = func_typ - g_obj = ir.Global("_compute_last_ind", check_func, loc) - g_assign = ir.Assign(g_obj, g_var, loc) - gen_nodes.append(g_assign) - index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc) - self.calltypes[index_call] = func_typ.get_call_type( - self.typingctx, [types.intp, types.intp], {}) - index_assign = ir.Assign(index_call, last_ind, loc) - gen_nodes.append(index_assign) - - return last_ind - - def _get_stencil_start_ind(self, start_length, gen_nodes, scope, loc): - if isinstance(start_length, int): - return abs(min(start_length, 0)) - def get_start_ind(s_length): - return abs(min(s_length, 0)) - f_ir = compile_to_numba_ir(get_start_ind, {}, self.typingctx, - (types.intp,), self.typemap, self.calltypes) - assert len(f_ir.blocks) == 1 - block = f_ir.blocks.popitem()[1] - replace_arg_nodes(block, [start_length]) - gen_nodes += block.body[:-2] - ret_var = block.body[-2].value.value - return ret_var - - def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args, - index_offsets, stencil_func, arg_to_arr_dict): - """ Convert relative indexing in the stencil kernel to standard indexing - by adding the loop index variables to the corresponding dimensions - of the array index tuples. - """ - in_arr = in_args[0] - in_arg_names = [x.name for x in in_args] - - if "standard_indexing" in stencil_func.options: - for x in stencil_func.options["standard_indexing"]: - if x not in arg_to_arr_dict: - raise ValueError("Standard indexing requested for an array " \ - "name not present in the stencil kernel definition.") - standard_indexed = [arg_to_arr_dict[x] for x in - stencil_func.options["standard_indexing"]] - else: - standard_indexed = [] - - if in_arr.name in standard_indexed: - raise ValueError("The first argument to a stencil kernel must use " \ - "relative indexing, not standard indexing.") - - ndims = self.typemap[in_arr.name].ndim - scope = in_arr.scope - loc = in_arr.loc - # replace access indices, find access lengths in each dimension - need_to_calc_kernel = stencil_func.neighborhood is None - - # If we need to infer the kernel size then initialize the minimum and - # maximum seen indices for each dimension to 0. If we already have - # the neighborhood calculated then just convert from neighborhood format - # to the separate start and end lengths format used here. - if need_to_calc_kernel: - start_lengths = ndims*[0] - end_lengths = ndims*[0] - else: - start_lengths = [x[0] for x in stencil_func.neighborhood] - end_lengths = [x[1] for x in stencil_func.neighborhood] - - # Get all the tuples defined in the stencil blocks. - tuple_table = ir_utils.get_tuple_table(stencil_blocks) - - found_relative_index = False - - # For all blocks in the stencil kernel... - for label, block in stencil_blocks.items(): - new_body = [] - # For all statements in those blocks... - for stmt in block.body: - # Reject assignments to input arrays. - if ((isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op in ['setitem', 'static_setitem'] - and stmt.value.value.name in in_arg_names) or - ((isinstance(stmt, ir.SetItem) or - isinstance(stmt, ir.StaticSetItem)) - and stmt.target.name in in_arg_names)): - raise ValueError("Assignments to arrays passed to stencil kernels is not allowed.") - # We found a getitem for some array. If that array is an input - # array and isn't in the list of standard indexed arrays then - # update min and max seen indices if we are inferring the - # kernel size and create a new tuple where the relative offsets - # are added to loop index vars to get standard indexing. - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op in ['static_getitem', 'getitem'] - and stmt.value.value.name in in_arg_names - and stmt.value.value.name not in standard_indexed): - index_list = stmt.value.index - # handle 1D case - if ndims == 1: - index_list = [index_list] - else: - if hasattr(index_list, 'name') and index_list.name in tuple_table: - index_list = tuple_table[index_list.name] - if index_offsets: - index_list = self._add_index_offsets(index_list, - list(index_offsets), new_body, scope, loc) - - # update min and max indices - if need_to_calc_kernel: - # all indices should be integer to be able to calculate - # neighborhood automatically - if (isinstance(index_list, ir.Var) or - any([not isinstance(v, int) for v in index_list])): - raise ValueError("Variable stencil index only " - "possible with known neighborhood") - start_lengths = list(map(min, start_lengths, - index_list)) - end_lengths = list(map(max, end_lengths, index_list)) - found_relative_index = True - - # update access indices - index_vars = self._add_index_offsets(parfor_vars, - list(index_list), new_body, scope, loc) - - # new access index tuple - if ndims == 1: - ind_var = index_vars[0] - else: - ind_var = ir.Var(scope, mk_unique_var( - "$parfor_index_ind_var"), loc) - self.typemap[ind_var.name] = types.containers.UniTuple( - types.intp, ndims) - tuple_call = ir.Expr.build_tuple(index_vars, loc) - tuple_assign = ir.Assign(tuple_call, ind_var, loc) - new_body.append(tuple_assign) - - # getitem return type is scalar if all indices are integer - if all([self.typemap[v.name] == types.intp - for v in index_vars]): - getitem_return_typ = self.typemap[ - stmt.value.value.name].dtype - else: - # getitem returns an array - getitem_return_typ = self.typemap[stmt.value.value.name] - # new getitem with the new index var - getitem_call = ir.Expr.getitem(stmt.value.value, ind_var, - loc) - self.calltypes[getitem_call] = signature( - getitem_return_typ, - self.typemap[stmt.value.value.name], - self.typemap[ind_var.name]) - stmt.value = getitem_call - - new_body.append(stmt) - block.body = new_body - if need_to_calc_kernel and not found_relative_index: - raise ValueError("Stencil kernel with no accesses to " \ - "relatively indexed arrays.") - - return start_lengths, end_lengths - - def _add_index_offsets(self, index_list, index_offsets, new_body, - scope, loc): - """ Does the actual work of adding loop index variables to the - relative index constants or variables. - """ - assert len(index_list) == len(index_offsets) - - # shortcut if all values are integer - if all([isinstance(v, int) for v in index_list+index_offsets]): - # add offsets in all dimensions - return list(map(add, index_list, index_offsets)) - - out_nodes = [] - index_vars = [] - for i in range(len(index_list)): - # new_index = old_index + offset - old_index_var = index_list[i] - if isinstance(old_index_var, int): - old_index_var = ir.Var(scope, - mk_unique_var("old_index_var"), loc) - self.typemap[old_index_var.name] = types.intp - const_assign = ir.Assign(ir.Const(index_list[i], loc), - old_index_var, loc) - out_nodes.append(const_assign) - - offset_var = index_offsets[i] - if isinstance(offset_var, int): - offset_var = ir.Var(scope, - mk_unique_var("offset_var"), loc) - self.typemap[offset_var.name] = types.intp - const_assign = ir.Assign(ir.Const(index_offsets[i], loc), - offset_var, loc) - out_nodes.append(const_assign) - - if (isinstance(old_index_var, slice) - or isinstance(self.typemap[old_index_var.name], - types.misc.SliceType)): - # only one arg can be slice - assert self.typemap[offset_var.name] == types.intp - index_var = self._add_offset_to_slice(old_index_var, offset_var, - out_nodes, scope, loc) - index_vars.append(index_var) - continue - - if (isinstance(offset_var, slice) - or isinstance(self.typemap[offset_var.name], - types.misc.SliceType)): - # only one arg can be slice - assert self.typemap[old_index_var.name] == types.intp - index_var = self._add_offset_to_slice(offset_var, old_index_var, - out_nodes, scope, loc) - index_vars.append(index_var) - continue - - index_var = ir.Var(scope, - mk_unique_var("offset_stencil_index"), loc) - self.typemap[index_var.name] = types.intp - index_call = ir.Expr.binop('+', old_index_var, - offset_var, loc) - self.calltypes[index_call] = ir_utils.find_op_typ('+', - [types.intp, types.intp]) - index_assign = ir.Assign(index_call, index_var, loc) - out_nodes.append(index_assign) - index_vars.append(index_var) - - new_body.extend(out_nodes) - return index_vars - - def _add_offset_to_slice(self, slice_var, offset_var, out_nodes, scope, - loc): - if isinstance(slice_var, slice): - f_text = """def f(offset): - return slice({} + offset, {} + offset) - """.format(slice_var.start, slice_var.stop) - loc = {} - exec_(f_text, {}, loc) - f = loc['f'] - args = [offset_var] - arg_typs = (types.intp,) - else: - def f(old_slice, offset): - return slice(old_slice.start + offset, old_slice.stop + offset) - args = [slice_var, offset_var] - slice_type = self.typemap[slice_var.name] - arg_typs = (slice_type, types.intp,) - _globals = self.func_ir.func_id.func.__globals__ - f_ir = compile_to_numba_ir(f, _globals, self.typingctx, arg_typs, - self.typemap, self.calltypes) - _, block = f_ir.blocks.popitem() - replace_arg_nodes(block, args) - new_index = block.body[-2].value.value - out_nodes.extend(block.body[:-2]) # ignore return nodes - return new_index - -def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap, - calltypes): - """get typed IR from stencil bytecode - """ - from numba.targets.cpu import CPUContext - from numba.targets.registry import cpu_target - from numba.annotations import type_annotations - from numba.compiler import type_inference_stage - - # get untyped IR - stencil_func_ir = sf.kernel_ir.copy() - # copy the IR nodes to avoid changing IR in the StencilFunc object - stencil_blocks = copy.deepcopy(stencil_func_ir.blocks) - stencil_func_ir.blocks = stencil_blocks - - name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks) - if "out" in name_var_table: - raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") - - # get typed IR with a dummy pipeline (similar to test_parfors.py) - targetctx = CPUContext(typingctx) - with cpu_target.nested_context(typingctx, targetctx): - tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir) - - numba.rewrites.rewrite_registry.apply( - 'before-inference', tp, tp.func_ir) - - tp.typemap, tp.return_type, tp.calltypes = type_inference_stage( - tp.typingctx, tp.func_ir, tp.args, None) - - type_annotations.TypeAnnotation( - func_ir=tp.func_ir, - typemap=tp.typemap, - calltypes=tp.calltypes, - lifted=(), - lifted_from=None, - args=tp.args, - return_type=tp.return_type, - html_output=numba.config.HTML) - - # make block labels unique - stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks, - ir_utils.next_label()) - min_label = min(stencil_blocks.keys()) - max_label = max(stencil_blocks.keys()) - ir_utils._max_label = max_label - - if config.DEBUG_ARRAY_OPT == 1: - print("Initial stencil_blocks") - ir_utils.dump_blocks(stencil_blocks) - - # rename variables, - var_dict = {} - for v, typ in tp.typemap.items(): - new_var = ir.Var(scope, mk_unique_var(v), loc) - var_dict[v] = new_var - typemap[new_var.name] = typ # add new var type for overall function - ir_utils.replace_vars(stencil_blocks, var_dict) - - if config.DEBUG_ARRAY_OPT == 1: - print("After replace_vars") - ir_utils.dump_blocks(stencil_blocks) - - # add call types to overall function - for call, call_typ in tp.calltypes.items(): - calltypes[call] = call_typ - - arg_to_arr_dict = {} - # replace arg with arr - for block in stencil_blocks.values(): - for stmt in block.body: - if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): - if config.DEBUG_ARRAY_OPT == 1: - print("input_dict", input_dict, stmt.value.index, - stmt.value.name, stmt.value.index in input_dict) - arg_to_arr_dict[stmt.value.name] = input_dict[stmt.value.index].name - stmt.value = input_dict[stmt.value.index] - - if config.DEBUG_ARRAY_OPT == 1: - print("arg_to_arr_dict", arg_to_arr_dict) - print("After replace arg with arr") - ir_utils.dump_blocks(stencil_blocks) - - ir_utils.remove_dels(stencil_blocks) - stencil_func_ir.blocks = stencil_blocks - return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict - -class DummyPipeline(object): - def __init__(self, typingctx, targetctx, args, f_ir): - self.typingctx = typingctx - self.targetctx = targetctx - self.args = args - self.func_ir = f_ir - self.typemap = None - self.return_type = None - self.calltypes = None diff --git a/numba/numba/targets/__init__.py b/numba/numba/targets/__init__.py deleted file mode 100644 index c3961685a..000000000 --- a/numba/numba/targets/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import absolute_import diff --git a/numba/numba/targets/arraymath.py b/numba/numba/targets/arraymath.py deleted file mode 100644 index 68cc4c1a1..000000000 --- a/numba/numba/targets/arraymath.py +++ /dev/null @@ -1,1779 +0,0 @@ -""" -Implementation of math operations on Array objects. -""" - -from __future__ import absolute_import, division, print_function - -import math -from collections import namedtuple -from enum import IntEnum - -import llvmlite.llvmpy.core as lc -import numpy as np -from llvmlite import ir -from llvmlite.llvmpy.core import Constant, Type - -from numba import cgutils, types, typing -from numba.errors import RequireConstValue, TypingError -from numba.extending import (intrinsic, overload, overload_method, - register_jitable) -from numba.numpy_support import as_dtype -from numba.numpy_support import version as numpy_version -from numba.targets.imputils import (impl_ret_borrowed, impl_ret_new_ref, - impl_ret_untracked, lower_builtin) -from numba.typing import signature - -from .arrayobj import _empty_nd_impl, load_item, make_array, store_item -from .linalg import ensure_blas - - -def _check_blas(): - # Checks if a BLAS is available so e.g. dot will work - try: - ensure_blas() - except ImportError: - return False - return True - -_HAVE_BLAS = _check_blas() - -@intrinsic -def _create_tuple_result_shape(tyctx, shape_list, shape_tuple): - """ - This routine converts shape list where the axis dimension has already - been popped to a tuple for indexing of the same size. The original shape - tuple is also required because it contains a length field at compile time - whereas the shape list does not. - """ - - # The new tuple's size is one less than the original tuple since axis - # dimension removed. - nd = len(shape_tuple) - 1 - # The return type of this intrinsic is an int tuple of length nd. - tupty = types.UniTuple(types.intp, nd) - # The function signature for this intrinsic. - function_sig = tupty(shape_list, shape_tuple) - - def codegen(cgctx, builder, signature, args): - lltupty = cgctx.get_value_type(tupty) - # Create an empty int tuple. - tup = cgutils.get_null_value(lltupty) - - # Get the shape list from the args and we don't need shape tuple. - [in_shape, _] = args - - def array_indexer(a, i): - return a[i] - - # loop to fill the tuple - for i in range(nd): - dataidx = cgctx.get_constant(types.intp, i) - # compile and call array_indexer - data = cgctx.compile_internal(builder, array_indexer, - types.intp(shape_list, types.intp), - [in_shape, dataidx]) - tup = builder.insert_value(tup, data, i) - return tup - - return function_sig, codegen - -@intrinsic(support_literals=True) -def _gen_index_tuple(tyctx, shape_tuple, value, axis): - """ - Generates a tuple that can be used to index a specific slice from an - array for sum with axis. shape_tuple is the size of the dimensions of - the input array. 'value' is the value to put in the indexing tuple - in the axis dimension and 'axis' is that dimension. For this to work, - axis has to be a const. - """ - if not isinstance(axis, types.Const): - raise RequireConstValue('axis argument must be a constant') - # Get the value of the axis constant. - axis_value = axis.value - # The length of the indexing tuple to be output. - nd = len(shape_tuple) - - # If the axis value is impossible for the given size array then - # just fake it like it was for axis 0. This will stop compile errors - # when it looks like it could be called from array_sum_axis but really - # can't because that routine checks the axis mismatch and raise an - # exception. - if axis_value >= nd: - axis_value = 0 - - # Calculate the type of the indexing tuple. All the non-axis - # dimensions have slice2 type and the axis dimension has int type. - before = axis_value - after = nd - before - 1 - types_list = ([types.slice2_type] * before) + \ - [types.intp] + \ - ([types.slice2_type] * after) - - # Creates the output type of the function. - tupty = types.Tuple(types_list) - # Defines the signature of the intrinsic. - function_sig = tupty(shape_tuple, value, axis) - - def codegen(cgctx, builder, signature, args): - lltupty = cgctx.get_value_type(tupty) - # Create an empty indexing tuple. - tup = cgutils.get_null_value(lltupty) - - # We only need value of the axis dimension here. - # The rest are constants defined above. - [_, value_arg, _] = args - - def create_full_slice(): - return slice(None, None) - - # loop to fill the tuple with slice(None,None) before - # the axis dimension. - - # compile and call create_full_slice - slice_data = cgctx.compile_internal(builder, create_full_slice, - types.slice2_type(), - []) - for i in range(0, axis_value): - tup = builder.insert_value(tup, slice_data, i) - - # Add the axis dimension 'value'. - tup = builder.insert_value(tup, value_arg, axis_value) - - # loop to fill the tuple with slice(None,None) after - # the axis dimension. - for i in range(axis_value + 1, nd): - tup = builder.insert_value(tup, slice_data, i) - return tup - - return function_sig, codegen - - -#---------------------------------------------------------------------------- -# Basic stats and aggregates - -@lower_builtin(np.sum, types.Array) -@lower_builtin("array.sum", types.Array) -def array_sum(context, builder, sig, args): - zero = sig.return_type(0) - - def array_sum_impl(arr): - c = zero - for v in np.nditer(arr): - c += v.item() - return c - - res = context.compile_internal(builder, array_sum_impl, sig, args, - locals=dict(c=sig.return_type)) - return impl_ret_borrowed(context, builder, sig.return_type, res) - -@lower_builtin(np.sum, types.Array, types.intp) -@lower_builtin(np.sum, types.Array, types.Const) -@lower_builtin("array.sum", types.Array, types.intp) -@lower_builtin("array.sum", types.Array, types.Const) -def array_sum_axis(context, builder, sig, args): - """ - The third parameter to gen_index_tuple that generates the indexing - tuples has to be a const so we can't just pass "axis" through since - that isn't const. We can check for specific values and have - different instances that do take consts. Supporting axis summation - only up to the fourth dimension for now. - """ - # typing/arraydecl.py:sum_expand defines the return type for sum with axis. - # It is one dimension less than the input array. - zero = sig.return_type.dtype(0) - - [ty_array, ty_axis] = sig.args - is_axis_const = False - const_axis_val = 0 - if isinstance(ty_axis, types.Const): - # this special-cases for constant axis - const_axis_val = ty_axis.value - # fix negative axis - if const_axis_val < 0: - const_axis_val = ty_array.ndim + const_axis_val - if const_axis_val < 0 or const_axis_val > ty_array.ndim: - raise ValueError("'axis' entry is out of bounds") - - ty_axis = context.typing_context.resolve_value_type(const_axis_val) - axis_val = context.get_constant(ty_axis, const_axis_val) - # rewrite arguments - args = args[0], axis_val - # rewrite sig - sig = sig.replace(args=[ty_array, ty_axis]) - is_axis_const = True - - def array_sum_impl_axis(arr, axis): - ndim = arr.ndim - - if not is_axis_const: - # Catch where axis is negative or greater than 3. - if axis < 0 or axis > 3: - raise ValueError("Numba does not support sum with axis" - "parameter outside the range 0 to 3.") - - # Catch the case where the user misspecifies the axis to be - # more than the number of the array's dimensions. - if axis >= ndim: - raise ValueError("axis is out of bounds for array") - - # Convert the shape of the input array to a list. - ashape = list(arr.shape) - # Get the length of the axis dimension. - axis_len = ashape[axis] - # Remove the axis dimension from the list of dimensional lengths. - ashape.pop(axis) - # Convert this shape list back to a tuple using above intrinsic. - ashape_without_axis = _create_tuple_result_shape(ashape, arr.shape) - # Tuple needed here to create output array with correct size. - result = np.full(ashape_without_axis, zero, type(zero)) - - # Iterate through the axis dimension. - for axis_index in range(axis_len): - if is_axis_const: - # constant specialized version works for any valid axis value - index_tuple_generic = _gen_index_tuple(arr.shape, axis_index, - const_axis_val) - result += arr[index_tuple_generic] - else: - # Generate a tuple used to index the input array. - # The tuple is ":" in all dimensions except the axis - # dimension where it is "axis_index". - if axis == 0: - index_tuple1 = _gen_index_tuple(arr.shape, axis_index, 0) - result += arr[index_tuple1] - elif axis == 1: - index_tuple2 = _gen_index_tuple(arr.shape, axis_index, 1) - result += arr[index_tuple2] - elif axis == 2: - index_tuple3 = _gen_index_tuple(arr.shape, axis_index, 2) - result += arr[index_tuple3] - elif axis == 3: - index_tuple4 = _gen_index_tuple(arr.shape, axis_index, 3) - result += arr[index_tuple4] - - return result - - res = context.compile_internal(builder, array_sum_impl_axis, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.prod, types.Array) -@lower_builtin("array.prod", types.Array) -def array_prod(context, builder, sig, args): - - def array_prod_impl(arr): - c = 1 - for v in np.nditer(arr): - c *= v.item() - return c - - res = context.compile_internal(builder, array_prod_impl, sig, args, - locals=dict(c=sig.return_type)) - return impl_ret_borrowed(context, builder, sig.return_type, res) - -@lower_builtin(np.cumsum, types.Array) -@lower_builtin("array.cumsum", types.Array) -def array_cumsum(context, builder, sig, args): - scalar_dtype = sig.return_type.dtype - dtype = as_dtype(scalar_dtype) - zero = scalar_dtype(0) - - def array_cumsum_impl(arr): - size = 1 - for i in arr.shape: - size = size * i - out = np.empty(size, dtype) - c = zero - for idx, v in enumerate(arr.flat): - c += v - out[idx] = c - return out - - res = context.compile_internal(builder, array_cumsum_impl, sig, args, - locals=dict(c=scalar_dtype)) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - - -@lower_builtin(np.cumprod, types.Array) -@lower_builtin("array.cumprod", types.Array) -def array_cumprod(context, builder, sig, args): - scalar_dtype = sig.return_type.dtype - dtype = as_dtype(scalar_dtype) - - def array_cumprod_impl(arr): - size = 1 - for i in arr.shape: - size = size * i - out = np.empty(size, dtype) - c = 1 - for idx, v in enumerate(arr.flat): - c *= v - out[idx] = c - return out - - res = context.compile_internal(builder, array_cumprod_impl, sig, args, - locals=dict(c=scalar_dtype)) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.mean, types.Array) -@lower_builtin("array.mean", types.Array) -def array_mean(context, builder, sig, args): - zero = sig.return_type(0) - - def array_mean_impl(arr): - # Can't use the naive `arr.sum() / arr.size`, as it would return - # a wrong result on integer sum overflow. - c = zero - for v in np.nditer(arr): - c += v.item() - return c / arr.size - - res = context.compile_internal(builder, array_mean_impl, sig, args, - locals=dict(c=sig.return_type)) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.var, types.Array) -@lower_builtin("array.var", types.Array) -def array_var(context, builder, sig, args): - def array_var_impl(arr): - # Compute the mean - m = arr.mean() - - # Compute the sum of square diffs - ssd = 0 - for v in np.nditer(arr): - val = (v.item() - m) - ssd += np.real(val * np.conj(val)) - return ssd / arr.size - - res = context.compile_internal(builder, array_var_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.std, types.Array) -@lower_builtin("array.std", types.Array) -def array_std(context, builder, sig, args): - def array_std_impl(arry): - return arry.var() ** 0.5 - res = context.compile_internal(builder, array_std_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.min, types.Array) -@lower_builtin("array.min", types.Array) -def array_min(context, builder, sig, args): - ty = sig.args[0].dtype - if isinstance(ty, (types.NPDatetime, types.NPTimedelta)): - # NaT is smaller than every other value, but it is - # ignored as far as min() is concerned. - nat = ty('NaT') - - def array_min_impl(arry): - min_value = nat - it = np.nditer(arry) - for view in it: - v = view.item() - if v != nat: - min_value = v - break - - for view in it: - v = view.item() - if v != nat and v < min_value: - min_value = v - return min_value - - else: - def array_min_impl(arry): - it = np.nditer(arry) - for view in it: - min_value = view.item() - break - - for view in it: - v = view.item() - if v < min_value: - min_value = v - return min_value - res = context.compile_internal(builder, array_min_impl, sig, args) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin(np.max, types.Array) -@lower_builtin("array.max", types.Array) -def array_max(context, builder, sig, args): - def array_max_impl(arry): - it = np.nditer(arry) - for view in it: - max_value = view.item() - break - - for view in it: - v = view.item() - if v > max_value: - max_value = v - return max_value - res = context.compile_internal(builder, array_max_impl, sig, args) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin(np.argmin, types.Array) -@lower_builtin("array.argmin", types.Array) -def array_argmin(context, builder, sig, args): - ty = sig.args[0].dtype - # NOTE: Under Numpy < 1.10, argmin() is inconsistent with min() on NaT values: - # https://github.com/numpy/numpy/issues/6030 - - if (numpy_version >= (1, 10) and - isinstance(ty, (types.NPDatetime, types.NPTimedelta))): - # NaT is smaller than every other value, but it is - # ignored as far as argmin() is concerned. - nat = ty('NaT') - - def array_argmin_impl(arry): - min_value = nat - min_idx = 0 - it = arry.flat - idx = 0 - for v in it: - if v != nat: - min_value = v - min_idx = idx - idx += 1 - break - idx += 1 - - for v in it: - if v != nat and v < min_value: - min_value = v - min_idx = idx - idx += 1 - return min_idx - - else: - def array_argmin_impl(arry): - for v in arry.flat: - min_value = v - min_idx = 0 - break - - idx = 0 - for v in arry.flat: - if v < min_value: - min_value = v - min_idx = idx - idx += 1 - return min_idx - res = context.compile_internal(builder, array_argmin_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.argmax, types.Array) -@lower_builtin("array.argmax", types.Array) -def array_argmax(context, builder, sig, args): - def array_argmax_impl(arry): - for v in arry.flat: - max_value = v - max_idx = 0 - break - - idx = 0 - for v in arry.flat: - if v > max_value: - max_value = v - max_idx = idx - idx += 1 - return max_idx - res = context.compile_internal(builder, array_argmax_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@overload(np.all) -@overload_method(types.Array, "all") -def np_all(a): - def flat_all(a): - for v in np.nditer(a): - if not v.item(): - return False - return True - - return flat_all - -@overload(np.any) -@overload_method(types.Array, "any") -def np_any(a): - def flat_any(a): - for v in np.nditer(a): - if v.item(): - return True - return False - - return flat_any - - -def get_isnan(dtype): - """ - A generic isnan() function - """ - if isinstance(dtype, (types.Float, types.Complex)): - return np.isnan - else: - @register_jitable - def _trivial_isnan(x): - return False - return _trivial_isnan - - -@overload(np.nanmin) -def np_nanmin(a): - if not isinstance(a, types.Array): - return - isnan = get_isnan(a.dtype) - - def nanmin_impl(a): - if a.size == 0: - raise ValueError("nanmin(): empty array") - for view in np.nditer(a): - minval = view.item() - break - for view in np.nditer(a): - v = view.item() - if not minval < v and not isnan(v): - minval = v - return minval - - return nanmin_impl - -@overload(np.nanmax) -def np_nanmax(a): - if not isinstance(a, types.Array): - return - isnan = get_isnan(a.dtype) - - def nanmax_impl(a): - if a.size == 0: - raise ValueError("nanmin(): empty array") - for view in np.nditer(a): - maxval = view.item() - break - for view in np.nditer(a): - v = view.item() - if not maxval > v and not isnan(v): - maxval = v - return maxval - - return nanmax_impl - -if numpy_version >= (1, 8): - @overload(np.nanmean) - def np_nanmean(a): - if not isinstance(a, types.Array): - return - isnan = get_isnan(a.dtype) - - def nanmean_impl(arr): - c = 0.0 - count = 0 - for view in np.nditer(arr): - v = view.item() - if not isnan(v): - c += v.item() - count += 1 - # np.divide() doesn't raise ZeroDivisionError - return np.divide(c, count) - - return nanmean_impl - - @overload(np.nanvar) - def np_nanvar(a): - if not isinstance(a, types.Array): - return - isnan = get_isnan(a.dtype) - - def nanvar_impl(arr): - # Compute the mean - m = np.nanmean(arr) - - # Compute the sum of square diffs - ssd = 0.0 - count = 0 - for view in np.nditer(arr): - v = view.item() - if not isnan(v): - val = (v.item() - m) - ssd += np.real(val * np.conj(val)) - count += 1 - # np.divide() doesn't raise ZeroDivisionError - return np.divide(ssd, count) - - return nanvar_impl - - @overload(np.nanstd) - def np_nanstd(a): - if not isinstance(a, types.Array): - return - - def nanstd_impl(arr): - return np.nanvar(arr) ** 0.5 - - return nanstd_impl - -@overload(np.nansum) -def np_nansum(a): - if not isinstance(a, types.Array): - return - if isinstance(a.dtype, types.Integer): - retty = types.intp - else: - retty = a.dtype - zero = retty(0) - isnan = get_isnan(a.dtype) - - def nansum_impl(arr): - c = zero - for view in np.nditer(arr): - v = view.item() - if not isnan(v): - c += v - return c - - return nansum_impl - -if numpy_version >= (1, 10): - @overload(np.nanprod) - def np_nanprod(a): - if not isinstance(a, types.Array): - return - if isinstance(a.dtype, types.Integer): - retty = types.intp - else: - retty = a.dtype - one = retty(1) - isnan = get_isnan(a.dtype) - - def nanprod_impl(arr): - c = one - for view in np.nditer(arr): - v = view.item() - if not isnan(v): - c *= v - return c - - return nanprod_impl - -#---------------------------------------------------------------------------- -# Median and partitioning - -@register_jitable -def _partition(A, low, high): - mid = (low + high) >> 1 - # NOTE: the pattern of swaps below for the pivot choice and the - # partitioning gives good results (i.e. regular O(n log n)) - # on sorted, reverse-sorted, and uniform arrays. Subtle changes - # risk breaking this property. - - # Use median of three {low, middle, high} as the pivot - if A[mid] < A[low]: - A[low], A[mid] = A[mid], A[low] - if A[high] < A[mid]: - A[high], A[mid] = A[mid], A[high] - if A[mid] < A[low]: - A[low], A[mid] = A[mid], A[low] - pivot = A[mid] - - A[high], A[mid] = A[mid], A[high] - i = low - j = high - 1 - while True: - while i < high and A[i] < pivot: - i += 1 - while j >= low and pivot < A[j]: - j -= 1 - if i >= j: - break - A[i], A[j] = A[j], A[i] - i += 1 - j -= 1 - # Put the pivot back in its final place (all items before `i` - # are smaller than the pivot, all items at/after `i` are larger) - A[i], A[high] = A[high], A[i] - return i - -@register_jitable -def _select(arry, k, low, high): - """ - Select the k'th smallest element in array[low:high + 1]. - """ - i = _partition(arry, low, high) - while i != k: - if i < k: - low = i + 1 - i = _partition(arry, low, high) - else: - high = i - 1 - i = _partition(arry, low, high) - return arry[k] - -@register_jitable -def _select_two(arry, k, low, high): - """ - Select the k'th and k+1'th smallest elements in array[low:high + 1]. - - This is significantly faster than doing two independent selections - for k and k+1. - """ - while True: - assert high > low # by construction - i = _partition(arry, low, high) - if i < k: - low = i + 1 - elif i > k + 1: - high = i - 1 - elif i == k: - _select(arry, k + 1, i + 1, high) - break - else: # i == k + 1 - _select(arry, k, low, i - 1) - break - - return arry[k], arry[k + 1] - -@register_jitable -def _median_inner(temp_arry, n): - """ - The main logic of the median() call. *temp_arry* must be disposable, - as this function will mutate it. - """ - low = 0 - high = n - 1 - half = n >> 1 - if n & 1 == 0: - a, b = _select_two(temp_arry, half - 1, low, high) - return (a + b) / 2 - else: - return _select(temp_arry, half, low, high) - -@overload(np.median) -def np_median(a): - if not isinstance(a, types.Array): - return - - def median_impl(arry): - # np.median() works on the flattened array, and we need a temporary - # workspace anyway - temp_arry = arry.flatten() - n = temp_arry.shape[0] - return _median_inner(temp_arry, n) - - return median_impl - -@register_jitable -def _collect_percentiles_inner(a, q): - n = len(a) - - if n == 1: - # single element array; output same for all percentiles - out = np.full(len(q), a[0], dtype=np.float64) - else: - out = np.empty(len(q), dtype=np.float64) - for i in range(len(q)): - percentile = q[i] - - # bypass pivoting where requested percentile is 100 - if percentile == 100: - val = np.max(a) - # heuristics to handle infinite values a la NumPy - if ~np.all(np.isfinite(a)): - if ~np.isfinite(val): - val = np.nan - - # bypass pivoting where requested percentile is 0 - elif percentile == 0: - val = np.min(a) - # convoluted heuristics to handle infinite values a la NumPy - if ~np.all(np.isfinite(a)): - num_pos_inf = np.sum(a == np.inf) - num_neg_inf = np.sum(a == -np.inf) - num_finite = n - (num_neg_inf + num_pos_inf) - if num_finite == 0: - val = np.nan - if num_pos_inf == 1 and n == 2: - val = np.nan - if num_neg_inf > 1: - val = np.nan - if num_finite == 1: - if num_pos_inf > 1: - if num_neg_inf != 1: - val = np.nan - - else: - # linear interp between closest ranks - rank = 1 + (n - 1) * np.true_divide(percentile, 100.0) - f = math.floor(rank) - m = rank - f - lower, upper = _select_two(a, k=int(f - 1), low=0, high=(n - 1)) - val = lower * (1 - m) + upper * m - out[i] = val - - return out - -@register_jitable -def _can_collect_percentiles(a, nan_mask, skip_nan): - if skip_nan: - a = a[~nan_mask] - if len(a) == 0: - return False # told to skip nan, but no elements remain - else: - if np.any(nan_mask): - return False # told *not* to skip nan, but nan encountered - - if len(a) == 1: # single element array - val = a[0] - return np.isfinite(val) # can collect percentiles if element is finite - else: - return True - -@register_jitable -def _collect_percentiles(a, q, skip_nan=False): - if np.any(np.isnan(q)) or np.any(q < 0) or np.any(q > 100): - raise ValueError('Percentiles must be in the range [0,100]') - - temp_arry = a.flatten() - nan_mask = np.isnan(temp_arry) - - if _can_collect_percentiles(temp_arry, nan_mask, skip_nan): - temp_arry = temp_arry[~nan_mask] - out = _collect_percentiles_inner(temp_arry, q) - else: - out = np.full(len(q), np.nan) - - return out - -def _np_percentile_impl(a, q, skip_nan): - def np_percentile_q_scalar_impl(a, q): - percentiles = np.array([q]) - return _collect_percentiles(a, percentiles, skip_nan=skip_nan)[0] - - def np_percentile_q_sequence_impl(a, q): - percentiles = np.array(q) - return _collect_percentiles(a, percentiles, skip_nan=skip_nan) - - def np_percentile_q_array_impl(a, q): - return _collect_percentiles(a, q, skip_nan=skip_nan) - - if isinstance(q, (types.Float, types.Integer, types.Boolean)): - return np_percentile_q_scalar_impl - - elif isinstance(q, (types.Tuple, types.Sequence)): - return np_percentile_q_sequence_impl - - elif isinstance(q, types.Array): - return np_percentile_q_array_impl - -if numpy_version >= (1, 10): - @overload(np.percentile) - def np_percentile(a, q): - # Note: np.percentile behaviour in the case of an array containing one or - # more NaNs was changed in numpy 1.10 to return an array of np.NaN of - # length equal to q, hence version guard. - return _np_percentile_impl(a, q, skip_nan=False) - -if numpy_version >= (1, 11): - @overload(np.nanpercentile) - def np_nanpercentile(a, q): - # Note: np.nanpercentile return type in the case of an all-NaN slice - # was changed in 1.11 to be an array of np.NaN of length equal to q, - # hence version guard. - return _np_percentile_impl(a, q, skip_nan=True) - -if numpy_version >= (1, 9): - @overload(np.nanmedian) - def np_nanmedian(a): - if not isinstance(a, types.Array): - return - isnan = get_isnan(a.dtype) - - def nanmedian_impl(arry): - # Create a temporary workspace with only non-NaN values - temp_arry = np.empty(arry.size, arry.dtype) - n = 0 - for view in np.nditer(arry): - v = view.item() - if not isnan(v): - temp_arry[n] = v - n += 1 - - # all NaNs - if n == 0: - return np.nan - - return _median_inner(temp_arry, n) - - return nanmedian_impl - -#---------------------------------------------------------------------------- -# Element-wise computations - -def _np_round_intrinsic(tp): - # np.round() always rounds half to even - return "llvm.rint.f%d" % (tp.bitwidth,) - -def _np_round_float(context, builder, tp, val): - llty = context.get_value_type(tp) - module = builder.module - fnty = lc.Type.function(llty, [llty]) - fn = module.get_or_insert_function(fnty, name=_np_round_intrinsic(tp)) - return builder.call(fn, (val,)) - -@lower_builtin(np.round, types.Float) -def scalar_round_unary(context, builder, sig, args): - res = _np_round_float(context, builder, sig.args[0], args[0]) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.round, types.Integer) -def scalar_round_unary(context, builder, sig, args): - res = args[0] - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.round, types.Complex) -def scalar_round_unary_complex(context, builder, sig, args): - fltty = sig.args[0].underlying_float - z = context.make_complex(builder, sig.args[0], args[0]) - z.real = _np_round_float(context, builder, fltty, z.real) - z.imag = _np_round_float(context, builder, fltty, z.imag) - res = z._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.round, types.Float, types.Integer) -@lower_builtin(np.round, types.Integer, types.Integer) -def scalar_round_binary_float(context, builder, sig, args): - def round_ndigits(x, ndigits): - if math.isinf(x) or math.isnan(x): - return x - - # NOTE: this is CPython's algorithm, but perhaps this is overkill - # when emulating Numpy's behaviour. - if ndigits >= 0: - if ndigits > 22: - # pow1 and pow2 are each safe from overflow, but - # pow1*pow2 ~= pow(10.0, ndigits) might overflow. - pow1 = 10.0 ** (ndigits - 22) - pow2 = 1e22 - else: - pow1 = 10.0 ** ndigits - pow2 = 1.0 - y = (x * pow1) * pow2 - if math.isinf(y): - return x - return (np.round(y) / pow2) / pow1 - - else: - pow1 = 10.0 ** (-ndigits) - y = x / pow1 - return np.round(y) * pow1 - - res = context.compile_internal(builder, round_ndigits, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.round, types.Complex, types.Integer) -def scalar_round_binary_complex(context, builder, sig, args): - def round_ndigits(z, ndigits): - return complex(np.round(z.real, ndigits), - np.round(z.imag, ndigits)) - - res = context.compile_internal(builder, round_ndigits, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.round, types.Array, types.Integer, - types.Array) -def array_round(context, builder, sig, args): - def array_round_impl(arr, decimals, out): - if arr.shape != out.shape: - raise ValueError("invalid output shape") - for index, val in np.ndenumerate(arr): - out[index] = np.round(val, decimals) - return out - - res = context.compile_internal(builder, array_round_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.sinc, types.Array) -def array_sinc(context, builder, sig, args): - def array_sinc_impl(arr): - out = np.zeros_like(arr) - for index, val in np.ndenumerate(arr): - out[index] = np.sinc(val) - return out - res = context.compile_internal(builder, array_sinc_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.sinc, types.Number) -def scalar_sinc(context, builder, sig, args): - scalar_dtype = sig.return_type - def scalar_sinc_impl(val): - if val == 0.e0: # to match np impl - val = 1e-20 - val *= np.pi # np sinc is the normalised variant - return np.sin(val)/val - res = context.compile_internal(builder, scalar_sinc_impl, sig, args, - locals=dict(c=scalar_dtype)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.angle, types.Number) -@lower_builtin(np.angle, types.Number, types.Boolean) -def scalar_angle_kwarg(context, builder, sig, args): - deg_mult = sig.return_type(180 / np.pi) - def scalar_angle_impl(val, deg): - if deg: - return np.arctan2(val.imag, val.real) * deg_mult - else: - return np.arctan2(val.imag, val.real) - - if len(args) == 1: - args = args + (cgutils.false_bit,) - sig = signature(sig.return_type, *(sig.args + (types.boolean,))) - res = context.compile_internal(builder, scalar_angle_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(np.angle, types.Array) -@lower_builtin(np.angle, types.Array, types.Boolean) -def array_angle_kwarg(context, builder, sig, args): - arg = sig.args[0] - ret_dtype = sig.return_type.dtype - - def array_angle_impl(arr, deg): - out = np.zeros_like(arr, dtype=ret_dtype) - for index, val in np.ndenumerate(arr): - out[index] = np.angle(val, deg) - return out - - if len(args) == 1: - args = args + (cgutils.false_bit,) - sig = signature(sig.return_type, *(sig.args + (types.boolean,))) - - res = context.compile_internal(builder, array_angle_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.nonzero, types.Array) -@lower_builtin("array.nonzero", types.Array) -@lower_builtin(np.where, types.Array) -def array_nonzero(context, builder, sig, args): - aryty = sig.args[0] - # Return type is a N-tuple of 1D C-contiguous arrays - retty = sig.return_type - outaryty = retty.dtype - ndim = aryty.ndim - nouts = retty.count - - ary = make_array(aryty)(context, builder, args[0]) - shape = cgutils.unpack_tuple(builder, ary.shape) - strides = cgutils.unpack_tuple(builder, ary.strides) - data = ary.data - layout = aryty.layout - - # First count the number of non-zero elements - zero = context.get_constant(types.intp, 0) - one = context.get_constant(types.intp, 1) - count = cgutils.alloca_once_value(builder, zero) - with cgutils.loop_nest(builder, shape, zero.type) as indices: - ptr = cgutils.get_item_pointer2(builder, data, shape, strides, - layout, indices) - val = load_item(context, builder, aryty, ptr) - nz = context.is_true(builder, aryty.dtype, val) - with builder.if_then(nz): - builder.store(builder.add(builder.load(count), one), count) - - # Then allocate output arrays of the right size - out_shape = (builder.load(count),) - outs = [_empty_nd_impl(context, builder, outaryty, out_shape)._getvalue() - for i in range(nouts)] - outarys = [make_array(outaryty)(context, builder, out) for out in outs] - out_datas = [out.data for out in outarys] - - # And fill them up - index = cgutils.alloca_once_value(builder, zero) - with cgutils.loop_nest(builder, shape, zero.type) as indices: - ptr = cgutils.get_item_pointer2(builder, data, shape, strides, - layout, indices) - val = load_item(context, builder, aryty, ptr) - nz = context.is_true(builder, aryty.dtype, val) - with builder.if_then(nz): - # Store element indices in output arrays - if not indices: - # For a 0-d array, store 0 in the unique output array - indices = (zero,) - cur = builder.load(index) - for i in range(nouts): - ptr = cgutils.get_item_pointer2(builder, out_datas[i], - out_shape, (), - 'C', [cur]) - store_item(context, builder, outaryty, indices[i], ptr) - builder.store(builder.add(cur, one), index) - - tup = context.make_tuple(builder, sig.return_type, outs) - return impl_ret_new_ref(context, builder, sig.return_type, tup) - - -def array_where(context, builder, sig, args): - """ - np.where(array, array, array) - """ - layouts = set(a.layout for a in sig.args) - if layouts == set('C'): - # Faster implementation for C-contiguous arrays - def where_impl(cond, x, y): - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - cf = cond.flat - xf = x.flat - yf = y.flat - rf = res.flat - for i in range(cond.size): - rf[i] = xf[i] if cf[i] else yf[i] - return res - else: - - def where_impl(cond, x, y): - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - for idx, c in np.ndenumerate(cond): - res[idx] = x[idx] if c else y[idx] - return res - - res = context.compile_internal(builder, where_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(np.where, types.Any, types.Any, types.Any) -def any_where(context, builder, sig, args): - cond = sig.args[0] - if isinstance(cond, types.Array): - return array_where(context, builder, sig, args) - - def scalar_where_impl(cond, x, y): - """ - np.where(scalar, scalar, scalar): return a 0-dim array - """ - scal = x if cond else y - # This is the equivalent of np.full_like(scal, scal), - # for compatibility with Numpy < 1.8 - arr = np.empty_like(scal) - arr[()] = scal - return arr - - res = context.compile_internal(builder, scalar_where_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@overload(np.real) -def np_real(a): - def np_real_impl(a): - return a.real - - return np_real_impl - - -@overload(np.imag) -def np_imag(a): - def np_imag_impl(a): - return a.imag - - return np_imag_impl - - -#---------------------------------------------------------------------------- -# Misc functions - -@overload(np.diff) -def np_diff_impl(a, n=1): - if not isinstance(a, types.Array) or a.ndim == 0: - return - - def diff_impl(a, n=1): - if n == 0: - return a.copy() - if n < 0: - raise ValueError("diff(): order must be non-negative") - size = a.shape[-1] - out_shape = a.shape[:-1] + (max(size - n, 0),) - out = np.empty(out_shape, a.dtype) - if out.size == 0: - return out - - # np.diff() works on each last dimension subarray independently. - # To make things easier, normalize input and output into 2d arrays - a2 = a.reshape((-1, size)) - out2 = out.reshape((-1, out.shape[-1])) - # A scratchpad for subarrays - work = np.empty(size, a.dtype) - - for major in range(a2.shape[0]): - # First iteration: diff a2 into work - for i in range(size - 1): - work[i] = a2[major, i + 1] - a2[major, i] - # Other iterations: diff work into itself - for niter in range(1, n): - for i in range(size - niter - 1): - work[i] = work[i + 1] - work[i] - # Copy final diff into out2 - out2[major] = work[:size - n] - - return out - - return diff_impl - - -def validate_1d_array_like(func_name, seq): - if isinstance(seq, types.Array): - if seq.ndim != 1: - raise TypeError("{0}(): input should have dimension 1" - .format(func_name)) - elif not isinstance(seq, types.Sequence): - raise TypeError("{0}(): input should be an array or sequence" - .format(func_name)) - - -@overload(np.bincount) -def np_bincount(a, weights=None): - validate_1d_array_like("bincount", a) - if not isinstance(a.dtype, types.Integer): - return - - if weights not in (None, types.none): - validate_1d_array_like("bincount", weights) - out_dtype = weights.dtype - - @register_jitable - def validate_inputs(a, weights): - if len(a) != len(weights): - raise ValueError("bincount(): weights and list don't have the same length") - - @register_jitable - def count_item(out, idx, val, weights): - out[val] += weights[idx] - - else: - out_dtype = types.intp - - @register_jitable - def validate_inputs(a, weights): - pass - - @register_jitable - def count_item(out, idx, val, weights): - out[val] += 1 - - def bincount_impl(a, weights=None): - validate_inputs(a, weights) - n = len(a) - - a_max = a[0] if n > 0 else -1 - for i in range(1, n): - if a[i] < 0: - raise ValueError("bincount(): first argument must be non-negative") - a_max = max(a_max, a[i]) - - out = np.zeros(a_max + 1, out_dtype) - for i in range(n): - count_item(out, i, a[i], weights) - return out - - return bincount_impl - -def _searchsorted(func): - def searchsorted_inner(a, v): - n = len(a) - if np.isnan(v): - # Find the first nan (i.e. the last from the end of a, - # since there shouldn't be many of them in practice) - for i in range(n, 0, -1): - if not np.isnan(a[i - 1]): - return i - return 0 - lo = 0 - hi = n - while hi > lo: - mid = (lo + hi) >> 1 - if func(a[mid], (v)): - # mid is too low => go up - lo = mid + 1 - else: - # mid is too high, or is a NaN => go down - hi = mid - return lo - return searchsorted_inner - -_lt = register_jitable(lambda x, y: x < y) -_le = register_jitable(lambda x, y: x <= y) -_searchsorted_left = register_jitable(_searchsorted(_lt)) -_searchsorted_right = register_jitable(_searchsorted(_le)) - -@overload(np.searchsorted) -def searchsorted(a, v, side='left'): - side_val = getattr(side, 'value', side) - if side_val == 'left': - loop_impl = _searchsorted_left - elif side_val == 'right': - loop_impl = _searchsorted_right - else: - raise ValueError("Invalid value given for 'side': %s" % side_val) - - if isinstance(v, types.Array): - # N-d array and output - def searchsorted_impl(a, v, side='left'): - out = np.empty(v.shape, np.intp) - for view, outview in np.nditer((v, out)): - index = loop_impl(a, view.item()) - outview.itemset(index) - return out - - elif isinstance(v, types.Sequence): - # 1-d sequence and output - def searchsorted_impl(a, v, side='left'): - out = np.empty(len(v), np.intp) - for i in range(len(v)): - out[i] = loop_impl(a, v[i]) - return out - else: - # Scalar value and output - # Note: NaNs come last in Numpy-sorted arrays - def searchsorted_impl(a, v, side='left'): - return loop_impl(a, v) - - return searchsorted_impl - -@overload(np.digitize) -def np_digitize(x, bins, right=False): - @register_jitable - def are_bins_increasing(bins): - n = len(bins) - is_increasing = True - is_decreasing = True - if n > 1: - prev = bins[0] - for i in range(1, n): - cur = bins[i] - is_increasing = is_increasing and not prev > cur - is_decreasing = is_decreasing and not prev < cur - if not is_increasing and not is_decreasing: - raise ValueError("bins must be monotonically increasing or decreasing") - prev = cur - return is_increasing - - # NOTE: the algorithm is slightly different from searchsorted's, - # as the edge cases (bin boundaries, NaN) give different results. - - @register_jitable - def digitize_scalar(x, bins, right): - # bins are monotonically-increasing - n = len(bins) - lo = 0 - hi = n - - if right: - if np.isnan(x): - # Find the first nan (i.e. the last from the end of bins, - # since there shouldn't be many of them in practice) - for i in range(n, 0, -1): - if not np.isnan(bins[i - 1]): - return i - return 0 - while hi > lo: - mid = (lo + hi) >> 1 - if bins[mid] < x: - # mid is too low => narrow to upper bins - lo = mid + 1 - else: - # mid is too high, or is a NaN => narrow to lower bins - hi = mid - else: - if np.isnan(x): - # NaNs end up in the last bin - return n - while hi > lo: - mid = (lo + hi) >> 1 - if bins[mid] <= x: - # mid is too low => narrow to upper bins - lo = mid + 1 - else: - # mid is too high, or is a NaN => narrow to lower bins - hi = mid - - return lo - - @register_jitable - def digitize_scalar_decreasing(x, bins, right): - # bins are monotonically-decreasing - n = len(bins) - lo = 0 - hi = n - - if right: - if np.isnan(x): - # Find the last nan - for i in range(0, n): - if not np.isnan(bins[i]): - return i - return n - while hi > lo: - mid = (lo + hi) >> 1 - if bins[mid] < x: - # mid is too high => narrow to lower bins - hi = mid - else: - # mid is too low, or is a NaN => narrow to upper bins - lo = mid + 1 - else: - if np.isnan(x): - # NaNs end up in the first bin - return 0 - while hi > lo: - mid = (lo + hi) >> 1 - if bins[mid] <= x: - # mid is too high => narrow to lower bins - hi = mid - else: - # mid is too low, or is a NaN => narrow to upper bins - lo = mid + 1 - - return lo - - if isinstance(x, types.Array): - # N-d array and output - - def digitize_impl(x, bins, right=False): - is_increasing = are_bins_increasing(bins) - out = np.empty(x.shape, np.intp) - for view, outview in np.nditer((x, out)): - if is_increasing: - index = digitize_scalar(view.item(), bins, right) - else: - index = digitize_scalar_decreasing(view.item(), bins, right) - outview.itemset(index) - return out - - return digitize_impl - - elif isinstance(x, types.Sequence): - # 1-d sequence and output - - def digitize_impl(x, bins, right=False): - is_increasing = are_bins_increasing(bins) - out = np.empty(len(x), np.intp) - for i in range(len(x)): - if is_increasing: - out[i] = digitize_scalar(x[i], bins, right) - else: - out[i] = digitize_scalar_decreasing(x[i], bins, right) - return out - - return digitize_impl - - -_range = range - -@overload(np.histogram) -def np_histogram(a, bins=10, range=None): - if isinstance(bins, (int, types.Integer)): - # With a uniform distribution of bins, use a fast algorithm - # independent of the number of bins - - if range in (None, types.none): - inf = float('inf') - def histogram_impl(a, bins=10, range=None): - bin_min = inf - bin_max = -inf - for view in np.nditer(a): - v = view.item() - if bin_min > v: - bin_min = v - if bin_max < v: - bin_max = v - return np.histogram(a, bins, (bin_min, bin_max)) - - else: - def histogram_impl(a, bins=10, range=None): - if bins <= 0: - raise ValueError("histogram(): `bins` should be a positive integer") - bin_min, bin_max = range - if not bin_min <= bin_max: - raise ValueError("histogram(): max must be larger than min in range parameter") - - hist = np.zeros(bins, np.intp) - if bin_max > bin_min: - bin_ratio = bins / (bin_max - bin_min) - for view in np.nditer(a): - v = view.item() - b = math.floor((v - bin_min) * bin_ratio) - if 0 <= b < bins: - hist[int(b)] += 1 - elif v == bin_max: - hist[bins - 1] += 1 - - bins_array = np.linspace(bin_min, bin_max, bins + 1) - return hist, bins_array - - else: - # With a custom bins array, use a bisection search - - def histogram_impl(a, bins, range=None): - nbins = len(bins) - 1 - for i in _range(nbins): - # Note this also catches NaNs - if not bins[i] <= bins[i + 1]: - raise ValueError("histogram(): bins must increase monotonically") - - bin_min = bins[0] - bin_max = bins[nbins] - hist = np.zeros(nbins, np.intp) - - if nbins > 0: - for view in np.nditer(a): - v = view.item() - if not bin_min <= v <= bin_max: - # Value is out of bounds, ignore (this also catches NaNs) - continue - # Bisect in bins[:-1] - lo = 0 - hi = nbins - 1 - while lo < hi: - # Note the `+ 1` is necessary to avoid an infinite - # loop where mid = lo => lo = mid - mid = (lo + hi + 1) >> 1 - if v < bins[mid]: - hi = mid - 1 - else: - lo = mid - hist[lo] += 1 - - return hist, bins - - return histogram_impl - - -# Create np.finfo, np.iinfo and np.MachAr -# machar -_mach_ar_supported = ('ibeta', 'it', 'machep', 'eps', 'negep', 'epsneg', - 'iexp', 'minexp', 'xmin', 'maxexp', 'xmax', 'irnd', - 'ngrd', 'epsilon', 'tiny', 'huge', 'precision', - 'resolution',) -MachAr = namedtuple('MachAr', _mach_ar_supported) - -# Do not support MachAr field -# finfo -_finfo_supported = ('eps', 'epsneg', 'iexp', 'machep', 'max', 'maxexp', 'min', - 'minexp', 'negep', 'nexp', 'nmant', 'precision', - 'resolution', 'tiny',) -if numpy_version >= (1, 12): - _finfo_supported = ('bits',) + _finfo_supported - -finfo = namedtuple('finfo', _finfo_supported) - -# iinfo -_iinfo_supported = ('min', 'max') -if numpy_version >= (1, 12): - _iinfo_supported = _iinfo_supported + ('bits',) - -iinfo = namedtuple('iinfo', _iinfo_supported) - -@overload(np.MachAr) -def MachAr_impl(): - f = np.MachAr() - _mach_ar_data = tuple([getattr(f, x) for x in _mach_ar_supported]) - def impl(): - return MachAr(*_mach_ar_data) - return impl - -def generate_xinfo(np_func, container, attr): - @overload(np_func) - def xinfo_impl(arg): - nbty = getattr(arg, 'dtype', arg) - f = np_func(as_dtype(nbty)) - data = tuple([getattr(f, x) for x in attr]) - def impl(arg): - return container(*data) - return impl - -generate_xinfo(np.finfo, finfo, _finfo_supported) -generate_xinfo(np.iinfo, iinfo, _iinfo_supported) - -def _get_inner_prod(dta, dtb): - # gets an inner product implementation, if both types are float then - # BLAS is used else a local function - - @register_jitable - def _innerprod(a, b): - acc = 0 - for i in range(len(a)): - acc = acc + a[i] * b[i] - return acc - - # no BLAS... use local function regardless - if not _HAVE_BLAS: - return _innerprod - - flty = types.real_domain | types.complex_domain - floats = dta in flty and dtb in flty - if not floats: - return _innerprod - else: - a_dt = as_dtype(dta) - b_dt = as_dtype(dtb) - dt = np.promote_types(a_dt, b_dt) - - @register_jitable - def _dot_wrap(a, b): - return np.dot(a.astype(dt), b.astype(dt)) - return _dot_wrap - -def _assert_1d(a, func_name): - if isinstance(a, types.Array): - if not a.ndim <= 1: - raise TypingError("%s() only supported on 1D arrays " % func_name) - -def _np_correlate_core(ap1, ap2, mode, direction): - pass - - -class _corr_conv_Mode(IntEnum): - """ - Enumerated modes for correlate/convolve as per: - https://github.com/numpy/numpy/blob/ac6b1a902b99e340cf7eeeeb7392c91e38db9dd8/numpy/core/numeric.py#L862-L870 - """ - VALID = 0 - SAME = 1 - FULL = 2 - - -@overload(_np_correlate_core) -def _np_correlate_core_impl(ap1, ap2, mode, direction): - a_dt = as_dtype(ap1.dtype) - b_dt = as_dtype(ap2.dtype) - dt = np.promote_types(a_dt, b_dt) - innerprod = _get_inner_prod(ap1.dtype, ap2.dtype) - - Mode = _corr_conv_Mode - - def impl(ap1, ap2, mode, direction): - # Implementation loosely based on `_pyarray_correlate` from - # https://github.com/numpy/numpy/blob/3bce2be74f228684ca2895ad02b63953f37e2a9d/numpy/core/src/multiarray/multiarraymodule.c#L1191 - # For "Mode": - # Convolve uses 'full' by default, this is denoted by the number 2 - # Correlate uses 'valid' by default, this is denoted by the number 0 - # For "direction", +1 to write the return values out in order 0->N - # -1 to write them out N->0. - - if not (mode == Mode.VALID or mode == Mode.FULL): - raise ValueError("Invalid mode") - - n1 = len(ap1) - n2 = len(ap2) - length = n1 - n = n2 - if mode == Mode.VALID: # mode == valid == 0, correlate default - length = length - n + 1 - n_left = 0 - n_right = 0 - elif mode == Mode.FULL: # mode == full == 2, convolve default - n_right = n - 1 - n_left = n - 1 - length = length + n - 1 - else: - raise ValueError("Invalid mode") - - ret = np.zeros(length, dt) - n = n - n_left - - if direction == 1: - idx = 0 - inc = 1 - elif direction == -1: - idx = length - 1 - inc = -1 - else: - raise ValueError("Invalid direction") - - for i in range(n_left): - ret[idx] = innerprod(ap1[:idx + 1], ap2[-(idx + 1):]) - idx = idx + inc - - for i in range(n1 - n2 + 1): - ret[idx] = innerprod(ap1[i : i + n2], ap2) - idx = idx + inc - - for i in range(n_right, 0, -1): - ret[idx] = innerprod(ap1[-i:], ap2[:i]) - idx = idx + inc - return ret - - return impl - -@overload(np.correlate) -def _np_correlate(a, v): - _assert_1d(a, 'np.correlate') - _assert_1d(v, 'np.correlate') - - @register_jitable - def op_conj(x): - return np.conj(x) - - @register_jitable - def op_nop(x): - return x - - Mode = _corr_conv_Mode - - if a.dtype in types.complex_domain: - if v.dtype in types.complex_domain: - a_op = op_nop - b_op = op_conj - else: - a_op = op_nop - b_op = op_nop - else: - if v.dtype in types.complex_domain: - a_op = op_nop - b_op = op_conj - else: - a_op = op_conj - b_op = op_nop - - def impl(a, v): - if len(a) < len(v): - return _np_correlate_core(b_op(v), a_op(a), Mode.VALID, -1) - else: - return _np_correlate_core(a_op(a), b_op(v), Mode.VALID, 1) - - return impl - -@overload(np.convolve) -def np_convolve(a, v): - _assert_1d(a, 'np.convolve') - _assert_1d(v, 'np.convolve') - - Mode = _corr_conv_Mode - - def impl(a, v): - la = len(a) - lv = len(v) - - if la == 0: - raise ValueError("'a' cannot be empty") - if lv == 0: - raise ValueError("'v' cannot be empty") - - if la < lv: - return _np_correlate_core(v, a[::-1], Mode.FULL, 1) - else: - return _np_correlate_core(a, v[::-1], Mode.FULL, 1) - - return impl diff --git a/numba/numba/targets/arrayobj.py b/numba/numba/targets/arrayobj.py deleted file mode 100644 index 5a847c0da..000000000 --- a/numba/numba/targets/arrayobj.py +++ /dev/null @@ -1,4751 +0,0 @@ -""" -Implementation of operations on Array objects and objects supporting -the buffer protocol. -""" - -from __future__ import print_function, absolute_import, division - -import functools -import math - -from llvmlite import ir -import llvmlite.llvmpy.core as lc -from llvmlite.llvmpy.core import Constant - -import numpy as np - -from numba import types, cgutils, typing, utils, extending, pndindex -from numba.numpy_support import (as_dtype, carray, farray, is_contiguous, - is_fortran) -from numba.numpy_support import version as numpy_version -from numba.targets.imputils import (lower_builtin, lower_getattr, - lower_getattr_generic, - lower_setattr_generic, - lower_cast, lower_constant, - iternext_impl, impl_ret_borrowed, - impl_ret_new_ref, impl_ret_untracked) -from numba.typing import signature -from numba.extending import register_jitable, overload -from . import quicksort, mergesort, slicing - - -def set_range_metadata(builder, load, lower_bound, upper_bound): - """ - Set the "range" metadata on a load instruction. - Note the interval is in the form [lower_bound, upper_bound). - """ - range_operands = [Constant.int(load.type, lower_bound), - Constant.int(load.type, upper_bound)] - md = builder.module.add_metadata(range_operands) - load.set_metadata("range", md) - - -def mark_positive(builder, load): - """ - Mark the result of a load instruction as positive (or zero). - """ - upper_bound = (1 << (load.type.width - 1)) - 1 - set_range_metadata(builder, load, 0, upper_bound) - - -def make_array(array_type): - """ - Return the Structure representation of the given *array_type* - (an instance of types.ArrayCompatible). - - Note this does not call __array_wrap__ in case a new array structure - is being created (rather than populated). - """ - real_array_type = array_type.as_array - base = cgutils.create_struct_proxy(real_array_type) - ndim = real_array_type.ndim - - class ArrayStruct(base): - - def _make_refs(self, ref): - sig = signature(real_array_type, array_type) - try: - array_impl = self._context.get_function('__array__', sig) - except NotImplementedError: - return super(ArrayStruct, self)._make_refs(ref) - - # Return a wrapped structure and its unwrapped reference - datamodel = self._context.data_model_manager[array_type] - be_type = self._get_be_type(datamodel) - if ref is None: - outer_ref = cgutils.alloca_once(self._builder, be_type, zfill=True) - else: - outer_ref = ref - # NOTE: __array__ is called with a pointer and expects a pointer - # in return! - ref = array_impl(self._builder, (outer_ref,)) - return outer_ref, ref - - @property - def shape(self): - """ - Override .shape to inform LLVM that its elements are all positive. - """ - builder = self._builder - if ndim == 0: - return base.__getattr__(self, "shape") - - # Unfortunately, we can't use llvm.assume as its presence can - # seriously pessimize performance, - # *and* the range metadata currently isn't improving anything here, - # see https://llvm.org/bugs/show_bug.cgi?id=23848 ! - ptr = self._get_ptr_by_name("shape") - dims = [] - for i in range(ndim): - dimptr = cgutils.gep_inbounds(builder, ptr, 0, i) - load = builder.load(dimptr) - dims.append(load) - mark_positive(builder, load) - - return cgutils.pack_array(builder, dims) - - return ArrayStruct - - -def get_itemsize(context, array_type): - """ - Return the item size for the given array or buffer type. - """ - llty = context.get_data_type(array_type.dtype) - return context.get_abi_sizeof(llty) - - -def load_item(context, builder, arrayty, ptr): - """ - Load the item at the given array pointer. - """ - align = None if arrayty.aligned else 1 - return context.unpack_value(builder, arrayty.dtype, ptr, - align=align) - -def store_item(context, builder, arrayty, val, ptr): - """ - Store the item at the given array pointer. - """ - align = None if arrayty.aligned else 1 - return context.pack_value(builder, arrayty.dtype, val, ptr, align=align) - - -def fix_integer_index(context, builder, idxty, idx, size): - """ - Fix the integer index' type and value for the given dimension size. - """ - if idxty.signed: - ind = context.cast(builder, idx, idxty, types.intp) - ind = slicing.fix_index(builder, ind, size) - else: - ind = context.cast(builder, idx, idxty, types.uintp) - return ind - - -def normalize_index(context, builder, idxty, idx): - """ - Normalize the index type and value. 0-d arrays are converted to scalars. - """ - if isinstance(idxty, types.Array) and idxty.ndim == 0: - assert isinstance(idxty.dtype, types.Integer) - idxary = make_array(idxty)(context, builder, idx) - idxval = load_item(context, builder, idxty, idxary.data) - return idxty.dtype, idxval - else: - return idxty, idx - - -def normalize_indices(context, builder, index_types, indices): - """ - Same as normalize_index(), but operating on sequences of - index types and values. - """ - if len(indices): - index_types, indices = zip(*[normalize_index(context, builder, idxty, idx) - for idxty, idx in zip(index_types, indices)]) - return index_types, indices - - -def populate_array(array, data, shape, strides, itemsize, meminfo, - parent=None): - """ - Helper function for populating array structures. - This avoids forgetting to set fields. - - *shape* and *strides* can be Python tuples or LLVM arrays. - """ - context = array._context - builder = array._builder - datamodel = array._datamodel - required_fields = set(datamodel._fields) - - if meminfo is None: - meminfo = Constant.null(context.get_value_type( - datamodel.get_type('meminfo'))) - - intp_t = context.get_value_type(types.intp) - if isinstance(shape, (tuple, list)): - shape = cgutils.pack_array(builder, shape, intp_t) - if isinstance(strides, (tuple, list)): - strides = cgutils.pack_array(builder, strides, intp_t) - if isinstance(itemsize, utils.INT_TYPES): - itemsize = intp_t(itemsize) - - attrs = dict(shape=shape, - strides=strides, - data=data, - itemsize=itemsize, - meminfo=meminfo,) - - # Set `parent` attribute - if parent is None: - attrs['parent'] = Constant.null(context.get_value_type( - datamodel.get_type('parent'))) - else: - attrs['parent'] = parent - # Calc num of items from shape - nitems = context.get_constant(types.intp, 1) - unpacked_shape = cgutils.unpack_tuple(builder, shape, shape.type.count) - # (note empty shape => 0d array therefore nitems = 1) - for axlen in unpacked_shape: - nitems = builder.mul(nitems, axlen, flags=['nsw']) - attrs['nitems'] = nitems - - # Make sure that we have all the fields - got_fields = set(attrs.keys()) - if got_fields != required_fields: - raise ValueError("missing {0}".format(required_fields - got_fields)) - - # Set field value - for k, v in attrs.items(): - setattr(array, k, v) - - return array - - -def update_array_info(aryty, array): - """ - Update some auxiliary information in *array* after some of its fields - were changed. `itemsize` and `nitems` are updated. - """ - context = array._context - builder = array._builder - - # Calc num of items from shape - nitems = context.get_constant(types.intp, 1) - unpacked_shape = cgutils.unpack_tuple(builder, array.shape, aryty.ndim) - for axlen in unpacked_shape: - nitems = builder.mul(nitems, axlen, flags=['nsw']) - array.nitems = nitems - - array.itemsize = context.get_constant(types.intp, - get_itemsize(context, aryty)) - - -@lower_builtin('getiter', types.Buffer) -def getiter_array(context, builder, sig, args): - [arrayty] = sig.args - [array] = args - - iterobj = context.make_helper(builder, sig.return_type) - - zero = context.get_constant(types.intp, 0) - indexptr = cgutils.alloca_once_value(builder, zero) - - iterobj.index = indexptr - iterobj.array = array - - # Incref array - if context.enable_nrt: - context.nrt.incref(builder, arrayty, array) - - res = iterobj._getvalue() - - # Note: a decref on the iterator will dereference all internal MemInfo* - out = impl_ret_new_ref(context, builder, sig.return_type, res) - return out - - -def _getitem_array1d(context, builder, arrayty, array, idx, wraparound): - """ - Look up and return an element from a 1D array. - """ - ptr = cgutils.get_item_pointer(builder, arrayty, array, [idx], - wraparound=wraparound) - return load_item(context, builder, arrayty, ptr) - -@lower_builtin('iternext', types.ArrayIterator) -@iternext_impl -def iternext_array(context, builder, sig, args, result): - [iterty] = sig.args - [iter] = args - arrayty = iterty.array_type - - if arrayty.ndim != 1: - # TODO - raise NotImplementedError("iterating over %dD array" % arrayty.ndim) - - iterobj = context.make_helper(builder, iterty, value=iter) - ary = make_array(arrayty)(context, builder, value=iterobj.array) - - nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1) - - index = builder.load(iterobj.index) - is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) - result.set_valid(is_valid) - - with builder.if_then(is_valid): - value = _getitem_array1d(context, builder, arrayty, ary, index, - wraparound=False) - result.yield_(value) - nindex = cgutils.increment_index(builder, index) - builder.store(nindex, iterobj.index) - - -#------------------------------------------------------------------------------- -# Basic indexing (with integers and slices only) - -def basic_indexing(context, builder, aryty, ary, index_types, indices): - """ - Perform basic indexing on the given array. - A (data pointer, shapes, strides) tuple is returned describing - the corresponding view. - """ - zero = context.get_constant(types.intp, 0) - - shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) - strides = cgutils.unpack_tuple(builder, ary.strides, aryty.ndim) - - output_indices = [] - output_shapes = [] - output_strides = [] - - ax = 0 - for indexval, idxty in zip(indices, index_types): - if idxty is types.ellipsis: - # Fill up missing dimensions at the middle - n_missing = aryty.ndim - len(indices) + 1 - for i in range(n_missing): - output_indices.append(zero) - output_shapes.append(shapes[ax]) - output_strides.append(strides[ax]) - ax += 1 - continue - # Regular index value - if isinstance(idxty, types.SliceType): - slice = context.make_helper(builder, idxty, value=indexval) - slicing.guard_invalid_slice(context, builder, idxty, slice) - slicing.fix_slice(builder, slice, shapes[ax]) - output_indices.append(slice.start) - sh = slicing.get_slice_length(builder, slice) - st = slicing.fix_stride(builder, slice, strides[ax]) - output_shapes.append(sh) - output_strides.append(st) - elif isinstance(idxty, types.Integer): - ind = fix_integer_index(context, builder, idxty, indexval, - shapes[ax]) - output_indices.append(ind) - else: - raise NotImplementedError("unexpected index type: %s" % (idxty,)) - ax += 1 - - # Fill up missing dimensions at the end - assert ax <= aryty.ndim - while ax < aryty.ndim: - output_shapes.append(shapes[ax]) - output_strides.append(strides[ax]) - ax += 1 - - # No need to check wraparound, as negative indices were already - # fixed in the loop above. - dataptr = cgutils.get_item_pointer(builder, aryty, ary, - output_indices, - wraparound=False) - return (dataptr, output_shapes, output_strides) - - -def make_view(context, builder, aryty, ary, return_type, - data, shapes, strides): - """ - Build a view over the given array with the given parameters. - """ - retary = make_array(return_type)(context, builder) - populate_array(retary, - data=data, - shape=shapes, - strides=strides, - itemsize=ary.itemsize, - meminfo=ary.meminfo, - parent=ary.parent) - return retary - - -def _getitem_array_generic(context, builder, return_type, aryty, ary, - index_types, indices): - """ - Return the result of indexing *ary* with the given *indices*, - returning either a scalar or a view. - """ - dataptr, view_shapes, view_strides = \ - basic_indexing(context, builder, aryty, ary, index_types, indices) - - if isinstance(return_type, types.Buffer): - # Build array view - retary = make_view(context, builder, aryty, ary, return_type, - dataptr, view_shapes, view_strides) - return retary._getvalue() - else: - # Load scalar from 0-d result - assert not view_shapes - return load_item(context, builder, aryty, dataptr) - - -@lower_builtin('getitem', types.Buffer, types.Integer) -@lower_builtin('getitem', types.Buffer, types.SliceType) -def getitem_arraynd_intp(context, builder, sig, args): - """ - Basic indexing with an integer or a slice. - """ - aryty, idxty = sig.args - ary, idx = args - - assert aryty.ndim >= 1 - ary = make_array(aryty)(context, builder, ary) - - res = _getitem_array_generic(context, builder, sig.return_type, - aryty, ary, (idxty,), (idx,)) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('getitem', types.Buffer, types.BaseTuple) -def getitem_array_tuple(context, builder, sig, args): - """ - Basic or advanced indexing with a tuple. - """ - aryty, tupty = sig.args - ary, tup = args - ary = make_array(aryty)(context, builder, ary) - - index_types = tupty.types - indices = cgutils.unpack_tuple(builder, tup, count=len(tupty)) - - index_types, indices = normalize_indices(context, builder, - index_types, indices) - - if any(isinstance(ty, types.Array) for ty in index_types): - # Advanced indexing - return fancy_getitem(context, builder, sig, args, - aryty, ary, index_types, indices) - - res = _getitem_array_generic(context, builder, sig.return_type, - aryty, ary, index_types, indices) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('setitem', types.Buffer, types.Any, types.Any) -def setitem_array(context, builder, sig, args): - """ - array[a] = scalar_or_array - array[a,..,b] = scalar_or_array - """ - aryty, idxty, valty = sig.args - ary, idx, val = args - - if isinstance(idxty, types.BaseTuple): - index_types = idxty.types - indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) - else: - index_types = (idxty,) - indices = (idx,) - - ary = make_array(aryty)(context, builder, ary) - - # First try basic indexing to see if a single array location is denoted. - index_types, indices = normalize_indices(context, builder, - index_types, indices) - try: - dataptr, shapes, strides = \ - basic_indexing(context, builder, aryty, ary, index_types, indices) - except NotImplementedError: - use_fancy_indexing = True - else: - use_fancy_indexing = bool(shapes) - - if use_fancy_indexing: - # Index describes a non-trivial view => use generic slice assignment - # (NOTE: this also handles scalar broadcasting) - return fancy_setslice(context, builder, sig, args, - index_types, indices) - - # Store source value the given location - val = context.cast(builder, val, valty, aryty.dtype) - store_item(context, builder, aryty, val, dataptr) - - -@lower_builtin(len, types.Buffer) -def array_len(context, builder, sig, args): - (aryty,) = sig.args - (ary,) = args - arystty = make_array(aryty) - ary = arystty(context, builder, ary) - shapeary = ary.shape - res = builder.extract_value(shapeary, 0) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin("array.item", types.Array) -def array_item(context, builder, sig, args): - aryty, = sig.args - ary, = args - ary = make_array(aryty)(context, builder, ary) - - nitems = ary.nitems - with builder.if_then(builder.icmp_signed('!=', nitems, nitems.type(1)), - likely=False): - msg = "item(): can only convert an array of size 1 to a Python scalar" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - return load_item(context, builder, aryty, ary.data) - - -@lower_builtin("array.itemset", types.Array, types.Any) -def array_itemset(context, builder, sig, args): - aryty, valty = sig.args - ary, val = args - assert valty == aryty.dtype - ary = make_array(aryty)(context, builder, ary) - - nitems = ary.nitems - with builder.if_then(builder.icmp_signed('!=', nitems, nitems.type(1)), - likely=False): - msg = "itemset(): can only write to an array of size 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - store_item(context, builder, aryty, val, ary.data) - return context.get_dummy_value() - - -#------------------------------------------------------------------------------- -# Advanced / fancy indexing - - -class Indexer(object): - """ - Generic indexer interface, for generating indices over a fancy indexed - array on a single dimension. - """ - - def prepare(self): - """ - Prepare the indexer by initializing any required variables, basic - blocks... - """ - raise NotImplementedError - - def get_size(self): - """ - Return this dimension's size as an integer. - """ - raise NotImplementedError - - def get_shape(self): - """ - Return this dimension's shape as a tuple. - """ - raise NotImplementedError - - def get_index_bounds(self): - """ - Return a half-open [lower, upper) range of indices this dimension - is guaranteed not to step out of. - """ - raise NotImplementedError - - def loop_head(self): - """ - Start indexation loop. Return a (index, count) tuple. - *index* is an integer LLVM value representing the index over this - dimension. - *count* is either an integer LLVM value representing the current - iteration count, or None if this dimension should be omitted from - the indexation result. - """ - raise NotImplementedError - - def loop_tail(self): - """ - Finish indexation loop. - """ - raise NotImplementedError - - -class EntireIndexer(Indexer): - """ - Compute indices along an entire array dimension. - """ - - def __init__(self, context, builder, aryty, ary, dim): - self.context = context - self.builder = builder - self.aryty = aryty - self.ary = ary - self.dim = dim - self.ll_intp = self.context.get_value_type(types.intp) - - def prepare(self): - builder = self.builder - self.size = builder.extract_value(self.ary.shape, self.dim) - self.index = cgutils.alloca_once(builder, self.ll_intp) - self.bb_start = builder.append_basic_block() - self.bb_end = builder.append_basic_block() - - def get_size(self): - return self.size - - def get_shape(self): - return (self.size,) - - def get_index_bounds(self): - # [0, size) - return (self.ll_intp(0), self.size) - - def loop_head(self): - builder = self.builder - # Initialize loop variable - self.builder.store(Constant.int(self.ll_intp, 0), self.index) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_start) - cur_index = builder.load(self.index) - with builder.if_then(builder.icmp_signed('>=', cur_index, self.size), - likely=False): - builder.branch(self.bb_end) - return cur_index, cur_index - - def loop_tail(self): - builder = self.builder - next_index = cgutils.increment_index(builder, builder.load(self.index)) - builder.store(next_index, self.index) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_end) - - -class IntegerIndexer(Indexer): - """ - Compute indices from a single integer. - """ - - def __init__(self, context, builder, idx): - self.context = context - self.builder = builder - self.idx = idx - self.ll_intp = self.context.get_value_type(types.intp) - - def prepare(self): - pass - - def get_size(self): - return Constant.int(self.ll_intp, 1) - - def get_shape(self): - return () - - def get_index_bounds(self): - # [idx, idx+1) - return (self.idx, self.builder.add(self.idx, self.get_size())) - - def loop_head(self): - return self.idx, None - - def loop_tail(self): - pass - - -class IntegerArrayIndexer(Indexer): - """ - Compute indices from an array of integer indices. - """ - - def __init__(self, context, builder, idxty, idxary, size): - self.context = context - self.builder = builder - self.idxty = idxty - self.idxary = idxary - self.size = size - assert idxty.ndim == 1 - self.ll_intp = self.context.get_value_type(types.intp) - - def prepare(self): - builder = self.builder - self.idx_size = cgutils.unpack_tuple(builder, self.idxary.shape)[0] - self.idx_index = cgutils.alloca_once(builder, self.ll_intp) - self.bb_start = builder.append_basic_block() - self.bb_end = builder.append_basic_block() - - def get_size(self): - return self.idx_size - - def get_shape(self): - return (self.idx_size,) - - def get_index_bounds(self): - # Pessimal heuristic, as we don't want to scan for the min and max - return (self.ll_intp(0), self.size) - - def loop_head(self): - builder = self.builder - # Initialize loop variable - self.builder.store(Constant.int(self.ll_intp, 0), self.idx_index) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_start) - cur_index = builder.load(self.idx_index) - with builder.if_then(builder.icmp_signed('>=', cur_index, self.idx_size), - likely=False): - builder.branch(self.bb_end) - # Load the actual index from the array of indices - index = _getitem_array1d(self.context, builder, - self.idxty, self.idxary, - cur_index, wraparound=False) - index = fix_integer_index(self.context, builder, - self.idxty.dtype, index, self.size) - return index, cur_index - - def loop_tail(self): - builder = self.builder - next_index = cgutils.increment_index(builder, - builder.load(self.idx_index)) - builder.store(next_index, self.idx_index) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_end) - - -class BooleanArrayIndexer(Indexer): - """ - Compute indices from an array of boolean predicates. - """ - - def __init__(self, context, builder, idxty, idxary): - self.context = context - self.builder = builder - self.idxty = idxty - self.idxary = idxary - assert idxty.ndim == 1 - self.ll_intp = self.context.get_value_type(types.intp) - self.zero = Constant.int(self.ll_intp, 0) - - def prepare(self): - builder = self.builder - self.size = cgutils.unpack_tuple(builder, self.idxary.shape)[0] - self.idx_index = cgutils.alloca_once(builder, self.ll_intp) - self.count = cgutils.alloca_once(builder, self.ll_intp) - self.bb_start = builder.append_basic_block() - self.bb_tail = builder.append_basic_block() - self.bb_end = builder.append_basic_block() - - def get_size(self): - builder = self.builder - count = cgutils.alloca_once_value(builder, self.zero) - # Sum all true values - with cgutils.for_range(builder, self.size) as loop: - c = builder.load(count) - pred = _getitem_array1d(self.context, builder, - self.idxty, self.idxary, - loop.index, wraparound=False) - c = builder.add(c, builder.zext(pred, c.type)) - builder.store(c, count) - - return builder.load(count) - - def get_shape(self): - return (self.get_size(),) - - def get_index_bounds(self): - # Pessimal heuristic, as we don't want to scan for the - # first and last true items - return (self.ll_intp(0), self.size) - - def loop_head(self): - builder = self.builder - # Initialize loop variable - self.builder.store(self.zero, self.idx_index) - self.builder.store(self.zero, self.count) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_start) - cur_index = builder.load(self.idx_index) - cur_count = builder.load(self.count) - with builder.if_then(builder.icmp_signed('>=', cur_index, self.size), - likely=False): - builder.branch(self.bb_end) - # Load the predicate and branch if false - pred = _getitem_array1d(self.context, builder, - self.idxty, self.idxary, - cur_index, wraparound=False) - with builder.if_then(builder.not_(pred)): - builder.branch(self.bb_tail) - # Increment the count for next iteration - next_count = cgutils.increment_index(builder, cur_count) - builder.store(next_count, self.count) - return cur_index, cur_count - - def loop_tail(self): - builder = self.builder - builder.branch(self.bb_tail) - builder.position_at_end(self.bb_tail) - next_index = cgutils.increment_index(builder, - builder.load(self.idx_index)) - builder.store(next_index, self.idx_index) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_end) - - -class SliceIndexer(Indexer): - """ - Compute indices along a slice. - """ - - def __init__(self, context, builder, aryty, ary, dim, idxty, slice): - self.context = context - self.builder = builder - self.aryty = aryty - self.ary = ary - self.dim = dim - self.idxty = idxty - self.slice = slice - self.ll_intp = self.context.get_value_type(types.intp) - self.zero = Constant.int(self.ll_intp, 0) - - def prepare(self): - builder = self.builder - # Fix slice for the dimension's size - self.dim_size = builder.extract_value(self.ary.shape, self.dim) - slicing.guard_invalid_slice(self.context, builder, self.idxty, - self.slice) - slicing.fix_slice(builder, self.slice, self.dim_size) - self.is_step_negative = cgutils.is_neg_int(builder, self.slice.step) - # Create loop entities - self.index = cgutils.alloca_once(builder, self.ll_intp) - self.count = cgutils.alloca_once(builder, self.ll_intp) - self.bb_start = builder.append_basic_block() - self.bb_end = builder.append_basic_block() - - def get_size(self): - return slicing.get_slice_length(self.builder, self.slice) - - def get_shape(self): - return (self.get_size(),) - - def get_index_bounds(self): - lower, upper = slicing.get_slice_bounds(self.builder, self.slice) - return lower, upper - - def loop_head(self): - builder = self.builder - # Initialize loop variable - self.builder.store(self.slice.start, self.index) - self.builder.store(self.zero, self.count) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_start) - cur_index = builder.load(self.index) - cur_count = builder.load(self.count) - is_finished = builder.select(self.is_step_negative, - builder.icmp_signed('<=', cur_index, - self.slice.stop), - builder.icmp_signed('>=', cur_index, - self.slice.stop)) - with builder.if_then(is_finished, likely=False): - builder.branch(self.bb_end) - return cur_index, cur_count - - def loop_tail(self): - builder = self.builder - next_index = builder.add(builder.load(self.index), self.slice.step, - flags=['nsw']) - builder.store(next_index, self.index) - next_count = cgutils.increment_index(builder, builder.load(self.count)) - builder.store(next_count, self.count) - builder.branch(self.bb_start) - builder.position_at_end(self.bb_end) - - -class FancyIndexer(object): - """ - Perform fancy indexing on the given array. - """ - - def __init__(self, context, builder, aryty, ary, index_types, indices): - self.context = context - self.builder = builder - self.aryty = aryty - self.shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) - self.strides = cgutils.unpack_tuple(builder, ary.strides, aryty.ndim) - self.ll_intp = self.context.get_value_type(types.intp) - - indexers = [] - - ax = 0 - for indexval, idxty in zip(indices, index_types): - if idxty is types.ellipsis: - # Fill up missing dimensions at the middle - n_missing = aryty.ndim - len(indices) + 1 - for i in range(n_missing): - indexer = EntireIndexer(context, builder, aryty, ary, ax) - indexers.append(indexer) - ax += 1 - continue - - # Regular index value - if isinstance(idxty, types.SliceType): - slice = context.make_helper(builder, idxty, indexval) - indexer = SliceIndexer(context, builder, aryty, ary, ax, - idxty, slice) - indexers.append(indexer) - elif isinstance(idxty, types.Integer): - ind = fix_integer_index(context, builder, idxty, indexval, - self.shapes[ax]) - indexer = IntegerIndexer(context, builder, ind) - indexers.append(indexer) - elif isinstance(idxty, types.Array): - idxary = make_array(idxty)(context, builder, indexval) - if isinstance(idxty.dtype, types.Integer): - indexer = IntegerArrayIndexer(context, builder, - idxty, idxary, - self.shapes[ax]) - elif isinstance(idxty.dtype, types.Boolean): - indexer = BooleanArrayIndexer(context, builder, - idxty, idxary) - else: - assert 0 - indexers.append(indexer) - else: - raise AssertionError("unexpected index type: %s" % (idxty,)) - ax += 1 - - # Fill up missing dimensions at the end - assert ax <= aryty.ndim, (ax, aryty.ndim) - while ax < aryty.ndim: - indexer = EntireIndexer(context, builder, aryty, ary, ax) - indexers.append(indexer) - ax += 1 - - assert len(indexers) == aryty.ndim, (len(indexers), aryty.ndim) - self.indexers = indexers - - def prepare(self): - for i in self.indexers: - i.prepare() - # Compute the resulting shape - self.indexers_shape = sum([i.get_shape() for i in self.indexers], ()) - - def get_shape(self): - """ - Get the resulting data shape as Python tuple. - """ - return self.indexers_shape - - def get_offset_bounds(self, strides, itemsize): - """ - Get a half-open [lower, upper) range of byte offsets spanned by - the indexer with the given strides and itemsize. The indexer is - guaranteed to not go past those bounds. - """ - assert len(strides) == self.aryty.ndim - builder = self.builder - is_empty = cgutils.false_bit - zero = self.ll_intp(0) - one = self.ll_intp(1) - lower = zero - upper = zero - for indexer, shape, stride in zip(self.indexers, self.indexers_shape, - strides): - is_empty = builder.or_(is_empty, - builder.icmp_unsigned('==', shape, zero)) - # Compute [lower, upper) indices on this dimension - lower_index, upper_index = indexer.get_index_bounds() - lower_offset = builder.mul(stride, lower_index) - upper_offset = builder.mul(stride, builder.sub(upper_index, one)) - # Adjust total interval - is_downwards = builder.icmp_signed('<', stride, zero) - lower = builder.add(lower, - builder.select(is_downwards, upper_offset, lower_offset)) - upper = builder.add(upper, - builder.select(is_downwards, lower_offset, upper_offset)) - # Make interval half-open - upper = builder.add(upper, itemsize) - # Adjust for empty shape - lower = builder.select(is_empty, zero, lower) - upper = builder.select(is_empty, zero, upper) - return lower, upper - - def begin_loops(self): - indices, counts = zip(*(i.loop_head() for i in self.indexers)) - return indices, counts - - def end_loops(self): - for i in reversed(self.indexers): - i.loop_tail() - - -def fancy_getitem(context, builder, sig, args, - aryty, ary, index_types, indices): - - shapes = cgutils.unpack_tuple(builder, ary.shape) - strides = cgutils.unpack_tuple(builder, ary.strides) - data = ary.data - - indexer = FancyIndexer(context, builder, aryty, ary, - index_types, indices) - indexer.prepare() - - # Construct output array - out_ty = sig.return_type - out_shapes = indexer.get_shape() - - out = _empty_nd_impl(context, builder, out_ty, out_shapes) - out_data = out.data - out_idx = cgutils.alloca_once_value(builder, - context.get_constant(types.intp, 0)) - - # Loop on source and copy to destination - indices, _ = indexer.begin_loops() - - # No need to check for wraparound, as the indexers all ensure - # a positive index is returned. - ptr = cgutils.get_item_pointer2(builder, data, shapes, strides, - aryty.layout, indices, wraparound=False) - val = load_item(context, builder, aryty, ptr) - - # Since the destination is C-contiguous, no need for multi-dimensional - # indexing. - cur = builder.load(out_idx) - ptr = builder.gep(out_data, [cur]) - store_item(context, builder, out_ty, val, ptr) - next_idx = cgutils.increment_index(builder, cur) - builder.store(next_idx, out_idx) - - indexer.end_loops() - - return impl_ret_new_ref(context, builder, out_ty, out._getvalue()) - - -@lower_builtin('getitem', types.Buffer, types.Array) -def fancy_getitem_array(context, builder, sig, args): - """ - Advanced or basic indexing with an array. - """ - aryty, idxty = sig.args - ary, idx = args - ary = make_array(aryty)(context, builder, ary) - if idxty.ndim == 0: - # 0-d array index acts as a basic integer index - idxty, idx = normalize_index(context, builder, idxty, idx) - res = _getitem_array_generic(context, builder, sig.return_type, - aryty, ary, (idxty,), (idx,)) - return impl_ret_borrowed(context, builder, sig.return_type, res) - else: - # Advanced indexing - return fancy_getitem(context, builder, sig, args, - aryty, ary, (idxty,), (idx,)) - - -def offset_bounds_from_strides(context, builder, arrty, arr, shapes, strides): - """ - Compute a half-open range [lower, upper) of byte offsets from the - array's data pointer, that bound the in-memory extent of the array. - - This mimicks offset_bounds_from_strides() from numpy/core/src/private/mem_overlap.c - """ - itemsize = arr.itemsize - zero = itemsize.type(0) - one = zero.type(1) - if arrty.layout in 'CF': - # Array is contiguous: contents are laid out sequentially - # starting from arr.data and upwards - lower = zero - upper = builder.mul(itemsize, arr.nitems) - else: - # Non-contiguous array: need to examine strides - lower = zero - upper = zero - for i in range(arrty.ndim): - # Compute the largest byte offset on this dimension - # max_axis_offset = strides[i] * (shapes[i] - 1) - # (shapes[i] == 0 is catered for by the empty array case below) - max_axis_offset = builder.mul(strides[i], - builder.sub(shapes[i], one)) - is_upwards = builder.icmp_signed('>=', max_axis_offset, zero) - # Expand either upwards or downwards depending on stride - upper = builder.select(is_upwards, - builder.add(upper, max_axis_offset), upper) - lower = builder.select(is_upwards, - lower, builder.add(lower, max_axis_offset)) - # Return a half-open range - upper = builder.add(upper, itemsize) - # Adjust for empty arrays - is_empty = builder.icmp_signed('==', arr.nitems, zero) - upper = builder.select(is_empty, zero, upper) - lower = builder.select(is_empty, zero, lower) - - return lower, upper - - -def compute_memory_extents(context, builder, lower, upper, data): - """ - Given [lower, upper) byte offsets and a base data pointer, - compute the memory pointer bounds as pointer-sized integers. - """ - data_ptr_as_int = builder.ptrtoint(data, lower.type) - start = builder.add(data_ptr_as_int, lower) - end = builder.add(data_ptr_as_int, upper) - return start, end - -def get_array_memory_extents(context, builder, arrty, arr, shapes, strides, data): - """ - Compute a half-open range [start, end) of pointer-sized integers - which fully contain the array data. - """ - lower, upper = offset_bounds_from_strides(context, builder, arrty, arr, - shapes, strides) - return compute_memory_extents(context, builder, lower, upper, data) - - -def extents_may_overlap(context, builder, a_start, a_end, b_start, b_end): - """ - Whether two memory extents [a_start, a_end) and [b_start, b_end) - may overlap. - """ - # Comparisons are unsigned, since we are really comparing pointers - may_overlap = builder.and_( - builder.icmp_unsigned('<', a_start, b_end), - builder.icmp_unsigned('<', b_start, a_end), - ) - return may_overlap - - -def maybe_copy_source(context, builder, use_copy, - srcty, src, src_shapes, src_strides, src_data): - ptrty = src_data.type - - copy_layout = 'C' - copy_data = cgutils.alloca_once_value(builder, src_data) - copy_shapes = src_shapes - copy_strides = None # unneeded for contiguous arrays - - with builder.if_then(use_copy, likely=False): - # Allocate temporary scratchpad - # XXX: should we use a stack-allocated array for very small - # data sizes? - allocsize = builder.mul(src.itemsize, src.nitems) - data = context.nrt.allocate(builder, allocsize) - voidptrty = data.type - data = builder.bitcast(data, ptrty) - builder.store(data, copy_data) - - # Copy source data into scratchpad - intp_t = context.get_value_type(types.intp) - - with cgutils.loop_nest(builder, src_shapes, intp_t) as indices: - src_ptr = cgutils.get_item_pointer2(builder, src_data, - src_shapes, src_strides, - srcty.layout, indices) - dest_ptr = cgutils.get_item_pointer2(builder, data, - copy_shapes, copy_strides, - copy_layout, indices) - builder.store(builder.load(src_ptr), dest_ptr) - - def src_getitem(source_indices): - assert len(source_indices) == srcty.ndim - src_ptr = cgutils.alloca_once(builder, ptrty) - with builder.if_else(use_copy, likely=False) as (if_copy, otherwise): - with if_copy: - builder.store( - cgutils.get_item_pointer2(builder, builder.load(copy_data), - copy_shapes, copy_strides, - copy_layout, source_indices, - wraparound=False), - src_ptr) - with otherwise: - builder.store( - cgutils.get_item_pointer2(builder, src_data, - src_shapes, src_strides, - srcty.layout, source_indices, - wraparound=False), - src_ptr) - return load_item(context, builder, srcty, builder.load(src_ptr)) - - def src_cleanup(): - # Deallocate memory - with builder.if_then(use_copy, likely=False): - data = builder.load(copy_data) - data = builder.bitcast(data, voidptrty) - context.nrt.free(builder, data) - - return src_getitem, src_cleanup - - -def _bc_adjust_dimension(context, builder, shapes, strides, target_shape): - """ - Preprocess dimension for broadcasting. - Returns (shapes, strides) such that the ndim match *target_shape*. - When expanding to higher ndim, the returning shapes and strides are - prepended with ones and zeros, respectively. - When truncating to lower ndim, the shapes are checked (in runtime). - All extra dimension must have size of 1. - """ - zero = context.get_constant(types.uintp, 0) - one = context.get_constant(types.uintp, 1) - - # Adjust for broadcasting to higher dimension - if len(target_shape) > len(shapes): - nd_diff = len(target_shape) - len(shapes) - # Fill missing shapes with one, strides with zeros - shapes = [one] * nd_diff + shapes - strides = [zero] * nd_diff + strides - # Adjust for broadcasting to lower dimension - elif len(target_shape) < len(shapes): - # Accepted if all extra dims has shape 1 - nd_diff = len(shapes) - len(target_shape) - dim_is_one = [builder.icmp_unsigned('==', sh, one) - for sh in shapes[:nd_diff]] - accepted = functools.reduce(builder.and_, dim_is_one, - cgutils.true_bit) - # Check error - with builder.if_then(builder.not_(accepted), likely=False): - msg = "cannot broadcast source array for assignment" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - # Truncate extra shapes, strides - shapes = shapes[nd_diff:] - strides = strides[nd_diff:] - - return shapes, strides - - -def _bc_adjust_shape_strides(context, builder, shapes, strides, target_shape): - """ - Broadcast shapes and strides to target_shape given that their ndim already - matches. For each location where the shape is 1 and does not match the - dim for target, it is set to the value at the target and the stride is - set to zero. - """ - bc_shapes = [] - bc_strides = [] - zero = context.get_constant(types.uintp, 0) - one = context.get_constant(types.uintp, 1) - # Adjust all mismatching ones in shape - mismatch = [builder.icmp_signed('!=', tar, old) - for tar, old in zip(target_shape, shapes)] - src_is_one = [builder.icmp_signed('==', old, one) for old in shapes] - preds = [builder.and_(x, y) for x, y in zip(mismatch, src_is_one)] - bc_shapes = [builder.select(p, tar, old) - for p, tar, old in zip(preds, target_shape, shapes)] - bc_strides = [builder.select(p, zero, old) - for p, old in zip(preds, strides)] - return bc_shapes, bc_strides - - -def _broadcast_to_shape(context, builder, arrtype, arr, target_shape): - """ - Broadcast the given array to the target_shape. - Returns (array_type, array) - """ - # Compute broadcasted shape and strides - shapes = cgutils.unpack_tuple(builder, arr.shape) - strides = cgutils.unpack_tuple(builder, arr.strides) - - shapes, strides = _bc_adjust_dimension(context, builder, shapes, strides, - target_shape) - shapes, strides = _bc_adjust_shape_strides(context, builder, shapes, - strides, target_shape) - new_arrtype = arrtype.copy(ndim=len(target_shape), layout='A') - # Create new view - new_arr = make_array(new_arrtype)(context, builder) - repl = dict(shape=cgutils.pack_array(builder, shapes), - strides=cgutils.pack_array(builder, strides)) - cgutils.copy_struct(new_arr, arr, repl) - return new_arrtype, new_arr - - -def fancy_setslice(context, builder, sig, args, index_types, indices): - """ - Implement slice assignment for arrays. This implementation works for - basic as well as fancy indexing, since there's no functional difference - between the two for indexed assignment. - """ - aryty, _, srcty = sig.args - ary, _, src = args - - ary = make_array(aryty)(context, builder, ary) - dest_shapes = cgutils.unpack_tuple(builder, ary.shape) - dest_strides = cgutils.unpack_tuple(builder, ary.strides) - dest_data = ary.data - - indexer = FancyIndexer(context, builder, aryty, ary, - index_types, indices) - indexer.prepare() - - if isinstance(srcty, types.Buffer): - # Source is an array - src_dtype = srcty.dtype - index_shape = indexer.get_shape() - src = make_array(srcty)(context, builder, src) - # Broadcast source array to shape - srcty, src = _broadcast_to_shape(context, builder, srcty, src, index_shape) - src_shapes = cgutils.unpack_tuple(builder, src.shape) - src_strides = cgutils.unpack_tuple(builder, src.strides) - src_data = src.data - - # Check shapes are equal - shape_error = cgutils.false_bit - assert len(index_shape) == len(src_shapes) - - for u, v in zip(src_shapes, index_shape): - shape_error = builder.or_(shape_error, - builder.icmp_signed('!=', u, v)) - - with builder.if_then(shape_error, likely=False): - msg = "cannot assign slice from input of different size" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - # Check for array overlap - src_start, src_end = get_array_memory_extents(context, builder, srcty, src, - src_shapes, src_strides, src_data) - - dest_lower, dest_upper = indexer.get_offset_bounds(dest_strides, ary.itemsize) - dest_start, dest_end = compute_memory_extents(context, builder, - dest_lower, dest_upper, dest_data) - - use_copy = extents_may_overlap(context, builder, src_start, src_end, dest_start, dest_end) - - src_getitem, src_cleanup = maybe_copy_source(context, builder, use_copy, - srcty, src, src_shapes, - src_strides, src_data) - - elif isinstance(srcty, types.Sequence): - src_dtype = srcty.dtype - - # Check shape is equal to sequence length - index_shape = indexer.get_shape() - assert len(index_shape) == 1 - len_impl = context.get_function(len, signature(types.intp, srcty)) - seq_len = len_impl(builder, (src,)) - - shape_error = builder.icmp_signed('!=', index_shape[0], seq_len) - - with builder.if_then(shape_error, likely=False): - msg = "cannot assign slice from input of different size" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - def src_getitem(source_indices): - idx, = source_indices - getitem_impl = context.get_function('getitem', - signature(src_dtype, srcty, types.intp)) - return getitem_impl(builder, (src, idx)) - - def src_cleanup(): - pass - - else: - # Source is a scalar (broadcast or not, depending on destination - # shape). - src_dtype = srcty - - def src_getitem(source_indices): - return src - - def src_cleanup(): - pass - - # Loop on destination and copy from source to destination - dest_indices, counts = indexer.begin_loops() - - # Source is iterated in natural order - source_indices = tuple(c for c in counts if c is not None) - val = src_getitem(source_indices) - - # Cast to the destination dtype (cross-dtype slice assignement is allowed) - val = context.cast(builder, val, src_dtype, aryty.dtype) - - # No need to check for wraparound, as the indexers all ensure - # a positive index is returned. - dest_ptr = cgutils.get_item_pointer2(builder, dest_data, - dest_shapes, dest_strides, - aryty.layout, dest_indices, - wraparound=False) - store_item(context, builder, aryty, val, dest_ptr) - - indexer.end_loops() - - src_cleanup() - - return context.get_dummy_value() - - -#------------------------------------------------------------------------------- -# Shape / layout altering - -def vararg_to_tuple(context, builder, sig, args): - aryty = sig.args[0] - dimtys = sig.args[1:] - # values - ary = args[0] - dims = args[1:] - # coerce all types to intp - dims = [context.cast(builder, val, ty, types.intp) - for ty, val in zip(dimtys, dims)] - # make a tuple - shape = cgutils.pack_array(builder, dims, dims[0].type) - - shapety = types.UniTuple(dtype=types.intp, count=len(dims)) - new_sig = typing.signature(sig.return_type, aryty, shapety) - new_args = ary, shape - - return new_sig, new_args - - -@lower_builtin('array.transpose', types.Array) -def array_transpose(context, builder, sig, args): - return array_T(context, builder, sig.args[0], args[0]) - - -def permute_arrays(axis, shape, strides): - if len(axis) != len(set(axis)): - raise ValueError("repeated axis in transpose") - dim = len(shape) - for x in axis: - if x >= dim or abs(x) > dim: - raise ValueError("axis is out of bounds for array of given dimension") - - shape[:] = shape[axis] - strides[:] = strides[axis] - - -# Transposing an array involves permuting the shape and strides of the array -# based on the given axes. -@lower_builtin('array.transpose', types.Array, types.BaseTuple) -def array_transpose_tuple(context, builder, sig, args): - aryty = sig.args[0] - ary = make_array(aryty)(context, builder, args[0]) - - axisty, axis = sig.args[1], args[1] - num_axis, dtype = axisty.count, axisty.dtype - - ll_intp = context.get_value_type(types.intp) - ll_ary_size = lc.Type.array(ll_intp, num_axis) - - # Allocate memory for axes, shapes, and strides arrays. - arys = [axis, ary.shape, ary.strides] - ll_arys = [cgutils.alloca_once(builder, ll_ary_size) for _ in arys] - - # Store axes, shapes, and strides arrays to the allocated memory. - for src, dst in zip(arys, ll_arys): - builder.store(src, dst) - - np_ary_ty = types.Array(dtype=dtype, ndim=1, layout='C') - np_itemsize = context.get_constant(types.intp, - context.get_abi_sizeof(ll_intp)) - - # Form NumPy arrays for axes, shapes, and strides arrays. - np_arys = [make_array(np_ary_ty)(context, builder) for _ in arys] - - # Roughly, `np_ary = np.array(ll_ary)` for each of axes, shapes, and strides. - for np_ary, ll_ary in zip(np_arys, ll_arys): - populate_array(np_ary, - data=builder.bitcast(ll_ary, ll_intp.as_pointer()), - shape=[context.get_constant(types.intp, num_axis)], - strides=[np_itemsize], - itemsize=np_itemsize, - meminfo=None) - - # Pass NumPy arrays formed above to permute_arrays function that permutes - # shapes and strides based on axis contents. - context.compile_internal(builder, permute_arrays, - typing.signature(types.void, - np_ary_ty, np_ary_ty, np_ary_ty), - [a._getvalue() for a in np_arys]) - - # Make a new array based on permuted shape and strides and return it. - ret = make_array(sig.return_type)(context, builder) - populate_array(ret, - data=ary.data, - shape=builder.load(ll_arys[1]), - strides=builder.load(ll_arys[2]), - itemsize=ary.itemsize, - meminfo=ary.meminfo, - parent=ary.parent) - res = ret._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('array.transpose', types.Array, types.VarArg(types.Any)) -def array_transpose_vararg(context, builder, sig, args): - new_sig, new_args = vararg_to_tuple(context, builder, sig, args) - return array_transpose_tuple(context, builder, new_sig, new_args) - - -@overload(np.transpose) -def numpy_transpose(a, axes=None): - - if axes is None: - def np_transpose_impl(arr): - return arr.transpose() - else: - def np_transpose_impl(arr, axes=None): - return arr.transpose(axes) - - return np_transpose_impl - - -@lower_getattr(types.Array, 'T') -def array_T(context, builder, typ, value): - if typ.ndim <= 1: - res = value - else: - ary = make_array(typ)(context, builder, value) - ret = make_array(typ)(context, builder) - shapes = cgutils.unpack_tuple(builder, ary.shape, typ.ndim) - strides = cgutils.unpack_tuple(builder, ary.strides, typ.ndim) - populate_array(ret, - data=ary.data, - shape=cgutils.pack_array(builder, shapes[::-1]), - strides=cgutils.pack_array(builder, strides[::-1]), - itemsize=ary.itemsize, - meminfo=ary.meminfo, - parent=ary.parent) - res = ret._getvalue() - return impl_ret_borrowed(context, builder, typ, res) - - -def _attempt_nocopy_reshape(context, builder, aryty, ary, - newnd, newshape, newstrides): - """ - Call into Numba_attempt_nocopy_reshape() for the given array type - and instance, and the specified new shape. - - Return value is non-zero if successful, and the array pointed to - by *newstrides* will be filled up with the computed results. - """ - ll_intp = context.get_value_type(types.intp) - ll_intp_star = ll_intp.as_pointer() - ll_intc = context.get_value_type(types.intc) - fnty = lc.Type.function(ll_intc, [ - # nd, *dims, *strides - ll_intp, ll_intp_star, ll_intp_star, - # newnd, *newdims, *newstrides - ll_intp, ll_intp_star, ll_intp_star, - # itemsize, is_f_order - ll_intp, ll_intc]) - fn = builder.module.get_or_insert_function( - fnty, name="numba_attempt_nocopy_reshape") - - nd = ll_intp(aryty.ndim) - shape = cgutils.gep_inbounds(builder, ary._get_ptr_by_name('shape'), 0, 0) - strides = cgutils.gep_inbounds(builder, ary._get_ptr_by_name('strides'), 0, 0) - newnd = ll_intp(newnd) - newshape = cgutils.gep_inbounds(builder, newshape, 0, 0) - newstrides = cgutils.gep_inbounds(builder, newstrides, 0, 0) - is_f_order = ll_intc(0) - res = builder.call(fn, [nd, shape, strides, - newnd, newshape, newstrides, - ary.itemsize, is_f_order]) - return res - - -def normalize_reshape_value(origsize, shape): - num_neg_value = 0 - known_size = 1 - for ax, s in enumerate(shape): - if s < 0: - num_neg_value += 1 - neg_ax = ax - else: - known_size *= s - - if num_neg_value == 0: - if origsize != known_size: - raise ValueError("total size of new array must be unchanged") - - elif num_neg_value == 1: - # Infer negative dimension - if known_size == 0: - inferred = 0 - ok = origsize == 0 - else: - inferred = origsize // known_size - ok = origsize % known_size == 0 - if not ok: - raise ValueError("total size of new array must be unchanged") - shape[neg_ax] = inferred - - else: - raise ValueError("multiple negative shape values") - - -@lower_builtin('array.reshape', types.Array, types.BaseTuple) -def array_reshape(context, builder, sig, args): - aryty = sig.args[0] - retty = sig.return_type - - shapety = sig.args[1] - shape = args[1] - - ll_intp = context.get_value_type(types.intp) - ll_shape = lc.Type.array(ll_intp, shapety.count) - - ary = make_array(aryty)(context, builder, args[0]) - - # We will change the target shape in this slot - # (see normalize_reshape_value() below) - newshape = cgutils.alloca_once(builder, ll_shape) - builder.store(shape, newshape) - - # Create a shape array pointing to the value of newshape. - # (roughly, `shape_ary = np.array(ary.shape)`) - shape_ary_ty = types.Array(dtype=shapety.dtype, ndim=1, layout='C') - shape_ary = make_array(shape_ary_ty)(context, builder) - shape_itemsize = context.get_constant(types.intp, - context.get_abi_sizeof(ll_intp)) - populate_array(shape_ary, - data=builder.bitcast(newshape, ll_intp.as_pointer()), - shape=[context.get_constant(types.intp, shapety.count)], - strides=[shape_itemsize], - itemsize=shape_itemsize, - meminfo=None) - - # Compute the original array size - size = ary.nitems - - # Call our normalizer which will fix the shape array in case of negative - # shape value - context.compile_internal(builder, normalize_reshape_value, - typing.signature(types.void, - types.uintp, shape_ary_ty), - [size, shape_ary._getvalue()]) - - # Perform reshape (nocopy) - newnd = shapety.count - newstrides = cgutils.alloca_once(builder, ll_shape) - - ok = _attempt_nocopy_reshape(context, builder, aryty, ary, newnd, - newshape, newstrides) - fail = builder.icmp_unsigned('==', ok, ok.type(0)) - - with builder.if_then(fail): - msg = "incompatible shape for array" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) - - ret = make_array(retty)(context, builder) - populate_array(ret, - data=ary.data, - shape=builder.load(newshape), - strides=builder.load(newstrides), - itemsize=ary.itemsize, - meminfo=ary.meminfo, - parent=ary.parent) - res = ret._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('array.reshape', types.Array, types.VarArg(types.Any)) -def array_reshape_vararg(context, builder, sig, args): - new_sig, new_args = vararg_to_tuple(context, builder, sig, args) - return array_reshape(context, builder, new_sig, new_args) - - -@overload(np.reshape) -def np_reshape(a, shape): - def np_reshape_impl(a, shape): - return a.reshape(shape) - return np_reshape_impl - - -@lower_builtin('array.ravel', types.Array) -def array_ravel(context, builder, sig, args): - # Only support no argument version (default order='C') - def imp_nocopy(ary): - """No copy version""" - return ary.reshape(ary.size) - - def imp_copy(ary): - """Copy version""" - return ary.flatten() - - # If the input array is C layout already, use the nocopy version - if sig.args[0].layout == 'C': - imp = imp_nocopy - # otherwise, use flatten under-the-hood - else: - imp = imp_copy - - res = context.compile_internal(builder, imp, sig, args) - res = impl_ret_new_ref(context, builder, sig.return_type, res) - return res - - -@lower_builtin(np.ravel, types.Array) -def np_ravel(context, builder, sig, args): - def np_ravel_impl(a): - return a.ravel() - - return context.compile_internal(builder, np_ravel_impl, sig, args) - - -@lower_builtin('array.flatten', types.Array) -def array_flatten(context, builder, sig, args): - # Only support flattening to C layout currently. - def imp(ary): - return ary.copy().reshape(ary.size) - - res = context.compile_internal(builder, imp, sig, args) - res = impl_ret_new_ref(context, builder, sig.return_type, res) - return res - - -def _change_dtype(context, builder, oldty, newty, ary): - """ - Attempt to fix up *ary* for switching from *oldty* to *newty*. - - See Numpy's array_descr_set() - (np/core/src/multiarray/getset.c). - Attempt to fix the array's shape and strides for a new dtype. - False is returned on failure, True on success. - """ - assert oldty.ndim == newty.ndim - assert oldty.layout == newty.layout - - new_layout = ord(newty.layout) - any_layout = ord('A') - c_layout = ord('C') - f_layout = ord('F') - - int8 = types.int8 - - def imp(nd, dims, strides, old_itemsize, new_itemsize, layout): - # Attempt to update the layout due to limitation of the numba - # type system. - if layout == any_layout: - # Test rightmost stride to be contiguous - if strides[-1] == old_itemsize: - # Process this as if it is C contiguous - layout = int8(c_layout) - # Test leftmost stride to be F contiguous - elif strides[0] == old_itemsize: - # Process this as if it is F contiguous - layout = int8(f_layout) - - if old_itemsize != new_itemsize and (layout == any_layout or nd == 0): - return False - - if layout == c_layout: - i = nd - 1 - else: - i = 0 - - if new_itemsize < old_itemsize: - # If it is compatible, increase the size of the dimension - # at the end (or at the front if F-contiguous) - if (old_itemsize % new_itemsize) != 0: - return False - - newdim = old_itemsize // new_itemsize - dims[i] *= newdim - strides[i] = new_itemsize - - elif new_itemsize > old_itemsize: - # Determine if last (or first if F-contiguous) dimension - # is compatible - bytelength = dims[i] * old_itemsize - if (bytelength % new_itemsize) != 0: - return False - - dims[i] = bytelength // new_itemsize - strides[i] = new_itemsize - - else: - # Same item size: nothing to do (this also works for - # non-contiguous arrays). - pass - - return True - - old_itemsize = context.get_constant(types.intp, - get_itemsize(context, oldty)) - new_itemsize = context.get_constant(types.intp, - get_itemsize(context, newty)) - - nd = context.get_constant(types.intp, newty.ndim) - shape_data = cgutils.gep_inbounds(builder, ary._get_ptr_by_name('shape'), - 0, 0) - strides_data = cgutils.gep_inbounds(builder, - ary._get_ptr_by_name('strides'), 0, 0) - - shape_strides_array_type = types.Array(dtype=types.intp, ndim=1, layout='C') - arycls = context.make_array(shape_strides_array_type) - - shape_constant = cgutils.pack_array(builder, - [context.get_constant(types.intp, - newty.ndim)]) - - sizeof_intp = context.get_abi_sizeof(context.get_data_type(types.intp)) - sizeof_intp = context.get_constant(types.intp, sizeof_intp) - strides_constant = cgutils.pack_array(builder, [sizeof_intp]) - - shape_ary = arycls(context, builder) - - populate_array(shape_ary, - data=shape_data, - shape=shape_constant, - strides=strides_constant, - itemsize=sizeof_intp, - meminfo=None) - - strides_ary = arycls(context, builder) - populate_array(strides_ary, - data=strides_data, - shape=shape_constant, - strides=strides_constant, - itemsize=sizeof_intp, - meminfo=None) - - shape = shape_ary._getvalue() - strides = strides_ary._getvalue() - args = [nd, shape, strides, old_itemsize, new_itemsize, - context.get_constant(types.int8, new_layout)] - - sig = signature(types.boolean, - types.intp, # nd - shape_strides_array_type, # dims - shape_strides_array_type, # strides - types.intp, # old_itemsize - types.intp, # new_itemsize - types.int8, # layout - ) - - res = context.compile_internal(builder, imp, sig, args) - update_array_info(newty, ary) - res = impl_ret_borrowed(context, builder, sig.return_type, res) - return res - - -@overload(np.unique) -def np_unique(a): - def np_unique_impl(a): - b = np.sort(a.ravel()) - head = list(b[:1]) - tail = [x for i, x in enumerate(b[1:]) if b[i] != x] - return np.array(head + tail) - return np_unique_impl - - -@lower_builtin('array.view', types.Array, types.DTypeSpec) -def array_view(context, builder, sig, args): - aryty = sig.args[0] - retty = sig.return_type - - ary = make_array(aryty)(context, builder, args[0]) - ret = make_array(retty)(context, builder) - # Copy all fields, casting the "data" pointer appropriately - fields = set(ret._datamodel._fields) - for k in sorted(fields): - val = getattr(ary, k) - if k == 'data': - ptrty = ret.data.type - ret.data = builder.bitcast(val, ptrty) - else: - setattr(ret, k, val) - - ok = _change_dtype(context, builder, aryty, retty, ret) - fail = builder.icmp_unsigned('==', ok, lc.Constant.int(ok.type, 0)) - - with builder.if_then(fail): - msg = "new type not compatible with array" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - res = ret._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -#------------------------------------------------------------------------------- -# Array attributes - -@lower_getattr(types.Array, "dtype") -def array_dtype(context, builder, typ, value): - res = context.get_dummy_value() - return impl_ret_untracked(context, builder, typ, res) - -@lower_getattr(types.Array, "shape") -@lower_getattr(types.MemoryView, "shape") -def array_shape(context, builder, typ, value): - arrayty = make_array(typ) - array = arrayty(context, builder, value) - res = array.shape - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.Array, "strides") -@lower_getattr(types.MemoryView, "strides") -def array_strides(context, builder, typ, value): - arrayty = make_array(typ) - array = arrayty(context, builder, value) - res = array.strides - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.Array, "ndim") -@lower_getattr(types.MemoryView, "ndim") -def array_ndim(context, builder, typ, value): - res = context.get_constant(types.intp, typ.ndim) - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.Array, "size") -def array_size(context, builder, typ, value): - arrayty = make_array(typ) - array = arrayty(context, builder, value) - res = array.nitems - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.Array, "itemsize") -@lower_getattr(types.MemoryView, "itemsize") -def array_itemsize(context, builder, typ, value): - arrayty = make_array(typ) - array = arrayty(context, builder, value) - res = array.itemsize - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.MemoryView, "nbytes") -def array_nbytes(context, builder, typ, value): - """ - nbytes = size * itemsize - """ - arrayty = make_array(typ) - array = arrayty(context, builder, value) - dims = cgutils.unpack_tuple(builder, array.shape, typ.ndim) - res = builder.mul(array.nitems, array.itemsize) - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.MemoryView, "contiguous") -def array_contiguous(context, builder, typ, value): - res = context.get_constant(types.boolean, typ.is_contig) - return impl_ret_untracked(context, builder, typ, res) - -@lower_getattr(types.MemoryView, "c_contiguous") -def array_c_contiguous(context, builder, typ, value): - res = context.get_constant(types.boolean, typ.is_c_contig) - return impl_ret_untracked(context, builder, typ, res) - -@lower_getattr(types.MemoryView, "f_contiguous") -def array_f_contiguous(context, builder, typ, value): - res = context.get_constant(types.boolean, typ.is_f_contig) - return impl_ret_untracked(context, builder, typ, res) - - -@lower_getattr(types.MemoryView, "readonly") -def array_readonly(context, builder, typ, value): - res = context.get_constant(types.boolean, not typ.mutable) - return impl_ret_untracked(context, builder, typ, res) - - -# array.ctypes - -@lower_getattr(types.Array, "ctypes") -def array_ctypes(context, builder, typ, value): - arrayty = make_array(typ) - array = arrayty(context, builder, value) - # Create new ArrayCType structure - act = types.ArrayCTypes(typ) - ctinfo = context.make_helper(builder, act) - ctinfo.data = array.data - ctinfo.meminfo = array.meminfo - res = ctinfo._getvalue() - return impl_ret_borrowed(context, builder, act, res) - -@lower_getattr(types.ArrayCTypes, "data") -def array_ctypes_data(context, builder, typ, value): - ctinfo = context.make_helper(builder, typ, value=value) - res = ctinfo.data - # Convert it to an integer - res = builder.ptrtoint(res, context.get_value_type(types.intp)) - return impl_ret_untracked(context, builder, typ, res) - -@lower_cast(types.ArrayCTypes, types.CPointer) -@lower_cast(types.ArrayCTypes, types.voidptr) -def array_ctypes_to_pointer(context, builder, fromty, toty, val): - ctinfo = context.make_helper(builder, fromty, value=val) - res = ctinfo.data - res = builder.bitcast(res, context.get_value_type(toty)) - return impl_ret_untracked(context, builder, toty, res) - - -def _call_contiguous_check(checker, context, builder, aryty, ary): - """Helper to invoke the contiguous checker function on an array - - Args - ---- - checker : - ``numba.numpy_supports.is_contiguous``, or - ``numba.numpy_supports.is_fortran``. - context : target context - builder : llvm ir builder - aryty : numba type - ary : llvm value - """ - ary = make_array(aryty)(context, builder, value=ary) - tup_intp = types.UniTuple(types.intp, aryty.ndim) - itemsize = context.get_abi_sizeof(context.get_value_type(aryty.dtype)) - check_sig = signature(types.bool_, tup_intp, tup_intp, types.intp) - check_args = [ary.shape, ary.strides, - context.get_constant(types.intp, itemsize)] - is_contig = context.compile_internal(builder, checker, check_sig, - check_args) - return is_contig - - -# array.flags - -@lower_getattr(types.Array, "flags") -def array_flags(context, builder, typ, value): - flagsobj = context.make_helper(builder, types.ArrayFlags(typ)) - flagsobj.parent = value - res = flagsobj._getvalue() - return impl_ret_new_ref(context, builder, typ, res) - -@lower_getattr(types.ArrayFlags, "contiguous") -@lower_getattr(types.ArrayFlags, "c_contiguous") -def array_flags_c_contiguous(context, builder, typ, value): - if typ.array_type.layout != 'C': - # any layout can stil be contiguous - flagsobj = context.make_helper(builder, typ, value=value) - res = _call_contiguous_check(is_contiguous, context, builder, - typ.array_type, flagsobj.parent) - else: - val = typ.array_type.layout == 'C' - res = context.get_constant(types.boolean, val) - return impl_ret_untracked(context, builder, typ, res) - -@lower_getattr(types.ArrayFlags, "f_contiguous") -def array_flags_f_contiguous(context, builder, typ, value): - if typ.array_type.layout != 'F': - # any layout can stil be contiguous - flagsobj = context.make_helper(builder, typ, value=value) - res = _call_contiguous_check(is_fortran, context, builder, - typ.array_type, flagsobj.parent) - else: - layout = typ.array_type.layout - val = layout == 'F' if typ.array_type.ndim > 1 else layout in 'CF' - res = context.get_constant(types.boolean, val) - return impl_ret_untracked(context, builder, typ, res) - - -#------------------------------------------------------------------------------- -# .real / .imag - -@lower_getattr(types.Array, "real") -def array_real_part(context, builder, typ, value): - if typ.dtype in types.complex_domain: - return array_complex_attr(context, builder, typ, value, attr='real') - elif typ.dtype in types.number_domain: - # as an identity function - return impl_ret_borrowed(context, builder, typ, value) - else: - raise NotImplementedError('unsupported .real for {}'.format(type.dtype)) - - -@lower_getattr(types.Array, "imag") -def array_imag_part(context, builder, typ, value): - if typ.dtype in types.complex_domain: - return array_complex_attr(context, builder, typ, value, attr='imag') - elif typ.dtype in types.number_domain: - # return a readonly zero array - sig = signature(typ.copy(readonly=True), typ) - return numpy_zeros_like_nd(context, builder, sig, [value]) - else: - raise NotImplementedError('unsupported .imag for {}'.format(type.dtype)) - - -def array_complex_attr(context, builder, typ, value, attr): - """ - Given a complex array, it's memory layout is: - - R C R C R C - ^ ^ ^ - - (`R` indicates a float for the real part; - `C` indicates a float for the imaginery part; - the `^` indicates the start of each element) - - To get the real part, we can simply change the dtype and itemsize to that - of the underlying float type. The new layout is: - - R x R x R x - ^ ^ ^ - - (`x` indicates unused) - - A load operation will use the dtype to determine the number of bytes to - load. - - To get the imaginary part, we shift the pointer by 1 float offset and - change the dtype and itemsize. The new layout is: - - x C x C x C - ^ ^ ^ - """ - if attr not in ['real', 'imag'] or typ.dtype not in types.complex_domain: - raise NotImplementedError("cannot get attribute `{}`".format(attr)) - - arrayty = make_array(typ) - array = arrayty(context, builder, value) - - # sizeof underlying float type - flty = typ.dtype.underlying_float - sizeof_flty = context.get_abi_sizeof(context.get_data_type(flty)) - itemsize = array.itemsize.type(sizeof_flty) - - # cast data pointer to float type - llfltptrty = context.get_value_type(flty).as_pointer() - dataptr = builder.bitcast(array.data, llfltptrty) - - # add offset - if attr == 'imag': - dataptr = builder.gep(dataptr, [ir.IntType(32)(1)]) - - # make result - resultty = typ.copy(dtype=flty, layout='A') - result = make_array(resultty)(context, builder) - repl = dict(data=dataptr, itemsize=itemsize) - cgutils.copy_struct(result, array, repl) - return impl_ret_borrowed(context, builder, resultty, result._getvalue()) - - -#------------------------------------------------------------------------------- -# DType attribute - -@lower_getattr(types.DType, 'type') -def dtype_type(context, builder, dtypety, dtypeval): - # Just return a dummy opaque value - return context.get_dummy_value() - -@lower_getattr(types.DType, 'kind') -def dtype_type(context, builder, dtypety, dtypeval): - # Just return a dummy opaque value - return context.get_dummy_value() - -#------------------------------------------------------------------------------- -# Structured / record lookup - -@lower_getattr_generic(types.Array) -def array_record_getattr(context, builder, typ, value, attr): - """ - Generic getattr() implementation for record arrays: fetch the given - record member, i.e. a subarray. - """ - arrayty = make_array(typ) - array = arrayty(context, builder, value) - - rectype = typ.dtype - if not isinstance(rectype, types.Record): - raise NotImplementedError("attribute %r of %s not defined" % (attr, typ)) - dtype = rectype.typeof(attr) - offset = rectype.offset(attr) - - resty = typ.copy(dtype=dtype, layout='A') - - raryty = make_array(resty) - - rary = raryty(context, builder) - - constoffset = context.get_constant(types.intp, offset) - - llintp = context.get_value_type(types.intp) - newdata = builder.add(builder.ptrtoint(array.data, llintp), constoffset) - newdataptr = builder.inttoptr(newdata, rary.data.type) - - datasize = context.get_abi_sizeof(context.get_data_type(dtype)) - populate_array(rary, - data=newdataptr, - shape=array.shape, - strides=array.strides, - itemsize=context.get_constant(types.intp, datasize), - meminfo=array.meminfo, - parent=array.parent) - res = rary._getvalue() - return impl_ret_borrowed(context, builder, resty, res) - -@lower_builtin('static_getitem', types.Array, types.Const) -def array_record_getitem(context, builder, sig, args): - index = args[1] - if not isinstance(index, str): - # This will fallback to normal getitem - raise NotImplementedError - return array_record_getattr(context, builder, sig.args[0], args[0], index) - - -@lower_getattr_generic(types.Record) -def record_getattr(context, builder, typ, value, attr): - """ - Generic getattr() implementation for records: fetch the given - record member, i.e. a scalar. - """ - context.sentry_record_alignment(typ, attr) - offset = typ.offset(attr) - elemty = typ.typeof(attr) - - if isinstance(elemty, types.NestedArray): - # Only a nested array's *data* is stored in a structured array, - # so we create an array structure to point to that data. - aryty = make_array(elemty) - ary = aryty(context, builder) - dtype = elemty.dtype - newshape = [context.get_constant(types.intp, s) for s in - elemty.shape] - newstrides = [context.get_constant(types.intp, s) for s in - elemty.strides] - newdata = cgutils.get_record_member(builder, value, offset, - context.get_data_type(dtype)) - populate_array( - ary, - data=newdata, - shape=cgutils.pack_array(builder, newshape), - strides=cgutils.pack_array(builder, newstrides), - itemsize=context.get_constant(types.intp, elemty.size), - meminfo=None, - parent=None, - ) - res = ary._getvalue() - return impl_ret_borrowed(context, builder, typ, res) - else: - dptr = cgutils.get_record_member(builder, value, offset, - context.get_data_type(elemty)) - align = None if typ.aligned else 1 - res = context.unpack_value(builder, elemty, dptr, align) - return impl_ret_borrowed(context, builder, typ, res) - -@lower_setattr_generic(types.Record) -def record_setattr(context, builder, sig, args, attr): - """ - Generic setattr() implementation for records: set the given - record member, i.e. a scalar. - """ - typ, valty = sig.args - target, val = args - - context.sentry_record_alignment(typ, attr) - offset = typ.offset(attr) - elemty = typ.typeof(attr) - - dptr = cgutils.get_record_member(builder, target, offset, - context.get_data_type(elemty)) - val = context.cast(builder, val, valty, elemty) - align = None if typ.aligned else 1 - context.pack_value(builder, elemty, val, dptr, align=align) - - -@lower_builtin('static_getitem', types.Record, types.Const) -def record_getitem(context, builder, sig, args): - """ - Record.__getitem__ redirects to getattr() - """ - impl = context.get_getattr(sig.args[0], args[1]) - return impl(context, builder, sig.args[0], args[0], args[1]) - -@lower_builtin('static_setitem', types.Record, types.Const, types.Any) -def record_setitem(context, builder, sig, args): - """ - Record.__setitem__ redirects to setattr() - """ - recty, _, valty = sig.args - rec, idx, val = args - getattr_sig = signature(sig.return_type, recty, valty) - impl = context.get_setattr(idx, getattr_sig) - assert impl is not None - return impl(builder, (rec, val)) - - -#------------------------------------------------------------------------------- -# Constant arrays and records - - -@lower_constant(types.Array) -def constant_record(context, builder, ty, pyval): - """ - Create a constant array (mechanism is target-dependent). - """ - return context.make_constant_array(builder, ty, pyval) - -@lower_constant(types.Record) -def constant_record(context, builder, ty, pyval): - """ - Create a record constant as a stack-allocated array of bytes. - """ - lty = ir.ArrayType(ir.IntType(8), pyval.nbytes) - val = lty(bytearray(pyval.tostring())) - return cgutils.alloca_once_value(builder, val) - - -#------------------------------------------------------------------------------- -# Comparisons - -@lower_builtin('is', types.Array, types.Array) -def array_is(context, builder, sig, args): - aty, bty = sig.args - if aty != bty: - return cgutils.false_bit - - def array_is_impl(a, b): - return (a.shape == b.shape and - a.strides == b.strides and - a.ctypes.data == b.ctypes.data) - - return context.compile_internal(builder, array_is_impl, sig, args) - - -#------------------------------------------------------------------------------- -# builtin `np.flat` implementation - -def make_array_flat_cls(flatiterty): - """ - Return the Structure representation of the given *flatiterty* (an - instance of types.NumpyFlatType). - """ - return _make_flattening_iter_cls(flatiterty, 'flat') - - -def make_array_ndenumerate_cls(nditerty): - """ - Return the Structure representation of the given *nditerty* (an - instance of types.NumpyNdEnumerateType). - """ - return _make_flattening_iter_cls(nditerty, 'ndenumerate') - - -def _increment_indices(context, builder, ndim, shape, indices, end_flag=None, - loop_continue=None, loop_break=None): - zero = context.get_constant(types.intp, 0) - - bbend = builder.append_basic_block('end_increment') - - if end_flag is not None: - builder.store(cgutils.false_byte, end_flag) - - for dim in reversed(range(ndim)): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - idx = cgutils.increment_index(builder, builder.load(idxptr)) - - count = shape[dim] - in_bounds = builder.icmp_signed('<', idx, count) - with cgutils.if_likely(builder, in_bounds): - # New index is still in bounds - builder.store(idx, idxptr) - if loop_continue is not None: - loop_continue(dim) - builder.branch(bbend) - # Index out of bounds => reset it and proceed it to outer index - builder.store(zero, idxptr) - if loop_break is not None: - loop_break(dim) - - if end_flag is not None: - builder.store(cgutils.true_byte, end_flag) - builder.branch(bbend) - - builder.position_at_end(bbend) - -def _increment_indices_array(context, builder, arrty, arr, indices, end_flag=None): - shape = cgutils.unpack_tuple(builder, arr.shape, arrty.ndim) - _increment_indices(context, builder, arrty.ndim, shape, indices, end_flag) - - -def make_nditer_cls(nditerty): - """ - Return the Structure representation of the given *nditerty* (an - instance of types.NumpyNdIterType). - """ - ndim = nditerty.ndim - layout = nditerty.layout - narrays = len(nditerty.arrays) - nshapes = ndim if nditerty.need_shaped_indexing else 1 - - class BaseSubIter(object): - """ - Base class for sub-iterators of a nditer() instance. - """ - - def __init__(self, nditer, member_name, start_dim, end_dim): - self.nditer = nditer - self.member_name = member_name - self.start_dim = start_dim - self.end_dim = end_dim - self.ndim = end_dim - start_dim - - def set_member_ptr(self, ptr): - setattr(self.nditer, self.member_name, ptr) - - @utils.cached_property - def member_ptr(self): - return getattr(self.nditer, self.member_name) - - def init_specific(self, context, builder): - pass - - def loop_continue(self, context, builder, logical_dim): - pass - - def loop_break(self, context, builder, logical_dim): - pass - - - class FlatSubIter(BaseSubIter): - """ - Sub-iterator walking a contiguous array in physical order, with - support for broadcasting (the index is reset on the outer dimension). - """ - - def init_specific(self, context, builder): - zero = context.get_constant(types.intp, 0) - self.set_member_ptr(cgutils.alloca_once_value(builder, zero)) - - def compute_pointer(self, context, builder, indices, arrty, arr): - index = builder.load(self.member_ptr) - return builder.gep(arr.data, [index]) - - def loop_continue(self, context, builder, logical_dim): - if logical_dim == self.ndim - 1: - # Only increment index inside innermost logical dimension - index = builder.load(self.member_ptr) - index = cgutils.increment_index(builder, index) - builder.store(index, self.member_ptr) - - def loop_break(self, context, builder, logical_dim): - if logical_dim == 0: - # At the exit of outermost logical dimension, reset index - zero = context.get_constant(types.intp, 0) - builder.store(zero, self.member_ptr) - elif logical_dim == self.ndim - 1: - # Inside innermost logical dimension, increment index - index = builder.load(self.member_ptr) - index = cgutils.increment_index(builder, index) - builder.store(index, self.member_ptr) - - - class TrivialFlatSubIter(BaseSubIter): - """ - Sub-iterator walking a contiguous array in physical order, - *without* support for broadcasting. - """ - - def init_specific(self, context, builder): - assert not nditerty.need_shaped_indexing - - def compute_pointer(self, context, builder, indices, arrty, arr): - assert len(indices) <= 1, len(indices) - return builder.gep(arr.data, indices) - - - class IndexedSubIter(BaseSubIter): - """ - Sub-iterator walking an array in logical order. - """ - - def compute_pointer(self, context, builder, indices, arrty, arr): - assert len(indices) == self.ndim - return cgutils.get_item_pointer(builder, arrty, arr, - indices, wraparound=False) - - - class ZeroDimSubIter(BaseSubIter): - """ - Sub-iterator "walking" a 0-d array. - """ - - def compute_pointer(self, context, builder, indices, arrty, arr): - return arr.data - - - class ScalarSubIter(BaseSubIter): - """ - Sub-iterator "walking" a scalar value. - """ - - def compute_pointer(self, context, builder, indices, arrty, arr): - return arr - - - class NdIter(cgutils.create_struct_proxy(nditerty)): - """ - .nditer() implementation. - - Note: 'F' layout means the shape is iterated in reverse logical order, - so indices and shapes arrays have to be reversed as well. - """ - - @utils.cached_property - def subiters(self): - l = [] - factories = {'flat': FlatSubIter if nditerty.need_shaped_indexing - else TrivialFlatSubIter, - 'indexed': IndexedSubIter, - '0d': ZeroDimSubIter, - 'scalar': ScalarSubIter, - } - for i, sub in enumerate(nditerty.indexers): - kind, start_dim, end_dim, _ = sub - member_name = 'index%d' % i - factory = factories[kind] - l.append(factory(self, member_name, start_dim, end_dim)) - return l - - def init_specific(self, context, builder, arrtys, arrays): - """ - Initialize the nditer() instance for the specific array inputs. - """ - zero = context.get_constant(types.intp, 0) - - # Store inputs - self.arrays = context.make_tuple(builder, types.Tuple(arrtys), - arrays) - # Create slots for scalars - for i, ty in enumerate(arrtys): - if not isinstance(ty, types.Array): - member_name = 'scalar%d' % i - # XXX as_data()? - slot = cgutils.alloca_once_value(builder, arrays[i]) - setattr(self, member_name, slot) - - arrays = self._arrays_or_scalars(context, builder, arrtys, arrays) - - # Extract iterator shape (the shape of the most-dimensional input) - main_shape_ty = types.UniTuple(types.intp, ndim) - main_shape = None - main_nitems = None - for i, arrty in enumerate(arrtys): - if isinstance(arrty, types.Array) and arrty.ndim == ndim: - main_shape = arrays[i].shape - main_nitems = arrays[i].nitems - break - else: - # Only scalar inputs => synthesize a dummy shape - assert ndim == 0 - main_shape = context.make_tuple(builder, main_shape_ty, ()) - main_nitems = context.get_constant(types.intp, 1) - - # Validate shapes of array inputs - def check_shape(shape, main_shape): - n = len(shape) - for i in range(n): - if shape[i] != main_shape[len(main_shape) - n + i]: - raise ValueError("nditer(): operands could not be broadcast together") - - for arrty, arr in zip(arrtys, arrays): - if isinstance(arrty, types.Array) and arrty.ndim > 0: - context.compile_internal(builder, check_shape, - signature(types.none, - types.UniTuple(types.intp, arrty.ndim), - main_shape_ty), - (arr.shape, main_shape)) - - # Compute shape and size - shapes = cgutils.unpack_tuple(builder, main_shape) - if layout == 'F': - shapes = shapes[::-1] - - # If shape is empty, mark iterator exhausted - shape_is_empty = builder.icmp_signed('==', main_nitems, zero) - exhausted = builder.select(shape_is_empty, cgutils.true_byte, - cgutils.false_byte) - - if not nditerty.need_shaped_indexing: - # Flatten shape to make iteration faster on small innermost - # dimensions (e.g. a (100000, 3) shape) - shapes = (main_nitems,) - assert len(shapes) == nshapes - - indices = cgutils.alloca_once(builder, zero.type, size=nshapes) - for dim in range(nshapes): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - builder.store(zero, idxptr) - - self.indices = indices - self.shape = cgutils.pack_array(builder, shapes, zero.type) - self.exhausted = cgutils.alloca_once_value(builder, exhausted) - - # Initialize subiterators - for subiter in self.subiters: - subiter.init_specific(context, builder) - - def iternext_specific(self, context, builder, result): - """ - Compute next iteration of the nditer() instance. - """ - bbend = builder.append_basic_block('end') - - # Branch early if exhausted - exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) - with cgutils.if_unlikely(builder, exhausted): - result.set_valid(False) - builder.branch(bbend) - - arrtys = nditerty.arrays - arrays = cgutils.unpack_tuple(builder, self.arrays) - arrays = self._arrays_or_scalars(context, builder, arrtys, arrays) - indices = self.indices - - # Compute iterated results - result.set_valid(True) - views = self._make_views(context, builder, indices, arrtys, arrays) - views = [v._getvalue() for v in views] - if len(views) == 1: - result.yield_(views[0]) - else: - result.yield_(context.make_tuple(builder, nditerty.yield_type, - views)) - - shape = cgutils.unpack_tuple(builder, self.shape) - _increment_indices(context, builder, len(shape), shape, - indices, self.exhausted, - functools.partial(self._loop_continue, context, builder), - functools.partial(self._loop_break, context, builder), - ) - - builder.branch(bbend) - builder.position_at_end(bbend) - - def _loop_continue(self, context, builder, dim): - for sub in self.subiters: - if sub.start_dim <= dim < sub.end_dim: - sub.loop_continue(context, builder, dim - sub.start_dim) - - def _loop_break(self, context, builder, dim): - for sub in self.subiters: - if sub.start_dim <= dim < sub.end_dim: - sub.loop_break(context, builder, dim - sub.start_dim) - - def _make_views(self, context, builder, indices, arrtys, arrays): - """ - Compute the views to be yielded. - """ - views = [None] * narrays - indexers = nditerty.indexers - subiters = self.subiters - rettys = nditerty.yield_type - if isinstance(rettys, types.BaseTuple): - rettys = list(rettys) - else: - rettys = [rettys] - indices = [builder.load(cgutils.gep_inbounds(builder, indices, i)) - for i in range(nshapes)] - - for sub, subiter in zip(indexers, subiters): - _, _, _, array_indices = sub - sub_indices = indices[subiter.start_dim:subiter.end_dim] - if layout == 'F': - sub_indices = sub_indices[::-1] - for i in array_indices: - assert views[i] is None - views[i] = self._make_view(context, builder, sub_indices, - rettys[i], - arrtys[i], arrays[i], subiter) - assert all(v for v in views) - return views - - def _make_view(self, context, builder, indices, retty, arrty, arr, subiter): - """ - Compute a 0d view for a given input array. - """ - assert isinstance(retty, types.Array) and retty.ndim == 0 - - ptr = subiter.compute_pointer(context, builder, indices, arrty, arr) - view = context.make_array(retty)(context, builder) - - itemsize = get_itemsize(context, retty) - shape = context.make_tuple(builder, types.UniTuple(types.intp, 0), ()) - strides = context.make_tuple(builder, types.UniTuple(types.intp, 0), ()) - # HACK: meminfo=None avoids expensive refcounting operations - # on ephemeral views - populate_array(view, ptr, shape, strides, itemsize, meminfo=None) - return view - - def _arrays_or_scalars(self, context, builder, arrtys, arrays): - # Return a list of either array structures or pointers to - # scalar slots - l = [] - for i, (arrty, arr) in enumerate(zip(arrtys, arrays)): - if isinstance(arrty, types.Array): - l.append(context.make_array(arrty)(context, builder, value=arr)) - else: - l.append(getattr(self, "scalar%d" % i)) - return l - - return NdIter - - -def make_ndindex_cls(nditerty): - """ - Return the Structure representation of the given *nditerty* (an - instance of types.NumpyNdIndexType). - """ - ndim = nditerty.ndim - - class NdIndexIter(cgutils.create_struct_proxy(nditerty)): - """ - .ndindex() implementation. - """ - - def init_specific(self, context, builder, shapes): - zero = context.get_constant(types.intp, 0) - indices = cgutils.alloca_once(builder, zero.type, - size=context.get_constant(types.intp, - ndim)) - exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) - - for dim in range(ndim): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - builder.store(zero, idxptr) - # 0-sized dimensions really indicate an empty array, - # but we have to catch that condition early to avoid - # a bug inside the iteration logic. - dim_size = shapes[dim] - dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) - with cgutils.if_unlikely(builder, dim_is_empty): - builder.store(cgutils.true_byte, exhausted) - - self.indices = indices - self.exhausted = exhausted - self.shape = cgutils.pack_array(builder, shapes, zero.type) - - def iternext_specific(self, context, builder, result): - zero = context.get_constant(types.intp, 0) - - bbend = builder.append_basic_block('end') - - exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) - with cgutils.if_unlikely(builder, exhausted): - result.set_valid(False) - builder.branch(bbend) - - indices = [builder.load(cgutils.gep_inbounds(builder, self.indices, dim)) - for dim in range(ndim)] - for load in indices: - mark_positive(builder, load) - - result.yield_(cgutils.pack_array(builder, indices, zero.type)) - result.set_valid(True) - - shape = cgutils.unpack_tuple(builder, self.shape, ndim) - _increment_indices(context, builder, ndim, shape, - self.indices, self.exhausted) - - builder.branch(bbend) - builder.position_at_end(bbend) - - return NdIndexIter - - -def _make_flattening_iter_cls(flatiterty, kind): - assert kind in ('flat', 'ndenumerate') - - array_type = flatiterty.array_type - dtype = array_type.dtype - - if array_type.layout == 'C': - class CContiguousFlatIter(cgutils.create_struct_proxy(flatiterty)): - """ - .flat() / .ndenumerate() implementation for C-contiguous arrays. - """ - - def init_specific(self, context, builder, arrty, arr): - zero = context.get_constant(types.intp, 0) - self.index = cgutils.alloca_once_value(builder, zero) - # We can't trust strides[-1] to always contain the right - # step value, see - # http://docs.scipy.org/doc/numpy-dev/release.html#npy-relaxed-strides-checking - self.stride = arr.itemsize - - if kind == 'ndenumerate': - # Zero-initialize the indices array. - indices = cgutils.alloca_once( - builder, zero.type, - size=context.get_constant(types.intp, arrty.ndim)) - - for dim in range(arrty.ndim): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - builder.store(zero, idxptr) - - self.indices = indices - - # NOTE: Using gep() instead of explicit pointer addition helps - # LLVM vectorize the loop (since the stride is known and - # constant). This is not possible in the non-contiguous case, - # where the strides are unknown at compile-time. - - def iternext_specific(self, context, builder, arrty, arr, result): - zero = context.get_constant(types.intp, 0) - - ndim = arrty.ndim - nitems = arr.nitems - - index = builder.load(self.index) - is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) - result.set_valid(is_valid) - - with cgutils.if_likely(builder, is_valid): - ptr = builder.gep(arr.data, [index]) - value = load_item(context, builder, arrty, ptr) - if kind == 'flat': - result.yield_(value) - else: - # ndenumerate(): fetch and increment indices - indices = self.indices - idxvals = [builder.load(cgutils.gep_inbounds(builder, indices, dim)) - for dim in range(ndim)] - idxtuple = cgutils.pack_array(builder, idxvals) - result.yield_( - cgutils.make_anonymous_struct(builder, [idxtuple, value])) - _increment_indices_array(context, builder, arrty, arr, indices) - - index = cgutils.increment_index(builder, index) - builder.store(index, self.index) - - def getitem(self, context, builder, arrty, arr, index): - ptr = builder.gep(arr.data, [index]) - return load_item(context, builder, arrty, ptr) - - def setitem(self, context, builder, arrty, arr, index, value): - ptr = builder.gep(arr.data, [index]) - store_item(context, builder, arrty, value, ptr) - - return CContiguousFlatIter - - else: - class FlatIter(cgutils.create_struct_proxy(flatiterty)): - """ - Generic .flat() / .ndenumerate() implementation for - non-contiguous arrays. - It keeps track of pointers along each dimension in order to - minimize computations. - """ - - def init_specific(self, context, builder, arrty, arr): - zero = context.get_constant(types.intp, 0) - data = arr.data - ndim = arrty.ndim - shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) - - indices = cgutils.alloca_once(builder, zero.type, - size=context.get_constant(types.intp, - arrty.ndim)) - pointers = cgutils.alloca_once(builder, data.type, - size=context.get_constant(types.intp, - arrty.ndim)) - exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) - - # Initialize indices and pointers with their start values. - for dim in range(ndim): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - ptrptr = cgutils.gep_inbounds(builder, pointers, dim) - builder.store(data, ptrptr) - builder.store(zero, idxptr) - # 0-sized dimensions really indicate an empty array, - # but we have to catch that condition early to avoid - # a bug inside the iteration logic (see issue #846). - dim_size = shapes[dim] - dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) - with cgutils.if_unlikely(builder, dim_is_empty): - builder.store(cgutils.true_byte, exhausted) - - self.indices = indices - self.pointers = pointers - self.exhausted = exhausted - - def iternext_specific(self, context, builder, arrty, arr, result): - ndim = arrty.ndim - data = arr.data - shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) - strides = cgutils.unpack_tuple(builder, arr.strides, ndim) - indices = self.indices - pointers = self.pointers - - zero = context.get_constant(types.intp, 0) - - bbend = builder.append_basic_block('end') - - # Catch already computed iterator exhaustion - is_exhausted = cgutils.as_bool_bit( - builder, builder.load(self.exhausted)) - with cgutils.if_unlikely(builder, is_exhausted): - result.set_valid(False) - builder.branch(bbend) - result.set_valid(True) - - # Current pointer inside last dimension - last_ptr = cgutils.gep_inbounds(builder, pointers, ndim - 1) - ptr = builder.load(last_ptr) - value = load_item(context, builder, arrty, ptr) - if kind == 'flat': - result.yield_(value) - else: - # ndenumerate() => yield (indices, value) - idxvals = [builder.load(cgutils.gep_inbounds(builder, indices, dim)) - for dim in range(ndim)] - idxtuple = cgutils.pack_array(builder, idxvals) - result.yield_( - cgutils.make_anonymous_struct(builder, [idxtuple, value])) - - # Update indices and pointers by walking from inner - # dimension to outer. - for dim in reversed(range(ndim)): - idxptr = cgutils.gep_inbounds(builder, indices, dim) - idx = cgutils.increment_index(builder, - builder.load(idxptr)) - - count = shapes[dim] - stride = strides[dim] - in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) - with cgutils.if_likely(builder, in_bounds): - # Index is valid => pointer can simply be incremented. - builder.store(idx, idxptr) - ptrptr = cgutils.gep_inbounds(builder, pointers, dim) - ptr = builder.load(ptrptr) - ptr = cgutils.pointer_add(builder, ptr, stride) - builder.store(ptr, ptrptr) - # Reset pointers in inner dimensions - for inner_dim in range(dim + 1, ndim): - ptrptr = cgutils.gep_inbounds(builder, pointers, inner_dim) - builder.store(ptr, ptrptr) - builder.branch(bbend) - # Reset index and continue with next dimension - builder.store(zero, idxptr) - - # End of array - builder.store(cgutils.true_byte, self.exhausted) - builder.branch(bbend) - - builder.position_at_end(bbend) - - def _ptr_for_index(self, context, builder, arrty, arr, index): - ndim = arrty.ndim - shapes = cgutils.unpack_tuple(builder, arr.shape, count=ndim) - strides = cgutils.unpack_tuple(builder, arr.strides, count=ndim) - - # First convert the flattened index into a regular n-dim index - indices = [] - for dim in reversed(range(ndim)): - indices.append(builder.urem(index, shapes[dim])) - index = builder.udiv(index, shapes[dim]) - indices.reverse() - - ptr = cgutils.get_item_pointer2(builder, arr.data, shapes, - strides, arrty.layout, indices) - return ptr - - def getitem(self, context, builder, arrty, arr, index): - ptr = self._ptr_for_index(context, builder, arrty, arr, index) - return load_item(context, builder, arrty, ptr) - - def setitem(self, context, builder, arrty, arr, index, value): - ptr = self._ptr_for_index(context, builder, arrty, arr, index) - store_item(context, builder, arrty, value, ptr) - - return FlatIter - - -@lower_getattr(types.Array, "flat") -def make_array_flatiter(context, builder, arrty, arr): - flatitercls = make_array_flat_cls(types.NumpyFlatType(arrty)) - flatiter = flatitercls(context, builder) - - flatiter.array = arr - - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, ref=flatiter._get_ptr_by_name('array')) - - flatiter.init_specific(context, builder, arrty, arr) - - res = flatiter._getvalue() - return impl_ret_borrowed(context, builder, types.NumpyFlatType(arrty), res) - - -@lower_builtin('iternext', types.NumpyFlatType) -@iternext_impl -def iternext_numpy_flatiter(context, builder, sig, args, result): - [flatiterty] = sig.args - [flatiter] = args - - flatitercls = make_array_flat_cls(flatiterty) - flatiter = flatitercls(context, builder, value=flatiter) - - arrty = flatiterty.array_type - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, value=flatiter.array) - - flatiter.iternext_specific(context, builder, arrty, arr, result) - - -@lower_builtin('getitem', types.NumpyFlatType, types.Integer) -def iternext_numpy_getitem(context, builder, sig, args): - flatiterty = sig.args[0] - flatiter, index = args - - flatitercls = make_array_flat_cls(flatiterty) - flatiter = flatitercls(context, builder, value=flatiter) - - arrty = flatiterty.array_type - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, value=flatiter.array) - - res = flatiter.getitem(context, builder, arrty, arr, index) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('setitem', types.NumpyFlatType, types.Integer, - types.Any) -def iternext_numpy_getitem(context, builder, sig, args): - flatiterty = sig.args[0] - flatiter, index, value = args - - flatitercls = make_array_flat_cls(flatiterty) - flatiter = flatitercls(context, builder, value=flatiter) - - arrty = flatiterty.array_type - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, value=flatiter.array) - - res = flatiter.setitem(context, builder, arrty, arr, index, value) - return context.get_dummy_value() - - -@lower_builtin(len, types.NumpyFlatType) -def iternext_numpy_getitem(context, builder, sig, args): - flatiterty = sig.args[0] - flatitercls = make_array_flat_cls(flatiterty) - flatiter = flatitercls(context, builder, value=args[0]) - - arrcls = context.make_array(flatiterty.array_type) - arr = arrcls(context, builder, value=flatiter.array) - return arr.nitems - - -@lower_builtin(np.ndenumerate, types.Array) -def make_array_ndenumerate(context, builder, sig, args): - arrty, = sig.args - arr, = args - nditercls = make_array_ndenumerate_cls(types.NumpyNdEnumerateType(arrty)) - nditer = nditercls(context, builder) - - nditer.array = arr - - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, ref=nditer._get_ptr_by_name('array')) - - nditer.init_specific(context, builder, arrty, arr) - - res = nditer._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('iternext', types.NumpyNdEnumerateType) -@iternext_impl -def iternext_numpy_nditer(context, builder, sig, args, result): - [nditerty] = sig.args - [nditer] = args - - nditercls = make_array_ndenumerate_cls(nditerty) - nditer = nditercls(context, builder, value=nditer) - - arrty = nditerty.array_type - arrcls = context.make_array(arrty) - arr = arrcls(context, builder, value=nditer.array) - - nditer.iternext_specific(context, builder, arrty, arr, result) - - -@lower_builtin(pndindex, types.VarArg(types.Integer)) -@lower_builtin(np.ndindex, types.VarArg(types.Integer)) -def make_array_ndindex(context, builder, sig, args): - """ndindex(*shape)""" - shape = [context.cast(builder, arg, argty, types.intp) - for argty, arg in zip(sig.args, args)] - - nditercls = make_ndindex_cls(types.NumpyNdIndexType(len(shape))) - nditer = nditercls(context, builder) - nditer.init_specific(context, builder, shape) - - res = nditer._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - -@lower_builtin(pndindex, types.BaseTuple) -@lower_builtin(np.ndindex, types.BaseTuple) -def make_array_ndindex(context, builder, sig, args): - """ndindex(shape)""" - ndim = sig.return_type.ndim - if ndim > 0: - idxty = sig.args[0].dtype - tup = args[0] - - shape = cgutils.unpack_tuple(builder, tup, ndim) - shape = [context.cast(builder, idx, idxty, types.intp) - for idx in shape] - else: - shape = [] - - nditercls = make_ndindex_cls(types.NumpyNdIndexType(len(shape))) - nditer = nditercls(context, builder) - nditer.init_specific(context, builder, shape) - - res = nditer._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - -@lower_builtin('iternext', types.NumpyNdIndexType) -@iternext_impl -def iternext_numpy_ndindex(context, builder, sig, args, result): - [nditerty] = sig.args - [nditer] = args - - nditercls = make_ndindex_cls(nditerty) - nditer = nditercls(context, builder, value=nditer) - - nditer.iternext_specific(context, builder, result) - - -@lower_builtin(np.nditer, types.Any) -def make_array_nditer(context, builder, sig, args): - """ - nditer(...) - """ - nditerty = sig.return_type - arrtys = nditerty.arrays - - if isinstance(sig.args[0], types.BaseTuple): - arrays = cgutils.unpack_tuple(builder, args[0]) - else: - arrays = [args[0]] - - nditer = make_nditer_cls(nditerty)(context, builder) - nditer.init_specific(context, builder, arrtys, arrays) - - res = nditer._getvalue() - return impl_ret_borrowed(context, builder, nditerty, res) - -@lower_builtin('iternext', types.NumpyNdIterType) -@iternext_impl -def iternext_numpy_ndindex(context, builder, sig, args, result): - [nditerty] = sig.args - [nditer] = args - - nditer = make_nditer_cls(nditerty)(context, builder, value=nditer) - nditer.iternext_specific(context, builder, result) - - -# ----------------------------------------------------------------------------- -# Numpy array constructors - -def _empty_nd_impl(context, builder, arrtype, shapes): - """Utility function used for allocating a new array during LLVM code - generation (lowering). Given a target context, builder, array - type, and a tuple or list of lowered dimension sizes, returns a - LLVM value pointing at a Numba runtime allocated array. - """ - arycls = make_array(arrtype) - ary = arycls(context, builder) - - datatype = context.get_data_type(arrtype.dtype) - itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype)) - - # compute array length - arrlen = context.get_constant(types.intp, 1) - for s in shapes: - arrlen = builder.mul(arrlen, s) - - if arrtype.ndim == 0: - strides = () - elif arrtype.layout == 'C': - strides = [itemsize] - for dimension_size in reversed(shapes[1:]): - strides.append(builder.mul(strides[-1], dimension_size)) - strides = tuple(reversed(strides)) - elif arrtype.layout == 'F': - strides = [itemsize] - for dimension_size in shapes[:-1]: - strides.append(builder.mul(strides[-1], dimension_size)) - strides = tuple(strides) - else: - raise NotImplementedError( - "Don't know how to allocate array with layout '{0}'.".format( - arrtype.layout)) - - allocsize = builder.mul(itemsize, arrlen) - align = context.get_preferred_array_alignment(arrtype.dtype) - meminfo = context.nrt.meminfo_alloc_aligned(builder, size=allocsize, - align=align) - - data = context.nrt.meminfo_data(builder, meminfo) - - intp_t = context.get_value_type(types.intp) - shape_array = cgutils.pack_array(builder, shapes, ty=intp_t) - strides_array = cgutils.pack_array(builder, strides, ty=intp_t) - - populate_array(ary, - data=builder.bitcast(data, datatype.as_pointer()), - shape=shape_array, - strides=strides_array, - itemsize=itemsize, - meminfo=meminfo) - - return ary - -def _zero_fill_array(context, builder, ary): - """ - Zero-fill an array. The array must be contiguous. - """ - cgutils.memset(builder, ary.data, builder.mul(ary.itemsize, ary.nitems), 0) - - -def _parse_shape(context, builder, ty, val): - """ - Parse the shape argument to an array constructor. - """ - if isinstance(ty, types.Integer): - ndim = 1 - shapes = [context.cast(builder, val, ty, types.intp)] - else: - assert isinstance(ty, types.BaseTuple) - ndim = ty.count - arrshape = context.cast(builder, val, ty, - types.UniTuple(types.intp, ndim)) - shapes = cgutils.unpack_tuple(builder, val, count=ndim) - - zero = context.get_constant_generic(builder, types.intp, 0) - for dim in range(ndim): - is_neg = builder.icmp_signed('<', shapes[dim], zero) - with cgutils.if_unlikely(builder, is_neg): - context.call_conv.return_user_exc(builder, ValueError, - ("negative dimensions not allowed",)) - - return shapes - - -def _parse_empty_args(context, builder, sig, args): - """ - Parse the arguments of a np.empty(), np.zeros() or np.ones() call. - """ - arrshapetype = sig.args[0] - arrshape = args[0] - arrtype = sig.return_type - return arrtype, _parse_shape(context, builder, arrshapetype, arrshape) - - -def _parse_empty_like_args(context, builder, sig, args): - """ - Parse the arguments of a np.empty_like(), np.zeros_like() or - np.ones_like() call. - """ - arytype = sig.args[0] - if isinstance(arytype, types.Array): - ary = make_array(arytype)(context, builder, value=args[0]) - shapes = cgutils.unpack_tuple(builder, ary.shape, count=arytype.ndim) - return sig.return_type, shapes - else: - return sig.return_type, () - - -@lower_builtin(np.empty, types.Any) -@lower_builtin(np.empty, types.Any, types.Any) -def numpy_empty_nd(context, builder, sig, args): - arrtype, shapes = _parse_empty_args(context, builder, sig, args) - ary = _empty_nd_impl(context, builder, arrtype, shapes) - return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue()) - -@lower_builtin(np.empty_like, types.Any) -@lower_builtin(np.empty_like, types.Any, types.DTypeSpec) -def numpy_empty_like_nd(context, builder, sig, args): - arrtype, shapes = _parse_empty_like_args(context, builder, sig, args) - ary = _empty_nd_impl(context, builder, arrtype, shapes) - return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue()) - - -@lower_builtin(np.zeros, types.Any) -@lower_builtin(np.zeros, types.Any, types.Any) -def numpy_zeros_nd(context, builder, sig, args): - arrtype, shapes = _parse_empty_args(context, builder, sig, args) - ary = _empty_nd_impl(context, builder, arrtype, shapes) - _zero_fill_array(context, builder, ary) - return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue()) - - -@lower_builtin(np.zeros_like, types.Any) -@lower_builtin(np.zeros_like, types.Any, types.DTypeSpec) -def numpy_zeros_like_nd(context, builder, sig, args): - arrtype, shapes = _parse_empty_like_args(context, builder, sig, args) - ary = _empty_nd_impl(context, builder, arrtype, shapes) - _zero_fill_array(context, builder, ary) - return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue()) - - -if numpy_version >= (1, 8): - @lower_builtin(np.full, types.Any, types.Any) - def numpy_full_nd(context, builder, sig, args): - if numpy_version < (1, 12): - # np < 1.12 returns float64 full regardless of value type - def full(shape, value): - arr = np.empty(shape) - val = np.float64(value.real) - for idx in np.ndindex(arr.shape): - arr[idx] = val - return arr - else: - def full(shape, value): - arr = np.empty(shape, type(value)) - for idx in np.ndindex(arr.shape): - arr[idx] = value - return arr - - res = context.compile_internal(builder, full, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - @lower_builtin(np.full, types.Any, types.Any, types.DTypeSpec) - def numpy_full_dtype_nd(context, builder, sig, args): - - def full(shape, value, dtype): - arr = np.empty(shape, dtype) - for idx in np.ndindex(arr.shape): - arr[idx] = value - return arr - - res = context.compile_internal(builder, full, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - - @lower_builtin(np.full_like, types.Any, types.Any) - def numpy_full_like_nd(context, builder, sig, args): - - def full_like(arr, value): - arr = np.empty_like(arr) - for idx in np.ndindex(arr.shape): - arr[idx] = value - return arr - - res = context.compile_internal(builder, full_like, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - - @lower_builtin(np.full_like, types.Any, types.Any, types.DTypeSpec) - def numpy_full_like_nd(context, builder, sig, args): - - def full_like(arr, value, dtype): - arr = np.empty_like(arr, dtype) - for idx in np.ndindex(arr.shape): - arr[idx] = value - return arr - - res = context.compile_internal(builder, full_like, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.ones, types.Any) -def numpy_ones_nd(context, builder, sig, args): - - def ones(shape): - arr = np.empty(shape) - for idx in np.ndindex(arr.shape): - arr[idx] = 1 - return arr - - valty = sig.return_type.dtype - res = context.compile_internal(builder, ones, sig, args, - locals={'c': valty}) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.ones, types.Any, types.DTypeSpec) -def numpy_ones_dtype_nd(context, builder, sig, args): - - def ones(shape, dtype): - arr = np.empty(shape, dtype) - for idx in np.ndindex(arr.shape): - arr[idx] = 1 - return arr - - res = context.compile_internal(builder, ones, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.ones_like, types.Any) -def numpy_ones_like_nd(context, builder, sig, args): - - def ones_like(arr): - arr = np.empty_like(arr) - for idx in np.ndindex(arr.shape): - arr[idx] = 1 - return arr - - res = context.compile_internal(builder, ones_like, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.ones_like, types.Any, types.DTypeSpec) -def numpy_ones_like_dtype_nd(context, builder, sig, args): - - def ones_like(arr, dtype): - arr = np.empty_like(arr, dtype) - for idx in np.ndindex(arr.shape): - arr[idx] = 1 - return arr - - res = context.compile_internal(builder, ones_like, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.identity, types.Integer) -def numpy_identity(context, builder, sig, args): - - def identity(n): - arr = np.zeros((n, n)) - for i in range(n): - arr[i, i] = 1 - return arr - - res = context.compile_internal(builder, identity, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.identity, types.Integer, types.DTypeSpec) -def numpy_identity(context, builder, sig, args): - - def identity(n, dtype): - arr = np.zeros((n, n), dtype) - for i in range(n): - arr[i, i] = 1 - return arr - - res = context.compile_internal(builder, identity, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.eye, types.Integer) -def numpy_eye(context, builder, sig, args): - - def eye(n): - return np.identity(n) - - res = context.compile_internal(builder, eye, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.eye, types.Integer, types.Integer) -def numpy_eye(context, builder, sig, args): - - def eye(n, m): - return np.eye(n, m, 0, np.float64) - - res = context.compile_internal(builder, eye, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.eye, types.Integer, types.Integer, - types.Integer) -def numpy_eye(context, builder, sig, args): - - def eye(n, m, k): - return np.eye(n, m, k, np.float64) - - res = context.compile_internal(builder, eye, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.eye, types.Integer, types.Integer, - types.Integer, types.DTypeSpec) -def numpy_eye(context, builder, sig, args): - - def eye(n, m, k, dtype): - arr = np.zeros((n, m), dtype) - if k >= 0: - d = min(n, m - k) - for i in range(d): - arr[i, i + k] = 1 - else: - d = min(n + k, m) - for i in range(d): - arr[i - k, i] = 1 - return arr - - res = context.compile_internal(builder, eye, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.diag, types.Array) -def numpy_diag(context, builder, sig, args): - def diag_impl(val): - return np.diag(val, k=0) - return context.compile_internal(builder, diag_impl, sig, args) - -@lower_builtin(np.diag, types.Array, types.Integer) -def numpy_diag_kwarg(context, builder, sig, args): - arg = sig.args[0] - if arg.ndim == 1: - # vector context - def diag_impl(arr, k=0): - s = arr.shape - n = s[0] + abs(k) - ret = np.zeros((n, n), arr.dtype) - if k >= 0: - for i in range(n - k): - ret[i, k + i] = arr[i] - else: - for i in range(n + k): - ret[i - k, i] = arr[i] - return ret - elif arg.ndim == 2: - # matrix context - def diag_impl(arr, k=0): - #Will return arr.diagonal(v, k) when axis args are supported - rows, cols = arr.shape - r = rows - c = cols - if k < 0: - rows = rows + k - if k > 0: - cols = cols - k - n = max(min(rows, cols), 0) - ret = np.empty(n, arr.dtype) - if k >= 0: - for i in range(n): - ret[i] = arr[i, k + i] - else: - for i in range(n): - ret[i] = arr[i - k, i] - return ret - else: - #invalid input - raise ValueError("Input must be 1- or 2-d.") - - res = context.compile_internal(builder, diag_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.take, types.Array, types.Integer) -@lower_builtin('array.take', types.Array, types.Integer) -def numpy_take_1(context, builder, sig, args): - - def take_impl(a, indices): - if indices > (a.size - 1) or indices < -a.size: - raise IndexError("Index out of bounds") - return a.ravel()[np.int(indices)] - - res = context.compile_internal(builder, take_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin('array.take', types.Array, types.Array) -@lower_builtin(np.take, types.Array, types.Array) -def numpy_take_2(context, builder, sig, args): - - F_order = sig.args[1].layout == 'F' - - def take_impl(a, indices): - ret = np.empty(indices.size, dtype=a.dtype) - if F_order: - walker = indices.copy() # get C order - else: - walker = indices - it = np.nditer(walker) - i = 0 - flat = a.ravel() - for x in it: - if x > (a.size - 1) or x < -a.size: - raise IndexError("Index out of bounds") - ret[i] = flat[x] - i = i + 1 - return ret.reshape(indices.shape) - - res = context.compile_internal(builder, take_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin('array.take', types.Array, types.List) -@lower_builtin(np.take, types.Array, types.List) -@lower_builtin('array.take', types.Array, types.BaseTuple) -@lower_builtin(np.take, types.Array, types.BaseTuple) -def numpy_take_3(context, builder, sig, args): - - def take_impl(a, indices): - convert = np.array(indices) - ret = np.empty(convert.size, dtype=a.dtype) - it = np.nditer(convert) - i = 0 - flat = a.ravel() - for x in it: - if x > (a.size - 1) or x < -a.size: - raise IndexError("Index out of bounds") - ret[i] = flat[x] - i = i + 1 - return ret.reshape(convert.shape) - - res = context.compile_internal(builder, take_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.arange, types.Number) -def numpy_arange_1(context, builder, sig, args): - dtype = as_dtype(sig.return_type.dtype) - - def arange(stop): - return np.arange(0, stop, 1, dtype) - - res = context.compile_internal(builder, arange, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.arange, types.Number, types.Number) -def numpy_arange_2(context, builder, sig, args): - dtype = as_dtype(sig.return_type.dtype) - - def arange(start, stop): - return np.arange(start, stop, 1, dtype) - - res = context.compile_internal(builder, arange, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -@lower_builtin(np.arange, types.Number, types.Number, - types.Number) -def numpy_arange_3(context, builder, sig, args): - dtype = as_dtype(sig.return_type.dtype) - - def arange(start, stop, step): - return np.arange(start, stop, step, dtype) - - res = context.compile_internal(builder, arange, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.arange, types.Number, types.Number, - types.Number, types.DTypeSpec) -def numpy_arange_4(context, builder, sig, args): - - if any(isinstance(a, types.Complex) for a in sig.args): - def arange(start, stop, step, dtype): - nitems_c = (stop - start) / step - nitems_r = math.ceil(nitems_c.real) - nitems_i = math.ceil(nitems_c.imag) - nitems = max(min(nitems_i, nitems_r), 0) - arr = np.empty(nitems, dtype) - val = start - for i in range(nitems): - arr[i] = val - val += step - return arr - else: - def arange(start, stop, step, dtype): - nitems_r = math.ceil((stop - start) / step) - nitems = max(nitems_r, 0) - arr = np.empty(nitems, dtype) - val = start - for i in range(nitems): - arr[i] = val - val += step - return arr - - res = context.compile_internal(builder, arange, sig, args, - locals={'nitems': types.intp}) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.linspace, types.Number, types.Number) -def numpy_linspace_2(context, builder, sig, args): - - def linspace(start, stop): - return np.linspace(start, stop, 50) - - res = context.compile_internal(builder, linspace, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin(np.linspace, types.Number, types.Number, - types.Integer) -def numpy_linspace_3(context, builder, sig, args): - dtype = as_dtype(sig.return_type.dtype) - - def linspace(start, stop, num): - arr = np.empty(num, dtype) - div = num - 1 - delta = stop - start - arr[0] = start - for i in range(1, num): - arr[i] = start + delta * (i / div) - return arr - - res = context.compile_internal(builder, linspace, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -def _array_copy(context, builder, sig, args): - """ - Array copy. - """ - arytype = sig.args[0] - ary = make_array(arytype)(context, builder, value=args[0]) - shapes = cgutils.unpack_tuple(builder, ary.shape) - - rettype = sig.return_type - ret = _empty_nd_impl(context, builder, rettype, shapes) - - src_data = ary.data - dest_data = ret.data - - assert rettype.layout in "CF" - if arytype.layout == rettype.layout: - # Fast path: memcpy - cgutils.raw_memcpy(builder, dest_data, src_data, ary.nitems, - ary.itemsize, align=1) - - else: - src_strides = cgutils.unpack_tuple(builder, ary.strides) - dest_strides = cgutils.unpack_tuple(builder, ret.strides) - intp_t = context.get_value_type(types.intp) - - with cgutils.loop_nest(builder, shapes, intp_t) as indices: - src_ptr = cgutils.get_item_pointer2(builder, src_data, - shapes, src_strides, - arytype.layout, indices) - dest_ptr = cgutils.get_item_pointer2(builder, dest_data, - shapes, dest_strides, - rettype.layout, indices) - builder.store(builder.load(src_ptr), dest_ptr) - - return impl_ret_new_ref(context, builder, sig.return_type, ret._getvalue()) - - -@lower_builtin("array.copy", types.Array) -def array_copy(context, builder, sig, args): - return _array_copy(context, builder, sig, args) - -@lower_builtin(np.copy, types.Array) -def numpy_copy(context, builder, sig, args): - return _array_copy(context, builder, sig, args) - - -def _as_layout_array(context, builder, sig, args, output_layout): - """ - Common logic for layout conversion function; - e.g. ascontiguousarray and asfortranarray - """ - retty = sig.return_type - aryty = sig.args[0] - assert retty.layout == output_layout, 'return-type has incorrect layout' - - if aryty.ndim == 0: - # 0-dim input => asfortranarray() returns a 1-dim array - assert retty.ndim == 1 - ary = make_array(aryty)(context, builder, value=args[0]) - ret = make_array(retty)(context, builder) - - shape = context.get_constant(types.UniTuple(types.intp, 1), (1,)) - strides = context.make_tuple(builder, - types.UniTuple(types.intp, 1), - (ary.itemsize,)) - populate_array(ret, ary.data, shape, strides, ary.itemsize, - ary.meminfo, ary.parent) - return impl_ret_borrowed(context, builder, retty, ret._getvalue()) - - elif (retty.layout == aryty.layout - or (aryty.ndim == 1 and aryty.layout in 'CF')): - # 1-dim contiguous input => return the same array - return impl_ret_borrowed(context, builder, retty, args[0]) - - else: - if aryty.layout == 'A': - # There's still chance the array is in contiguous layout, - # just that we don't know at compile time. - # We can do a runtime check. - - # Prepare and call is_contiguous or is_fortran - assert output_layout in 'CF' - check_func = is_contiguous if output_layout == 'C' else is_fortran - is_contig = _call_contiguous_check(check_func, context, builder, aryty, args[0]) - with builder.if_else(is_contig) as (then, orelse): - # If the array is already contiguous, just return it - with then: - out_then = impl_ret_borrowed(context, builder, retty, - args[0]) - then_blk = builder.block - # Otherwise, copy to a new contiguous region - with orelse: - out_orelse = _array_copy(context, builder, sig, args) - orelse_blk = builder.block - # Phi node for the return value - ret_phi = builder.phi(out_then.type) - ret_phi.add_incoming(out_then, then_blk) - ret_phi.add_incoming(out_orelse, orelse_blk) - return ret_phi - - else: - # Return a copy with the right layout - return _array_copy(context, builder, sig, args) - - -@lower_builtin(np.asfortranarray, types.Array) -def array_asfortranarray(context, builder, sig, args): - return _as_layout_array(context, builder, sig, args, output_layout='F') - - -@lower_builtin(np.ascontiguousarray, types.Array) -def array_ascontiguousarray(context, builder, sig, args): - return _as_layout_array(context, builder, sig, args, output_layout='C') - - -@lower_builtin("array.astype", types.Array, types.DTypeSpec) -def array_astype(context, builder, sig, args): - arytype = sig.args[0] - ary = make_array(arytype)(context, builder, value=args[0]) - shapes = cgutils.unpack_tuple(builder, ary.shape) - - rettype = sig.return_type - ret = _empty_nd_impl(context, builder, rettype, shapes) - - src_data = ary.data - dest_data = ret.data - - src_strides = cgutils.unpack_tuple(builder, ary.strides) - dest_strides = cgutils.unpack_tuple(builder, ret.strides) - intp_t = context.get_value_type(types.intp) - - with cgutils.loop_nest(builder, shapes, intp_t) as indices: - src_ptr = cgutils.get_item_pointer2(builder, src_data, - shapes, src_strides, - arytype.layout, indices) - dest_ptr = cgutils.get_item_pointer2(builder, dest_data, - shapes, dest_strides, - rettype.layout, indices) - item = load_item(context, builder, arytype, src_ptr) - item = context.cast(builder, item, arytype.dtype, rettype.dtype) - store_item(context, builder, rettype, item, dest_ptr) - - return impl_ret_new_ref(context, builder, sig.return_type, ret._getvalue()) - - -@lower_builtin(np.frombuffer, types.Buffer) -@lower_builtin(np.frombuffer, types.Buffer, types.DTypeSpec) -def np_frombuffer(context, builder, sig, args): - bufty = sig.args[0] - aryty = sig.return_type - - buf = make_array(bufty)(context, builder, value=args[0]) - out_ary_ty = make_array(aryty) - out_ary = out_ary_ty(context, builder) - out_datamodel = out_ary._datamodel - - itemsize = get_itemsize(context, aryty) - ll_itemsize = lc.Constant.int(buf.itemsize.type, itemsize) - nbytes = builder.mul(buf.nitems, buf.itemsize) - - # Check that the buffer size is compatible - rem = builder.srem(nbytes, ll_itemsize) - is_incompatible = cgutils.is_not_null(builder, rem) - with builder.if_then(is_incompatible, likely=False): - msg = "buffer size must be a multiple of element size" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)]) - strides = cgutils.pack_array(builder, [ll_itemsize]) - data = builder.bitcast(buf.data, - context.get_value_type(out_datamodel.get_type('data'))) - - populate_array(out_ary, - data=data, - shape=shape, - strides=strides, - itemsize=ll_itemsize, - meminfo=buf.meminfo, - parent=buf.parent,) - - res = out_ary._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin(carray, types.Any, types.Any) -@lower_builtin(carray, types.Any, types.Any, types.DTypeSpec) -@lower_builtin(farray, types.Any, types.Any) -@lower_builtin(farray, types.Any, types.Any, types.DTypeSpec) -def np_cfarray(context, builder, sig, args): - """ - numba.numpy_support.carray(...) and - numba.numpy_support.farray(...). - """ - ptrty, shapety = sig.args[:2] - ptr, shape = args[:2] - - aryty = sig.return_type - dtype = aryty.dtype - assert aryty.layout in 'CF' - - out_ary = make_array(aryty)(context, builder) - - itemsize = get_itemsize(context, aryty) - ll_itemsize = cgutils.intp_t(itemsize) - - if isinstance(shapety, types.BaseTuple): - shapes = cgutils.unpack_tuple(builder, shape) - else: - shapety = (shapety,) - shapes = (shape,) - shapes = [context.cast(builder, value, fromty, types.intp) - for fromty, value in zip(shapety, shapes)] - - off = ll_itemsize - strides = [] - if aryty.layout == 'F': - for s in shapes: - strides.append(off) - off = builder.mul(off, s) - else: - for s in reversed(shapes): - strides.append(off) - off = builder.mul(off, s) - strides.reverse() - - data = builder.bitcast(ptr, - context.get_data_type(aryty.dtype).as_pointer()) - - populate_array(out_ary, - data=data, - shape=shapes, - strides=strides, - itemsize=ll_itemsize, - # Array is not memory-managed - meminfo=None, - ) - - res = out_ary._getvalue() - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -def _get_seq_size(context, builder, seqty, seq): - if isinstance(seqty, types.BaseTuple): - return context.get_constant(types.intp, len(seqty)) - elif isinstance(seqty, types.Sequence): - len_impl = context.get_function(len, signature(types.intp, seqty,)) - return len_impl(builder, (seq,)) - else: - assert 0 - -def _get_borrowing_getitem(context, seqty): - """ - Return a getitem() implementation that doesn't incref its result. - """ - retty = seqty.dtype - getitem_impl = context.get_function('getitem', - signature(retty, seqty, types.intp)) - def wrap(builder, args): - ret = getitem_impl(builder, args) - if context.enable_nrt: - context.nrt.decref(builder, retty, ret) - return ret - - return wrap - - -def compute_sequence_shape(context, builder, ndim, seqty, seq): - """ - Compute the likely shape of a nested sequence (possibly 0d). - """ - intp_t = context.get_value_type(types.intp) - zero = Constant.int(intp_t, 0) - - def get_first_item(seqty, seq): - if isinstance(seqty, types.BaseTuple): - if len(seqty) == 0: - return None, None - else: - return seqty[0], builder.extract_value(seq, 0) - else: - getitem_impl = _get_borrowing_getitem(context, seqty) - return seqty.dtype, getitem_impl(builder, (seq, zero)) - - # Compute shape by traversing the first element of each nested - # sequence - shapes = [] - innerty, inner = seqty, seq - - for i in range(ndim): - if i > 0: - innerty, inner = get_first_item(innerty, inner) - shapes.append(_get_seq_size(context, builder, innerty, inner)) - - return tuple(shapes) - - -def check_sequence_shape(context, builder, seqty, seq, shapes): - """ - Check the nested sequence matches the given *shapes*. - """ - intp_t = context.get_value_type(types.intp) - - def _fail(): - context.call_conv.return_user_exc(builder, ValueError, - ("incompatible sequence shape",)) - - def check_seq_size(seqty, seq, shapes): - if len(shapes) == 0: - return - - size = _get_seq_size(context, builder, seqty, seq) - expected = shapes[0] - mismatch = builder.icmp_signed('!=', size, expected) - with builder.if_then(mismatch, likely=False): - _fail() - - if len(shapes) == 1: - return - - if isinstance(seqty, types.Sequence): - getitem_impl = _get_borrowing_getitem(context, seqty) - with cgutils.for_range(builder, size) as loop: - innerty = seqty.dtype - inner = getitem_impl(builder, (seq, loop.index)) - check_seq_size(innerty, inner, shapes[1:]) - - elif isinstance(seqty, types.BaseTuple): - for i in range(len(seqty)): - innerty = seqty[i] - inner = builder.extract_value(seq, i) - check_seq_size(innerty, inner, shapes[1:]) - - else: - assert 0, seqty - - check_seq_size(seqty, seq, shapes) - - -def assign_sequence_to_array(context, builder, data, shapes, strides, - arrty, seqty, seq): - """ - Assign a nested sequence contents to an array. The shape must match - the sequence's structure. - """ - - def assign_item(indices, valty, val): - ptr = cgutils.get_item_pointer2(builder, data, shapes, strides, - arrty.layout, indices, wraparound=False) - val = context.cast(builder, val, valty, arrty.dtype) - store_item(context, builder, arrty, val, ptr) - - def assign(seqty, seq, shapes, indices): - if len(shapes) == 0: - assert not isinstance(seqty, (types.Sequence, types.BaseTuple)) - assign_item(indices, seqty, seq) - return - - size = shapes[0] - - if isinstance(seqty, types.Sequence): - getitem_impl = _get_borrowing_getitem(context, seqty) - with cgutils.for_range(builder, size) as loop: - innerty = seqty.dtype - inner = getitem_impl(builder, (seq, loop.index)) - assign(innerty, inner, shapes[1:], indices + (loop.index,)) - - elif isinstance(seqty, types.BaseTuple): - for i in range(len(seqty)): - innerty = seqty[i] - inner = builder.extract_value(seq, i) - index = context.get_constant(types.intp, i) - assign(innerty, inner, shapes[1:], indices + (index,)) - - else: - assert 0, seqty - - assign(seqty, seq, shapes, ()) - - -@lower_builtin(np.array, types.Any) -@lower_builtin(np.array, types.Any, types.DTypeSpec) -def np_array(context, builder, sig, args): - arrty = sig.return_type - ndim = arrty.ndim - seqty = sig.args[0] - seq = args[0] - - shapes = compute_sequence_shape(context, builder, ndim, seqty, seq) - assert len(shapes) == ndim - - check_sequence_shape(context, builder, seqty, seq, shapes) - arr = _empty_nd_impl(context, builder, arrty, shapes) - assign_sequence_to_array(context, builder, arr.data, shapes, arr.strides, - arrty, seqty, seq) - - return impl_ret_new_ref(context, builder, sig.return_type, arr._getvalue()) - - -def _normalize_axis(context, builder, func_name, ndim, axis): - zero = axis.type(0) - ll_ndim = axis.type(ndim) - - # Normalize negative axis - is_neg_axis = builder.icmp_signed('<', axis, zero) - axis = builder.select(is_neg_axis, builder.add(axis, ll_ndim), axis) - - # Check axis for bounds - axis_out_of_bounds = builder.or_( - builder.icmp_signed('<', axis, zero), - builder.icmp_signed('>=', axis, ll_ndim)) - with builder.if_then(axis_out_of_bounds, likely=False): - msg = "%s(): axis out of bounds" % func_name - context.call_conv.return_user_exc(builder, IndexError, (msg,)) - - return axis - - -def _insert_axis_in_shape(context, builder, orig_shape, ndim, axis): - """ - Compute shape with the new axis inserted - e.g. given original shape (2, 3, 4) and axis=2, - the returned new shape is (2, 3, 1, 4). - """ - assert len(orig_shape) == ndim - 1 - - ll_shty = ir.ArrayType(cgutils.intp_t, ndim) - shapes = cgutils.alloca_once(builder, ll_shty) - - one = cgutils.intp_t(1) - - # 1. copy original sizes at appropriate places - for dim in range(ndim - 1): - ll_dim = cgutils.intp_t(dim) - after_axis = builder.icmp_signed('>=', ll_dim, axis) - sh = orig_shape[dim] - idx = builder.select(after_axis, - builder.add(ll_dim, one), - ll_dim) - builder.store(sh, cgutils.gep_inbounds(builder, shapes, 0, idx)) - - # 2. insert new size (1) at axis dimension - builder.store(one, cgutils.gep_inbounds(builder, shapes, 0, axis)) - - return cgutils.unpack_tuple(builder, builder.load(shapes)) - - -def _insert_axis_in_strides(context, builder, orig_strides, ndim, axis): - """ - Same as _insert_axis_in_shape(), but with a strides array. - """ - assert len(orig_strides) == ndim - 1 - - ll_shty = ir.ArrayType(cgutils.intp_t, ndim) - strides = cgutils.alloca_once(builder, ll_shty) - - one = cgutils.intp_t(1) - zero = cgutils.intp_t(0) - - # 1. copy original strides at appropriate places - for dim in range(ndim - 1): - ll_dim = cgutils.intp_t(dim) - after_axis = builder.icmp_signed('>=', ll_dim, axis) - idx = builder.select(after_axis, - builder.add(ll_dim, one), - ll_dim) - builder.store(orig_strides[dim], - cgutils.gep_inbounds(builder, strides, 0, idx)) - - # 2. insert new stride at axis dimension - # (the value is indifferent for a 1-sized dimension, we use 0) - builder.store(zero, cgutils.gep_inbounds(builder, strides, 0, axis)) - - return cgutils.unpack_tuple(builder, builder.load(strides)) - - -def expand_dims(context, builder, sig, args, axis): - """ - np.expand_dims() with the given axis. - """ - retty = sig.return_type - ndim = retty.ndim - arrty = sig.args[0] - - arr = make_array(arrty)(context, builder, value=args[0]) - ret = make_array(retty)(context, builder) - - shapes = cgutils.unpack_tuple(builder, arr.shape) - strides = cgutils.unpack_tuple(builder, arr.strides) - - new_shapes = _insert_axis_in_shape(context, builder, shapes, ndim, axis) - new_strides = _insert_axis_in_strides(context, builder, strides, ndim, axis) - - populate_array(ret, - data=arr.data, - shape=new_shapes, - strides=new_strides, - itemsize=arr.itemsize, - meminfo=arr.meminfo, - parent=arr.parent) - - return ret._getvalue() - - -@lower_builtin(np.expand_dims, types.Array, types.Integer) -def np_expand_dims(context, builder, sig, args): - axis = context.cast(builder, args[1], sig.args[1], types.intp) - axis = _normalize_axis(context, builder, "np.expand_dims", - sig.return_type.ndim, axis) - - ret = expand_dims(context, builder, sig, args, axis) - return impl_ret_borrowed(context, builder, sig.return_type, ret) - - -def _atleast_nd(context, builder, sig, args, transform): - arrtys = sig.args - arrs = args - - if isinstance(sig.return_type, types.BaseTuple): - rettys = list(sig.return_type) - else: - rettys = [sig.return_type] - assert len(rettys) == len(arrtys) - - rets = [transform(context, builder, arr, arrty, retty) - for arr, arrty, retty in zip(arrs, arrtys, rettys)] - - if isinstance(sig.return_type, types.BaseTuple): - ret = context.make_tuple(builder, sig.return_type, rets) - else: - ret = rets[0] - return impl_ret_borrowed(context, builder, sig.return_type, ret) - - -def _atleast_nd_transform(min_ndim, axes): - """ - Return a callback successively inserting 1-sized dimensions at the - following axes. - """ - assert min_ndim == len(axes) - - def transform(context, builder, arr, arrty, retty): - for i in range(min_ndim): - ndim = i + 1 - if arrty.ndim < ndim: - axis = cgutils.intp_t(axes[i]) - newarrty = arrty.copy(ndim=arrty.ndim + 1) - arr = expand_dims(context, builder, - typing.signature(newarrty, arrty), (arr,), - axis) - arrty = newarrty - - return arr - - return transform - - -@lower_builtin(np.atleast_1d, types.VarArg(types.Array)) -def np_atleast_1d(context, builder, sig, args): - transform = _atleast_nd_transform(1, [0]) - - return _atleast_nd(context, builder, sig, args, transform) - -@lower_builtin(np.atleast_2d, types.VarArg(types.Array)) -def np_atleast_2d(context, builder, sig, args): - transform = _atleast_nd_transform(2, [0, 0]) - - return _atleast_nd(context, builder, sig, args, transform) - -@lower_builtin(np.atleast_3d, types.VarArg(types.Array)) -def np_atleast_2d(context, builder, sig, args): - transform = _atleast_nd_transform(3, [0, 0, 2]) - - return _atleast_nd(context, builder, sig, args, transform) - - -def _do_concatenate(context, builder, axis, - arrtys, arrs, arr_shapes, arr_strides, - retty, ret_shapes): - """ - Concatenate arrays along the given axis. - """ - assert len(arrtys) == len(arrs) == len(arr_shapes) == len(arr_strides) - - zero = cgutils.intp_t(0) - - # Allocate return array - ret = _empty_nd_impl(context, builder, retty, ret_shapes) - ret_strides = cgutils.unpack_tuple(builder, ret.strides) - - # Compute the offset by which to bump the destination pointer - # after copying each input array. - # Morally, we need to copy each input array at different start indices - # into the destination array; bumping the destination pointer - # is simply easier than offsetting all destination indices. - copy_offsets = [] - - for arr_sh in arr_shapes: - # offset = ret_strides[axis] * input_shape[axis] - offset = zero - for dim, (size, stride) in enumerate(zip(arr_sh, ret_strides)): - is_axis = builder.icmp_signed('==', axis.type(dim), axis) - addend = builder.mul(size, stride) - offset = builder.select(is_axis, - builder.add(offset, addend), - offset) - copy_offsets.append(offset) - - # Copy input arrays into the return array - ret_data = ret.data - - for arrty, arr, arr_sh, arr_st, offset in zip(arrtys, arrs, arr_shapes, - arr_strides, copy_offsets): - arr_data = arr.data - - # Do the copy loop - # Note the loop nesting is optimized for the destination layout - loop_nest = cgutils.loop_nest(builder, arr_sh, cgutils.intp_t, - order=retty.layout) - - with loop_nest as indices: - src_ptr = cgutils.get_item_pointer2(builder, arr_data, - arr_sh, arr_st, - arrty.layout, indices) - val = load_item(context, builder, arrty, src_ptr) - val = context.cast(builder, val, arrty.dtype, retty.dtype) - dest_ptr = cgutils.get_item_pointer2(builder, ret_data, - ret_shapes, ret_strides, - retty.layout, indices) - store_item(context, builder, retty, val, dest_ptr) - - # Bump destination pointer - ret_data = cgutils.pointer_add(builder, ret_data, offset) - - return ret - - -def _np_concatenate(context, builder, arrtys, arrs, retty, axis): - ndim = retty.ndim - - zero = cgutils.intp_t(0) - - arrs = [make_array(aty)(context, builder, value=a) - for aty, a in zip(arrtys, arrs)] - - axis = _normalize_axis(context, builder, "np.concatenate", ndim, axis) - - # Get input shapes - arr_shapes = [cgutils.unpack_tuple(builder, arr.shape) for arr in arrs] - arr_strides = [cgutils.unpack_tuple(builder, arr.strides) for arr in arrs] - - # Compute return shape: - # - the dimension for the concatenation axis is summed over all inputs - # - other dimensions must match exactly for each input - ret_shapes = [cgutils.alloca_once_value(builder, sh) - for sh in arr_shapes[0]] - - for dim in range(ndim): - is_axis = builder.icmp_signed('==', axis.type(dim), axis) - ret_shape_ptr = ret_shapes[dim] - ret_sh = builder.load(ret_shape_ptr) - other_shapes = [sh[dim] for sh in arr_shapes[1:]] - - with builder.if_else(is_axis) as (on_axis, on_other_dim): - with on_axis: - sh = functools.reduce( - builder.add, - other_shapes + [ret_sh]) - builder.store(sh, ret_shape_ptr) - - with on_other_dim: - is_ok = cgutils.true_bit - for sh in other_shapes: - is_ok = builder.and_(is_ok, - builder.icmp_signed('==', sh, ret_sh)) - with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( - builder, ValueError, - ("np.concatenate(): input sizes over dimension %d do not match" % dim,)) - - ret_shapes = [builder.load(sh) for sh in ret_shapes] - - ret = _do_concatenate(context, builder, axis, - arrtys, arrs, arr_shapes, arr_strides, - retty, ret_shapes) - return impl_ret_new_ref(context, builder, retty, ret._getvalue()) - - -def _np_stack(context, builder, arrtys, arrs, retty, axis): - ndim = retty.ndim - - zero = cgutils.intp_t(0) - one = cgutils.intp_t(1) - ll_ndim = cgutils.intp_t(ndim) - ll_narrays = cgutils.intp_t(len(arrs)) - - arrs = [make_array(aty)(context, builder, value=a) - for aty, a in zip(arrtys, arrs)] - - axis = _normalize_axis(context, builder, "np.stack", ndim, axis) - - # Check input arrays have the same shape - orig_shape = cgutils.unpack_tuple(builder, arrs[0].shape) - - for arr in arrs[1:]: - is_ok = cgutils.true_bit - for sh, orig_sh in zip(cgutils.unpack_tuple(builder, arr.shape), - orig_shape): - is_ok = builder.and_(is_ok, builder.icmp_signed('==', sh, orig_sh)) - with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( - builder, ValueError, - ("np.stack(): all input arrays must have the same shape",)) - - orig_strides = [cgutils.unpack_tuple(builder, arr.strides) for arr in arrs] - - # Compute input shapes and return shape with the new axis inserted - # e.g. given 5 input arrays of shape (2, 3, 4) and axis=1, - # corrected input shape is (2, 1, 3, 4) and return shape is (2, 5, 3, 4). - ll_shty = ir.ArrayType(cgutils.intp_t, ndim) - - input_shapes = cgutils.alloca_once(builder, ll_shty) - ret_shapes = cgutils.alloca_once(builder, ll_shty) - - # 1. copy original sizes at appropriate places - for dim in range(ndim - 1): - ll_dim = cgutils.intp_t(dim) - after_axis = builder.icmp_signed('>=', ll_dim, axis) - sh = orig_shape[dim] - idx = builder.select(after_axis, - builder.add(ll_dim, one), - ll_dim) - builder.store(sh, cgutils.gep_inbounds(builder, input_shapes, 0, idx)) - builder.store(sh, cgutils.gep_inbounds(builder, ret_shapes, 0, idx)) - - # 2. insert new size at axis dimension - builder.store(one, cgutils.gep_inbounds(builder, input_shapes, 0, axis)) - builder.store(ll_narrays, cgutils.gep_inbounds(builder, ret_shapes, 0, axis)) - - input_shapes = [cgutils.unpack_tuple(builder, builder.load(input_shapes))] * len(arrs) - ret_shapes = cgutils.unpack_tuple(builder, builder.load(ret_shapes)) - - # Compute input strides for each array with the new axis inserted - input_strides = [cgutils.alloca_once(builder, ll_shty) - for i in range(len(arrs))] - - # 1. copy original strides at appropriate places - for dim in range(ndim - 1): - ll_dim = cgutils.intp_t(dim) - after_axis = builder.icmp_signed('>=', ll_dim, axis) - idx = builder.select(after_axis, - builder.add(ll_dim, one), - ll_dim) - for i in range(len(arrs)): - builder.store(orig_strides[i][dim], - cgutils.gep_inbounds(builder, input_strides[i], 0, idx)) - - # 2. insert new stride at axis dimension - # (the value is indifferent for a 1-sized dimension, we put 0) - for i in range(len(arrs)): - builder.store(zero, cgutils.gep_inbounds(builder, input_strides[i], 0, axis)) - - input_strides = [cgutils.unpack_tuple(builder, builder.load(st)) - for st in input_strides] - - # Create concatenated array - ret = _do_concatenate(context, builder, axis, - arrtys, arrs, input_shapes, input_strides, - retty, ret_shapes) - return impl_ret_new_ref(context, builder, retty, ret._getvalue()) - - - -@lower_builtin(np.concatenate, types.BaseTuple) -def np_concatenate(context, builder, sig, args): - axis = context.get_constant(types.intp, 0) - return _np_concatenate(context, builder, - list(sig.args[0]), - cgutils.unpack_tuple(builder, args[0]), - sig.return_type, - axis) - -@lower_builtin(np.concatenate, types.BaseTuple, types.Integer) -def np_concatenate_axis(context, builder, sig, args): - axis = context.cast(builder, args[1], sig.args[1], types.intp) - return _np_concatenate(context, builder, - list(sig.args[0]), - cgutils.unpack_tuple(builder, args[0]), - sig.return_type, - axis) - - -@lower_builtin(np.column_stack, types.BaseTuple) -def np_column_stack(context, builder, sig, args): - orig_arrtys = list(sig.args[0]) - orig_arrs = cgutils.unpack_tuple(builder, args[0]) - - arrtys = [] - arrs = [] - - axis = context.get_constant(types.intp, 1) - - for arrty, arr in zip(orig_arrtys, orig_arrs): - if arrty.ndim == 2: - arrtys.append(arrty) - arrs.append(arr) - else: - # Convert 1d array to 2d column array: np.expand_dims(a, 1) - assert arrty.ndim == 1 - newty = arrty.copy(ndim=2) - expand_sig = typing.signature(newty, arrty) - newarr = expand_dims(context, builder, expand_sig, (arr,), axis) - - arrtys.append(newty) - arrs.append(newarr) - - return _np_concatenate(context, builder, arrtys, arrs, - sig.return_type, axis) - - -def _np_stack_common(context, builder, sig, args, axis): - """ - np.stack() with the given axis value. - """ - return _np_stack(context, builder, - list(sig.args[0]), - cgutils.unpack_tuple(builder, args[0]), - sig.return_type, - axis) - -if numpy_version >= (1, 10): - @lower_builtin(np.stack, types.BaseTuple) - def np_stack(context, builder, sig, args): - axis = context.get_constant(types.intp, 0) - return _np_stack_common(context, builder, sig, args, axis) - - @lower_builtin(np.stack, types.BaseTuple, types.Integer) - def np_stack_axis(context, builder, sig, args): - axis = context.cast(builder, args[1], sig.args[1], types.intp) - return _np_stack_common(context, builder, sig, args, axis) - - -@lower_builtin(np.hstack, types.BaseTuple) -def np_hstack(context, builder, sig, args): - tupty = sig.args[0] - ndim = tupty[0].ndim - - if ndim == 0: - # hstack() on 0-d arrays returns a 1-d array - axis = context.get_constant(types.intp, 0) - return _np_stack_common(context, builder, sig, args, axis) - - else: - # As a special case, dimension 0 of 1-dimensional arrays is "horizontal" - axis = 0 if ndim == 1 else 1 - - def np_hstack_impl(arrays): - return np.concatenate(arrays, axis=axis) - - return context.compile_internal(builder, np_hstack_impl, sig, args) - -@lower_builtin(np.vstack, types.BaseTuple) -def np_vstack(context, builder, sig, args): - tupty = sig.args[0] - ndim = tupty[0].ndim - - if ndim == 0: - def np_vstack_impl(arrays): - return np.expand_dims(np.hstack(arrays), 1) - - elif ndim == 1: - # np.stack(arrays, axis=0) - axis = context.get_constant(types.intp, 0) - return _np_stack_common(context, builder, sig, args, axis) - - else: - def np_vstack_impl(arrays): - return np.concatenate(arrays, axis=0) - - return context.compile_internal(builder, np_vstack_impl, sig, args) - -@lower_builtin(np.dstack, types.BaseTuple) -def np_dstack(context, builder, sig, args): - tupty = sig.args[0] - retty = sig.return_type - ndim = tupty[0].ndim - - if ndim == 0: - def np_vstack_impl(arrays): - return np.hstack(arrays).reshape(1, 1, -1) - - return context.compile_internal(builder, np_vstack_impl, sig, args) - - elif ndim == 1: - # np.expand_dims(np.stack(arrays, axis=1), axis=0) - axis = context.get_constant(types.intp, 1) - stack_retty = retty.copy(ndim=retty.ndim - 1) - stack_sig = typing.signature(stack_retty, *sig.args) - stack_ret = _np_stack_common(context, builder, stack_sig, args, axis) - - axis = context.get_constant(types.intp, 0) - expand_sig = typing.signature(retty, stack_retty) - return expand_dims(context, builder, expand_sig, (stack_ret,), axis) - - elif ndim == 2: - # np.stack(arrays, axis=2) - axis = context.get_constant(types.intp, 2) - return _np_stack_common(context, builder, sig, args, axis) - - else: - def np_vstack_impl(arrays): - return np.concatenate(arrays, axis=2) - - return context.compile_internal(builder, np_vstack_impl, sig, args) - - -@extending.overload_method(types.Array, 'fill') -def arr_fill(arr, val): - - def fill_impl(arr, val): - arr[:] = val - return None - - return fill_impl - - -@extending.overload_method(types.Array, 'dot') -def array_dot(arr, other): - def dot_impl(arr, other): - return np.dot(arr, other) - - return dot_impl - -# ----------------------------------------------------------------------------- -# Sorting - -_sorts = {} - -def lt_floats(a, b): - return math.isnan(b) or a < b - -def get_sort_func(kind, is_float, is_argsort=False): - """ - Get a sort implementation of the given kind. - """ - key = kind, is_float, is_argsort - try: - return _sorts[key] - except KeyError: - if kind == 'quicksort': - sort = quicksort.make_jit_quicksort( - lt=lt_floats if is_float else None, - is_argsort=is_argsort) - func = sort.run_quicksort - elif kind == 'mergesort': - sort = mergesort.make_jit_mergesort( - lt=lt_floats if is_float else None, - is_argsort=is_argsort) - func = sort.run_mergesort - _sorts[key] = func - return func - - -@lower_builtin("array.sort", types.Array) -def array_sort(context, builder, sig, args): - arytype = sig.args[0] - sort_func = get_sort_func(kind='quicksort', - is_float=isinstance(arytype.dtype, types.Float)) - - def array_sort_impl(arr): - # Note we clobber the return value - sort_func(arr) - - return context.compile_internal(builder, array_sort_impl, sig, args) - -@lower_builtin(np.sort, types.Array) -def np_sort(context, builder, sig, args): - - def np_sort_impl(a): - res = a.copy() - res.sort() - return res - - return context.compile_internal(builder, np_sort_impl, sig, args) - -@lower_builtin("array.argsort", types.Array, types.Const) -@lower_builtin(np.argsort, types.Array, types.Const) -def array_argsort(context, builder, sig, args): - arytype, kind = sig.args - sort_func = get_sort_func(kind=kind.value, - is_float=isinstance(arytype.dtype, types.Float), - is_argsort=True) - - def array_argsort_impl(arr): - return sort_func(arr) - - innersig = sig.replace(args=sig.args[:1]) - innerargs = args[:1] - return context.compile_internal(builder, array_argsort_impl, - innersig, innerargs) - - -# ----------------------------------------------------------------------------- -# Implicit cast - -@lower_cast(types.Array, types.Array) -def array_to_array(context, builder, fromty, toty, val): - # Type inference should have prevented illegal array casting. - assert fromty.mutable != toty.mutable or toty.layout == 'A' - return val - - -# ----------------------------------------------------------------------------- -# Stride tricks - -def reshape_unchecked(a, shape, strides): - """ - An intrinsic returning a derived array with the given shape and strides. - """ - raise NotImplementedError - -@extending.type_callable(reshape_unchecked) -def type_reshape_unchecked(context): - def check_shape(shape): - return (isinstance(shape, types.BaseTuple) and - all(isinstance(v, types.Integer) for v in shape)) - - def typer(a, shape, strides): - if not isinstance(a, types.Array): - return - if not check_shape(shape) or not check_shape(strides): - return - if len(shape) != len(strides): - return - return a.copy(ndim=len(shape), layout='A') - - return typer - -@lower_builtin(reshape_unchecked, types.Array, types.BaseTuple, types.BaseTuple) -def impl_shape_unchecked(context, builder, sig, args): - aryty = sig.args[0] - retty = sig.return_type - - ary = make_array(aryty)(context, builder, args[0]) - out = make_array(retty)(context, builder) - shape = cgutils.unpack_tuple(builder, args[1]) - strides = cgutils.unpack_tuple(builder, args[2]) - - populate_array(out, - data=ary.data, - shape=shape, - strides=strides, - itemsize=ary.itemsize, - meminfo=ary.meminfo, - ) - - res = out._getvalue() - return impl_ret_borrowed(context, builder, retty, res) - - -@extending.overload(np.lib.stride_tricks.as_strided) -def as_strided(x, shape=None, strides=None): - if shape in (None, types.none): - @register_jitable - def get_shape(x, shape): - return x.shape - else: - @register_jitable - def get_shape(x, shape): - return shape - - if strides in (None, types.none): - # When *strides* is not passed, as_strided() does a non-size-checking - # reshape(), possibly changing the original strides. This is too - # cumbersome to support right now, and a Web search shows all example - # use cases of as_strided() pass explicit *strides*. - raise NotImplementedError("as_strided() strides argument is mandatory") - else: - @register_jitable - def get_strides(x, strides): - return strides - - def as_strided_impl(x, shape=None, strides=None): - x = reshape_unchecked(x, get_shape(x, shape), get_strides(x, strides)) - return x - - return as_strided_impl diff --git a/numba/numba/targets/base.py b/numba/numba/targets/base.py deleted file mode 100644 index ae5ba0e88..000000000 --- a/numba/numba/targets/base.py +++ /dev/null @@ -1,1083 +0,0 @@ -from __future__ import print_function - -import copy -import os -import sys -from collections import defaultdict, namedtuple -from itertools import permutations, takewhile - -import llvmlite.binding as ll -import llvmlite.llvmpy.core as lc -import numpy as np -from llvmlite import ir as llvmir -from llvmlite.llvmpy.core import Constant, LLVMException, Type - -from numba import (_dynfunc, _helperlib, cgutils, datamodel, debuginfo, - funcdesc, types, typing, utils) -from numba.pythonapi import PythonAPI - -from . import arrayobj, builtins, imputils -from .imputils import (RegistryLoader, builtin_registry, impl_ret_borrowed, - user_function, user_generator) - -GENERIC_POINTER = Type.pointer(Type.int(8)) -PYOBJECT = GENERIC_POINTER -void_ptr = GENERIC_POINTER - - -class OverloadSelector(object): - """ - An object matching an actual signature against a registry of formal - signatures and choosing the best candidate, if any. - - In the current implementation: - - a "signature" is a tuple of type classes or type instances - - the "best candidate" is the most specific match - """ - - def __init__(self): - # A list of (formal args tuple, value) - self.versions = [] - self._cache = {} - - def find(self, sig): - out = self._cache.get(sig) - if out is None: - out = self._find(sig) - self._cache[sig] = out - return out - - def _find(self, sig): - candidates = self._select_compatible(sig) - if candidates: - return candidates[self._best_signature(candidates)] - else: - raise NotImplementedError(self, sig) - - def _select_compatible(self, sig): - """ - Select all compatible signatures and their implementation. - """ - out = {} - for ver_sig, impl in self.versions: - if self._match_arglist(ver_sig, sig): - out[ver_sig] = impl - return out - - def _best_signature(self, candidates): - """ - Returns the best signature out of the candidates - """ - ordered, genericity = self._sort_signatures(candidates) - # check for ambiguous signatures - if len(ordered) > 1: - firstscore = genericity[ordered[0]] - same = list(takewhile(lambda x: genericity[x] == firstscore, - ordered)) - if len(same) > 1: - msg = ["{n} ambiguous signatures".format(n=len(same))] - for sig in same: - msg += ["{0} => {1}".format(sig, candidates[sig])] - raise TypeError('\n'.join(msg)) - return ordered[0] - - def _sort_signatures(self, candidates): - """ - Sort signatures in ascending level of genericity. - - Returns a 2-tuple: - - * ordered list of signatures - * dictionary containing genericity scores - """ - # score by genericity - genericity = defaultdict(int) - for this, other in permutations(candidates.keys(), r=2): - matched = self._match_arglist(formal_args=this, actual_args=other) - if matched: - # genericity score +1 for every another compatible signature - genericity[this] += 1 - # order candidates in ascending level of genericity - ordered = sorted(candidates.keys(), key=lambda x: genericity[x]) - return ordered, genericity - - def _match_arglist(self, formal_args, actual_args): - """ - Returns True if the the signature is "matching". - A formal signature is "matching" if the actual signature matches exactly - or if the formal signature is a compatible generic signature. - """ - # normalize VarArg - if formal_args and isinstance(formal_args[-1], types.VarArg): - ndiff = len(actual_args) - len(formal_args) + 1 - formal_args = formal_args[:-1] + (formal_args[-1].dtype,) * ndiff - - if len(formal_args) != len(actual_args): - return False - - for formal, actual in zip(formal_args, actual_args): - if not self._match(formal, actual): - return False - - return True - - def _match(self, formal, actual): - if formal == actual: - # formal argument matches actual arguments - return True - elif types.Any == formal: - # formal argument is any - return True - elif isinstance(formal, type) and issubclass(formal, types.Type): - if isinstance(actual, type) and issubclass(actual, formal): - # formal arg is a type class and actual arg is a subclass - return True - elif isinstance(actual, formal): - # formal arg is a type class of which actual arg is an instance - return True - - def append(self, value, sig): - """ - Add a formal signature and its associated value. - """ - assert isinstance(sig, tuple), (value, sig) - self.versions.append((sig, value)) - self._cache.clear() - - -@utils.runonce -def _load_global_helpers(): - """ - Execute once to install special symbols into the LLVM symbol table. - """ - # This is Py_None's real C name - ll.add_symbol("_Py_NoneStruct", id(None)) - - # Add Numba C helper functions - for c_helpers in (_helperlib.c_helpers, _dynfunc.c_helpers): - for py_name, c_address in c_helpers.items(): - c_name = "numba_" + py_name - ll.add_symbol(c_name, c_address) - - # Add Numpy C helpers (npy_XXX) - for c_name, c_address in _helperlib.npymath_exports.items(): - ll.add_symbol(c_name, c_address) - - # Add all built-in exception classes - for obj in utils.builtins.__dict__.values(): - if isinstance(obj, type) and issubclass(obj, BaseException): - ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj)) - - -class BaseContext(object): - """ - - Notes on Structure - ------------------ - - Most objects are lowered as plain-old-data structure in the generated - llvm. They are passed around by reference (a pointer to the structure). - Only POD structure can life across function boundaries by copying the - data. - """ - # True if the target requires strict alignment - # Causes exception to be raised if the record members are not aligned. - strict_alignment = False - - # Force powi implementation as math.pow call - implement_powi_as_math_call = False - implement_pow_as_math_call = False - - # Emit Debug info - enable_debuginfo = False - DIBuilder = debuginfo.DIBuilder - - # Bound checking - enable_boundcheck = False - - # NRT - enable_nrt = False - - # Auto parallelization - auto_parallel = False - - # PYCC - aot_mode = False - - # Error model for various operations (only FP exceptions currently) - error_model = None - - # Whether dynamic globals (CPU runtime addresses) is allowed - allow_dynamic_globals = False - - # Fast math flags - enable_fastmath = False - - # python exceution environment - environment = None - - # the function descriptor - fndesc = None - - def __init__(self, typing_context): - _load_global_helpers() - - self.address_size = utils.MACHINE_BITS - self.typing_context = typing_context - - # A mapping of installed registries to their loaders - self._registries = {} - # Declarations loaded from registries and other sources - self._defns = defaultdict(OverloadSelector) - self._getattrs = defaultdict(OverloadSelector) - self._setattrs = defaultdict(OverloadSelector) - self._casts = OverloadSelector() - self._get_constants = OverloadSelector() - # Other declarations - self._generators = {} - self.special_ops = {} - self.cached_internal_func = {} - self._pid = None - - self.data_model_manager = datamodel.default_manager - - # Initialize - self.init() - - def init(self): - """ - For subclasses to add initializer - """ - - def refresh(self): - """ - Refresh context with new declarations from known registries. - Useful for third-party extensions. - """ - # Populate built-in registry - from . import (arraymath, enumimpl, iterators, linalg, numbers, - optional, polynomial, rangeobj, slicing, smartarray, - tupleobj) - try: - from . import npdatetime - except NotImplementedError: - pass - self.install_registry(builtin_registry) - self.load_additional_registries() - # Also refresh typing context, since @overload declarations can - # affect it. - self.typing_context.refresh() - - def load_additional_registries(self): - """ - Load target-specific registries. Can be overriden by subclasses. - """ - - def mangler(self, name, types): - """ - Perform name mangling. - """ - return funcdesc.default_mangler(name, types) - - def get_env_name(self, fndesc): - """Get the environment name given a FunctionDescriptior. - - Use this instead of the ``fndesc.env_name`` so that the target-context - can provide necessary mangling of the symbol to meet ABI requirements. - """ - return fndesc.env_name - - def declare_env_global(self, module, envname): - """Declare the Environment pointer as a global of the module. - - The pointer is initialized to NULL. It must be filled by the runtime - with the actual address of the Env before the associated function - can be executed. - - Parameters - ---------- - module : - The LLVM Module - envname : str - The name of the global variable. - """ - if envname not in module.globals: - gv = llvmir.GlobalVariable(module, cgutils.voidptr_t, name=envname) - gv.linkage = 'common' - gv.initializer = cgutils.get_null_value(gv.type.pointee) - - return module.globals[envname] - - def get_arg_packer(self, fe_args): - return datamodel.ArgPacker(self.data_model_manager, fe_args) - - def get_data_packer(self, fe_types): - return datamodel.DataPacker(self.data_model_manager, fe_types) - - @property - def target_data(self): - raise NotImplementedError - - @utils.cached_property - def nrt(self): - from numba.runtime.context import NRTContext - return NRTContext(self, self.enable_nrt) - - def subtarget(self, **kws): - obj = copy.copy(self) # shallow copy - for k, v in kws.items(): - if not hasattr(obj, k): - raise NameError("unknown option {0!r}".format(k)) - setattr(obj, k, v) - if obj.codegen() is not self.codegen(): - # We can't share functions accross different codegens - obj.cached_internal_func = {} - return obj - - def install_registry(self, registry): - """ - Install a *registry* (a imputils.Registry instance) of function - and attribute implementations. - """ - try: - loader = self._registries[registry] - except KeyError: - loader = RegistryLoader(registry) - self._registries[registry] = loader - self.insert_func_defn(loader.new_registrations('functions')) - self._insert_getattr_defn(loader.new_registrations('getattrs')) - self._insert_setattr_defn(loader.new_registrations('setattrs')) - self._insert_cast_defn(loader.new_registrations('casts')) - self._insert_get_constant_defn(loader.new_registrations('constants')) - - def insert_func_defn(self, defns): - for impl, func, sig in defns: - self._defns[func].append(impl, sig) - - def _insert_getattr_defn(self, defns): - for impl, attr, sig in defns: - self._getattrs[attr].append(impl, sig) - - def _insert_setattr_defn(self, defns): - for impl, attr, sig in defns: - self._setattrs[attr].append(impl, sig) - - def _insert_cast_defn(self, defns): - for impl, sig in defns: - self._casts.append(impl, sig) - - def _insert_get_constant_defn(self, defns): - for impl, sig in defns: - self._get_constants.append(impl, sig) - - def insert_user_function(self, func, fndesc, libs=()): - impl = user_function(fndesc, libs) - self._defns[func].append(impl, impl.signature) - - def add_user_function(self, func, fndesc, libs=()): - if func not in self._defns: - msg = "{func} is not a registered user function" - raise KeyError(msg.format(func=func)) - impl = user_function(fndesc, libs) - self._defns[func].append(impl, impl.signature) - - def insert_generator(self, genty, gendesc, libs=()): - assert isinstance(genty, types.Generator) - impl = user_generator(gendesc, libs) - self._generators[genty] = gendesc, impl - - def remove_user_function(self, func): - """ - Remove user function *func*. - KeyError is raised if the function isn't known to us. - """ - del self._defns[func] - - def get_external_function_type(self, fndesc): - argtypes = [self.get_argument_type(aty) - for aty in fndesc.argtypes] - # don't wrap in pointer - restype = self.get_argument_type(fndesc.restype) - fnty = Type.function(restype, argtypes) - return fnty - - def declare_function(self, module, fndesc): - fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) - fn = module.get_or_insert_function(fnty, name=fndesc.mangled_name) - self.call_conv.decorate_function(fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias) - if fndesc.inline: - fn.attributes.add('alwaysinline') - return fn - - def declare_external_function(self, module, fndesc): - fnty = self.get_external_function_type(fndesc) - fn = module.get_or_insert_function(fnty, name=fndesc.mangled_name) - assert fn.is_declaration - for ak, av in zip(fndesc.args, fn.args): - av.name = "arg.%s" % ak - return fn - - def insert_const_string(self, mod, string): - """ - Insert constant *string* (a str object) into module *mod*. - """ - stringtype = GENERIC_POINTER - name = ".const.%s" % string - text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00") - gv = self.insert_unique_const(mod, name, text) - return Constant.bitcast(gv, stringtype) - - def insert_unique_const(self, mod, name, val): - """ - Insert a unique internal constant named *name*, with LLVM value - *val*, into module *mod*. - """ - try: - gv = mod.get_global(name) - except KeyError: - return cgutils.global_constant(mod, name, val) - else: - return gv - - def get_argument_type(self, ty): - return self.data_model_manager[ty].get_argument_type() - - def get_return_type(self, ty): - return self.data_model_manager[ty].get_return_type() - - def get_data_type(self, ty): - """ - Get a LLVM data representation of the Numba type *ty* that is safe - for storage. Record data are stored as byte array. - - The return value is a llvmlite.ir.Type object, or None if the type - is an opaque pointer (???). - """ - return self.data_model_manager[ty].get_data_type() - - def get_value_type(self, ty): - return self.data_model_manager[ty].get_value_type() - - def pack_value(self, builder, ty, value, ptr, align=None): - """ - Pack value into the array storage at *ptr*. - If *align* is given, it is the guaranteed alignment for *ptr* - (by default, the standard ABI alignment). - """ - dataval = self.data_model_manager[ty].as_data(builder, value) - builder.store(dataval, ptr, align=align) - - def unpack_value(self, builder, ty, ptr, align=None): - """ - Unpack value from the array storage at *ptr*. - If *align* is given, it is the guaranteed alignment for *ptr* - (by default, the standard ABI alignment). - """ - dm = self.data_model_manager[ty] - return dm.load_from_data_pointer(builder, ptr, align) - - def get_constant_generic(self, builder, ty, val): - """ - Return a LLVM constant representing value *val* of Numba type *ty*. - """ - try: - impl = self._get_constants.find((ty,)) - return impl(self, builder, ty, val) - except NotImplementedError: - raise NotImplementedError("Cannot lower constant of type '%s'" % (ty,)) - - def get_constant(self, ty, val): - """ - Same as get_constant_generic(), but without specifying *builder*. - Works only for simple types. - """ - # HACK: pass builder=None to preserve get_constant() API - return self.get_constant_generic(None, ty, val) - - def get_constant_undef(self, ty): - lty = self.get_value_type(ty) - return Constant.undef(lty) - - def get_constant_null(self, ty): - lty = self.get_value_type(ty) - return Constant.null(lty) - - def get_function(self, fn, sig, _firstcall=True): - """ - Return the implementation of function *fn* for signature *sig*. - The return value is a callable with the signature (builder, args). - """ - sig = sig.as_function() - if isinstance(fn, (types.Function, types.BoundFunction, - types.Dispatcher)): - key = fn.get_impl_key(sig) - overloads = self._defns[key] - else: - key = fn - overloads = self._defns[key] - - try: - return _wrap_impl(overloads.find(sig.args), self, sig) - except NotImplementedError: - pass - if isinstance(fn, types.Type): - # It's a type instance => try to find a definition for the type class - try: - return self.get_function(type(fn), sig) - except NotImplementedError: - # Raise exception for the type instance, for a better error message - pass - - # Automatically refresh the context to load new registries if we are - # calling the first time. - if _firstcall: - self.refresh() - return self.get_function(fn, sig, _firstcall=False) - - raise NotImplementedError("No definition for lowering %s%s" % (key, sig)) - - def get_generator_desc(self, genty): - """ - """ - return self._generators[genty][0] - - def get_generator_impl(self, genty): - """ - """ - return self._generators[genty][1] - - def get_bound_function(self, builder, obj, ty): - assert self.get_value_type(ty) == obj.type - return obj - - def get_getattr(self, typ, attr): - """ - Get the getattr() implementation for the given type and attribute name. - The return value is a callable with the signature - (context, builder, typ, val, attr). - """ - if isinstance(typ, types.Module): - # Implement getattr for module-level globals. - # We are treating them as constants. - # XXX We shouldn't have to retype this - attrty = self.typing_context.resolve_module_constants(typ, attr) - if attrty is None or isinstance(attrty, types.Dummy): - # No implementation required for dummies (functions, modules...), - # which are dealt with later - return None - else: - pyval = getattr(typ.pymod, attr) - llval = self.get_constant(attrty, pyval) - def imp(context, builder, typ, val, attr): - return impl_ret_borrowed(context, builder, attrty, llval) - return imp - - # Lookup specific getattr implementation for this type and attribute - overloads = self._getattrs[attr] - try: - return overloads.find((typ,)) - except NotImplementedError: - pass - # Lookup generic getattr implementation for this type - overloads = self._getattrs[None] - try: - return overloads.find((typ,)) - except NotImplementedError: - pass - - raise NotImplementedError("No definition for lowering %s.%s" % (typ, attr)) - - def get_setattr(self, attr, sig): - """ - Get the setattr() implementation for the given attribute name - and signature. - The return value is a callable with the signature (builder, args). - """ - assert len(sig.args) == 2 - typ = sig.args[0] - valty = sig.args[1] - - def wrap_setattr(impl): - def wrapped(builder, args): - return impl(self, builder, sig, args, attr) - return wrapped - - # Lookup specific setattr implementation for this type and attribute - overloads = self._setattrs[attr] - try: - return wrap_setattr(overloads.find((typ, valty))) - except NotImplementedError: - pass - # Lookup generic setattr implementation for this type - overloads = self._setattrs[None] - try: - return wrap_setattr(overloads.find((typ, valty))) - except NotImplementedError: - pass - - raise NotImplementedError("No definition for lowering %s.%s = %s" - % (typ, attr, valty)) - - def get_argument_value(self, builder, ty, val): - """ - Argument representation to local value representation - """ - return self.data_model_manager[ty].from_argument(builder, val) - - def get_returned_value(self, builder, ty, val): - """ - Return value representation to local value representation - """ - return self.data_model_manager[ty].from_return(builder, val) - - def get_return_value(self, builder, ty, val): - """ - Local value representation to return type representation - """ - return self.data_model_manager[ty].as_return(builder, val) - - def get_value_as_argument(self, builder, ty, val): - """Prepare local value representation as argument type representation - """ - return self.data_model_manager[ty].as_argument(builder, val) - - def get_value_as_data(self, builder, ty, val): - return self.data_model_manager[ty].as_data(builder, val) - - def get_data_as_value(self, builder, ty, val): - return self.data_model_manager[ty].from_data(builder, val) - - def pair_first(self, builder, val, ty): - """ - Extract the first element of a heterogeneous pair. - """ - pair = self.make_helper(builder, ty, val) - return pair.first - - def pair_second(self, builder, val, ty): - """ - Extract the second element of a heterogeneous pair. - """ - pair = self.make_helper(builder, ty, val) - return pair.second - - def cast(self, builder, val, fromty, toty): - """ - Cast a value of type *fromty* to type *toty*. - This implements implicit conversions as can happen due to the - granularity of the Numba type system, or lax Python semantics. - """ - if fromty == toty or toty == types.Any: - return val - try: - impl = self._casts.find((fromty, toty)) - return impl(self, builder, fromty, toty, val) - except NotImplementedError: - raise NotImplementedError( - "Cannot cast %s to %s: %s" % (fromty, toty, val)) - - def generic_compare(self, builder, key, argtypes, args): - """ - Compare the given LLVM values of the given Numba types using - the comparison *key* (e.g. '=='). The values are first cast to - a common safe conversion type. - """ - at, bt = argtypes - av, bv = args - ty = self.typing_context.unify_types(at, bt) - assert ty is not None - cav = self.cast(builder, av, at, ty) - cbv = self.cast(builder, bv, bt, ty) - cmpsig = typing.signature(types.boolean, ty, ty) - cmpfunc = self.get_function(key, cmpsig) - return cmpfunc(builder, (cav, cbv)) - - def make_optional_none(self, builder, valtype): - optval = self.make_helper(builder, types.Optional(valtype)) - optval.valid = cgutils.false_bit - return optval._getvalue() - - def make_optional_value(self, builder, valtype, value): - optval = self.make_helper(builder, types.Optional(valtype)) - optval.valid = cgutils.true_bit - optval.data = value - return optval._getvalue() - - def is_true(self, builder, typ, val): - """ - Return the truth value of a value of the given Numba type. - """ - impl = self.get_function(bool, typing.signature(types.boolean, typ)) - return impl(builder, (val,)) - - def get_c_value(self, builder, typ, name, dllimport=False): - """ - Get a global value through its C-accessible *name*, with the given - LLVM type. - If *dllimport* is true, the symbol will be marked as imported - from a DLL (necessary for AOT compilation under Windows). - """ - module = builder.function.module - try: - gv = module.get_global_variable_named(name) - except LLVMException: - gv = module.add_global_variable(typ, name) - if dllimport and self.aot_mode and sys.platform == 'win32': - gv.storage_class = "dllimport" - return gv - - def call_external_function(self, builder, callee, argtys, args): - args = [self.get_value_as_argument(builder, ty, arg) - for ty, arg in zip(argtys, args)] - retval = builder.call(callee, args) - return retval - - def get_function_pointer_type(self, typ): - return self.data_model_manager[typ].get_data_type() - - def call_function_pointer(self, builder, funcptr, args, cconv=None): - return builder.call(funcptr, args, cconv=cconv) - - def print_string(self, builder, text): - mod = builder.module - cstring = GENERIC_POINTER - fnty = Type.function(Type.int(), [cstring]) - puts = mod.get_or_insert_function(fnty, "puts") - return builder.call(puts, [text]) - - def debug_print(self, builder, text): - mod = builder.module - cstr = self.insert_const_string(mod, str(text)) - self.print_string(builder, cstr) - - def printf(self, builder, format_string, *args): - mod = builder.module - if isinstance(format_string, str): - cstr = self.insert_const_string(mod, format_string) - else: - cstr = format_string - fnty = Type.function(Type.int(), (GENERIC_POINTER,), var_arg=True) - fn = mod.get_or_insert_function(fnty, "printf") - return builder.call(fn, (cstr,) + tuple(args)) - - def get_struct_type(self, struct): - """ - Get the LLVM struct type for the given Structure class *struct*. - """ - fields = [self.get_value_type(v) for _, v in struct._fields] - return Type.struct(fields) - - def get_dummy_value(self): - return Constant.null(self.get_dummy_type()) - - def get_dummy_type(self): - return GENERIC_POINTER - - def compile_subroutine_no_cache(self, builder, impl, sig, locals={}, flags=None): - """ - Invoke the compiler to compile a function to be used inside a - nopython function, but without generating code to call that - function. - - Note this context's flags are not inherited. - """ - # Compile - from numba import compiler - - codegen = self.codegen() - library = codegen.create_library(impl.__name__) - if flags is None: - flags = compiler.Flags() - flags.set('no_compile') - flags.set('no_cpython_wrapper') - cres = compiler.compile_internal(self.typing_context, self, - library, - impl, sig.args, - sig.return_type, flags, - locals=locals) - - # Allow inlining the function inside callers. - codegen.add_linking_library(cres.library) - return cres - - def compile_subroutine(self, builder, impl, sig, locals={}): - """ - Compile the function *impl* for the given *sig* (in nopython mode). - Return a placeholder object that's callable from another Numba - function. - """ - cache_key = (impl.__code__, sig, type(self.error_model)) - if impl.__closure__: - # XXX This obviously won't work if a cell's value is - # unhashable. - cache_key += tuple(c.cell_contents for c in impl.__closure__) - ty = self.cached_internal_func.get(cache_key) - if ty is None: - cres = self.compile_subroutine_no_cache(builder, impl, sig, - locals=locals) - ty = types.NumbaFunction(cres.fndesc, sig) - self.cached_internal_func[cache_key] = ty - return ty - - def compile_internal(self, builder, impl, sig, args, locals={}): - """ - Like compile_subroutine(), but also call the function with the given - *args*. - """ - ty = self.compile_subroutine(builder, impl, sig, locals) - return self.call_internal(builder, ty.fndesc, sig, args) - - def call_internal(self, builder, fndesc, sig, args): - """ - Given the function descriptor of an internally compiled function, - emit a call to that function with the given arguments. - """ - # Add call to the generated function - llvm_mod = builder.module - fn = self.declare_function(llvm_mod, fndesc) - status, res = self.call_conv.call_function(builder, fn, sig.return_type, - sig.args, args) - - with cgutils.if_unlikely(builder, status.is_error): - self.call_conv.return_status_propagate(builder, status) - - res = imputils.fix_returning_optional(self, builder, sig, status, res) - return res - - def call_unresolved(self, builder, name, sig, args): - """ - Insert a function call to an unresolved symbol with the given *name*. - - Note: this is used for recursive call. - - In the mutual recursion case:: - - @njit - def foo(): - ... # calls bar() - - @njit - def bar(): - ... # calls foo() - - foo() - - When foo() is called, the compilation of bar() is fully completed - (codegen'ed and loaded) before foo() is. Since MCJIT's eager compilation - doesn't allow loading modules with declare-only functions (which is - needed for foo() in bar()), the call_unresolved injects a global - variable that the "linker" can update even after the module is loaded by - MCJIT. The linker would allocate space for the global variable before - the bar() module is loaded. When later foo() module is defined, it will - update bar()'s reference to foo(). - - The legacy lazy JIT and the new ORC JIT would allow a declare-only - function be used in a module as long as it is defined by the time of its - first use. - """ - # Insert an unresolved reference to the function being called. - codegen = self.codegen() - fnty = self.call_conv.get_function_type(sig.return_type, sig.args) - fn = codegen.insert_unresolved_ref(builder, fnty, name) - # Normal call sequence - status, res = self.call_conv.call_function(builder, fn, sig.return_type, - sig.args, args) - with cgutils.if_unlikely(builder, status.is_error): - self.call_conv.return_status_propagate(builder, status) - - res = imputils.fix_returning_optional(self, builder, sig, status, res) - return res - - def get_executable(self, func, fndesc): - raise NotImplementedError - - def get_python_api(self, builder): - return PythonAPI(self, builder) - - def sentry_record_alignment(self, rectyp, attr): - """ - Assumes offset starts from a properly aligned location - """ - if self.strict_alignment: - offset = rectyp.offset(attr) - elemty = rectyp.typeof(attr) - align = self.get_abi_alignment(self.get_data_type(elemty)) - if offset % align: - msg = "{rec}.{attr} of type {type} is not aligned".format( - rec=rectyp, attr=attr, type=elemty) - raise TypeError(msg) - - def get_helper_class(self, typ, kind='value'): - """ - Get a helper class for the given *typ*. - """ - # XXX handle all types: complex, array, etc. - # XXX should it be a method on the model instead? this would allow a default kind... - return cgutils.create_struct_proxy(typ, kind) - - def _make_helper(self, builder, typ, value=None, ref=None, kind='value'): - cls = self.get_helper_class(typ, kind) - return cls(self, builder, value=value, ref=ref) - - def make_helper(self, builder, typ, value=None, ref=None): - """ - Get a helper object to access the *typ*'s members, - for the given value or reference. - """ - return self._make_helper(builder, typ, value, ref, kind='value') - - def make_data_helper(self, builder, typ, ref=None): - """ - As make_helper(), but considers the value as stored in memory, - rather than a live value. - """ - return self._make_helper(builder, typ, ref=ref, kind='data') - - def make_array(self, typ): - return arrayobj.make_array(typ) - - def populate_array(self, arr, **kwargs): - """ - Populate array structure. - """ - return arrayobj.populate_array(arr, **kwargs) - - def make_complex(self, builder, typ, value=None): - """ - Get a helper object to access the given complex numbers' members. - """ - assert isinstance(typ, types.Complex), typ - return self.make_helper(builder, typ, value) - - def make_tuple(self, builder, typ, values): - """ - Create a tuple of the given *typ* containing the *values*. - """ - tup = self.get_constant_undef(typ) - for i, val in enumerate(values): - tup = builder.insert_value(tup, val, i) - return tup - - def make_constant_array(self, builder, typ, ary): - """ - Create an array structure reifying the given constant array. - A low-level contiguous array constant is created in the LLVM IR. - """ - datatype = self.get_data_type(typ.dtype) - # don't freeze ary of non-contig or bigger than 1MB - size_limit = 10**6 - - if (self.allow_dynamic_globals and - (typ.layout not in 'FC' or ary.nbytes > size_limit)): - # get pointer from the ary - dataptr = ary.ctypes.data - data = self.add_dynamic_addr(builder, dataptr, info=str(type(dataptr))) - rt_addr = self.add_dynamic_addr(builder, id(ary), info=str(type(ary))) - else: - # Handle data: reify the flattened array in "C" or "F" order as a - # global array of bytes. - flat = ary.flatten(order=typ.layout) - # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to - # workaround issue #1850 which is due to numpy issue #3147 - consts = Constant.array(Type.int(8), bytearray(flat.data)) - data = cgutils.global_constant(builder, ".const.array.data", consts) - # Ensure correct data alignment (issue #1933) - data.align = self.get_abi_alignment(datatype) - # No reference to parent ndarray - rt_addr = None - - # Handle shape - llintp = self.get_value_type(types.intp) - shapevals = [self.get_constant(types.intp, s) for s in ary.shape] - cshape = Constant.array(llintp, shapevals) - - # Handle strides - stridevals = [self.get_constant(types.intp, s) for s in ary.strides] - cstrides = Constant.array(llintp, stridevals) - - # Create array structure - cary = self.make_array(typ)(self, builder) - - intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize) - self.populate_array(cary, - data=builder.bitcast(data, cary.data.type), - shape=cshape, - strides=cstrides, - itemsize=intp_itemsize, - parent=rt_addr, - meminfo=None) - - return cary._getvalue() - - def add_dynamic_addr(self, builder, intaddr, info): - """ - Returns dynamic address as a void pointer `i8*`. - - Internally, a global variable is added to inform the lowerer about - the usage of dynamic addresses. Caching will be disabled. - """ - assert self.allow_dynamic_globals, "dyn globals disabled in this target" - assert isinstance(intaddr, utils.INT_TYPES), 'dyn addr not of int type' - mod = builder.module - llvoidptr = self.get_value_type(types.voidptr) - addr = self.get_constant(types.uintp, intaddr).inttoptr(llvoidptr) - # Use a unique name by embedding the address value - symname = 'numba.dynamic.globals.{:x}'.format(intaddr) - gv = mod.add_global_variable(llvoidptr, name=symname) - # Use linkonce linkage to allow merging with other GV of the same name. - # And, avoid optimization from assuming its value. - gv.linkage = 'linkonce' - gv.initializer = addr - return builder.load(gv) - - def get_abi_sizeof(self, ty): - """ - Get the ABI size of LLVM type *ty*. - """ - assert isinstance(ty, llvmir.Type), "Expected LLVM type" - return ty.get_abi_size(self.target_data) - - def get_abi_alignment(self, ty): - """ - Get the ABI alignment of LLVM type *ty*. - """ - assert isinstance(ty, llvmir.Type), "Expected LLVM type" - return ty.get_abi_alignment(self.target_data) - - def get_preferred_array_alignment(context, ty): - """ - Get preferred array alignment for Numba type *ty*. - """ - # AVX prefers 32-byte alignment - return 32 - - def post_lowering(self, mod, library): - """Run target specific post-lowering transformation here. - """ - - def create_module(self, name): - """Create a LLVM module - """ - return lc.Module(name) - - -class _wrap_impl(object): - """ - A wrapper object to call an implementation function with some predefined - (context, signature) arguments. - The wrapper also forwards attribute queries, which is important. - """ - - def __init__(self, imp, context, sig): - self._imp = imp - self._context = context - self._sig = sig - - def __call__(self, builder, args): - return self._imp(self._context, builder, self._sig, args) - - def __getattr__(self, item): - return getattr(self._imp, item) - - def __repr__(self): - return "" % self._imp diff --git a/numba/numba/targets/boxing.py b/numba/numba/targets/boxing.py deleted file mode 100644 index eee9c9d50..000000000 --- a/numba/numba/targets/boxing.py +++ /dev/null @@ -1,1018 +0,0 @@ -""" -Boxing and unboxing of native Numba values to / from CPython objects. -""" - -from llvmlite import ir - -from .. import cgutils, numpy_support, types -from ..pythonapi import box, unbox, reflect, NativeValue - -from . import listobj, setobj -from ..utils import IS_PY3 - - -# -# Scalar types -# - -@box(types.Boolean) -def box_bool(typ, val, c): - longval = c.builder.zext(val, c.pyapi.long) - return c.pyapi.bool_from_long(longval) - -@unbox(types.Boolean) -def unbox_boolean(typ, obj, c): - istrue = c.pyapi.object_istrue(obj) - zero = ir.Constant(istrue.type, 0) - val = c.builder.icmp_signed('!=', istrue, zero) - return NativeValue(val, is_error=c.pyapi.c_api_error()) - - -@box(types.Integer) -def box_integer(typ, val, c): - if typ.signed: - ival = c.builder.sext(val, c.pyapi.longlong) - return c.pyapi.long_from_longlong(ival) - else: - ullval = c.builder.zext(val, c.pyapi.ulonglong) - return c.pyapi.long_from_ulonglong(ullval) - -@unbox(types.Integer) -def unbox_integer(typ, obj, c): - ll_type = c.context.get_argument_type(typ) - val = cgutils.alloca_once(c.builder, ll_type) - longobj = c.pyapi.number_long(obj) - with c.pyapi.if_object_ok(longobj): - if typ.signed: - llval = c.pyapi.long_as_longlong(longobj) - else: - llval = c.pyapi.long_as_ulonglong(longobj) - c.pyapi.decref(longobj) - c.builder.store(c.builder.trunc(llval, ll_type), val) - return NativeValue(c.builder.load(val), - is_error=c.pyapi.c_api_error()) - - -@box(types.Float) -def box_float(typ, val, c): - if typ == types.float32: - dbval = c.builder.fpext(val, c.pyapi.double) - else: - assert typ == types.float64 - dbval = val - return c.pyapi.float_from_double(dbval) - -@unbox(types.Float) -def unbox_float(typ, obj, c): - fobj = c.pyapi.number_float(obj) - dbval = c.pyapi.float_as_double(fobj) - c.pyapi.decref(fobj) - if typ == types.float32: - val = c.builder.fptrunc(dbval, - c.context.get_argument_type(typ)) - else: - assert typ == types.float64 - val = dbval - return NativeValue(val, is_error=c.pyapi.c_api_error()) - - -@box(types.Complex) -def box_complex(typ, val, c): - cval = c.context.make_complex(c.builder, typ, value=val) - - if typ == types.complex64: - freal = c.builder.fpext(cval.real, c.pyapi.double) - fimag = c.builder.fpext(cval.imag, c.pyapi.double) - else: - assert typ == types.complex128 - freal, fimag = cval.real, cval.imag - return c.pyapi.complex_from_doubles(freal, fimag) - -@unbox(types.Complex) -def unbox_complex(typ, obj, c): - # First unbox to complex128, since that's what CPython gives us - c128 = c.context.make_complex(c.builder, types.complex128) - ok = c.pyapi.complex_adaptor(obj, c128._getpointer()) - failed = cgutils.is_false(c.builder, ok) - - with cgutils.if_unlikely(c.builder, failed): - c.pyapi.err_set_string("PyExc_TypeError", - "conversion to %s failed" % (typ,)) - - if typ == types.complex64: - # Downcast to complex64 if necessary - cplx = c.context.make_complex(c.builder, typ) - cplx.real = c.context.cast(c.builder, c128.real, - types.float64, types.float32) - cplx.imag = c.context.cast(c.builder, c128.imag, - types.float64, types.float32) - else: - assert typ == types.complex128 - cplx = c128 - return NativeValue(cplx._getvalue(), is_error=failed) - - -@box(types.NoneType) -def box_none(typ, val, c): - return c.pyapi.make_none() - -@unbox(types.NoneType) -@unbox(types.EllipsisType) -def unbox_none(typ, val, c): - return NativeValue(c.context.get_dummy_value()) - - -@box(types.NPDatetime) -def box_npdatetime(typ, val, c): - return c.pyapi.create_np_datetime(val, typ.unit_code) - -@unbox(types.NPDatetime) -def unbox_npdatetime(typ, obj, c): - val = c.pyapi.extract_np_datetime(obj) - return NativeValue(val, is_error=c.pyapi.c_api_error()) - - -@box(types.NPTimedelta) -def box_nptimedelta(typ, val, c): - return c.pyapi.create_np_timedelta(val, typ.unit_code) - -@unbox(types.NPTimedelta) -def unbox_nptimedelta(typ, obj, c): - val = c.pyapi.extract_np_timedelta(obj) - return NativeValue(val, is_error=c.pyapi.c_api_error()) - - -@box(types.RawPointer) -def box_raw_pointer(typ, val, c): - """ - Convert a raw pointer to a Python int. - """ - ll_intp = c.context.get_value_type(types.uintp) - addr = c.builder.ptrtoint(val, ll_intp) - return c.box(types.uintp, addr) - - -@box(types.EnumMember) -def box_enum(typ, val, c): - """ - Fetch an enum member given its native value. - """ - valobj = c.box(typ.dtype, val) - # Call the enum class with the value object - cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) - return c.pyapi.call_function_objargs(cls_obj, (valobj,)) - -@unbox(types.EnumMember) -def unbox_enum(typ, obj, c): - """ - Convert an enum member's value to its native value. - """ - valobj = c.pyapi.object_getattr_string(obj, "value") - return c.unbox(typ.dtype, valobj) - - -# -# Composite types -# - -@box(types.Record) -def box_record(typ, val, c): - # Note we will create a copy of the record - # This is the only safe way. - size = ir.Constant(ir.IntType(32), val.type.pointee.count) - ptr = c.builder.bitcast(val, ir.PointerType(ir.IntType(8))) - return c.pyapi.recreate_record(ptr, size, typ.dtype, c.env_manager) - -@unbox(types.Record) -def unbox_record(typ, obj, c): - buf = c.pyapi.alloca_buffer() - ptr = c.pyapi.extract_record_data(obj, buf) - is_error = cgutils.is_null(c.builder, ptr) - - ltyp = c.context.get_value_type(typ) - val = c.builder.bitcast(ptr, ltyp) - - def cleanup(): - c.pyapi.release_buffer(buf) - return NativeValue(val, cleanup=cleanup, is_error=is_error) - - -@box(types.CharSeq) -def box_charseq(typ, val, c): - rawptr = cgutils.alloca_once_value(c.builder, value=val) - strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) - fullsize = c.context.get_constant(types.intp, typ.count) - zero = fullsize.type(0) - one = fullsize.type(1) - count = cgutils.alloca_once_value(c.builder, zero) - - # Find the length of the string, mimicking Numpy's behaviour: - # search for the last non-null byte in the underlying storage - # (e.g. b'A\0\0B\0\0\0' will return the logical string b'A\0\0B') - with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: - # Get char at idx - ch = c.builder.load(c.builder.gep(strptr, [idx])) - # If the char is a non-null-byte, store the next index as count - with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): - c.builder.store(c.builder.add(idx, one), count) - - strlen = c.builder.load(count) - return c.pyapi.bytes_from_string_and_size(strptr, strlen) - -@unbox(types.CharSeq) -def unbox_charseq(typ, obj, c): - lty = c.context.get_value_type(typ) - ok, buffer, size = c.pyapi.string_as_string_and_size(obj) - - # If conversion is ok, copy the buffer to the output storage. - with cgutils.if_likely(c.builder, ok): - # Check if the returned string size fits in the charseq - storage_size = ir.Constant(size.type, typ.count) - size_fits = c.builder.icmp_unsigned("<=", size, storage_size) - - # Allow truncation of string - size = c.builder.select(size_fits, size, storage_size) - - # Initialize output to zero bytes - null_string = ir.Constant(lty, None) - outspace = cgutils.alloca_once_value(c.builder, null_string) - - # We don't need to set the NULL-terminator because the storage - # is already zero-filled. - cgutils.memcpy(c.builder, - c.builder.bitcast(outspace, buffer.type), - buffer, size) - - ret = c.builder.load(outspace) - return NativeValue(ret, is_error=c.builder.not_(ok)) - - -@unbox(types.Optional) -def unbox_optional(typ, obj, c): - """ - Convert object *obj* to a native optional structure. - """ - noneval = c.context.make_optional_none(c.builder, typ.type) - is_not_none = c.builder.icmp_signed('!=', obj, c.pyapi.borrow_none()) - - retptr = cgutils.alloca_once(c.builder, noneval.type) - errptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) - - with c.builder.if_else(is_not_none) as (then, orelse): - with then: - native = c.unbox(typ.type, obj) - just = c.context.make_optional_value(c.builder, - typ.type, native.value) - c.builder.store(just, retptr) - c.builder.store(native.is_error, errptr) - - with orelse: - c.builder.store(noneval, retptr) - - if native.cleanup is not None: - def cleanup(): - with c.builder.if_then(is_not_none): - native.cleanup() - else: - cleanup = None - - ret = c.builder.load(retptr) - return NativeValue(ret, is_error=c.builder.load(errptr), - cleanup=cleanup) - - -@unbox(types.SliceType) -def unbox_slice(typ, obj, c): - """ - Convert object *obj* to a native slice structure. - """ - from . import slicing - ok, start, stop, step = c.pyapi.slice_as_ints(obj) - sli = c.context.make_helper(c.builder, typ) - sli.start = start - sli.stop = stop - sli.step = step - return NativeValue(sli._getvalue(), is_error=c.builder.not_(ok)) - - -# -# Collections -# - -# NOTE: boxing functions are supposed to steal any NRT references in -# the given native value. - -@box(types.Array) -def box_array(typ, val, c): - nativearycls = c.context.make_array(typ) - nativeary = nativearycls(c.context, c.builder, value=val) - if c.context.enable_nrt: - np_dtype = numpy_support.as_dtype(typ.dtype) - dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) - # Steals NRT ref - newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) - return newary - else: - parent = nativeary.parent - c.pyapi.incref(parent) - return parent - -@box(types.SmartArrayType) -def box_smart_array(typ, value, c): - # First build a Numpy array object, then wrap it in a SmartArray - a = c.context.make_helper(c.builder, typ, value=value) - # if 'parent' is set, we are re-boxing an object, so use the same logic - # as reflect. - obj = a.parent - res = cgutils.alloca_once_value(c.builder, obj) - with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): - with has_parent: - c.pyapi.incref(obj) - host = c.pyapi.string_from_constant_string('host') - retn = c.pyapi.call_method(obj, 'mark_changed', [host]) - with c.builder.if_else(cgutils.is_not_null(c.builder, retn)) as (success, failure): - with success: - c.pyapi.decref(retn) - with failure: - c.builder.store(c.pyapi.get_null_object(), res) - c.pyapi.decref(host) - with otherwise: - # box into a new array: - classobj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.pyclass)) - arrayobj = c.box(typ.as_array, a.data) - # Adopt arrayobj rather than copying it. - false = c.pyapi.bool_from_bool(cgutils.false_bit) - obj = c.pyapi.call_function_objargs(classobj, (arrayobj,false)) - c.pyapi.decref(classobj) - c.pyapi.decref(arrayobj) - c.pyapi.decref(false) - c.builder.store(obj, res) - - return c.builder.load(res) - -@unbox(types.Buffer) -def unbox_buffer(typ, obj, c): - """ - Convert a Py_buffer-providing object to a native array structure. - """ - buf = c.pyapi.alloca_buffer() - res = c.pyapi.get_buffer(obj, buf) - is_error = cgutils.is_not_null(c.builder, res) - - nativearycls = c.context.make_array(typ) - nativeary = nativearycls(c.context, c.builder) - aryptr = nativeary._getpointer() - - with cgutils.if_likely(c.builder, c.builder.not_(is_error)): - ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) - if c.context.enable_nrt: - c.pyapi.nrt_adapt_buffer_from_python(buf, ptr) - else: - c.pyapi.numba_buffer_adaptor(buf, ptr) - - def cleanup(): - c.pyapi.release_buffer(buf) - - return NativeValue(c.builder.load(aryptr), is_error=is_error, - cleanup=cleanup) - -@unbox(types.Array) -def unbox_array(typ, obj, c): - """ - Convert a Numpy array object to a native array structure. - """ - # This is necessary because unbox_buffer() does not work on some - # dtypes, e.g. datetime64 and timedelta64. - # TODO check matching dtype. - # currently, mismatching dtype will still work and causes - # potential memory corruption - nativearycls = c.context.make_array(typ) - nativeary = nativearycls(c.context, c.builder) - aryptr = nativeary._getpointer() - - ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) - if c.context.enable_nrt: - errcode = c.pyapi.nrt_adapt_ndarray_from_python(obj, ptr) - else: - errcode = c.pyapi.numba_array_adaptor(obj, ptr) - failed = cgutils.is_not_null(c.builder, errcode) - # Handle error - with c.builder.if_then(failed, likely=False): - c.pyapi.err_set_string("PyExc_TypeError", - "can't unbox array from PyObject into " - "native value. The object maybe of a " - "different type") - return NativeValue(c.builder.load(aryptr), is_error=failed) - - -@unbox(types.SmartArrayType) -def unbox_smart_array(typ, obj, c): - a = c.context.make_helper(c.builder, typ) - host = c.pyapi.string_from_constant_string('host') - arr = c.pyapi.call_method(obj, 'get', [host]) - with c.builder.if_else(cgutils.is_not_null(c.builder, arr)) as (success, failure): - with success: - a.data = c.unbox(typ.as_array, arr).value - a.parent = obj - c.pyapi.decref(arr) - with failure: - c.pyapi.raise_object() - - c.pyapi.decref(host) - return NativeValue(a._getvalue()) - - -@reflect(types.SmartArrayType) -def reflect_smart_array(typ, value, c): - a = c.context.make_helper(c.builder, typ, value) - arr = a.parent - host = c.pyapi.string_from_constant_string('host') - retn = c.pyapi.call_method(arr, 'mark_changed', [host]) - with c.builder.if_else(cgutils.is_not_null(c.builder, retn)) as (success, failure): - with success: - c.pyapi.decref(retn) - with failure: - c.pyapi.raise_object() - - c.pyapi.decref(host) - -@box(types.Tuple) -@box(types.UniTuple) -def box_tuple(typ, val, c): - """ - Convert native array or structure *val* to a tuple object. - """ - tuple_val = c.pyapi.tuple_new(typ.count) - - for i, dtype in enumerate(typ): - item = c.builder.extract_value(val, i) - obj = c.box(dtype, item) - c.pyapi.tuple_setitem(tuple_val, i, obj) - - return tuple_val - -@box(types.NamedTuple) -@box(types.NamedUniTuple) -def box_namedtuple(typ, val, c): - """ - Convert native array or structure *val* to a namedtuple object. - """ - cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) - tuple_obj = box_tuple(typ, val, c) - obj = c.pyapi.call(cls_obj, tuple_obj) - c.pyapi.decref(cls_obj) - c.pyapi.decref(tuple_obj) - return obj - - -@unbox(types.BaseTuple) -def unbox_tuple(typ, obj, c): - """ - Convert tuple *obj* to a native array (if homogeneous) or structure. - """ - n = len(typ) - values = [] - cleanups = [] - lty = c.context.get_value_type(typ) - - is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) - value_ptr = cgutils.alloca_once(c.builder, lty) - - # Issue #1638: need to check the tuple size - actual_size = c.pyapi.tuple_size(obj) - size_matches = c.builder.icmp_unsigned('==', actual_size, - ir.Constant(actual_size.type, n)) - with c.builder.if_then(c.builder.not_(size_matches), likely=False): - c.pyapi.err_format( - "PyExc_ValueError", - "size mismatch for tuple, expected %d element(s) but got %%zd" % (n,), - actual_size) - c.builder.store(cgutils.true_bit, is_error_ptr) - - # We unbox the items even if not `size_matches`, to avoid issues with - # the generated IR (instruction doesn't dominate all uses) - for i, eltype in enumerate(typ): - elem = c.pyapi.tuple_getitem(obj, i) - native = c.unbox(eltype, elem) - values.append(native.value) - with c.builder.if_then(native.is_error, likely=False): - c.builder.store(cgutils.true_bit, is_error_ptr) - if native.cleanup is not None: - cleanups.append(native.cleanup) - - value = c.context.make_tuple(c.builder, typ, values) - c.builder.store(value, value_ptr) - - if cleanups: - with c.builder.if_then(size_matches, likely=True): - def cleanup(): - for func in reversed(cleanups): - func() - else: - cleanup = None - - return NativeValue(c.builder.load(value_ptr), cleanup=cleanup, - is_error=c.builder.load(is_error_ptr)) - - -@box(types.List) -def box_list(typ, val, c): - """ - Convert native list *val* to a list object. - """ - list = listobj.ListInstance(c.context, c.builder, typ, val) - obj = list.parent - res = cgutils.alloca_once_value(c.builder, obj) - with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): - with has_parent: - # List is actually reflected => return the original object - # (note not all list instances whose *type* is reflected are - # actually reflected; see numba.tests.test_lists for an example) - c.pyapi.incref(obj) - - with otherwise: - # Build a new Python list - nitems = list.size - obj = c.pyapi.list_new(nitems) - with c.builder.if_then(cgutils.is_not_null(c.builder, obj), - likely=True): - with cgutils.for_range(c.builder, nitems) as loop: - item = list.getitem(loop.index) - list.incref_value(item) - itemobj = c.box(typ.dtype, item) - c.pyapi.list_setitem(obj, loop.index, itemobj) - - c.builder.store(obj, res) - - # Steal NRT ref - c.context.nrt.decref(c.builder, typ, val) - return c.builder.load(res) - - -class _NumbaTypeHelper(object): - """A helper for acquiring `numba.typeof` for type checking. - - Usage - ----- - - # `c` is the boxing context. - with _NumbaTypeHelper(c) as nth: - # This contextmanager maintains the lifetime of the `numba.typeof` - # function. - the_numba_type = nth.typeof(some_object) - # Do work on the type object - do_checks(the_numba_type) - # Cleanup - c.pyapi.decref(the_numba_type) - # At this point *nth* should not be used. - """ - def __init__(self, c): - self.c = c - - def __enter__(self): - c = self.c - numba_name = c.context.insert_const_string(c.builder.module, 'numba') - numba_mod = c.pyapi.import_module_noblock(numba_name) - typeof_fn = c.pyapi.object_getattr_string(numba_mod, 'typeof') - self.typeof_fn = typeof_fn - c.pyapi.decref(numba_mod) - return self - - def __exit__(self, *args, **kwargs): - c = self.c - c.pyapi.decref(self.typeof_fn) - - def typeof(self, obj): - res = self.c.pyapi.call_function_objargs(self.typeof_fn, [obj]) - return res - - -def _python_list_to_native(typ, obj, c, size, listptr, errorptr): - """ - Construct a new native list from a Python list. - """ - def check_element_type(nth, itemobj, expected_typobj): - typobj = nth.typeof(itemobj) - # Check if *typobj* is NULL - with c.builder.if_then( - cgutils.is_null(c.builder, typobj), - likely=False, - ): - c.builder.store(cgutils.true_bit, errorptr) - loop.do_break() - # Mandate that objects all have the same exact type - type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) - - with c.builder.if_then(type_mismatch, likely=False): - c.builder.store(cgutils.true_bit, errorptr) - if IS_PY3: - c.pyapi.err_format( - "PyExc_TypeError", - "can't unbox heterogeneous list: %S != %S", - expected_typobj, typobj, - ) - else: - # Python2 doesn't have "%S" format string. - c.pyapi.err_set_string( - "PyExc_TypeError", - "can't unbox heterogeneous list", - ) - c.pyapi.decref(typobj) - loop.do_break() - c.pyapi.decref(typobj) - - # Allocate a new native list - ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size) - with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): - with if_ok: - list.size = size - zero = ir.Constant(size.type, 0) - with c.builder.if_then(c.builder.icmp_signed('>', size, zero), - likely=True): - # Traverse Python list and unbox objects into native list - with _NumbaTypeHelper(c) as nth: - # Note: *expected_typobj* can't be NULL - expected_typobj = nth.typeof(c.pyapi.list_getitem(obj, zero)) - with cgutils.for_range(c.builder, size) as loop: - itemobj = c.pyapi.list_getitem(obj, loop.index) - check_element_type(nth, itemobj, expected_typobj) - # XXX we don't call native cleanup for each - # list element, since that would require keeping - # of which unboxings have been successful. - native = c.unbox(typ.dtype, itemobj) - with c.builder.if_then(native.is_error, likely=False): - c.builder.store(cgutils.true_bit, errorptr) - loop.do_break() - # The reference is borrowed so incref=False - list.setitem(loop.index, native.value, incref=False) - c.pyapi.decref(expected_typobj) - if typ.reflected: - list.parent = obj - # Stuff meminfo pointer into the Python object for - # later reuse. - with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), - likely=False): - c.pyapi.object_set_private_data(obj, list.meminfo) - list.set_dirty(False) - c.builder.store(list.value, listptr) - - with if_not_ok: - c.builder.store(cgutils.true_bit, errorptr) - - # If an error occurred, drop the whole native list - with c.builder.if_then(c.builder.load(errorptr)): - c.context.nrt.decref(c.builder, typ, list.value) - - -@unbox(types.List) -def unbox_list(typ, obj, c): - """ - Convert list *obj* to a native list. - - If list was previously unboxed, we reuse the existing native list - to ensure consistency. - """ - size = c.pyapi.list_size(obj) - - errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) - listptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) - - # See if the list was previously unboxed, if so, re-use the meminfo. - ptr = c.pyapi.object_get_private_data(obj) - - with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ - as (has_meminfo, otherwise): - - with has_meminfo: - # List was previously unboxed => reuse meminfo - list = listobj.ListInstance.from_meminfo(c.context, c.builder, typ, ptr) - list.size = size - if typ.reflected: - list.parent = obj - c.builder.store(list.value, listptr) - - with otherwise: - _python_list_to_native(typ, obj, c, size, listptr, errorptr) - - def cleanup(): - # Clean up the associated pointer, as the meminfo is now invalid. - c.pyapi.object_reset_private_data(obj) - - return NativeValue(c.builder.load(listptr), - is_error=c.builder.load(errorptr), - cleanup=cleanup) - - -@reflect(types.List) -def reflect_list(typ, val, c): - """ - Reflect the native list's contents into the Python object. - """ - if not typ.reflected: - return - if typ.dtype.reflected: - msg = "cannot reflect element of reflected container: {}\n".format(typ) - raise TypeError(msg) - - list = listobj.ListInstance(c.context, c.builder, typ, val) - with c.builder.if_then(list.dirty, likely=False): - obj = list.parent - size = c.pyapi.list_size(obj) - new_size = list.size - diff = c.builder.sub(new_size, size) - diff_gt_0 = c.builder.icmp_signed('>=', diff, - ir.Constant(diff.type, 0)) - with c.builder.if_else(diff_gt_0) as (if_grow, if_shrink): - # XXX no error checking below - with if_grow: - # First overwrite existing items - with cgutils.for_range(c.builder, size) as loop: - item = list.getitem(loop.index) - list.incref_value(item) - itemobj = c.box(typ.dtype, item) - c.pyapi.list_setitem(obj, loop.index, itemobj) - # Then add missing items - with cgutils.for_range(c.builder, diff) as loop: - idx = c.builder.add(size, loop.index) - item = list.getitem(idx) - list.incref_value(item) - itemobj = c.box(typ.dtype, item) - c.pyapi.list_append(obj, itemobj) - c.pyapi.decref(itemobj) - - with if_shrink: - # First delete list tail - c.pyapi.list_setslice(obj, new_size, size, None) - # Then overwrite remaining items - with cgutils.for_range(c.builder, new_size) as loop: - item = list.getitem(loop.index) - list.incref_value(item) - itemobj = c.box(typ.dtype, item) - c.pyapi.list_setitem(obj, loop.index, itemobj) - - # Mark the list clean, in case it is reflected twice - list.set_dirty(False) - - -def _python_set_to_native(typ, obj, c, size, setptr, errorptr): - """ - Construct a new native set from a Python set. - """ - # Allocate a new native set - ok, inst = setobj.SetInstance.allocate_ex(c.context, c.builder, typ, size) - with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): - with if_ok: - # Traverse Python set and unbox objects into native set - typobjptr = cgutils.alloca_once_value(c.builder, - ir.Constant(c.pyapi.pyobj, None)) - - with c.pyapi.set_iterate(obj) as loop: - itemobj = loop.value - # Mandate that objects all have the same exact type - typobj = c.pyapi.get_type(itemobj) - expected_typobj = c.builder.load(typobjptr) - - with c.builder.if_else( - cgutils.is_null(c.builder, expected_typobj), - likely=False) as (if_first, if_not_first): - with if_first: - # First iteration => store item type - c.builder.store(typobj, typobjptr) - with if_not_first: - # Otherwise, check item type - type_mismatch = c.builder.icmp_signed('!=', typobj, - expected_typobj) - with c.builder.if_then(type_mismatch, likely=False): - c.builder.store(cgutils.true_bit, errorptr) - c.pyapi.err_set_string("PyExc_TypeError", - "can't unbox heterogeneous set") - loop.do_break() - - # XXX we don't call native cleanup for each set element, - # since that would require keeping track - # of which unboxings have been successful. - native = c.unbox(typ.dtype, itemobj) - with c.builder.if_then(native.is_error, likely=False): - c.builder.store(cgutils.true_bit, errorptr) - inst.add(native.value, do_resize=False) - - if typ.reflected: - inst.parent = obj - # Associate meminfo pointer with the Python object for later reuse. - with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), - likely=False): - c.pyapi.object_set_private_data(obj, inst.meminfo) - inst.set_dirty(False) - c.builder.store(inst.value, setptr) - - with if_not_ok: - c.builder.store(cgutils.true_bit, errorptr) - - # If an error occurred, drop the whole native set - with c.builder.if_then(c.builder.load(errorptr)): - c.context.nrt.decref(c.builder, typ, inst.value) - - -@unbox(types.Set) -def unbox_set(typ, obj, c): - """ - Convert set *obj* to a native set. - - If set was previously unboxed, we reuse the existing native set - to ensure consistency. - """ - size = c.pyapi.set_size(obj) - - errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) - setptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) - - # See if the set was previously unboxed, if so, re-use the meminfo. - ptr = c.pyapi.object_get_private_data(obj) - - with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ - as (has_meminfo, otherwise): - - with has_meminfo: - # Set was previously unboxed => reuse meminfo - inst = setobj.SetInstance.from_meminfo(c.context, c.builder, typ, ptr) - if typ.reflected: - inst.parent = obj - c.builder.store(inst.value, setptr) - - with otherwise: - _python_set_to_native(typ, obj, c, size, setptr, errorptr) - - def cleanup(): - # Clean up the associated pointer, as the meminfo is now invalid. - c.pyapi.object_reset_private_data(obj) - - return NativeValue(c.builder.load(setptr), - is_error=c.builder.load(errorptr), - cleanup=cleanup) - - -def _native_set_to_python_list(typ, payload, c): - """ - Create a Python list from a native set's items. - """ - nitems = payload.used - listobj = c.pyapi.list_new(nitems) - ok = cgutils.is_not_null(c.builder, listobj) - with c.builder.if_then(ok, likely=True): - index = cgutils.alloca_once_value(c.builder, - ir.Constant(nitems.type, 0)) - with payload._iterate() as loop: - i = c.builder.load(index) - item = loop.entry.key - itemobj = c.box(typ.dtype, item) - c.pyapi.list_setitem(listobj, i, itemobj) - i = c.builder.add(i, ir.Constant(i.type, 1)) - c.builder.store(i, index) - - return ok, listobj - - -@box(types.Set) -def box_set(typ, val, c): - """ - Convert native set *val* to a set object. - """ - inst = setobj.SetInstance(c.context, c.builder, typ, val) - obj = inst.parent - res = cgutils.alloca_once_value(c.builder, obj) - - with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): - with has_parent: - # Set is actually reflected => return the original object - # (note not all set instances whose *type* is reflected are - # actually reflected; see numba.tests.test_sets for an example) - c.pyapi.incref(obj) - - with otherwise: - # Build a new Python list and then create a set from that - payload = inst.payload - ok, listobj = _native_set_to_python_list(typ, payload, c) - with c.builder.if_then(ok, likely=True): - obj = c.pyapi.set_new(listobj) - c.pyapi.decref(listobj) - c.builder.store(obj, res) - - # Steal NRT ref - c.context.nrt.decref(c.builder, typ, val) - return c.builder.load(res) - -@reflect(types.Set) -def reflect_set(typ, val, c): - """ - Reflect the native set's contents into the Python object. - """ - if not typ.reflected: - return - inst = setobj.SetInstance(c.context, c.builder, typ, val) - payload = inst.payload - - with c.builder.if_then(payload.dirty, likely=False): - obj = inst.parent - # XXX errors are not dealt with below - c.pyapi.set_clear(obj) - - # Build a new Python list and then update the set with that - ok, listobj = _native_set_to_python_list(typ, payload, c) - with c.builder.if_then(ok, likely=True): - c.pyapi.set_update(obj, listobj) - c.pyapi.decref(listobj) - - # Mark the set clean, in case it is reflected twice - inst.set_dirty(False) - - -# -# Other types -# - -@box(types.Generator) -def box_generator(typ, val, c): - return c.pyapi.from_native_generator(val, typ, c.env_manager.env_ptr) - -@unbox(types.Generator) -def unbox_generator(typ, obj, c): - return c.pyapi.to_native_generator(obj, typ) - - -@box(types.DType) -def box_dtype(typ, val, c): - np_dtype = numpy_support.as_dtype(typ.dtype) - return c.pyapi.unserialize(c.pyapi.serialize_object(np_dtype)) - - -@box(types.PyObject) -@box(types.Object) -def box_pyobject(typ, val, c): - return val - -@unbox(types.PyObject) -@unbox(types.Object) -def unbox_pyobject(typ, obj, c): - return NativeValue(obj) - - -@unbox(types.ExternalFunctionPointer) -def unbox_funcptr(typ, obj, c): - if typ.get_pointer is None: - raise NotImplementedError(typ) - - # Call get_pointer() on the object to get the raw pointer value - ptrty = c.context.get_function_pointer_type(typ) - ret = cgutils.alloca_once_value(c.builder, - ir.Constant(ptrty, None), - name='fnptr') - ser = c.pyapi.serialize_object(typ.get_pointer) - get_pointer = c.pyapi.unserialize(ser) - with cgutils.if_likely(c.builder, - cgutils.is_not_null(c.builder, get_pointer)): - intobj = c.pyapi.call_function_objargs(get_pointer, (obj,)) - c.pyapi.decref(get_pointer) - with cgutils.if_likely(c.builder, - cgutils.is_not_null(c.builder, intobj)): - ptr = c.pyapi.long_as_voidptr(intobj) - c.pyapi.decref(intobj) - c.builder.store(c.builder.bitcast(ptr, ptrty), ret) - return NativeValue(c.builder.load(ret), is_error=c.pyapi.c_api_error()) - -@box(types.DeferredType) -def box_deferred(typ, val, c): - out = c.pyapi.from_native_value(typ.get(), - c.builder.extract_value(val, [0]), - env_manager=c.env_manager) - return out - - -@unbox(types.DeferredType) -def unbox_deferred(typ, obj, c): - native_value = c.pyapi.to_native_value(typ.get(), obj) - model = c.context.data_model_manager[typ] - res = model.set(c.builder, model.make_uninitialized(), native_value.value) - return NativeValue(res, is_error=native_value.is_error, - cleanup=native_value.cleanup) - - -@unbox(types.Dispatcher) -def unbox_dispatcher(typ, obj, c): - # A dispatcher object has no meaningful value in native code - res = c.context.get_constant_undef(typ) - return NativeValue(res) - - -def unbox_unsupported(typ, obj, c): - c.pyapi.err_set_string("PyExc_TypeError", - "can't unbox {!r} type".format(typ)) - res = c.pyapi.get_null_object() - return NativeValue(res, is_error=cgutils.true_bit) - - -def box_unsupported(typ, val, c): - msg = "cannot convert native %s to Python object" % (typ,) - c.pyapi.err_set_string("PyExc_TypeError", msg) - res = c.pyapi.get_null_object() - return res - diff --git a/numba/numba/targets/builtins.py b/numba/numba/targets/builtins.py deleted file mode 100644 index 4347310b2..000000000 --- a/numba/numba/targets/builtins.py +++ /dev/null @@ -1,433 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import math -from functools import reduce - -import numpy as np - -from llvmlite import ir -from llvmlite.llvmpy.core import Type, Constant -import llvmlite.llvmpy.core as lc - -from .imputils import (lower_builtin, lower_getattr, lower_getattr_generic, - lower_cast, lower_constant, iternext_impl, - call_getiter, call_iternext, - impl_ret_borrowed, impl_ret_untracked) -from .. import typing, types, cgutils, utils - - -@lower_builtin('is not', types.Any, types.Any) -def generic_is_not(context, builder, sig, args): - """ - Implement `x is not y` as `not (x is y)`. - """ - is_impl = context.get_function('is', sig) - return builder.not_(is_impl(builder, args)) - - -@lower_builtin('is', types.Any, types.Any) -def generic_is(context, builder, sig, args): - """ - Default implementation for `x is y` - """ - lhs_type, rhs_type = sig.args - # the lhs and rhs have the same type - if lhs_type == rhs_type: - # mutable types - if lhs_type.mutable: - raise NotImplementedError('no default `is` implementation') - # immutable types - else: - # fallbacks to `==` - try: - eq_impl = context.get_function('==', sig) - except NotImplementedError: - # no `==` implemented for this type - return cgutils.false_bit - else: - return eq_impl(builder, args) - else: - return cgutils.false_bit - -#------------------------------------------------------------------------------- - -@lower_getattr_generic(types.DeferredType) -def deferred_getattr(context, builder, typ, value, attr): - """ - Deferred.__getattr__ => redirect to the actual type. - """ - inner_type = typ.get() - val = context.cast(builder, value, typ, inner_type) - imp = context.get_getattr(inner_type, attr) - return imp(context, builder, inner_type, val, attr) - -@lower_cast(types.Any, types.DeferredType) -@lower_cast(types.Optional, types.DeferredType) -@lower_cast(types.Boolean, types.DeferredType) -def any_to_deferred(context, builder, fromty, toty, val): - actual = context.cast(builder, val, fromty, toty.get()) - model = context.data_model_manager[toty] - return model.set(builder, model.make_uninitialized(), actual) - -@lower_cast(types.DeferredType, types.Any) -@lower_cast(types.DeferredType, types.Boolean) -@lower_cast(types.DeferredType, types.Optional) -def deferred_to_any(context, builder, fromty, toty, val): - model = context.data_model_manager[fromty] - val = model.get(builder, val) - return context.cast(builder, val, fromty.get(), toty) - - -#------------------------------------------------------------------------------ - -@lower_builtin('getitem', types.CPointer, types.Integer) -def getitem_cpointer(context, builder, sig, args): - base_ptr, idx = args - elem_ptr = builder.gep(base_ptr, [idx]) - res = builder.load(elem_ptr) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('setitem', types.CPointer, types.Integer, types.Any) -def setitem_cpointer(context, builder, sig, args): - base_ptr, idx, val = args - elem_ptr = builder.gep(base_ptr, [idx]) - builder.store(val, elem_ptr) - - -#------------------------------------------------------------------------------- - -def do_minmax(context, builder, argtys, args, cmpop): - assert len(argtys) == len(args), (argtys, args) - assert len(args) > 0 - - def binary_minmax(accumulator, value): - # This is careful to reproduce Python's algorithm, e.g. - # max(1.5, nan, 2.5) should return 2.5 (not nan or 1.5) - accty, acc = accumulator - vty, v = value - ty = context.typing_context.unify_types(accty, vty) - assert ty is not None - acc = context.cast(builder, acc, accty, ty) - v = context.cast(builder, v, vty, ty) - cmpsig = typing.signature(types.boolean, ty, ty) - ge = context.get_function(cmpop, cmpsig) - pred = ge(builder, (v, acc)) - res = builder.select(pred, v, acc) - return ty, res - - typvals = zip(argtys, args) - resty, resval = reduce(binary_minmax, typvals) - return resval - - -@lower_builtin(max, types.BaseTuple) -def max_iterable(context, builder, sig, args): - argtys = list(sig.args[0]) - args = cgutils.unpack_tuple(builder, args[0]) - return do_minmax(context, builder, argtys, args, '>') - -@lower_builtin(max, types.VarArg(types.Any)) -def max_vararg(context, builder, sig, args): - return do_minmax(context, builder, sig.args, args, '>') - -@lower_builtin(min, types.BaseTuple) -def min_iterable(context, builder, sig, args): - argtys = list(sig.args[0]) - args = cgutils.unpack_tuple(builder, args[0]) - return do_minmax(context, builder, argtys, args, '<') - -@lower_builtin(min, types.VarArg(types.Any)) -def min_vararg(context, builder, sig, args): - return do_minmax(context, builder, sig.args, args, '<') - - -def _round_intrinsic(tp): - # round() rounds half to even on Python 3, away from zero on Python 2. - if utils.IS_PY3: - return "llvm.rint.f%d" % (tp.bitwidth,) - else: - return "llvm.round.f%d" % (tp.bitwidth,) - -@lower_builtin(round, types.Float) -def round_impl_unary(context, builder, sig, args): - fltty = sig.args[0] - llty = context.get_value_type(fltty) - module = builder.module - fnty = Type.function(llty, [llty]) - fn = module.get_or_insert_function(fnty, name=_round_intrinsic(fltty)) - res = builder.call(fn, args) - if utils.IS_PY3: - # unary round() returns an int on Python 3 - res = builder.fptosi(res, context.get_value_type(sig.return_type)) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(round, types.Float, types.Integer) -def round_impl_binary(context, builder, sig, args): - fltty = sig.args[0] - # Allow calling the intrinsic from the Python implementation below. - # This avoids the conversion to an int in Python 3's unary round(). - _round = types.ExternalFunction( - _round_intrinsic(fltty), typing.signature(fltty, fltty)) - - def round_ndigits(x, ndigits): - if math.isinf(x) or math.isnan(x): - return x - - if ndigits >= 0: - if ndigits > 22: - # pow1 and pow2 are each safe from overflow, but - # pow1*pow2 ~= pow(10.0, ndigits) might overflow. - pow1 = 10.0 ** (ndigits - 22) - pow2 = 1e22 - else: - pow1 = 10.0 ** ndigits - pow2 = 1.0 - y = (x * pow1) * pow2 - if math.isinf(y): - return x - return (_round(y) / pow2) / pow1 - - else: - pow1 = 10.0 ** (-ndigits) - y = x / pow1 - return _round(y) * pow1 - - res = context.compile_internal(builder, round_ndigits, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -#------------------------------------------------------------------------------- -# Numeric constructors - -@lower_builtin(int, types.Any) -@lower_builtin(float, types.Any) -def int_impl(context, builder, sig, args): - [ty] = sig.args - [val] = args - res = context.cast(builder, val, ty, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(complex, types.VarArg(types.Any)) -def complex_impl(context, builder, sig, args): - complex_type = sig.return_type - float_type = complex_type.underlying_float - if len(sig.args) == 1: - [argty] = sig.args - [arg] = args - if isinstance(argty, types.Complex): - # Cast Complex* to Complex* - res = context.cast(builder, arg, argty, complex_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - else: - real = context.cast(builder, arg, argty, float_type) - imag = context.get_constant(float_type, 0) - - elif len(sig.args) == 2: - [realty, imagty] = sig.args - [real, imag] = args - real = context.cast(builder, real, realty, float_type) - imag = context.cast(builder, imag, imagty, float_type) - - cmplx = context.make_complex(builder, complex_type) - cmplx.real = real - cmplx.imag = imag - res = cmplx._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(types.NumberClass, types.Any) -def number_constructor(context, builder, sig, args): - """ - Call a number class, e.g. np.int32(...) - """ - if isinstance(sig.return_type, types.Array): - # Array constructor - impl = context.get_function(np.array, sig) - return impl(builder, args) - else: - # Scalar constructor - [val] = args - [valty] = sig.args - return context.cast(builder, val, valty, sig.return_type) - - -#------------------------------------------------------------------------------- -# Constants - -@lower_constant(types.Dummy) -def constant_dummy(context, builder, ty, pyval): - # This handles None, etc. - return context.get_dummy_value() - -@lower_constant(types.ExternalFunctionPointer) -def constant_function_pointer(context, builder, ty, pyval): - ptrty = context.get_function_pointer_type(ty) - ptrval = context.add_dynamic_addr(builder, ty.get_pointer(pyval), - info=str(pyval)) - return builder.bitcast(ptrval, ptrty) - - -# ----------------------------------------------------------------------------- - -@lower_builtin(type, types.Any) -def type_impl(context, builder, sig, args): - """ - One-argument type() builtin. - """ - return context.get_dummy_value() - - -@lower_builtin(iter, types.IterableType) -def iter_impl(context, builder, sig, args): - ty, = sig.args - val, = args - iterval = call_getiter(context, builder, ty, val) - return iterval - - -@lower_builtin(next, types.IteratorType) -def next_impl(context, builder, sig, args): - iterty, = sig.args - iterval, = args - - res = call_iternext(context, builder, iterty, iterval) - - with builder.if_then(builder.not_(res.is_valid()), likely=False): - context.call_conv.return_user_exc(builder, StopIteration, ()) - - return res.yielded_value() - - -# ----------------------------------------------------------------------------- - -@lower_builtin("not in", types.Any, types.Any) -def not_in(context, builder, sig, args): - def in_impl(a, b): - return a in b - - res = context.compile_internal(builder, in_impl, sig, args) - return builder.not_(res) - - -# ----------------------------------------------------------------------------- - -@lower_builtin(len, types.ConstSized) -def constsized_len(context, builder, sig, args): - [ty] = sig.args - retty = sig.return_type - res = context.get_constant(retty, len(ty.types)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(bool, types.Sized) -def sized_bool(context, builder, sig, args): - [ty] = sig.args - if len(ty): - return cgutils.true_bit - else: - return cgutils.false_bit - -# ----------------------------------------------------------------------------- - -def get_type_max_value(typ): - if isinstance(typ, types.Float): - bw = typ.bitwidth - if bw == 32: - return np.finfo(np.float32).max - if bw == 64: - return np.finfo(np.float64).max - raise NotImplementedError("Unsupported floating point type") - if isinstance(typ, types.Integer): - return typ.maxval - raise NotImplementedError("Unsupported type") - -def get_type_min_value(typ): - if isinstance(typ, types.Float): - bw = typ.bitwidth - if bw == 32: - return np.finfo(np.float32).min - if bw == 64: - return np.finfo(np.float64).min - raise NotImplementedError("Unsupported floating point type") - if isinstance(typ, types.Integer): - return typ.minval - raise NotImplementedError("Unsupported type") - -@lower_builtin(get_type_min_value, types.NumberClass) -@lower_builtin(get_type_min_value, types.DType) -def lower_get_type_min_value(context, builder, sig, args): - typ = sig.args[0].dtype - bw = typ.bitwidth - - if isinstance(typ, types.Integer): - lty = ir.IntType(bw) - val = typ.minval - res = ir.Constant(lty, val) - elif isinstance(typ, types.Float): - if bw == 32: - lty = ir.FloatType() - elif bw == 64: - lty = ir.DoubleType() - else: - raise NotImplementedError("llvmlite only supports 32 and 64 bit floats") - npty = getattr(np, 'float{}'.format(bw)) - res = ir.Constant(lty, np.finfo(npty).min) - return impl_ret_untracked(context, builder, lty, res) - -@lower_builtin(get_type_max_value, types.NumberClass) -@lower_builtin(get_type_max_value, types.DType) -def lower_get_type_max_value(context, builder, sig, args): - typ = sig.args[0].dtype - bw = typ.bitwidth - - if isinstance(typ, types.Integer): - lty = ir.IntType(bw) - val = typ.maxval - res = ir.Constant(lty, val) - elif isinstance(typ, types.Float): - if bw == 32: - lty = ir.FloatType() - elif bw == 64: - lty = ir.DoubleType() - else: - raise NotImplementedError("llvmlite only supports 32 and 64 bit floats") - npty = getattr(np, 'float{}'.format(bw)) - res = ir.Constant(lty, np.finfo(npty).max) - return impl_ret_untracked(context, builder, lty, res) - -# ----------------------------------------------------------------------------- - -from numba.typing.builtins import IndexValue, IndexValueType -from numba.extending import overload - -@lower_builtin(IndexValue, types.intp, types.Type) -@lower_builtin(IndexValue, types.uintp, types.Type) -def impl_index_value(context, builder, sig, args): - typ = sig.return_type - index, value = args - index_value = cgutils.create_struct_proxy(typ)(context, builder) - index_value.index = index - index_value.value = value - return index_value._getvalue() - -@overload(min) -def indval_min(*args): - if len(args) == 2 and (isinstance(args[0], IndexValueType) - and isinstance(args[1], IndexValueType)): - def min_impl(indval1, indval2): - if indval1.value > indval2.value: - return indval2 - return indval1 - return min_impl - -@overload(max) -def indval_max(*args): - if len(args) == 2 and (isinstance(args[0], IndexValueType) - and isinstance(args[1], IndexValueType)): - def max_impl(indval1, indval2): - if indval2.value > indval1.value: - return indval2 - return indval1 - return max_impl diff --git a/numba/numba/targets/callconv.py b/numba/numba/targets/callconv.py deleted file mode 100644 index ecdacff80..000000000 --- a/numba/numba/targets/callconv.py +++ /dev/null @@ -1,497 +0,0 @@ -""" -Calling conventions for Numba-compiled functions. -""" - -from collections import namedtuple -import itertools - -from llvmlite import ir as ir - -from numba import cgutils, types -from .base import PYOBJECT, GENERIC_POINTER - - -Status = namedtuple("Status", - ("code", - # If the function returned ok (a value or None) - "is_ok", - # If the function returned None - "is_none", - # If the function errored out (== not is_ok) - "is_error", - # If the generator exited with StopIteration - "is_stop_iteration", - # If the function errored with an already set exception - "is_python_exc", - # If the function errored with a user exception - "is_user_exc", - # The pointer to the exception info structure (for user exceptions) - "excinfoptr", - )) - -int32_t = ir.IntType(32) -errcode_t = int32_t - -def _const_int(code): - return ir.Constant(errcode_t, code) - -RETCODE_OK = _const_int(0) -RETCODE_EXC = _const_int(-1) -RETCODE_NONE = _const_int(-2) -# StopIteration -RETCODE_STOPIT = _const_int(-3) - -FIRST_USEREXC = 1 - -RETCODE_USEREXC = _const_int(FIRST_USEREXC) - - - - -class BaseCallConv(object): - - def __init__(self, context): - self.context = context - - def return_optional_value(self, builder, retty, valty, value): - if valty == types.none: - # Value is none - self.return_native_none(builder) - - elif retty == valty: - # Value is an optional, need a runtime switch - optval = self.context.make_helper(builder, retty, value=value) - - validbit = cgutils.as_bool_bit(builder, optval.valid) - with builder.if_then(validbit): - retval = self.context.get_return_value(builder, retty.type, - optval.data) - self.return_value(builder, retval) - - self.return_native_none(builder) - - elif not isinstance(valty, types.Optional): - # Value is not an optional, need a cast - if valty != retty.type: - value = self.context.cast(builder, value, fromty=valty, - toty=retty.type) - retval = self.context.get_return_value(builder, retty.type, value) - self.return_value(builder, retval) - - else: - raise NotImplementedError("returning {0} for {1}".format(valty, - retty)) - - def return_native_none(self, builder): - self._return_errcode_raw(builder, RETCODE_NONE) - - def return_exc(self, builder): - self._return_errcode_raw(builder, RETCODE_EXC) - - def return_stop_iteration(self, builder): - self._return_errcode_raw(builder, RETCODE_STOPIT) - - def get_return_type(self, ty): - """ - Get the actual type of the return argument for Numba type *ty*. - """ - restype = self.context.data_model_manager[ty].get_return_type() - return restype.as_pointer() - - def init_call_helper(self, builder): - """ - Initialize and return a call helper object for the given builder. - """ - ch = self._make_call_helper(builder) - builder.__call_helper = ch - return ch - - def _get_call_helper(self, builder): - return builder.__call_helper - - def raise_error(self, builder, api, status): - """ - Given a non-ok *status*, raise the corresponding Python exception. - """ - bbend = builder.function.append_basic_block() - - with builder.if_then(status.is_user_exc): - # Unserialize user exception. - # Make sure another error may not interfere. - api.err_clear() - exc = api.unserialize(status.excinfoptr) - with cgutils.if_likely(builder, - cgutils.is_not_null(builder, exc)): - api.raise_object(exc) # steals ref - builder.branch(bbend) - - with builder.if_then(status.is_stop_iteration): - api.err_set_none("PyExc_StopIteration") - builder.branch(bbend) - - with builder.if_then(status.is_python_exc): - # Error already raised => nothing to do - builder.branch(bbend) - - api.err_set_string("PyExc_SystemError", - "unknown error when calling native function") - builder.branch(bbend) - - builder.position_at_end(bbend) - - def decode_arguments(self, builder, argtypes, func): - """ - Get the decoded (unpacked) Python arguments with *argtypes* - from LLVM function *func*. A tuple of LLVM values is returned. - """ - raw_args = self.get_arguments(func) - arginfo = self._get_arg_packer(argtypes) - return arginfo.from_arguments(builder, raw_args) - - def _fix_argtypes(self, argtypes): - """ - Fix argument types, removing any omitted arguments. - """ - return tuple(ty for ty in argtypes - if not isinstance(ty, types.Omitted)) - - def _get_arg_packer(self, argtypes): - """ - Get an argument packer for the given argument types. - """ - return self.context.get_arg_packer(argtypes) - - -class MinimalCallConv(BaseCallConv): - """ - A minimal calling convention, suitable for e.g. GPU targets. - The implemented function signature is: - - retcode_t (*, ... ) - - The return code will be one of the RETCODE_* constants or a - function-specific user exception id (>= RETCODE_USEREXC). - - Caller is responsible for allocating a slot for the return value - (passed as a pointer in the first argument). - """ - - def _make_call_helper(self, builder): - return _MinimalCallHelper() - - def return_value(self, builder, retval): - retptr = builder.function.args[0] - assert retval.type == retptr.type.pointee, \ - (str(retval.type), str(retptr.type.pointee)) - builder.store(retval, retptr) - self._return_errcode_raw(builder, RETCODE_OK) - - def return_user_exc(self, builder, exc, exc_args=None): - if exc is not None and not issubclass(exc, BaseException): - raise TypeError("exc should be None or exception class, got %r" - % (exc,)) - if exc_args is not None and not isinstance(exc_args, tuple): - raise TypeError("exc_args should be None or tuple, got %r" - % (exc_args,)) - call_helper = self._get_call_helper(builder) - exc_id = call_helper._add_exception(exc, exc_args) - self._return_errcode_raw(builder, _const_int(exc_id)) - - def return_status_propagate(self, builder, status): - self._return_errcode_raw(builder, status.code) - - def _return_errcode_raw(self, builder, code): - if isinstance(code, int): - code = _const_int(code) - builder.ret(code) - - def _get_return_status(self, builder, code): - """ - Given a return *code*, get a Status instance. - """ - norm = builder.icmp_signed('==', code, RETCODE_OK) - none = builder.icmp_signed('==', code, RETCODE_NONE) - ok = builder.or_(norm, none) - err = builder.not_(ok) - exc = builder.icmp_signed('==', code, RETCODE_EXC) - is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) - is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) - - status = Status(code=code, - is_ok=ok, - is_error=err, - is_python_exc=exc, - is_none=none, - is_user_exc=is_user_exc, - is_stop_iteration=is_stop_iteration, - excinfoptr=None) - return status - - def get_function_type(self, restype, argtypes): - """ - Get the implemented Function type for *restype* and *argtypes*. - """ - arginfo = self._get_arg_packer(argtypes) - argtypes = list(arginfo.argument_types) - resptr = self.get_return_type(restype) - fnty = ir.FunctionType(errcode_t, [resptr] + argtypes) - return fnty - - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names and attributes of function arguments. - """ - assert not noalias - arginfo = self._get_arg_packer(fe_argtypes) - arginfo.assign_names(self.get_arguments(fn), - ['arg.' + a for a in args]) - fn.args[0].name = ".ret" - return fn - - def get_arguments(self, func): - """ - Get the Python-level arguments of LLVM *func*. - """ - return func.args[1:] - - def call_function(self, builder, callee, resty, argtys, args): - """ - Call the Numba-compiled *callee*. - """ - retty = callee.args[0].type.pointee - retvaltmp = cgutils.alloca_once(builder, retty) - # initialize return value - builder.store(cgutils.get_null_value(retty), retvaltmp) - - arginfo = self._get_arg_packer(argtys) - args = arginfo.as_arguments(builder, args) - realargs = [retvaltmp] + list(args) - code = builder.call(callee, realargs) - status = self._get_return_status(builder, code) - retval = builder.load(retvaltmp) - out = self.context.get_returned_value(builder, resty, retval) - return status, out - - -class _MinimalCallHelper(object): - """ - A call helper object for the "minimal" calling convention. - User exceptions are represented as integer codes and stored in - a mapping for retrieval from the caller. - """ - - def __init__(self): - self.exceptions = {} - - def _add_exception(self, exc, exc_args): - exc_id = len(self.exceptions) + FIRST_USEREXC - self.exceptions[exc_id] = exc, exc_args - return exc_id - - def get_exception(self, exc_id): - try: - return self.exceptions[exc_id] - except KeyError: - msg = "unknown error %d in native function" % exc_id - return SystemError, (msg,) - - -excinfo_t = ir.LiteralStructType([GENERIC_POINTER, int32_t]) -excinfo_ptr_t = ir.PointerType(excinfo_t) - - -class CPUCallConv(BaseCallConv): - """ - The calling convention for CPU targets. - The implemented function signature is: - - retcode_t (*, excinfo **, ... ) - - The return code will be one of the RETCODE_* constants. - If RETCODE_USEREXC, the exception info pointer will be filled with - a pointer to a constant struct describing the raised exception. - - Caller is responsible for allocating slots for the return value - and the exception info pointer (passed as first and second arguments, - respectively). - """ - _status_ids = itertools.count(1) - - def _make_call_helper(self, builder): - return None - - def return_value(self, builder, retval): - retptr = self._get_return_argument(builder.function) - assert retval.type == retptr.type.pointee, \ - (str(retval.type), str(retptr.type.pointee)) - builder.store(retval, retptr) - self._return_errcode_raw(builder, RETCODE_OK) - - def return_user_exc(self, builder, exc, exc_args=None): - if exc is not None and not issubclass(exc, BaseException): - raise TypeError("exc should be None or exception class, got %r" - % (exc,)) - if exc_args is not None and not isinstance(exc_args, tuple): - raise TypeError("exc_args should be None or tuple, got %r" - % (exc_args,)) - pyapi = self.context.get_python_api(builder) - # Build excinfo struct - if exc_args is not None: - exc = (exc, exc_args) - struct_gv = pyapi.serialize_object(exc) - excptr = self._get_excinfo_argument(builder.function) - builder.store(struct_gv, excptr) - self._return_errcode_raw(builder, RETCODE_USEREXC) - - def return_status_propagate(self, builder, status): - excptr = self._get_excinfo_argument(builder.function) - builder.store(status.excinfoptr, excptr) - self._return_errcode_raw(builder, status.code) - - def _return_errcode_raw(self, builder, code): - builder.ret(code) - - def _get_return_status(self, builder, code, excinfoptr): - """ - Given a return *code* and *excinfoptr*, get a Status instance. - """ - norm = builder.icmp_signed('==', code, RETCODE_OK) - none = builder.icmp_signed('==', code, RETCODE_NONE) - exc = builder.icmp_signed('==', code, RETCODE_EXC) - is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) - ok = builder.or_(norm, none) - err = builder.not_(ok) - is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) - excinfoptr = builder.select(is_user_exc, excinfoptr, - ir.Constant(excinfo_ptr_t, ir.Undefined)) - - status = Status(code=code, - is_ok=ok, - is_error=err, - is_python_exc=exc, - is_none=none, - is_user_exc=is_user_exc, - is_stop_iteration=is_stop_iteration, - excinfoptr=excinfoptr) - return status - - def get_function_type(self, restype, argtypes): - """ - Get the implemented Function type for *restype* and *argtypes*. - """ - arginfo = self._get_arg_packer(argtypes) - argtypes = list(arginfo.argument_types) - resptr = self.get_return_type(restype) - fnty = ir.FunctionType(errcode_t, - [resptr, ir.PointerType(excinfo_ptr_t)] - + argtypes) - return fnty - - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names of function arguments, and add useful attributes to them. - """ - arginfo = self._get_arg_packer(fe_argtypes) - arginfo.assign_names(self.get_arguments(fn), - ['arg.' + a for a in args]) - retarg = self._get_return_argument(fn) - retarg.name = "retptr" - retarg.add_attribute("nocapture") - retarg.add_attribute("noalias") - excarg = self._get_excinfo_argument(fn) - excarg.name = "excinfo" - excarg.add_attribute("nocapture") - excarg.add_attribute("noalias") - - if noalias: - args = self.get_arguments(fn) - for a in args: - if isinstance(a.type, ir.PointerType): - a.add_attribute("nocapture") - a.add_attribute("noalias") - return fn - - def get_arguments(self, func): - """ - Get the Python-level arguments of LLVM *func*. - """ - return func.args[2:] - - def _get_return_argument(self, func): - return func.args[0] - - def _get_excinfo_argument(self, func): - return func.args[1] - - def call_function(self, builder, callee, resty, argtys, args): - """ - Call the Numba-compiled *callee*. - """ - # XXX better fix for callees that are not function values - # (pointers to function; thus have no `.args` attribute) - retty = self._get_return_argument(callee.function_type).pointee - - retvaltmp = cgutils.alloca_once(builder, retty) - # initialize return value to zeros - builder.store(cgutils.get_null_value(retty), retvaltmp) - - excinfoptr = cgutils.alloca_once(builder, ir.PointerType(excinfo_t), - name="excinfo") - - arginfo = self._get_arg_packer(argtys) - args = list(arginfo.as_arguments(builder, args)) - realargs = [retvaltmp, excinfoptr] + args - code = builder.call(callee, realargs) - status = self._get_return_status(builder, code, - builder.load(excinfoptr)) - retval = builder.load(retvaltmp) - out = self.context.get_returned_value(builder, resty, retval) - return status, out - - -class ErrorModel(object): - - def __init__(self, call_conv): - self.call_conv = call_conv - - def fp_zero_division(self, builder, exc_args=None): - if self.raise_on_fp_zero_division: - self.call_conv.return_user_exc(builder, ZeroDivisionError, exc_args) - return True - else: - return False - - -class PythonErrorModel(ErrorModel): - """ - The Python error model. Any invalid FP input raises an exception. - """ - raise_on_fp_zero_division = True - - -class NumpyErrorModel(ErrorModel): - """ - In the Numpy error model, floating-point errors don't raise an - exception. The FPU exception state is inspected by Numpy at the - end of a ufunc's execution and a warning is raised if appropriate. - - Note there's no easy way to set the FPU exception state from LLVM. - Instructions known to set an FP exception can be optimized away: - https://llvm.org/bugs/show_bug.cgi?id=6050 - http://lists.llvm.org/pipermail/llvm-dev/2014-September/076918.html - http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140929/237997.html - """ - raise_on_fp_zero_division = False - - -error_models = { - 'python': PythonErrorModel, - 'numpy': NumpyErrorModel, - } - - -def create_error_model(model_name, context): - """ - Create an error model instance for the given target context. - """ - return error_models[model_name](context.call_conv) diff --git a/numba/numba/targets/cffiimpl.py b/numba/numba/targets/cffiimpl.py deleted file mode 100644 index ebc8906a6..000000000 --- a/numba/numba/targets/cffiimpl.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Implementation of some CFFI functions -""" - -from __future__ import print_function, absolute_import, division - -from numba.targets.imputils import Registry -from numba import types -from . import arrayobj - -registry = Registry() - -@registry.lower('ffi.from_buffer', types.Buffer) -def from_buffer(context, builder, sig, args): - assert len(sig.args) == 1 - assert len(args) == 1 - [fromty] = sig.args - [val] = args - # Type inference should have prevented passing a buffer from an - # array to a pointer of the wrong type - assert fromty.dtype == sig.return_type.dtype - ary = arrayobj.make_array(fromty)(context, builder, val) - return ary.data diff --git a/numba/numba/targets/cmathimpl.py b/numba/numba/targets/cmathimpl.py deleted file mode 100644 index a22390db1..000000000 --- a/numba/numba/targets/cmathimpl.py +++ /dev/null @@ -1,522 +0,0 @@ -""" -Implement the cmath module functions. -""" - -from __future__ import print_function, absolute_import, division - -import cmath -import math - -import llvmlite.llvmpy.core as lc -from llvmlite.llvmpy.core import Type - -from numba.targets.imputils import Registry, impl_ret_untracked -from numba import types, cgutils, utils -from numba.typing import signature -from . import builtins, mathimpl - -registry = Registry() -lower = registry.lower - - -def is_nan(builder, z): - return builder.fcmp_unordered('uno', z.real, z.imag) - -def is_inf(builder, z): - return builder.or_(mathimpl.is_inf(builder, z.real), - mathimpl.is_inf(builder, z.imag)) - -def is_finite(builder, z): - return builder.and_(mathimpl.is_finite(builder, z.real), - mathimpl.is_finite(builder, z.imag)) - - -@lower(cmath.isnan, types.Complex) -def isnan_float_impl(context, builder, sig, args): - [typ] = sig.args - [value] = args - z = context.make_complex(builder, typ, value=value) - res = is_nan(builder, z) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower(cmath.isinf, types.Complex) -def isinf_float_impl(context, builder, sig, args): - [typ] = sig.args - [value] = args - z = context.make_complex(builder, typ, value=value) - res = is_inf(builder, z) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -if utils.PYVERSION >= (3, 2): - @lower(cmath.isfinite, types.Complex) - def isfinite_float_impl(context, builder, sig, args): - [typ] = sig.args - [value] = args - z = context.make_complex(builder, typ, value=value) - res = is_finite(builder, z) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(cmath.rect, types.Float, types.Float) -def rect_impl(context, builder, sig, args): - [r, phi] = args - # We can't call math.isfinite() inside rect() below because it - # only exists on 3.2+. - phi_is_finite = mathimpl.is_finite(builder, phi) - - def rect(r, phi, phi_is_finite): - if not phi_is_finite: - if not r: - # cmath.rect(0, phi={inf, nan}) = 0 - return abs(r) - if math.isinf(r): - # cmath.rect(inf, phi={inf, nan}) = inf + j phi - return complex(r, phi) - real = math.cos(phi) - imag = math.sin(phi) - if real == 0. and math.isinf(r): - # 0 * inf would return NaN, we want to keep 0 but xor the sign - real /= r - else: - real *= r - if imag == 0. and math.isinf(r): - # ditto - imag /= r - else: - imag *= r - return complex(real, imag) - - inner_sig = signature(sig.return_type, *sig.args + (types.boolean,)) - res = context.compile_internal(builder, rect, inner_sig, - args + [phi_is_finite]) - return impl_ret_untracked(context, builder, sig, res) - -def intrinsic_complex_unary(inner_func): - def wrapper(context, builder, sig, args): - [typ] = sig.args - [value] = args - z = context.make_complex(builder, typ, value=value) - x = z.real - y = z.imag - # Same as above: math.isfinite() is unavailable on 2.x so we precompute - # its value and pass it to the pure Python implementation. - x_is_finite = mathimpl.is_finite(builder, x) - y_is_finite = mathimpl.is_finite(builder, y) - inner_sig = signature(sig.return_type, - *(typ.underlying_float,) * 2 + (types.boolean,) * 2) - res = context.compile_internal(builder, inner_func, inner_sig, - (x, y, x_is_finite, y_is_finite)) - return impl_ret_untracked(context, builder, sig, res) - return wrapper - - -NAN = float('nan') -INF = float('inf') - -@lower(cmath.exp, types.Complex) -@intrinsic_complex_unary -def exp_impl(x, y, x_is_finite, y_is_finite): - """cmath.exp(x + y j)""" - if x_is_finite: - if y_is_finite: - c = math.cos(y) - s = math.sin(y) - r = math.exp(x) - return complex(r * c, r * s) - else: - return complex(NAN, NAN) - elif math.isnan(x): - if y: - return complex(x, x) # nan + j nan - else: - return complex(x, y) # nan + 0j - elif x > 0.0: - # x == +inf - if y_is_finite: - real = math.cos(y) - imag = math.sin(y) - # Avoid NaNs if math.cos(y) or math.sin(y) == 0 - # (e.g. cmath.exp(inf + 0j) == inf + 0j) - if real != 0: - real *= x - if imag != 0: - imag *= x - return complex(real, imag) - else: - return complex(x, NAN) - else: - # x == -inf - if y_is_finite: - r = math.exp(x) - c = math.cos(y) - s = math.sin(y) - return complex(r * c, r * s) - else: - return complex(r, r) - -@lower(cmath.log, types.Complex) -@intrinsic_complex_unary -def log_impl(x, y, x_is_finite, y_is_finite): - """cmath.log(x + y j)""" - a = math.log(math.hypot(x, y)) - b = math.atan2(y, x) - return complex(a, b) - - -@lower(cmath.log, types.Complex, types.Complex) -def log_base_impl(context, builder, sig, args): - """cmath.log(z, base)""" - [z, base] = args - - def log_base(z, base): - return cmath.log(z) / cmath.log(base) - - res = context.compile_internal(builder, log_base, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.log10, types.Complex) -def log10_impl(context, builder, sig, args): - LN_10 = 2.302585092994045684 - - def log10_impl(z): - """cmath.log10(z)""" - z = cmath.log(z) - # This formula gives better results on +/-inf than cmath.log(z, 10) - # See http://bugs.python.org/issue22544 - return complex(z.real / LN_10, z.imag / LN_10) - - res = context.compile_internal(builder, log10_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.phase, types.Complex) -@intrinsic_complex_unary -def phase_impl(x, y, x_is_finite, y_is_finite): - """cmath.phase(x + y j)""" - return math.atan2(y, x) - -@lower(cmath.polar, types.Complex) -@intrinsic_complex_unary -def polar_impl(x, y, x_is_finite, y_is_finite): - """cmath.polar(x + y j)""" - return math.hypot(x, y), math.atan2(y, x) - - -@lower(cmath.sqrt, types.Complex) -def sqrt_impl(context, builder, sig, args): - # We risk spurious overflow for components >= FLT_MAX / (1 + sqrt(2)). - THRES = mathimpl.FLT_MAX / (1 + math.sqrt(2)) - - def sqrt_impl(z): - """cmath.sqrt(z)""" - # This is NumPy's algorithm, see npy_csqrt() in npy_math_complex.c.src - a = z.real - b = z.imag - if a == 0.0 and b == 0.0: - return complex(abs(b), b) - if math.isinf(b): - return complex(abs(b), b) - if math.isnan(a): - return complex(a, a) - if math.isinf(a): - if a < 0.0: - return complex(abs(b - b), math.copysign(a, b)) - else: - return complex(a, math.copysign(b - b, b)) - - # The remaining special case (b is NaN) is handled just fine by - # the normal code path below. - - # Scale to avoid overflow - if abs(a) >= THRES or abs(b) >= THRES: - a *= 0.25 - b *= 0.25 - scale = True - else: - scale = False - # Algorithm 312, CACM vol 10, Oct 1967 - if a >= 0: - t = math.sqrt((a + math.hypot(a, b)) * 0.5) - real = t - imag = b / (2 * t) - else: - t = math.sqrt((-a + math.hypot(a, b)) * 0.5) - real = abs(b) / (2 * t) - imag = math.copysign(t, b) - # Rescale - if scale: - return complex(real * 2, imag) - else: - return complex(real, imag) - - res = context.compile_internal(builder, sqrt_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.cos, types.Complex) -def cos_impl(context, builder, sig, args): - def cos_impl(z): - """cmath.cos(z) = cmath.cosh(z j)""" - return cmath.cosh(complex(-z.imag, z.real)) - - res = context.compile_internal(builder, cos_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.cosh, types.Complex) -def cosh_impl(context, builder, sig, args): - def cosh_impl(z): - """cmath.cosh(z)""" - x = z.real - y = z.imag - if math.isinf(x): - if math.isnan(y): - # x = +inf, y = NaN => cmath.cosh(x + y j) = inf + Nan * j - real = abs(x) - imag = y - elif y == 0.0: - # x = +inf, y = 0 => cmath.cosh(x + y j) = inf + 0j - real = abs(x) - imag = y - else: - real = math.copysign(x, math.cos(y)) - imag = math.copysign(x, math.sin(y)) - if x < 0.0: - # x = -inf => negate imaginary part of result - imag = -imag - return complex(real, imag) - return complex(math.cos(y) * math.cosh(x), - math.sin(y) * math.sinh(x)) - - res = context.compile_internal(builder, cosh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.sin, types.Complex) -def sin_impl(context, builder, sig, args): - def sin_impl(z): - """cmath.sin(z) = -j * cmath.sinh(z j)""" - r = cmath.sinh(complex(-z.imag, z.real)) - return complex(r.imag, -r.real) - - res = context.compile_internal(builder, sin_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.sinh, types.Complex) -def sinh_impl(context, builder, sig, args): - def sinh_impl(z): - """cmath.sinh(z)""" - x = z.real - y = z.imag - if math.isinf(x): - if math.isnan(y): - # x = +/-inf, y = NaN => cmath.sinh(x + y j) = x + NaN * j - real = x - imag = y - else: - real = math.cos(y) - imag = math.sin(y) - if real != 0.: - real *= x - if imag != 0.: - imag *= abs(x) - return complex(real, imag) - return complex(math.cos(y) * math.sinh(x), - math.sin(y) * math.cosh(x)) - - res = context.compile_internal(builder, sinh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.tan, types.Complex) -def tan_impl(context, builder, sig, args): - def tan_impl(z): - """cmath.tan(z) = -j * cmath.tanh(z j)""" - r = cmath.tanh(complex(-z.imag, z.real)) - return complex(r.imag, -r.real) - - res = context.compile_internal(builder, tan_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.tanh, types.Complex) -def tanh_impl(context, builder, sig, args): - def tanh_impl(z): - """cmath.tanh(z)""" - x = z.real - y = z.imag - if math.isinf(x): - real = math.copysign(1., x) - if math.isinf(y): - imag = 0. - else: - imag = math.copysign(0., math.sin(2. * y)) - return complex(real, imag) - # This is CPython's algorithm (see c_tanh() in cmathmodule.c). - # XXX how to force float constants into single precision? - tx = math.tanh(x) - ty = math.tan(y) - cx = 1. / math.cosh(x) - txty = tx * ty - denom = 1. + txty * txty - return complex( - tx * (1. + ty * ty) / denom, - ((ty / denom) * cx) * cx) - - res = context.compile_internal(builder, tanh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - - -@lower(cmath.acos, types.Complex) -def acos_impl(context, builder, sig, args): - LN_4 = math.log(4) - THRES = mathimpl.FLT_MAX / 4 - - def acos_impl(z): - """cmath.acos(z)""" - # CPython's algorithm (see c_acos() in cmathmodule.c) - if abs(z.real) > THRES or abs(z.imag) > THRES: - # Avoid unnecessary overflow for large arguments - # (also handles infinities gracefully) - real = math.atan2(abs(z.imag), z.real) - imag = math.copysign( - math.log(math.hypot(z.real * 0.5, z.imag * 0.5)) + LN_4, - -z.imag) - return complex(real, imag) - else: - s1 = cmath.sqrt(complex(1. - z.real, -z.imag)) - s2 = cmath.sqrt(complex(1. + z.real, z.imag)) - real = 2. * math.atan2(s1.real, s2.real) - imag = math.asinh(s2.real * s1.imag - s2.imag * s1.real) - return complex(real, imag) - - res = context.compile_internal(builder, acos_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.acosh, types.Complex) -def acosh_impl(context, builder, sig, args): - LN_4 = math.log(4) - THRES = mathimpl.FLT_MAX / 4 - - def acosh_impl(z): - """cmath.acosh(z)""" - # CPython's algorithm (see c_acosh() in cmathmodule.c) - if abs(z.real) > THRES or abs(z.imag) > THRES: - # Avoid unnecessary overflow for large arguments - # (also handles infinities gracefully) - real = math.log(math.hypot(z.real * 0.5, z.imag * 0.5)) + LN_4 - imag = math.atan2(z.imag, z.real) - return complex(real, imag) - else: - s1 = cmath.sqrt(complex(z.real - 1., z.imag)) - s2 = cmath.sqrt(complex(z.real + 1., z.imag)) - real = math.asinh(s1.real * s2.real + s1.imag * s2.imag) - imag = 2. * math.atan2(s1.imag, s2.real) - return complex(real, imag) - # Condensed formula (NumPy) - #return cmath.log(z + cmath.sqrt(z + 1.) * cmath.sqrt(z - 1.)) - - res = context.compile_internal(builder, acosh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.asinh, types.Complex) -def asinh_impl(context, builder, sig, args): - LN_4 = math.log(4) - THRES = mathimpl.FLT_MAX / 4 - - def asinh_impl(z): - """cmath.asinh(z)""" - # CPython's algorithm (see c_asinh() in cmathmodule.c) - if abs(z.real) > THRES or abs(z.imag) > THRES: - real = math.copysign( - math.log(math.hypot(z.real * 0.5, z.imag * 0.5)) + LN_4, - z.real) - imag = math.atan2(z.imag, abs(z.real)) - return complex(real, imag) - else: - s1 = cmath.sqrt(complex(1. + z.imag, -z.real)) - s2 = cmath.sqrt(complex(1. - z.imag, z.real)) - real = math.asinh(s1.real * s2.imag - s2.real * s1.imag) - imag = math.atan2(z.imag, s1.real * s2.real - s1.imag * s2.imag) - return complex(real, imag) - - res = context.compile_internal(builder, asinh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.asin, types.Complex) -def asin_impl(context, builder, sig, args): - def asin_impl(z): - """cmath.asin(z) = -j * cmath.asinh(z j)""" - r = cmath.asinh(complex(-z.imag, z.real)) - return complex(r.imag, -r.real) - - res = context.compile_internal(builder, asin_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.atan, types.Complex) -def atan_impl(context, builder, sig, args): - def atan_impl(z): - """cmath.atan(z) = -j * cmath.atanh(z j)""" - r = cmath.atanh(complex(-z.imag, z.real)) - if math.isinf(z.real) and math.isnan(z.imag): - # XXX this is odd but necessary - return complex(r.imag, r.real) - else: - return complex(r.imag, -r.real) - - res = context.compile_internal(builder, atan_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) - -@lower(cmath.atanh, types.Complex) -def atanh_impl(context, builder, sig, args): - LN_4 = math.log(4) - THRES_LARGE = math.sqrt(mathimpl.FLT_MAX / 4) - THRES_SMALL = math.sqrt(mathimpl.FLT_MIN) - PI_12 = math.pi / 2 - - def atanh_impl(z): - """cmath.atanh(z)""" - # CPython's algorithm (see c_atanh() in cmathmodule.c) - if z.real < 0.: - # Reduce to case where z.real >= 0., using atanh(z) = -atanh(-z). - negate = True - z = -z - else: - negate = False - - ay = abs(z.imag) - if math.isnan(z.real) or z.real > THRES_LARGE or ay > THRES_LARGE: - if math.isinf(z.imag): - real = math.copysign(0., z.real) - elif math.isinf(z.real): - real = 0. - else: - # may be safe from overflow, depending on hypot's implementation... - h = math.hypot(z.real * 0.5, z.imag * 0.5) - real = z.real/4./h/h - imag = -math.copysign(PI_12, -z.imag) - elif z.real == 1. and ay < THRES_SMALL: - # C99 standard says: atanh(1+/-0.) should be inf +/- 0j - if ay == 0.: - real = INF - imag = z.imag - else: - real = -math.log(math.sqrt(ay) / - math.sqrt(math.hypot(ay, 2.))) - imag = math.copysign(math.atan2(2., -ay) / 2, z.imag) - else: - sqay = ay * ay - zr1 = 1 - z.real - real = math.log1p(4. * z.real / (zr1 * zr1 + sqay)) * 0.25 - imag = -math.atan2(-2. * z.imag, - zr1 * (1 + z.real) - sqay) * 0.5 - - if math.isnan(z.imag): - imag = NAN - if negate: - return complex(-real, -imag) - else: - return complex(real, imag) - - res = context.compile_internal(builder, atanh_impl, sig, args) - return impl_ret_untracked(context, builder, sig, res) diff --git a/numba/numba/targets/codegen.py b/numba/numba/targets/codegen.py deleted file mode 100644 index f62e331c0..000000000 --- a/numba/numba/targets/codegen.py +++ /dev/null @@ -1,845 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import warnings -import functools -import locale -import weakref -from collections import defaultdict -import ctypes - -import llvmlite.llvmpy.core as lc -import llvmlite.llvmpy.passes as lp -import llvmlite.binding as ll -import llvmlite.ir as llvmir - -from numba import config, utils, cgutils -from numba.runtime.nrtopt import remove_redundant_nrt_refct -from numba.runtime import rtsys - -_x86arch = frozenset(['x86', 'i386', 'i486', 'i586', 'i686', 'i786', - 'i886', 'i986']) - - -def _is_x86(triple): - arch = triple.split('-')[0] - return arch in _x86arch - - -def dump(header, body): - print(header.center(80, '-')) - print(body) - print('=' * 80) - - -class _CFG(object): - """ - Wraps the CFG graph for different display method. - - Instance of the class can be stringified (``__repr__`` is defined) to get - the graph in DOT format. The ``.display()`` method plots the graph in - PDF. If in IPython notebook, the returned image can be inlined. - """ - def __init__(self, dot): - self.dot = dot - - def display(self, filename=None, view=False): - """ - Plot the CFG. In IPython notebook, the return image object can be - inlined. - - The *filename* option can be set to a specific path for the rendered - output to write to. If *view* option is True, the plot is opened by - the system default application for the image format (PDF). - """ - return ll.view_dot_graph(self.dot, filename=filename, view=view) - - def __repr__(self): - return self.dot - - -class CodeLibrary(object): - """ - An interface for bundling LLVM code together and compiling it. - It is tied to a *codegen* instance (e.g. JITCPUCodegen) that will - determine how the LLVM code is transformed and linked together. - """ - - _finalized = False - _object_caching_enabled = False - _disable_inspection = False - - def __init__(self, codegen, name): - self._codegen = codegen - self._name = name - self._linking_libraries = set() - self._final_module = ll.parse_assembly( - str(self._codegen._create_empty_module(self._name))) - self._final_module.name = cgutils.normalize_ir_text(self._name) - self._shared_module = None - # Track names of the dynamic globals - self._dynamic_globals = [] - - @property - def has_dynamic_globals(self): - return len(self._dynamic_globals) > 0 - - @property - def codegen(self): - """ - The codegen object owning this library. - """ - return self._codegen - - def __repr__(self): - return "" % (self._name, id(self)) - - def _raise_if_finalized(self): - if self._finalized: - raise RuntimeError("operation impossible on finalized object %r" - % (self,)) - - def _ensure_finalized(self): - if not self._finalized: - self.finalize() - - def _optimize_functions(self, ll_module): - """ - Internal: run function-level optimizations inside *ll_module*. - """ - # Enforce data layout to enable layout-specific optimizations - ll_module.data_layout = self._codegen._data_layout - with self._codegen._function_pass_manager(ll_module) as fpm: - # Run function-level optimizations to reduce memory usage and improve - # module-level optimization. - for func in ll_module.functions: - fpm.initialize() - fpm.run(func) - fpm.finalize() - - def _optimize_final_module(self): - """ - Internal: optimize this library's final module. - """ - self._codegen._mpm.run(self._final_module) - self._final_module = remove_redundant_nrt_refct(self._final_module) - - def _get_module_for_linking(self): - """ - Internal: get a LLVM module suitable for linking multiple times - into another library. Exported functions are made "linkonce_odr" - to allow for multiple definitions, inlining, and removal of - unused exports. - - See discussion in https://github.com/numba/numba/pull/890 - """ - self._ensure_finalized() - if self._shared_module is not None: - return self._shared_module - mod = self._final_module - to_fix = [] - nfuncs = 0 - for fn in mod.functions: - nfuncs += 1 - if not fn.is_declaration and fn.linkage == ll.Linkage.external: - to_fix.append(fn.name) - if nfuncs == 0: - # This is an issue which can occur if loading a module - # from an object file and trying to link with it, so detect it - # here to make debugging easier. - raise RuntimeError("library unfit for linking: " - "no available functions in %s" - % (self,)) - if to_fix: - mod = mod.clone() - for name in to_fix: - # NOTE: this will mark the symbol WEAK if serialized - # to an ELF file - mod.get_function(name).linkage = 'linkonce_odr' - self._shared_module = mod - return mod - - def create_ir_module(self, name): - """ - Create a LLVM IR module for use by this library. - """ - self._raise_if_finalized() - ir_module = self._codegen._create_empty_module(name) - return ir_module - - def add_linking_library(self, library): - """ - Add a library for linking into this library, without losing - the original library. - """ - library._ensure_finalized() - self._linking_libraries.add(library) - - def add_ir_module(self, ir_module): - """ - Add a LLVM IR module's contents to this library. - """ - self._raise_if_finalized() - assert isinstance(ir_module, llvmir.Module) - ir = cgutils.normalize_ir_text(str(ir_module)) - ll_module = ll.parse_assembly(ir) - ll_module.name = ir_module.name - ll_module.verify() - self.add_llvm_module(ll_module) - - def _scan_dynamic_globals(self, ll_module): - """ - Scan for dynanmic globals and track their names - """ - for gv in ll_module.global_variables: - if gv.name.startswith("numba.dynamic.globals"): - self._dynamic_globals.append(gv.name) - - def add_llvm_module(self, ll_module): - self._scan_dynamic_globals(ll_module) - self._optimize_functions(ll_module) - # TODO: we shouldn't need to recreate the LLVM module object - ll_module = remove_redundant_nrt_refct(ll_module) - self._final_module.link_in(ll_module) - - def finalize(self): - """ - Finalize the library. After this call, nothing can be added anymore. - Finalization involves various stages of code optimization and - linking. - """ - # Report any LLVM-related problems to the user - self._codegen._check_llvm_bugs() - - self._raise_if_finalized() - - if config.DUMP_FUNC_OPT: - dump("FUNCTION OPTIMIZED DUMP %s" % self._name, self.get_llvm_str()) - - # Link libraries for shared code - for library in self._linking_libraries: - self._final_module.link_in( - library._get_module_for_linking(), preserve=True) - for library in self._codegen._libraries: - self._final_module.link_in( - library._get_module_for_linking(), preserve=True) - - # Optimize the module after all dependences are linked in above, - # to allow for inlining. - self._optimize_final_module() - - self._final_module.verify() - self._finalize_final_module() - - def _finalize_final_module(self): - """ - Make the underlying LLVM module ready to use. - """ - # Remember this on the module, for the object cache hooks - self._final_module.__library = weakref.proxy(self) - - # It seems add_module() must be done only here and not before - # linking in other modules, otherwise get_pointer_to_function() - # could fail. - cleanup = self._codegen._add_module(self._final_module) - if cleanup: - utils.finalize(self, cleanup) - self._finalize_specific() - - self._finalized = True - - if config.DUMP_OPTIMIZED: - dump("OPTIMIZED DUMP %s" % self._name, self.get_llvm_str()) - - if config.DUMP_ASSEMBLY: - # CUDA backend cannot return assembly this early, so don't - # attempt to dump assembly if nothing is produced. - asm = self.get_asm_str() - if asm: - dump("ASSEMBLY %s" % self._name, self.get_asm_str()) - - def get_defined_functions(self): - """ - Get all functions defined in the library. The library must have - been finalized. - """ - mod = self._final_module - for fn in mod.functions: - if not fn.is_declaration: - yield fn - - def get_function(self, name): - return self._final_module.get_function(name) - - def _sentry_cache_disable_inspection(self): - if self._disable_inspection: - warnings.warn('Inspection disabled for cached code. ' - 'Invalid result is returned.') - - def get_llvm_str(self): - """ - Get the human-readable form of the LLVM module. - """ - self._sentry_cache_disable_inspection() - return str(self._final_module) - - def get_asm_str(self): - """ - Get the human-readable assembly. - """ - self._sentry_cache_disable_inspection() - return str(self._codegen._tm.emit_assembly(self._final_module)) - - def get_function_cfg(self, name): - """ - Get control-flow graph of the LLVM function - """ - self._sentry_cache_disable_inspection() - fn = self.get_function(name) - dot = ll.get_function_cfg(fn) - return _CFG(dot) - - # - # Object cache hooks and serialization - # - - def enable_object_caching(self): - self._object_caching_enabled = True - self._compiled_object = None - self._compiled = False - - def _get_compiled_object(self): - if not self._object_caching_enabled: - raise ValueError("object caching not enabled in %s" % (self,)) - if self._compiled_object is None: - raise RuntimeError("no compiled object yet for %s" % (self,)) - return self._compiled_object - - def _set_compiled_object(self, value): - if not self._object_caching_enabled: - raise ValueError("object caching not enabled in %s" % (self,)) - if self._compiled: - raise ValueError("library already compiled: %s" % (self,)) - self._compiled_object = value - self._disable_inspection = True - - @classmethod - def _dump_elf(cls, buf): - """ - Dump the symbol table of an ELF file. - Needs pyelftools (https://github.com/eliben/pyelftools) - """ - from elftools.elf.elffile import ELFFile - from elftools.elf import descriptions - from io import BytesIO - f = ELFFile(BytesIO(buf)) - print("ELF file:") - for sec in f.iter_sections(): - if sec['sh_type'] == 'SHT_SYMTAB': - symbols = sorted(sec.iter_symbols(), key=lambda sym: sym.name) - print(" symbols:") - for sym in symbols: - if not sym.name: - continue - print(" - %r: size=%d, value=0x%x, type=%s, bind=%s" - % (sym.name.decode(), - sym['st_size'], - sym['st_value'], - descriptions.describe_symbol_type(sym['st_info']['type']), - descriptions.describe_symbol_bind(sym['st_info']['bind']), - )) - print() - - @classmethod - def _object_compiled_hook(cls, ll_module, buf): - """ - `ll_module` was compiled into object code `buf`. - """ - try: - self = ll_module.__library - except AttributeError: - return - if self._object_caching_enabled: - self._compiled = True - self._compiled_object = buf - - @classmethod - def _object_getbuffer_hook(cls, ll_module): - """ - Return a cached object code for `ll_module`. - """ - try: - self = ll_module.__library - except AttributeError: - return - if self._object_caching_enabled and self._compiled_object: - buf = self._compiled_object - self._compiled_object = None - return buf - - def serialize_using_bitcode(self): - """ - Serialize this library using its bitcode as the cached representation. - """ - self._ensure_finalized() - return (self._name, 'bitcode', self._final_module.as_bitcode()) - - def serialize_using_object_code(self): - """ - Serialize this library using its object code as the cached - representation. We also include its bitcode for further inlining - with other libraries. - """ - self._ensure_finalized() - data = (self._get_compiled_object(), - self._get_module_for_linking().as_bitcode()) - return (self._name, 'object', data) - - @classmethod - def _unserialize(cls, codegen, state): - name, kind, data = state - self = codegen.create_library(name) - assert isinstance(self, cls) - if kind == 'bitcode': - # No need to re-run optimizations, just make the module ready - self._final_module = ll.parse_bitcode(data) - self._finalize_final_module() - return self - elif kind == 'object': - object_code, shared_bitcode = data - self.enable_object_caching() - self._set_compiled_object(object_code) - self._shared_module = ll.parse_bitcode(shared_bitcode) - self._finalize_final_module() - # Load symbols from cache - self._codegen._engine._load_defined_symbols(self._shared_module) - return self - else: - raise ValueError("unsupported serialization kind %r" % (kind,)) - - -class AOTCodeLibrary(CodeLibrary): - - def emit_native_object(self): - """ - Return this library as a native object (a bytestring) -- for example - ELF under Linux. - - This function implicitly calls .finalize(). - """ - self._ensure_finalized() - return self._codegen._tm.emit_object(self._final_module) - - def emit_bitcode(self): - """ - Return this library as LLVM bitcode (a bytestring). - - This function implicitly calls .finalize(). - """ - self._ensure_finalized() - return self._final_module.as_bitcode() - - def _finalize_specific(self): - pass - - -class JITCodeLibrary(CodeLibrary): - - def get_pointer_to_function(self, name): - """ - Generate native code for function named *name* and return a pointer - to the start of the function (as an integer). - - This function implicitly calls .finalize(). - - Returns - ------- - pointer : int - - zero (null) if no symbol of *name* is defined by this code - library. - - non-zero if the symbol is defined. - """ - self._ensure_finalized() - ee = self._codegen._engine - if not ee.is_symbol_defined(name): - return 0 - else: - return self._codegen._engine.get_function_address(name) - - def _finalize_specific(self): - self._codegen._scan_and_fix_unresolved_refs(self._final_module) - self._codegen._engine.finalize_object() - - -class RuntimeLinker(object): - """ - For tracking unresolved symbols generated at runtime due to recursion. - """ - PREFIX = '.numba.unresolved$' - - def __init__(self): - self._unresolved = utils.UniqueDict() - self._defined = set() - self._resolved = [] - - def scan_unresolved_symbols(self, module, engine): - """ - Scan and track all unresolved external symbols in the module and - allocate memory for it. - """ - prefix = self.PREFIX - - for gv in module.global_variables: - if gv.name.startswith(prefix): - sym = gv.name[len(prefix):] - # Avoid remapping to existing GV - if engine.is_symbol_defined(gv.name): - continue - # Allocate a memory space for the pointer - abortfn = rtsys.library.get_pointer_to_function("nrt_unresolved_abort") - ptr = ctypes.c_void_p(abortfn) - engine.add_global_mapping(gv, ctypes.addressof(ptr)) - self._unresolved[sym] = ptr - - def scan_defined_symbols(self, module): - """ - Scan and track all defined symbols. - """ - for fn in module.functions: - if not fn.is_declaration: - self._defined.add(fn.name) - - def resolve(self, engine): - """ - Fix unresolved symbols if they are defined. - """ - # An iterator to get all unresolved but available symbols - pending = [name for name in self._unresolved if name in self._defined] - # Resolve pending symbols - for name in pending: - # Get runtime address - fnptr = engine.get_function_address(name) - # Fix all usage - ptr = self._unresolved[name] - ptr.value = fnptr - self._resolved.append((name, ptr)) # keep ptr alive - # Delete resolved - del self._unresolved[name] - - -def _proxy(old): - @functools.wraps(old) - def wrapper(self, *args, **kwargs): - return old(self._ee, *args, **kwargs) - return wrapper - - -class JitEngine(object): - """Wraps an ExecutionEngine to provide custom symbol tracking. - Since the symbol tracking is incomplete (doesn't consider - loaded code object), we are not putting it in llvmlite. - """ - def __init__(self, ee): - self._ee = ee - # Track symbol defined via codegen'd Module - # but not any cached object. - # NOTE: `llvm::ExecutionEngine` will catch duplicated symbols and - # we are not going to protect against that. A proper duplicated - # symbol detection will need a more logic to check for the linkage - # (e.g. like `weak` linkage symbol can override). This - # `_defined_symbols` set will be just enough to tell if a symbol - # exists and will not cause the `EE` symbol lookup to `exit(1)` - # when symbol-not-found. - self._defined_symbols = set() - - def is_symbol_defined(self, name): - """Is the symbol defined in this session? - """ - return name in self._defined_symbols - - def _load_defined_symbols(self, mod): - """Extract symbols from the module - """ - for gsets in (mod.functions, mod.global_variables): - self._defined_symbols |= {gv.name for gv in gsets - if not gv.is_declaration} - - def add_module(self, module): - """Override ExecutionEngine.add_module - to keep info about defined symbols. - """ - self._load_defined_symbols(module) - return self._ee.add_module(module) - - def add_global_mapping(self, gv, addr): - """Override ExecutionEngine.add_global_mapping - to keep info about defined symbols. - """ - self._defined_symbols.add(gv.name) - return self._ee.add_global_mapping(gv, addr) - - # - # The remaining methods are re-export of the ExecutionEngine APIs - # - set_object_cache = _proxy(ll.ExecutionEngine.set_object_cache) - finalize_object = _proxy(ll.ExecutionEngine.finalize_object) - get_function_address = _proxy(ll.ExecutionEngine.get_function_address) - get_global_value_address = _proxy( - ll.ExecutionEngine.get_global_value_address - ) - -class BaseCPUCodegen(object): - - def __init__(self, module_name): - initialize_llvm() - - self._libraries = set() - self._data_layout = None - self._llvm_module = ll.parse_assembly( - str(self._create_empty_module(module_name))) - self._llvm_module.name = "global_codegen_module" - self._rtlinker = RuntimeLinker() - self._init(self._llvm_module) - - def _init(self, llvm_module): - assert list(llvm_module.global_variables) == [], "Module isn't empty" - - target = ll.Target.from_triple(ll.get_process_triple()) - tm_options = dict(opt=config.OPT) - self._tm_features = self._customize_tm_features() - self._customize_tm_options(tm_options) - tm = target.create_target_machine(**tm_options) - engine = ll.create_mcjit_compiler(llvm_module, tm) - - self._tm = tm - self._engine = JitEngine(engine) - self._target_data = engine.target_data - self._data_layout = str(self._target_data) - self._mpm = self._module_pass_manager() - - self._engine.set_object_cache(self._library_class._object_compiled_hook, - self._library_class._object_getbuffer_hook) - - def _create_empty_module(self, name): - ir_module = lc.Module(cgutils.normalize_ir_text(name)) - ir_module.triple = ll.get_process_triple() - if self._data_layout: - ir_module.data_layout = self._data_layout - return ir_module - - @property - def target_data(self): - """ - The LLVM "target data" object for this codegen instance. - """ - return self._target_data - - def add_linking_library(self, library): - """ - Add a library for linking into all libraries created by this - codegen object, without losing the original library. - """ - library._ensure_finalized() - self._libraries.add(library) - - def create_library(self, name): - """ - Create a :class:`CodeLibrary` object for use with this codegen - instance. - """ - return self._library_class(self, name) - - def unserialize_library(self, serialized): - return self._library_class._unserialize(self, serialized) - - def _module_pass_manager(self): - pm = ll.create_module_pass_manager() - self._tm.add_analysis_passes(pm) - with self._pass_manager_builder() as pmb: - pmb.populate(pm) - return pm - - def _function_pass_manager(self, llvm_module): - pm = ll.create_function_pass_manager(llvm_module) - self._tm.add_analysis_passes(pm) - with self._pass_manager_builder() as pmb: - pmb.populate(pm) - return pm - - def _pass_manager_builder(self): - """ - Create a PassManagerBuilder. - - Note: a PassManagerBuilder seems good only for one use, so you - should call this method each time you want to populate a module - or function pass manager. Otherwise some optimizations will be - missed... - """ - pmb = lp.create_pass_manager_builder( - opt=config.OPT, loop_vectorize=config.LOOP_VECTORIZE) - return pmb - - def _check_llvm_bugs(self): - """ - Guard against some well-known LLVM bug(s). - """ - # Check the locale bug at https://github.com/numba/numba/issues/1569 - # Note we can't cache the result as locale settings can change - # accross a process's lifetime. Also, for this same reason, - # the check here is a mere heuristic (there may be a race condition - # between now and actually compiling IR). - ir = """ - define double @func() - { - ret double 1.23e+01 - } - """ - mod = ll.parse_assembly(ir) - ir_out = str(mod) - if "12.3" in ir_out or "1.23" in ir_out: - # Everything ok - return - if "1.0" in ir_out: - loc = locale.getlocale() - raise RuntimeError( - "LLVM will produce incorrect floating-point code " - "in the current locale %s.\nPlease read " - "http://numba.pydata.org/numba-doc/dev/user/faq.html#llvm-locale-bug " - "for more information." - % (loc,)) - raise AssertionError("Unexpected IR:\n%s\n" % (ir_out,)) - - def magic_tuple(self): - """ - Return a tuple unambiguously describing the codegen behaviour. - """ - return (self._llvm_module.triple, self._get_host_cpu_name(), - self._tm_features) - - def _scan_and_fix_unresolved_refs(self, module): - self._rtlinker.scan_unresolved_symbols(module, self._engine) - self._rtlinker.scan_defined_symbols(module) - self._rtlinker.resolve(self._engine) - - def insert_unresolved_ref(self, builder, fnty, name): - voidptr = llvmir.IntType(8).as_pointer() - ptrname = self._rtlinker.PREFIX + name - llvm_mod = builder.module - try: - fnptr = llvm_mod.get_global(ptrname) - except KeyError: - # Not defined? - fnptr = llvmir.GlobalVariable(llvm_mod, voidptr, name=ptrname) - fnptr.linkage = 'external' - return builder.bitcast(builder.load(fnptr), fnty.as_pointer()) - - def _get_host_cpu_name(self): - return (ll.get_host_cpu_name() - if config.CPU_NAME is None - else config.CPU_NAME) - - def _get_host_cpu_features(self): - if config.CPU_FEATURES is not None: - return config.CPU_FEATURES - return get_host_cpu_features() - -class AOTCPUCodegen(BaseCPUCodegen): - """ - A codegen implementation suitable for Ahead-Of-Time compilation - (e.g. generation of object files). - """ - - _library_class = AOTCodeLibrary - - def __init__(self, module_name, cpu_name=None): - # By default, use generic cpu model for the arch - self._cpu_name = cpu_name or '' - BaseCPUCodegen.__init__(self, module_name) - - def _customize_tm_options(self, options): - cpu_name = self._cpu_name - if cpu_name == 'host': - cpu_name = self._get_host_cpu_name() - options['cpu'] = cpu_name - options['reloc'] = 'pic' - options['codemodel'] = 'default' - options['features'] = self._tm_features - - def _customize_tm_features(self): - # ISA features are selected according to the requested CPU model - # in _customize_tm_options() - return '' - - def _add_module(self, module): - pass - - -class JITCPUCodegen(BaseCPUCodegen): - """ - A codegen implementation suitable for Just-In-Time compilation. - """ - - _library_class = JITCodeLibrary - - def _customize_tm_options(self, options): - # As long as we don't want to ship the code to another machine, - # we can specialize for this CPU. - options['cpu'] = self._get_host_cpu_name() - options['reloc'] = 'default' - options['codemodel'] = 'jitdefault' - - # Set feature attributes (such as ISA extensions) - # This overrides default feature selection by CPU model above - options['features'] = self._tm_features - - # Enable JIT debug - options['jitdebug'] = True - - def _customize_tm_features(self): - # For JIT target, we will use LLVM to get the feature map - return self._get_host_cpu_features() - - def _add_module(self, module): - self._engine.add_module(module) - # XXX: disabling remove module due to MCJIT engine leakage in - # removeModule. The removeModule causes consistent access - # violation with certain test combinations. - # # Early bind the engine method to avoid keeping a reference to self. - # return functools.partial(self._engine.remove_module, module) - - def set_env(self, env_name, env): - """Set the environment address. - - Update the GlobalVariable named *env_name* to the address of *env*. - """ - gvaddr = self._engine.get_global_value_address(env_name) - envptr = (ctypes.c_void_p * 1).from_address(gvaddr) - envptr[0] = ctypes.c_void_p(id(env)) - - -def initialize_llvm(): - """Safe to use multiple times. - """ - ll.initialize() - ll.initialize_native_target() - ll.initialize_native_asmprinter() - - -def get_host_cpu_features(): - """Get host CPU features using LLVM. - - The features may be modified due to user setting. - See numba.config.ENABLE_AVX. - """ - try: - features = ll.get_host_cpu_features() - except RuntimeError: - return '' - else: - if not config.ENABLE_AVX: - # Disable all features with name starting with 'avx' - for k in features: - if k.startswith('avx'): - features[k] = False - - # Set feature attributes - return features.flatten() diff --git a/numba/numba/targets/cpu.py b/numba/numba/targets/cpu.py deleted file mode 100644 index 85ec43443..000000000 --- a/numba/numba/targets/cpu.py +++ /dev/null @@ -1,282 +0,0 @@ -from __future__ import print_function, absolute_import - -import sys - -import llvmlite.llvmpy.core as lc - -from numba import _dynfunc, config -from numba.callwrapper import PyCallWrapper -from .base import BaseContext, PYOBJECT -from numba import utils, cgutils, types -from numba.utils import cached_property -from numba.targets import callconv, codegen, externals, intrinsics, listobj, setobj -from .options import TargetOptions -from numba.runtime import rtsys -from . import fastmathpass - -# Keep those structures in sync with _dynfunc.c. - -class ClosureBody(cgutils.Structure): - _fields = [('env', types.pyobject)] - - -class EnvBody(cgutils.Structure): - _fields = [ - ('globals', types.pyobject), - ('consts', types.pyobject), - ] - - -class CPUContext(BaseContext): - """ - Changes BaseContext calling convention - """ - allow_dynamic_globals = True - - # Overrides - def create_module(self, name): - return self._internal_codegen._create_empty_module(name) - - def init(self): - self.is32bit = (utils.MACHINE_BITS == 32) - self._internal_codegen = codegen.JITCPUCodegen("numba.exec") - - # Map external C functions. - externals.c_math_functions.install(self) - - # Initialize NRT runtime - rtsys.initialize(self) - - def load_additional_registries(self): - # Add target specific implementations - from . import (cffiimpl, cmathimpl, mathimpl, npyimpl, operatorimpl, - printimpl, randomimpl) - self.install_registry(cmathimpl.registry) - self.install_registry(cffiimpl.registry) - self.install_registry(mathimpl.registry) - self.install_registry(npyimpl.registry) - self.install_registry(operatorimpl.registry) - self.install_registry(printimpl.registry) - self.install_registry(randomimpl.registry) - self.install_registry(randomimpl.registry) - - @property - def target_data(self): - return self._internal_codegen.target_data - - def with_aot_codegen(self, name, **aot_options): - aot_codegen = codegen.AOTCPUCodegen(name, **aot_options) - return self.subtarget(_internal_codegen=aot_codegen, - aot_mode=True) - - def codegen(self): - return self._internal_codegen - - @cached_property - def call_conv(self): - return callconv.CPUCallConv(self) - - def get_env_body(self, builder, envptr): - """ - From the given *envptr* (a pointer to a _dynfunc.Environment object), - get a EnvBody allowing structured access to environment fields. - """ - body_ptr = cgutils.pointer_add( - builder, envptr, _dynfunc._impl_info['offsetof_env_body']) - return EnvBody(self, builder, ref=body_ptr, cast_ref=True) - - def get_env_manager(self, builder): - envgv = self.declare_env_global(builder.module, - self.get_env_name(self.fndesc)) - envarg = builder.load(envgv) - pyapi = self.get_python_api(builder) - pyapi.emit_environment_sentry(envarg) - env_body = self.get_env_body(builder, envarg) - return pyapi.get_env_manager(self.environment, env_body, envarg) - - def get_generator_state(self, builder, genptr, return_type): - """ - From the given *genptr* (a pointer to a _dynfunc.Generator object), - get a pointer to its state area. - """ - return cgutils.pointer_add( - builder, genptr, _dynfunc._impl_info['offsetof_generator_state'], - return_type=return_type) - - def build_list(self, builder, list_type, items): - """ - Build a list from the Numba *list_type* and its initial *items*. - """ - return listobj.build_list(self, builder, list_type, items) - - def build_set(self, builder, set_type, items): - """ - Build a set from the Numba *set_type* and its initial *items*. - """ - return setobj.build_set(self, builder, set_type, items) - - def post_lowering(self, mod, library): - if self.enable_fastmath: - fastmathpass.rewrite_module(mod) - - if self.is32bit: - # 32-bit machine needs to replace all 64-bit div/rem to avoid - # calls to compiler-rt - intrinsics.fix_divmod(mod) - - library.add_linking_library(rtsys.library) - - def create_cpython_wrapper(self, library, fndesc, env, call_helper, - release_gil=False): - wrapper_module = self.create_module("wrapper") - fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) - wrapper_callee = wrapper_module.add_function(fnty, fndesc.llvm_func_name) - builder = PyCallWrapper(self, wrapper_module, wrapper_callee, - fndesc, env, call_helper=call_helper, - release_gil=release_gil) - builder.build() - library.add_ir_module(wrapper_module) - - def get_executable(self, library, fndesc, env): - """ - Returns - ------- - (cfunc, fnptr) - - - cfunc - callable function (Can be None) - - fnptr - callable function address - - env - an execution environment (from _dynfunc) - """ - # Code generation - baseptr = library.get_pointer_to_function(fndesc.llvm_func_name) - fnptr = library.get_pointer_to_function(fndesc.llvm_cpython_wrapper_name) - - # Note: we avoid reusing the original docstring to avoid encoding - # issues on Python 2, see issue #1908 - doc = "compiled wrapper for %r" % (fndesc.qualname,) - cfunc = _dynfunc.make_function(fndesc.lookup_module(), - fndesc.qualname.split('.')[-1], - doc, fnptr, env, - # objects to keepalive with the function - (library,) - ) - library.codegen.set_env(self.get_env_name(fndesc), env) - return cfunc - - def calc_array_sizeof(self, ndim): - ''' - Calculate the size of an array struct on the CPU target - ''' - aryty = types.Array(types.int32, ndim, 'A') - return self.get_abi_sizeof(self.get_value_type(aryty)) - -class ParallelOptions(object): - """ - Options for controlling auto parallelization. - """ - def __init__(self, value): - if isinstance(value, bool): - self.enabled = value - self.comprehension = value - self.reduction = value - self.setitem = value - self.numpy = value - self.stencil = value - self.fusion = value - self.prange = value - elif isinstance(value, dict): - self.enabled = True - self.comprehension = value.pop('comprehension', True) - self.reduction = value.pop('reduction', True) - self.setitem = value.pop('setitem', True) - self.numpy = value.pop('numpy', True) - self.stencil = value.pop('stencil', True) - self.fusion = value.pop('fusion', True) - self.prange = value.pop('prange', True) - if value: - raise NameError("Unrecognized parallel options: %s" % value.keys()) - else: - raise ValueError("Expect parallel option to be either a bool or a dict") - - -# ---------------------------------------------------------------------------- -# TargetOptions - -class CPUTargetOptions(TargetOptions): - OPTIONS = { - "nopython": bool, - "nogil": bool, - "forceobj": bool, - "looplift": bool, - "boundcheck": bool, - "debug": bool, - "_nrt": bool, - "no_rewrites": bool, - "no_cpython_wrapper": bool, - "fastmath": bool, - "error_model": str, - "parallel": ParallelOptions, - } - - -# ---------------------------------------------------------------------------- -# Internal - -def remove_refct_calls(func): - """ - Remove redundant incref/decref within on a per block basis - """ - for bb in func.basic_blocks: - remove_null_refct_call(bb) - remove_refct_pairs(bb) - - -def remove_null_refct_call(bb): - """ - Remove refct api calls to NULL pointer - """ - pass - ## Skipped for now - # for inst in bb.instructions: - # if isinstance(inst, lc.CallOrInvokeInstruction): - # fname = inst.called_function.name - # if fname == "Py_IncRef" or fname == "Py_DecRef": - # arg = inst.args[0] - # print(type(arg)) - # if isinstance(arg, lc.ConstantPointerNull): - # inst.erase_from_parent() - - -def remove_refct_pairs(bb): - """ - Remove incref decref pairs on the same variable - """ - - didsomething = True - - while didsomething: - didsomething = False - - increfs = {} - decrefs = {} - - # Mark - for inst in bb.instructions: - if isinstance(inst, lc.CallOrInvokeInstruction): - fname = inst.called_function.name - if fname == "Py_IncRef": - arg = inst.operands[0] - increfs[arg] = inst - elif fname == "Py_DecRef": - arg = inst.operands[0] - decrefs[arg] = inst - - # Sweep - for val in increfs.keys(): - if val in decrefs: - increfs[val].erase_from_parent() - decrefs[val].erase_from_parent() - didsomething = True diff --git a/numba/numba/targets/descriptors.py b/numba/numba/targets/descriptors.py deleted file mode 100644 index e4b279644..000000000 --- a/numba/numba/targets/descriptors.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -Target Descriptors -""" - -from __future__ import print_function, division, absolute_import - - -class TargetDescriptor(object): - pass diff --git a/numba/numba/targets/enumimpl.py b/numba/numba/targets/enumimpl.py deleted file mode 100644 index 03aaa5a99..000000000 --- a/numba/numba/targets/enumimpl.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -Implementation of enums. -""" - - -from .imputils import (lower_builtin, lower_getattr, lower_getattr_generic, - lower_cast, lower_constant, impl_ret_untracked) -from .. import types - - -@lower_builtin('==', types.EnumMember, types.EnumMember) -def enum_eq(context, builder, sig, args): - tu, tv = sig.args - u, v = args - res = context.generic_compare(builder, "==", - (tu.dtype, tv.dtype), (u, v)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin('is', types.EnumMember, types.EnumMember) -def enum_is(context, builder, sig, args): - tu, tv = sig.args - u, v = args - if tu == tv: - res = context.generic_compare(builder, "==", - (tu.dtype, tv.dtype), (u, v)) - else: - res = context.get_constant(sig.return_type, False) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin('!=', types.EnumMember, types.EnumMember) -def enum_ne(context, builder, sig, args): - tu, tv = sig.args - u, v = args - res = context.generic_compare(builder, "!=", - (tu.dtype, tv.dtype), (u, v)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_getattr(types.EnumMember, 'value') -def enum_value(context, builder, ty, val): - return val - - -@lower_cast(types.IntEnumMember, types.Integer) -def int_enum_to_int(context, builder, fromty, toty, val): - """ - Convert an IntEnum member to its raw integer value. - """ - return context.cast(builder, val, fromty.dtype, toty) - - -@lower_constant(types.EnumMember) -def enum_constant(context, builder, ty, pyval): - """ - Return a LLVM constant representing enum member *pyval*. - """ - return context.get_constant_generic(builder, ty.dtype, pyval.value) - - -@lower_getattr_generic(types.EnumClass) -def enum_class_getattr(context, builder, ty, val, attr): - """ - Return an enum member by attribute name. - """ - member = getattr(ty.instance_class, attr) - return context.get_constant_generic(builder, ty.dtype, member.value) - - -@lower_builtin('static_getitem', types.EnumClass, types.Const) -def enum_class_getitem(context, builder, sig, args): - """ - Return an enum member by index name. - """ - enum_cls_typ, idx = sig.args - member = enum_cls_typ.instance_class[idx.value] - return context.get_constant_generic(builder, enum_cls_typ.dtype, - member.value) diff --git a/numba/numba/targets/externals.py b/numba/numba/targets/externals.py deleted file mode 100644 index b3d4aca97..000000000 --- a/numba/numba/targets/externals.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -Register external C functions necessary for Numba code generation. -""" - -import sys -import ctypes - -from llvmlite import ir -import llvmlite.binding as ll - -from numba import utils, config -from numba import _helperlib -from . import intrinsics - -# Require workaround for https://support.microsoft.com/en-us/kb/982107 ? -need_kb982107 = (config.PYVERSION == (2, 7) and - config.IS_WIN32 and - not config.IS_32BITS) - - -def _add_missing_symbol(symbol, addr): - """Add missing symbol into LLVM internal symtab - """ - if not ll.address_of_symbol(symbol): - ll.add_symbol(symbol, addr) - - -def _get_msvcrt_symbol(symbol): - """ - Under Windows, look up a symbol inside the C runtime - and return the raw pointer value as an integer. - """ - from ctypes import cdll, cast, c_void_p - f = getattr(cdll.msvcrt, symbol) - return cast(f, c_void_p).value - - -def compile_multi3(context): - """ - Compile the multi3() helper function used by LLVM - for 128-bit multiplication on 32-bit platforms. - """ - codegen = context.codegen() - library = codegen.create_library("multi3") - - ir_mod = library.create_ir_module("multi3") - - i64 = ir.IntType(64) - i128 = ir.IntType(128) - lower_mask = ir.Constant(i64, 0xffffffff) - _32 = ir.Constant(i64, 32) - _64 = ir.Constant(i128, 64) - - fn_type = ir.FunctionType(i128, [i128, i128]) - fn = ir.Function(ir_mod, fn_type, name="multi3") - - a, b = fn.args - bb = fn.append_basic_block() - builder = ir.IRBuilder(bb) - - # This implementation mimicks compiler-rt's. - al = builder.trunc(a, i64) - bl = builder.trunc(b, i64) - ah = builder.trunc(builder.ashr(a, _64), i64) - bh = builder.trunc(builder.ashr(b, _64), i64) - - # Compute {rh, rl} = al * bl (unsigned 64-bit multiplication) - # rl = (al & 0xffffffff) * (bl & 0xffffffff) - rl = builder.mul(builder.and_(al, lower_mask), builder.and_(bl, lower_mask)) - # t = rl >> 32 - t = builder.lshr(rl, _32) - # rl &= 0xffffffff - rl = builder.and_(rl, lower_mask) - # t += (al >> 32) * (bl & 0xffffffff) - t = builder.add(t, builder.mul(builder.lshr(al, _32), - builder.and_(bl, lower_mask))) - # rl += t << 32 - rl = builder.add(rl, builder.shl(t, _32)) - # rh = t >> 32 - rh = builder.lshr(t, _32) - # t = rl >> 32 - t = builder.lshr(rl, _32) - # rl &= 0xffffffff - rl = builder.and_(rl, lower_mask) - # t += (bl >> 32) * (al & 0xffffffff) - t = builder.add(t, builder.mul(builder.lshr(bl, _32), - builder.and_(al, lower_mask))) - # rl += t << 32 - rl = builder.add(rl, builder.shl(t, _32)) - # rh += t >> 32 - rh = builder.add(rh, builder.lshr(t, _32)) - # rh += (al >> 32) * (bl >> 32) - rh = builder.add(rh, builder.mul(builder.lshr(al, _32), - builder.lshr(bl, _32))) - - # rh += (bh * al) + (bl * ah) - rh = builder.add(rh, builder.mul(bh, al)) - rh = builder.add(rh, builder.mul(bl, ah)) - - # r = rl + (rh << 64) - r = builder.zext(rl, i128) - r = builder.add(r, builder.shl(builder.zext(rh, i128), _64)) - builder.ret(r) - - library.add_ir_module(ir_mod) - library.finalize() - - return library - - -class _Installer(object): - - _installed = False - - def install(self, context): - """ - Install the functions into LLVM. This only needs to be done once, - as the mappings are persistent during the process lifetime. - """ - if not self._installed: - self._do_install(context) - self._installed = True - - -class _ExternalMathFunctions(_Installer): - """ - Map the math functions from the C runtime library into the LLVM - execution environment. - """ - - def _do_install(self, context): - is32bit = utils.MACHINE_BITS == 32 - c_helpers = _helperlib.c_helpers - - if sys.platform.startswith('win32') and is32bit: - # For Windows XP _ftol2 is not defined, we will just use - # _ftol as a replacement. - # On Windows 7, this is not necessary but will work anyway. - ftol = _get_msvcrt_symbol("_ftol") - _add_missing_symbol("_ftol2", ftol) - - elif sys.platform.startswith('linux') and is32bit: - _add_missing_symbol("__fixunsdfdi", c_helpers["fptoui"]) - _add_missing_symbol("__fixunssfdi", c_helpers["fptouif"]) - - if is32bit: - # Make the library immortal - self._multi3_lib = compile_multi3(context) - ptr = self._multi3_lib.get_pointer_to_function("multi3") - assert ptr - _add_missing_symbol("__multi3", ptr) - - # List available C-math - for fname in intrinsics.INTR_MATH: - # Force binding from CPython's C runtime library. - # (under Windows, different versions of the C runtime can - # be loaded at the same time, for example msvcrt100 by - # CPython and msvcrt120 by LLVM) - if need_kb982107 and fname.startswith('fmod'): - ll.add_symbol(fname, c_helpers['fixed_' + fname]) - else: - ll.add_symbol(fname, c_helpers[fname]) - - if need_kb982107: - # Make the library immortal - self._kb982107_lib = set_fnclex(context, c_helpers) - - -def set_fnclex(context, c_helpers): - """ - Install fnclex before fmod calls. - Workaround for https://support.microsoft.com/en-us/kb/982107 - """ - ptr_set_fnclex = c_helpers['set_fnclex'] - fn = ctypes.CFUNCTYPE(None, ctypes.c_void_p)(ptr_set_fnclex) - - library = compile_fnclex(context) - fnclex_ptr = library.get_pointer_to_function('fnclex') - fn(fnclex_ptr) - - return library - - -def compile_fnclex(context): - """ - Compile a function that calls fnclex to workround - https://support.microsoft.com/en-us/kb/982107 - """ - codegen = context.codegen() - library = codegen.create_library("kb982107") - ir_mod = """ -define void @fnclex() { - call void asm sideeffect "fnclex", ""() - ret void -} - """ - ll.initialize_native_asmparser() - library.add_llvm_module(ll.parse_assembly(ir_mod)) - library.finalize() - return library - - -c_math_functions = _ExternalMathFunctions() diff --git a/numba/numba/targets/fastmathpass.py b/numba/numba/targets/fastmathpass.py deleted file mode 100644 index 42ed31c7b..000000000 --- a/numba/numba/targets/fastmathpass.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import absolute_import, print_function - -from llvmlite import ir -from llvmlite.ir.transforms import Visitor, CallVisitor - - -class FastFloatBinOpVisitor(Visitor): - """ - A pass to add fastmath flag to float-binop instruction if they don't have - any flags. - """ - float_binops = frozenset(['fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp']) - - def visit_Instruction(self, instr): - if instr.opname in self.float_binops: - if not instr.flags: - instr.flags.append('fast') - - -class FastFloatCallVisitor(CallVisitor): - """ - A pass to change all float function calls to use fastmath. - """ - def visit_Call(self, instr): - # Add to any call that has float/double return type - if instr.type in (ir.FloatType(), ir.DoubleType()): - instr.fastmath.add('fast') - - -def rewrite_module(mod): - """ - Rewrite the given LLVM module to use fastmath everywhere. - """ - FastFloatBinOpVisitor().visit(mod) - FastFloatCallVisitor().visit(mod) - diff --git a/numba/numba/targets/imputils.py b/numba/numba/targets/imputils.py deleted file mode 100644 index b71075ab6..000000000 --- a/numba/numba/targets/imputils.py +++ /dev/null @@ -1,427 +0,0 @@ -""" -Utilities to simplify the boilerplate for native lowering. -""" - -from __future__ import print_function, absolute_import, division - -import collections -import contextlib -import inspect -import functools - -from .. import typing, cgutils, types, utils -from .. typing.templates import BaseRegistryLoader - - -class Registry(object): - """ - A registry of function and attribute implementations. - """ - def __init__(self): - self.functions = [] - self.getattrs = [] - self.setattrs = [] - self.casts = [] - self.constants = [] - - def lower(self, func, *argtys): - """ - Decorate an implementation of *func* for the given argument types. - *func* may be an actual global function object, or any - pseudo-function supported by Numba, such as "getitem". - - The decorated implementation has the signature - (context, builder, sig, args). - """ - def decorate(impl): - self.functions.append((impl, func, argtys)) - return impl - return decorate - - def _decorate_attr(self, impl, ty, attr, impl_list, decorator): - real_impl = decorator(impl, ty, attr) - impl_list.append((real_impl, attr, real_impl.signature)) - return impl - - def lower_getattr(self, ty, attr): - """ - Decorate an implementation of __getattr__ for type *ty* and - the attribute *attr*. - - The decorated implementation will have the signature - (context, builder, typ, val). - """ - def decorate(impl): - return self._decorate_attr(impl, ty, attr, self.getattrs, - _decorate_getattr) - return decorate - - def lower_getattr_generic(self, ty): - """ - Decorate the fallback implementation of __getattr__ for type *ty*. - - The decorated implementation will have the signature - (context, builder, typ, val, attr). The implementation is - called for attributes which haven't been explicitly registered - with lower_getattr(). - """ - return self.lower_getattr(ty, None) - - def lower_setattr(self, ty, attr): - """ - Decorate an implementation of __setattr__ for type *ty* and - the attribute *attr*. - - The decorated implementation will have the signature - (context, builder, sig, args). - """ - def decorate(impl): - return self._decorate_attr(impl, ty, attr, self.setattrs, - _decorate_setattr) - return decorate - - def lower_setattr_generic(self, ty): - """ - Decorate the fallback implementation of __setattr__ for type *ty*. - - The decorated implementation will have the signature - (context, builder, sig, args, attr). The implementation is - called for attributes which haven't been explicitly registered - with lower_setattr(). - """ - return self.lower_setattr(ty, None) - - def lower_cast(self, fromty, toty): - """ - Decorate the implementation of implicit conversion between - *fromty* and *toty*. - - The decorated implementation will have the signature - (context, builder, fromty, toty, val). - """ - def decorate(impl): - self.casts.append((impl, (fromty, toty))) - return impl - return decorate - - def lower_constant(self, ty): - """ - Decorate the implementation for creating a constant of type *ty*. - - The decorated implementation will have the signature - (context, builder, ty, pyval). - """ - def decorate(impl): - self.constants.append((impl, (ty,))) - return impl - return decorate - - -class RegistryLoader(BaseRegistryLoader): - """ - An incremental loader for a target registry. - """ - registry_items = ('functions', 'getattrs', 'setattrs', 'casts', 'constants') - - -# Global registry for implementations of builtin operations -# (functions, attributes, type casts) -builtin_registry = Registry() - -lower_builtin = builtin_registry.lower -lower_getattr = builtin_registry.lower_getattr -lower_getattr_generic = builtin_registry.lower_getattr_generic -lower_setattr = builtin_registry.lower_setattr -lower_setattr_generic = builtin_registry.lower_setattr_generic -lower_cast = builtin_registry.lower_cast -lower_constant = builtin_registry.lower_constant - - -def _decorate_getattr(impl, ty, attr): - real_impl = impl - - if attr is not None: - def res(context, builder, typ, value, attr): - return real_impl(context, builder, typ, value) - else: - def res(context, builder, typ, value, attr): - return real_impl(context, builder, typ, value, attr) - - res.signature = (ty,) - res.attr = attr - return res - -def _decorate_setattr(impl, ty, attr): - real_impl = impl - - if attr is not None: - def res(context, builder, sig, args, attr): - return real_impl(context, builder, sig, args) - else: - def res(context, builder, sig, args, attr): - return real_impl(context, builder, sig, args, attr) - - res.signature = (ty, types.Any) - res.attr = attr - return res - - -def fix_returning_optional(context, builder, sig, status, retval): - # Reconstruct optional return type - if isinstance(sig.return_type, types.Optional): - value_type = sig.return_type.type - optional_none = context.make_optional_none(builder, value_type) - retvalptr = cgutils.alloca_once_value(builder, optional_none) - with builder.if_then(builder.not_(status.is_none)): - optional_value = context.make_optional_value( - builder, value_type, retval, - ) - builder.store(optional_value, retvalptr) - retval = builder.load(retvalptr) - return retval - -def user_function(fndesc, libs): - """ - A wrapper inserting code calling Numba-compiled *fndesc*. - """ - - def imp(context, builder, sig, args): - func = context.declare_function(builder.module, fndesc) - # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( - builder, func, fndesc.restype, fndesc.argtypes, args) - with cgutils.if_unlikely(builder, status.is_error): - context.call_conv.return_status_propagate(builder, status) - assert sig.return_type == fndesc.restype - # Reconstruct optional return type - retval = fix_returning_optional(context, builder, sig, status, retval) - # If the data representations don't match up - if retval.type != context.get_value_type(sig.return_type): - msg = "function returned {0} but expect {1}" - raise TypeError(msg.format(retval.type, sig.return_type)) - - return impl_ret_new_ref(context, builder, fndesc.restype, retval) - - imp.signature = fndesc.argtypes - imp.libs = tuple(libs) - return imp - - -def user_generator(gendesc, libs): - """ - A wrapper inserting code calling Numba-compiled *gendesc*. - """ - - def imp(context, builder, sig, args): - func = context.declare_function(builder.module, gendesc) - # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( - builder, func, gendesc.restype, gendesc.argtypes, args) - # Return raw status for caller to process StopIteration - return status, retval - - imp.libs = tuple(libs) - return imp - - -def iterator_impl(iterable_type, iterator_type): - """ - Decorator a given class as implementing *iterator_type* - (by providing an `iternext()` method). - """ - - def wrapper(cls): - # These are unbound methods - iternext = cls.iternext - - @iternext_impl - def iternext_wrapper(context, builder, sig, args, result): - (value,) = args - iterobj = cls(context, builder, value) - return iternext(iterobj, context, builder, result) - - lower_builtin('iternext', iterator_type)(iternext_wrapper) - return cls - - return wrapper - - -class _IternextResult(object): - """ - A result wrapper for iteration, passed by iternext_impl() into the - wrapped function. - """ - __slots__ = ('_context', '_builder', '_pairobj') - - def __init__(self, context, builder, pairobj): - self._context = context - self._builder = builder - self._pairobj = pairobj - - def set_exhausted(self): - """ - Mark the iterator as exhausted. - """ - self._pairobj.second = self._context.get_constant(types.boolean, False) - - def set_valid(self, is_valid=True): - """ - Mark the iterator as valid according to *is_valid* (which must - be either a Python boolean or a LLVM inst). - """ - if is_valid in (False, True): - is_valid = self._context.get_constant(types.boolean, is_valid) - self._pairobj.second = is_valid - - def yield_(self, value): - """ - Mark the iterator as yielding the given *value* (a LLVM inst). - """ - self._pairobj.first = value - - def is_valid(self): - """ - Return whether the iterator is marked valid. - """ - return self._context.get_argument_value(self._builder, - types.boolean, - self._pairobj.second) - - def yielded_value(self): - """ - Return the iterator's yielded value, if any. - """ - return self._pairobj.first - - -def iternext_impl(func): - """ - Wrap the given iternext() implementation so that it gets passed - an _IternextResult() object easing the returning of the iternext() - result pair. - - The wrapped function will be called with the following signature: - (context, builder, sig, args, iternext_result) - """ - - def wrapper(context, builder, sig, args): - pair_type = sig.return_type - pairobj = context.make_helper(builder, pair_type) - func(context, builder, sig, args, - _IternextResult(context, builder, pairobj)) - return impl_ret_borrowed(context, builder, - pair_type, pairobj._getvalue()) - return wrapper - - -def call_getiter(context, builder, iterable_type, val): - """ - Call the `getiter()` implementation for the given *iterable_type* - of value *val*, and return the corresponding LLVM inst. - """ - getiter_sig = typing.signature(iterable_type.iterator_type, iterable_type) - getiter_impl = context.get_function('getiter', getiter_sig) - return getiter_impl(builder, (val,)) - - -def call_iternext(context, builder, iterator_type, val): - """ - Call the `iternext()` implementation for the given *iterator_type* - of value *val*, and return a convenience _IternextResult() object - reflecting the results. - """ - itemty = iterator_type.yield_type - pair_type = types.Pair(itemty, types.boolean) - iternext_sig = typing.signature(pair_type, iterator_type) - iternext_impl = context.get_function('iternext', iternext_sig) - val = iternext_impl(builder, (val,)) - pairobj = context.make_helper(builder, pair_type, val) - return _IternextResult(context, builder, pairobj) - - -def call_len(context, builder, ty, val): - """ - Call len() on the given value. Return None if len() isn't defined on - this type. - """ - try: - len_impl = context.get_function(len, typing.signature(types.intp, ty,)) - except NotImplementedError: - return None - else: - return len_impl(builder, (val,)) - - -_ForIterLoop = collections.namedtuple('_ForIterLoop', - ('value', 'do_break')) - - -@contextlib.contextmanager -def for_iter(context, builder, iterable_type, val): - """ - Simulate a for loop on the given iterable. Yields a namedtuple with - the given members: - - `value` is the value being yielded - - `do_break` is a callable to early out of the loop - """ - iterator_type = iterable_type.iterator_type - iterval = call_getiter(context, builder, iterable_type, val) - - bb_body = builder.append_basic_block('for_iter.body') - bb_end = builder.append_basic_block('for_iter.end') - - def do_break(): - builder.branch(bb_end) - - builder.branch(bb_body) - - with builder.goto_block(bb_body): - res = call_iternext(context, builder, iterator_type, iterval) - with builder.if_then(builder.not_(res.is_valid()), likely=False): - builder.branch(bb_end) - yield _ForIterLoop(res.yielded_value(), do_break) - builder.branch(bb_body) - - builder.position_at_end(bb_end) - if context.enable_nrt: - context.nrt.decref(builder, iterator_type, iterval) - - -def impl_ret_new_ref(ctx, builder, retty, ret): - """ - The implementation returns a new reference. - """ - return ret - - -def impl_ret_borrowed(ctx, builder, retty, ret): - """ - The implementation returns a borrowed reference. - This function automatically incref so that the implementation is - returning a new reference. - """ - if ctx.enable_nrt: - ctx.nrt.incref(builder, retty, ret) - return ret - - -def impl_ret_untracked(ctx, builder, retty, ret): - """ - The return type is not a NRT object. - """ - return ret - - -@contextlib.contextmanager -def force_error_model(context, model_name='numpy'): - """ - Temporarily change the context's error model. - """ - from . import callconv - - old_error_model = context.error_model - context.error_model = callconv.create_error_model(model_name, context) - try: - yield - finally: - context.error_model = old_error_model diff --git a/numba/numba/targets/intrinsics.py b/numba/numba/targets/intrinsics.py deleted file mode 100644 index 7010a5355..000000000 --- a/numba/numba/targets/intrinsics.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -LLVM pass that converts intrinsic into other math calls -""" -from __future__ import print_function, absolute_import - -from llvmlite import ir - - -class _DivmodFixer(ir.Visitor): - def visit_Instruction(self, instr): - if instr.type == ir.IntType(64): - if instr.opname in ['srem', 'urem', 'sdiv', 'udiv']: - name = 'numba_{op}'.format(op=instr.opname) - fn = self.module.globals.get(name) - # Declare the function if it doesn't already exist - if fn is None: - opty = instr.type - sdivfnty = ir.FunctionType(opty, [opty, opty]) - fn = ir.Function(self.module, sdivfnty, name=name) - # Replace the operation with a call to the builtin - repl = ir.CallInstr(parent=instr.parent, func=fn, - args=instr.operands, name=instr.name) - instr.parent.replace(instr, repl) - - -def fix_divmod(mod): - """Replace division and reminder instructions to builtins calls - """ - _DivmodFixer().visit(mod) - - -INTR_TO_CMATH = { - "llvm.pow.f32": "powf", - "llvm.pow.f64": "pow", - - "llvm.sin.f32": "sinf", - "llvm.sin.f64": "sin", - - "llvm.cos.f32": "cosf", - "llvm.cos.f64": "cos", - - "llvm.sqrt.f32": "sqrtf", - "llvm.sqrt.f64": "sqrt", - - "llvm.exp.f32": "expf", - "llvm.exp.f64": "exp", - - "llvm.log.f32": "logf", - "llvm.log.f64": "log", - - "llvm.log10.f32": "log10f", - "llvm.log10.f64": "log10", - - "llvm.fabs.f32": "fabsf", - "llvm.fabs.f64": "fabs", - - "llvm.floor.f32": "floorf", - "llvm.floor.f64": "floor", - - "llvm.ceil.f32": "ceilf", - "llvm.ceil.f64": "ceil", - - "llvm.trunc.f32": "truncf", - "llvm.trunc.f64": "trunc", -} - -OTHER_CMATHS = ''' -tan -tanf -sinh -sinhf -cosh -coshf -tanh -tanhf -asin -asinf -acos -acosf -atan -atanf -atan2 -atan2f -atan2_fixed -asinh -asinhf -acosh -acoshf -atanh -atanhf -expm1 -expm1f -log1p -log1pf -log10 -log10f -fmod -fmodf -round -roundf -'''.split() - -INTR_MATH = frozenset(INTR_TO_CMATH.values()) | frozenset(OTHER_CMATHS) diff --git a/numba/numba/targets/iterators.py b/numba/numba/targets/iterators.py deleted file mode 100644 index 343f38d8f..000000000 --- a/numba/numba/targets/iterators.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Implementation of various iterable and iterator types. -""" - -from numba import types, cgutils -from numba.targets.imputils import ( - lower_builtin, iternext_impl, call_iternext, call_getiter, - impl_ret_borrowed, impl_ret_new_ref) - - - -@lower_builtin('getiter', types.IteratorType) -def iterator_getiter(context, builder, sig, args): - [it] = args - return impl_ret_borrowed(context, builder, sig.return_type, it) - -#------------------------------------------------------------------------------- -# builtin `enumerate` implementation - -@lower_builtin(enumerate, types.IterableType) -@lower_builtin(enumerate, types.IterableType, types.Integer) -def make_enumerate_object(context, builder, sig, args): - assert len(args) == 1 or len(args) == 2 # enumerate(it) or enumerate(it, start) - srcty = sig.args[0] - - if len(args) == 1: - src = args[0] - start_val = context.get_constant(types.intp, 0) - elif len(args) == 2: - src = args[0] - start_val = context.cast(builder, args[1], sig.args[1], types.intp) - - iterobj = call_getiter(context, builder, srcty, src) - - enum = context.make_helper(builder, sig.return_type) - - countptr = cgutils.alloca_once(builder, start_val.type) - builder.store(start_val, countptr) - - enum.count = countptr - enum.iter = iterobj - - res = enum._getvalue() - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin('iternext', types.EnumerateType) -@iternext_impl -def iternext_enumerate(context, builder, sig, args, result): - [enumty] = sig.args - [enum] = args - - enum = context.make_helper(builder, enumty, value=enum) - - count = builder.load(enum.count) - ncount = builder.add(count, context.get_constant(types.intp, 1)) - builder.store(ncount, enum.count) - - srcres = call_iternext(context, builder, enumty.source_type, enum.iter) - is_valid = srcres.is_valid() - result.set_valid(is_valid) - - with builder.if_then(is_valid): - srcval = srcres.yielded_value() - result.yield_(context.make_tuple(builder, enumty.yield_type, - [count, srcval])) - - -#------------------------------------------------------------------------------- -# builtin `zip` implementation - -@lower_builtin(zip, types.VarArg(types.Any)) -def make_zip_object(context, builder, sig, args): - zip_type = sig.return_type - - assert len(args) == len(zip_type.source_types) - - zipobj = context.make_helper(builder, zip_type) - - for i, (arg, srcty) in enumerate(zip(args, sig.args)): - zipobj[i] = call_getiter(context, builder, srcty, arg) - - res = zipobj._getvalue() - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin('iternext', types.ZipType) -@iternext_impl -def iternext_zip(context, builder, sig, args, result): - [zip_type] = sig.args - [zipobj] = args - - zipobj = context.make_helper(builder, zip_type, value=zipobj) - - if len(zipobj) == 0: - # zip() is an empty iterator - result.set_exhausted() - return - - p_ret_tup = cgutils.alloca_once(builder, - context.get_value_type(zip_type.yield_type)) - p_is_valid = cgutils.alloca_once_value(builder, value=cgutils.true_bit) - - for i, (iterobj, srcty) in enumerate(zip(zipobj, zip_type.source_types)): - is_valid = builder.load(p_is_valid) - # Avoid calling the remaining iternext if a iterator has been exhausted - with builder.if_then(is_valid): - srcres = call_iternext(context, builder, srcty, iterobj) - is_valid = builder.and_(is_valid, srcres.is_valid()) - builder.store(is_valid, p_is_valid) - val = srcres.yielded_value() - ptr = cgutils.gep_inbounds(builder, p_ret_tup, 0, i) - builder.store(val, ptr) - - is_valid = builder.load(p_is_valid) - result.set_valid(is_valid) - - with builder.if_then(is_valid): - result.yield_(builder.load(p_ret_tup)) - - -#------------------------------------------------------------------------------- -# generator implementation - -@lower_builtin('iternext', types.Generator) -@iternext_impl -def iternext_zip(context, builder, sig, args, result): - genty, = sig.args - gen, = args - # XXX We should link with the generator's library. - # Currently, this doesn't make a difference as the library has already - # been linked for the generator init function. - impl = context.get_generator_impl(genty) - status, retval = impl(context, builder, sig, args) - with cgutils.if_likely(builder, status.is_ok): - result.set_valid(True) - result.yield_(retval) - with cgutils.if_unlikely(builder, status.is_stop_iteration): - result.set_exhausted() - with cgutils.if_unlikely(builder, - builder.and_(status.is_error, - builder.not_(status.is_stop_iteration))): - context.call_conv.return_status_propagate(builder, status) diff --git a/numba/numba/targets/linalg.py b/numba/numba/targets/linalg.py deleted file mode 100644 index 3084fbd72..000000000 --- a/numba/numba/targets/linalg.py +++ /dev/null @@ -1,2795 +0,0 @@ -""" -Implementation of linear algebra operations. -""" - -from __future__ import print_function, absolute_import, division - -import contextlib - -from llvmlite import ir - -import numpy as np - -from numba import types, cgutils - -from numba.targets.imputils import (lower_builtin, impl_ret_borrowed, - impl_ret_new_ref, impl_ret_untracked) -from numba.typing import signature -from numba.extending import overload, register_jitable -from numba.numpy_support import version as numpy_version -from numba import types -from numba import numpy_support as np_support -from .arrayobj import make_array, _empty_nd_impl, array_copy -from ..errors import TypingError - -ll_char = ir.IntType(8) -ll_char_p = ll_char.as_pointer() -ll_void_p = ll_char_p -ll_intc = ir.IntType(32) -ll_intc_p = ll_intc.as_pointer() -intp_t = cgutils.intp_t -ll_intp_p = intp_t.as_pointer() - - -# fortran int type, this needs to match the F_INT C declaration in -# _lapack.c and is present to accomodate potential future 64bit int -# based LAPACK use. -F_INT_nptype = np.int32 -F_INT_nbtype = types.int32 - -# BLAS kinds as letters -_blas_kinds = { - types.float32: 's', - types.float64: 'd', - types.complex64: 'c', - types.complex128: 'z', -} - - -def get_blas_kind(dtype, func_name=""): - kind = _blas_kinds.get(dtype) - if kind is None: - raise TypeError("unsupported dtype for %s()" % (func_name,)) - return kind - - -def ensure_blas(): - try: - import scipy.linalg.cython_blas - except ImportError: - raise ImportError("scipy 0.16+ is required for linear algebra") - - -def ensure_lapack(): - try: - import scipy.linalg.cython_lapack - except ImportError: - raise ImportError("scipy 0.16+ is required for linear algebra") - - -def make_constant_slot(context, builder, ty, val): - const = context.get_constant_generic(builder, ty, val) - return cgutils.alloca_once_value(builder, const) - - -class _BLAS: - """ - Functions to return type signatures for wrapped - BLAS functions. - """ - - def __init__(self): - ensure_blas() - - @classmethod - def numba_xxnrm2(cls, dtype): - rtype = getattr(dtype, "underlying_float", dtype) - sig = types.intc(types.char, # kind - types.intp, # n - types.CPointer(dtype), # x - types.intp, # incx - types.CPointer(rtype)) # returned - - return types.ExternalFunction("numba_xxnrm2", sig) - - @classmethod - def numba_xxgemm(cls, dtype): - sig = types.intc( - types.char, # kind - types.char, # transa - types.char, # transb - types.intp, # m - types.intp, # n - types.intp, # k - types.CPointer(dtype), # alpha - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # b - types.intp, # ldb - types.CPointer(dtype), # beta - types.CPointer(dtype), # c - types.intp # ldc - ) - return types.ExternalFunction("numba_xxgemm", sig) - - -class _LAPACK: - """ - Functions to return type signatures for wrapped - LAPACK functions. - """ - - def __init__(self): - ensure_lapack() - - @classmethod - def numba_xxgetrf(cls, dtype): - sig = types.intc(types.char, # kind - types.intp, # m - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(F_INT_nbtype) # ipiv - ) - return types.ExternalFunction("numba_xxgetrf", sig) - - @classmethod - def numba_ez_xxgetri(cls, dtype): - sig = types.intc(types.char, # kind - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(F_INT_nbtype) # ipiv - ) - return types.ExternalFunction("numba_ez_xxgetri", sig) - - @classmethod - def numba_ez_rgeev(cls, dtype): - sig = types.intc(types.char, # kind - types.char, # jobvl - types.char, # jobvr - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # wr - types.CPointer(dtype), # wi - types.CPointer(dtype), # vl - types.intp, # ldvl - types.CPointer(dtype), # vr - types.intp # ldvr - ) - return types.ExternalFunction("numba_ez_rgeev", sig) - - @classmethod - def numba_ez_cgeev(cls, dtype): - sig = types.intc(types.char, # kind - types.char, # jobvl - types.char, # jobvr - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # w - types.CPointer(dtype), # vl - types.intp, # ldvl - types.CPointer(dtype), # vr - types.intp # ldvr - ) - return types.ExternalFunction("numba_ez_cgeev", sig) - - @classmethod - def numba_ez_xxxevd(cls, dtype): - wtype = getattr(dtype, "underlying_float", dtype) - sig = types.intc(types.char, # kind - types.char, # jobz - types.char, # uplo - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(wtype), # w - ) - return types.ExternalFunction("numba_ez_xxxevd", sig) - - @classmethod - def numba_xxpotrf(cls, dtype): - sig = types.intc(types.char, # kind - types.char, # uplo - types.intp, # n - types.CPointer(dtype), # a - types.intp # lda - ) - return types.ExternalFunction("numba_xxpotrf", sig) - - @classmethod - def numba_ez_gesdd(cls, dtype): - stype = getattr(dtype, "underlying_float", dtype) - sig = types.intc( - types.char, # kind - types.char, # jobz - types.intp, # m - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(stype), # s - types.CPointer(dtype), # u - types.intp, # ldu - types.CPointer(dtype), # vt - types.intp # ldvt - ) - - return types.ExternalFunction("numba_ez_gesdd", sig) - - @classmethod - def numba_ez_geqrf(cls, dtype): - sig = types.intc( - types.char, # kind - types.intp, # m - types.intp, # n - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # tau - ) - return types.ExternalFunction("numba_ez_geqrf", sig) - - @classmethod - def numba_ez_xxgqr(cls, dtype): - sig = types.intc( - types.char, # kind - types.intp, # m - types.intp, # n - types.intp, # k - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # tau - ) - return types.ExternalFunction("numba_ez_xxgqr", sig) - - @classmethod - def numba_ez_gelsd(cls, dtype): - rtype = getattr(dtype, "underlying_float", dtype) - sig = types.intc( - types.char, # kind - types.intp, # m - types.intp, # n - types.intp, # nrhs - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(dtype), # b - types.intp, # ldb - types.CPointer(rtype), # S - types.float64, # rcond - types.CPointer(types.intc) # rank - ) - return types.ExternalFunction("numba_ez_gelsd", sig) - - @classmethod - def numba_xgesv(cls, dtype): - sig = types.intc( - types.char, # kind - types.intp, # n - types.intp, # nhrs - types.CPointer(dtype), # a - types.intp, # lda - types.CPointer(F_INT_nbtype), # ipiv - types.CPointer(dtype), # b - types.intp # ldb - ) - return types.ExternalFunction("numba_xgesv", sig) - - -@contextlib.contextmanager -def make_contiguous(context, builder, sig, args): - """ - Ensure that all array arguments are contiguous, if necessary by - copying them. - A new (sig, args) tuple is yielded. - """ - newtys = [] - newargs = [] - copies = [] - for ty, val in zip(sig.args, args): - if not isinstance(ty, types.Array) or ty.layout in 'CF': - newty, newval = ty, val - else: - newty = ty.copy(layout='C') - copysig = signature(newty, ty) - newval = array_copy(context, builder, copysig, (val,)) - copies.append((newty, newval)) - newtys.append(newty) - newargs.append(newval) - yield signature(sig.return_type, *newtys), tuple(newargs) - for ty, val in copies: - context.nrt.decref(builder, ty, val) - - -def check_c_int(context, builder, n): - """ - Check whether *n* fits in a C `int`. - """ - _maxint = 2**31 - 1 - - def impl(n): - if n > _maxint: - raise OverflowError("array size too large to fit in C int") - - context.compile_internal(builder, impl, - signature(types.none, types.intp), (n,)) - - -def check_blas_return(context, builder, res): - """ - Check the integer error return from one of the BLAS wrappers in - _helperlib.c. - """ - with builder.if_then(cgutils.is_not_null(builder, res), likely=False): - # Those errors shouldn't happen, it's easier to just abort the process - pyapi = context.get_python_api(builder) - pyapi.gil_ensure() - pyapi.fatal_error("BLAS wrapper returned with an error") - - -def check_lapack_return(context, builder, res): - """ - Check the integer error return from one of the LAPACK wrappers in - _helperlib.c. - """ - with builder.if_then(cgutils.is_not_null(builder, res), likely=False): - # Those errors shouldn't happen, it's easier to just abort the process - pyapi = context.get_python_api(builder) - pyapi.gil_ensure() - pyapi.fatal_error("LAPACK wrapper returned with an error") - - -def call_xxdot(context, builder, conjugate, dtype, - n, a_data, b_data, out_data): - """ - Call the BLAS vector * vector product function for the given arguments. - """ - fnty = ir.FunctionType(ir.IntType(32), - [ll_char, ll_char, intp_t, # kind, conjugate, n - ll_void_p, ll_void_p, ll_void_p, # a, b, out - ]) - fn = builder.module.get_or_insert_function(fnty, name="numba_xxdot") - - kind = get_blas_kind(dtype) - kind_val = ir.Constant(ll_char, ord(kind)) - conjugate = ir.Constant(ll_char, int(conjugate)) - - res = builder.call(fn, (kind_val, conjugate, n, - builder.bitcast(a_data, ll_void_p), - builder.bitcast(b_data, ll_void_p), - builder.bitcast(out_data, ll_void_p))) - check_blas_return(context, builder, res) - - -def call_xxgemv(context, builder, do_trans, - m_type, m_shapes, m_data, v_data, out_data): - """ - Call the BLAS matrix * vector product function for the given arguments. - """ - fnty = ir.FunctionType(ir.IntType(32), - [ll_char, ll_char, # kind, trans - intp_t, intp_t, # m, n - ll_void_p, ll_void_p, intp_t, # alpha, a, lda - ll_void_p, ll_void_p, ll_void_p, # x, beta, y - ]) - fn = builder.module.get_or_insert_function(fnty, name="numba_xxgemv") - - dtype = m_type.dtype - alpha = make_constant_slot(context, builder, dtype, 1.0) - beta = make_constant_slot(context, builder, dtype, 0.0) - - if m_type.layout == 'F': - m, n = m_shapes - lda = m_shapes[0] - else: - n, m = m_shapes - lda = m_shapes[1] - - kind = get_blas_kind(dtype) - kind_val = ir.Constant(ll_char, ord(kind)) - trans = ir.Constant(ll_char, ord('t') if do_trans else ord('n')) - - res = builder.call(fn, (kind_val, trans, m, n, - builder.bitcast(alpha, ll_void_p), - builder.bitcast(m_data, ll_void_p), lda, - builder.bitcast(v_data, ll_void_p), - builder.bitcast(beta, ll_void_p), - builder.bitcast(out_data, ll_void_p))) - check_blas_return(context, builder, res) - - -def call_xxgemm(context, builder, - x_type, x_shapes, x_data, - y_type, y_shapes, y_data, - out_type, out_shapes, out_data): - """ - Call the BLAS matrix * matrix product function for the given arguments. - """ - fnty = ir.FunctionType(ir.IntType(32), - [ll_char, # kind - ll_char, ll_char, # transa, transb - intp_t, intp_t, intp_t, # m, n, k - ll_void_p, ll_void_p, intp_t, # alpha, a, lda - ll_void_p, intp_t, ll_void_p, # b, ldb, beta - ll_void_p, intp_t, # c, ldc - ]) - fn = builder.module.get_or_insert_function(fnty, name="numba_xxgemm") - - m, k = x_shapes - _k, n = y_shapes - dtype = x_type.dtype - alpha = make_constant_slot(context, builder, dtype, 1.0) - beta = make_constant_slot(context, builder, dtype, 0.0) - - trans = ir.Constant(ll_char, ord('t')) - notrans = ir.Constant(ll_char, ord('n')) - - def get_array_param(ty, shapes, data): - return ( - # Transpose if layout different from result's - notrans if ty.layout == out_type.layout else trans, - # Size of the inner dimension in physical array order - shapes[1] if ty.layout == 'C' else shapes[0], - # The data pointer, unit-less - builder.bitcast(data, ll_void_p), - ) - - transa, lda, data_a = get_array_param(y_type, y_shapes, y_data) - transb, ldb, data_b = get_array_param(x_type, x_shapes, x_data) - _, ldc, data_c = get_array_param(out_type, out_shapes, out_data) - - kind = get_blas_kind(dtype) - kind_val = ir.Constant(ll_char, ord(kind)) - - res = builder.call(fn, (kind_val, transa, transb, n, m, k, - builder.bitcast(alpha, ll_void_p), data_a, lda, - data_b, ldb, builder.bitcast(beta, ll_void_p), - data_c, ldc)) - check_blas_return(context, builder, res) - - -def dot_2_mm(context, builder, sig, args): - """ - np.dot(matrix, matrix) - """ - def dot_impl(a, b): - m, k = a.shape - _k, n = b.shape - out = np.empty((m, n), a.dtype) - return np.dot(a, b, out) - - res = context.compile_internal(builder, dot_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -def dot_2_vm(context, builder, sig, args): - """ - np.dot(vector, matrix) - """ - def dot_impl(a, b): - m, = a.shape - _m, n = b.shape - out = np.empty((n, ), a.dtype) - return np.dot(a, b, out) - - res = context.compile_internal(builder, dot_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -def dot_2_mv(context, builder, sig, args): - """ - np.dot(matrix, vector) - """ - def dot_impl(a, b): - m, n = a.shape - _n, = b.shape - out = np.empty((m, ), a.dtype) - return np.dot(a, b, out) - - res = context.compile_internal(builder, dot_impl, sig, args) - return impl_ret_new_ref(context, builder, sig.return_type, res) - - -def dot_2_vv(context, builder, sig, args, conjugate=False): - """ - np.dot(vector, vector) - np.vdot(vector, vector) - """ - aty, bty = sig.args - dtype = sig.return_type - a = make_array(aty)(context, builder, args[0]) - b = make_array(bty)(context, builder, args[1]) - n, = cgutils.unpack_tuple(builder, a.shape) - - def check_args(a, b): - m, = a.shape - n, = b.shape - if m != n: - raise ValueError("incompatible array sizes for np.dot(a, b) " - "(vector * vector)") - - context.compile_internal(builder, check_args, - signature(types.none, *sig.args), args) - check_c_int(context, builder, n) - - out = cgutils.alloca_once(builder, context.get_value_type(dtype)) - call_xxdot(context, builder, conjugate, dtype, n, a.data, b.data, out) - return builder.load(out) - - -@lower_builtin(np.dot, types.Array, types.Array) -@lower_builtin('@', types.Array, types.Array) -def dot_2(context, builder, sig, args): - """ - np.dot(a, b) - a @ b - """ - ensure_blas() - - with make_contiguous(context, builder, sig, args) as (sig, args): - ndims = [x.ndim for x in sig.args[:2]] - if ndims == [2, 2]: - return dot_2_mm(context, builder, sig, args) - elif ndims == [2, 1]: - return dot_2_mv(context, builder, sig, args) - elif ndims == [1, 2]: - return dot_2_vm(context, builder, sig, args) - elif ndims == [1, 1]: - return dot_2_vv(context, builder, sig, args) - else: - assert 0 - - -@lower_builtin(np.vdot, types.Array, types.Array) -def vdot(context, builder, sig, args): - """ - np.vdot(a, b) - """ - ensure_blas() - - with make_contiguous(context, builder, sig, args) as (sig, args): - return dot_2_vv(context, builder, sig, args, conjugate=True) - - -def dot_3_vm_check_args(a, b, out): - m, = a.shape - _m, n = b.shape - if m != _m: - raise ValueError("incompatible array sizes for " - "np.dot(a, b) (vector * matrix)") - if out.shape != (n,): - raise ValueError("incompatible output array size for " - "np.dot(a, b, out) (vector * matrix)") - - -def dot_3_mv_check_args(a, b, out): - m, _n = a.shape - n, = b.shape - if n != _n: - raise ValueError("incompatible array sizes for np.dot(a, b) " - "(matrix * vector)") - if out.shape != (m,): - raise ValueError("incompatible output array size for " - "np.dot(a, b, out) (matrix * vector)") - -def dot_3_vm(context, builder, sig, args): - """ - np.dot(vector, matrix, out) - np.dot(matrix, vector, out) - """ - xty, yty, outty = sig.args - assert outty == sig.return_type - dtype = xty.dtype - - x = make_array(xty)(context, builder, args[0]) - y = make_array(yty)(context, builder, args[1]) - out = make_array(outty)(context, builder, args[2]) - x_shapes = cgutils.unpack_tuple(builder, x.shape) - y_shapes = cgutils.unpack_tuple(builder, y.shape) - out_shapes = cgutils.unpack_tuple(builder, out.shape) - if xty.ndim < yty.ndim: - # Vector * matrix - # Asked for x * y, we will compute y.T * x - mty = yty - m_shapes = y_shapes - do_trans = yty.layout == 'F' - m_data, v_data = y.data, x.data - check_args = dot_3_vm_check_args - - - else: - # Matrix * vector - # We will compute x * y - mty = xty - m_shapes = x_shapes - do_trans = xty.layout == 'C' - m_data, v_data = x.data, y.data - check_args = dot_3_mv_check_args - - - context.compile_internal(builder, check_args, - signature(types.none, *sig.args), args) - for val in m_shapes: - check_c_int(context, builder, val) - - call_xxgemv(context, builder, do_trans, mty, m_shapes, m_data, - v_data, out.data) - - return impl_ret_borrowed(context, builder, sig.return_type, - out._getvalue()) - - -def dot_3_mm(context, builder, sig, args): - """ - np.dot(matrix, matrix, out) - """ - xty, yty, outty = sig.args - assert outty == sig.return_type - dtype = xty.dtype - - x = make_array(xty)(context, builder, args[0]) - y = make_array(yty)(context, builder, args[1]) - out = make_array(outty)(context, builder, args[2]) - x_shapes = cgutils.unpack_tuple(builder, x.shape) - y_shapes = cgutils.unpack_tuple(builder, y.shape) - out_shapes = cgutils.unpack_tuple(builder, out.shape) - m, k = x_shapes - _k, n = y_shapes - - # The only case Numpy supports - assert outty.layout == 'C' - - def check_args(a, b, out): - m, k = a.shape - _k, n = b.shape - if k != _k: - raise ValueError("incompatible array sizes for np.dot(a, b) " - "(matrix * matrix)") - if out.shape != (m, n): - raise ValueError("incompatible output array size for " - "np.dot(a, b, out) (matrix * matrix)") - - context.compile_internal(builder, check_args, - signature(types.none, *sig.args), args) - check_c_int(context, builder, m) - check_c_int(context, builder, k) - check_c_int(context, builder, n) - - x_data = x.data - y_data = y.data - out_data = out.data - - # Check whether any of the operands is really a 1-d vector represented - # as a (1, k) or (k, 1) 2-d array. In those cases, it is pessimal - # to call the generic matrix * matrix product BLAS function. - one = ir.Constant(intp_t, 1) - is_left_vec = builder.icmp_signed('==', m, one) - is_right_vec = builder.icmp_signed('==', n, one) - - with builder.if_else(is_right_vec) as (r_vec, r_mat): - with r_vec: - with builder.if_else(is_left_vec) as (v_v, m_v): - with v_v: - # V * V - call_xxdot(context, builder, False, dtype, - k, x_data, y_data, out_data) - with m_v: - # M * V - do_trans = xty.layout == outty.layout - call_xxgemv(context, builder, do_trans, - xty, x_shapes, x_data, y_data, out_data) - with r_mat: - with builder.if_else(is_left_vec) as (v_m, m_m): - with v_m: - # V * M - do_trans = yty.layout != outty.layout - call_xxgemv(context, builder, do_trans, - yty, y_shapes, y_data, x_data, out_data) - with m_m: - # M * M - call_xxgemm(context, builder, - xty, x_shapes, x_data, - yty, y_shapes, y_data, - outty, out_shapes, out_data) - - return impl_ret_borrowed(context, builder, sig.return_type, - out._getvalue()) - - -@lower_builtin(np.dot, types.Array, types.Array, - types.Array) -def dot_3(context, builder, sig, args): - """ - np.dot(a, b, out) - """ - ensure_blas() - - with make_contiguous(context, builder, sig, args) as (sig, args): - ndims = set(x.ndim for x in sig.args[:2]) - if ndims == set([2]): - return dot_3_mm(context, builder, sig, args) - elif ndims == set([1, 2]): - return dot_3_vm(context, builder, sig, args) - else: - assert 0 - -fatal_error_sig = types.intc() -fatal_error_func = types.ExternalFunction("numba_fatal_error", fatal_error_sig) - - -@register_jitable -def _check_finite_matrix(a): - for v in np.nditer(a): - if not np.isfinite(v.item()): - raise np.linalg.LinAlgError( - "Array must not contain infs or NaNs.") - - -def _check_linalg_matrix(a, func_name, la_prefix=True): - # la_prefix is present as some functions, e.g. np.trace() - # are documented under "linear algebra" but aren't in the - # module - prefix = "np.linalg" if la_prefix else "np" - interp = (prefix, func_name) - # Unpack optional type - if isinstance(a, types.Optional): - a = a.type - if not isinstance(a, types.Array): - msg = "%s.%s() only supported for array types" % interp - raise TypingError(msg, highlighting=False) - if not a.ndim == 2: - msg = "%s.%s() only supported on 2-D arrays." % interp - raise TypingError(msg, highlighting=False) - if not isinstance(a.dtype, (types.Float, types.Complex)): - msg = "%s.%s() only supported on "\ - "float and complex arrays." % interp - raise TypingError(msg, highlighting=False) - - -def _check_homogeneous_types(func_name, *types): - t0 = types[0].dtype - for t in types[1:]: - if t.dtype != t0: - msg = "np.linalg.%s() only supports inputs that have homogeneous dtypes." % func_name - raise TypingError(msg, highlighting=False) - - -@register_jitable -def _inv_err_handler(r): - if r != 0: - if r < 0: - fatal_error_func() - assert 0 # unreachable - if r > 0: - raise np.linalg.LinAlgError( - "Matrix is singular to machine precision.") - -@register_jitable -def _dummy_liveness_func(a): - """pass a list of variables to be preserved through dead code elimination""" - return a[0] - - -@overload(np.linalg.inv) -def inv_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "inv") - - numba_xxgetrf = _LAPACK().numba_xxgetrf(a.dtype) - - numba_xxgetri = _LAPACK().numba_ez_xxgetri(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "inv")) - - F_layout = a.layout == 'F' - - def inv_impl(a): - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - if n == 0: - return acpy - - ipiv = np.empty(n, dtype=F_INT_nptype) - - r = numba_xxgetrf(kind, n, n, acpy.ctypes, n, ipiv.ctypes) - _inv_err_handler(r) - - r = numba_xxgetri(kind, n, acpy.ctypes, n, ipiv.ctypes) - _inv_err_handler(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, ipiv.size]) - return acpy - - return inv_impl - - -@register_jitable -def _handle_err_maybe_convergence_problem(r): - if r != 0: - if r < 0: - fatal_error_func() - assert 0 # unreachable - if r > 0: - raise ValueError("Internal algorithm failed to converge.") - - -def _check_linalg_1_or_2d_matrix(a, func_name, la_prefix=True): - # la_prefix is present as some functions, e.g. np.trace() - # are documented under "linear algebra" but aren't in the - # module - prefix = "np.linalg" if la_prefix else "np" - interp = (prefix, func_name) - # checks that a matrix is 1 or 2D - if not isinstance(a, types.Array): - raise TypingError("%s.%s() only supported for array types " - % interp) - if not a.ndim <= 2: - raise TypingError("%s.%s() only supported on 1 and 2-D arrays " - % interp) - if not isinstance(a.dtype, (types.Float, types.Complex)): - raise TypingError("%s.%s() only supported on " - "float and complex arrays." % interp) - -if numpy_version >= (1, 8): - - @overload(np.linalg.cholesky) - def cho_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "cholesky") - - numba_xxpotrf = _LAPACK().numba_xxpotrf(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "cholesky")) - UP = ord('U') - LO = ord('L') - - def cho_impl(a): - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - # The output is allocated in C order - out = a.copy() - - if n == 0: - return out - - # Pass UP since xxpotrf() operates in F order - # The semantics ensure this works fine - # (out is really its Hermitian in F order, but UP instructs - # xxpotrf to compute the Hermitian of the upper triangle - # => they cancel each other) - r = numba_xxpotrf(kind, UP, n, out.ctypes, n) - if r != 0: - if r < 0: - fatal_error_func() - assert 0 # unreachable - if r > 0: - raise np.linalg.LinAlgError( - "Matrix is not positive definite.") - # Zero out upper triangle, in F order - for col in range(n): - out[:col, col] = 0 - return out - - return cho_impl - - @overload(np.linalg.eig) - def eig_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "eig") - - numba_ez_rgeev = _LAPACK().numba_ez_rgeev(a.dtype) - numba_ez_cgeev = _LAPACK().numba_ez_cgeev(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "eig")) - - JOBVL = ord('N') - JOBVR = ord('V') - - F_layout = a.layout == 'F' - - def real_eig_impl(a): - """ - eig() implementation for real arrays. - """ - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ldvl = 1 - ldvr = n - wr = np.empty(n, dtype=a.dtype) - wi = np.empty(n, dtype=a.dtype) - vl = np.empty((n, ldvl), dtype=a.dtype) - vr = np.empty((n, ldvr), dtype=a.dtype) - - if n == 0: - return (wr, vr.T) - - r = numba_ez_rgeev(kind, - JOBVL, - JOBVR, - n, - acpy.ctypes, - n, - wr.ctypes, - wi.ctypes, - vl.ctypes, - ldvl, - vr.ctypes, - ldvr) - _handle_err_maybe_convergence_problem(r) - - # By design numba does not support dynamic return types, however, - # Numpy does. Numpy uses this ability in the case of returning - # eigenvalues/vectors of a real matrix. The return type of - # np.linalg.eig(), when operating on a matrix in real space - # depends on the values present in the matrix itself (recalling - # that eigenvalues are the roots of the characteristic polynomial - # of the system matrix, which will by construction depend on the - # values present in the system matrix). As numba cannot handle - # the case of a runtime decision based domain change relative to - # the input type, if it is required numba raises as below. - if np.any(wi): - raise ValueError( - "eig() argument must not cause a domain change.") - - # put these in to help with liveness analysis, - # `.ctypes` doesn't keep the vars alive - _dummy_liveness_func([acpy.size, vl.size, vr.size, wr.size, wi.size]) - return (wr, vr.T) - - def cmplx_eig_impl(a): - """ - eig() implementation for complex arrays. - """ - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ldvl = 1 - ldvr = n - w = np.empty(n, dtype=a.dtype) - vl = np.empty((n, ldvl), dtype=a.dtype) - vr = np.empty((n, ldvr), dtype=a.dtype) - - if n == 0: - return (w, vr.T) - - r = numba_ez_cgeev(kind, - JOBVL, - JOBVR, - n, - acpy.ctypes, - n, - w.ctypes, - vl.ctypes, - ldvl, - vr.ctypes, - ldvr) - _handle_err_maybe_convergence_problem(r) - - # put these in to help with liveness analysis, - # `.ctypes` doesn't keep the vars alive - _dummy_liveness_func([acpy.size, vl.size, vr.size, w.size]) - return (w, vr.T) - - if isinstance(a.dtype, types.scalars.Complex): - return cmplx_eig_impl - else: - return real_eig_impl - - @overload(np.linalg.eigvals) - def eigvals_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "eigvals") - - numba_ez_rgeev = _LAPACK().numba_ez_rgeev(a.dtype) - numba_ez_cgeev = _LAPACK().numba_ez_cgeev(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "eigvals")) - - JOBVL = ord('N') - JOBVR = ord('N') - - F_layout = a.layout == 'F' - - def real_eigvals_impl(a): - """ - eigvals() implementation for real arrays. - """ - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ldvl = 1 - ldvr = 1 - wr = np.empty(n, dtype=a.dtype) - - if n == 0: - return wr - - wi = np.empty(n, dtype=a.dtype) - - # not referenced but need setting for MKL null check - vl = np.empty((1), dtype=a.dtype) - vr = np.empty((1), dtype=a.dtype) - - r = numba_ez_rgeev(kind, - JOBVL, - JOBVR, - n, - acpy.ctypes, - n, - wr.ctypes, - wi.ctypes, - vl.ctypes, - ldvl, - vr.ctypes, - ldvr) - _handle_err_maybe_convergence_problem(r) - - # By design numba does not support dynamic return types, however, - # Numpy does. Numpy uses this ability in the case of returning - # eigenvalues/vectors of a real matrix. The return type of - # np.linalg.eigvals(), when operating on a matrix in real space - # depends on the values present in the matrix itself (recalling - # that eigenvalues are the roots of the characteristic polynomial - # of the system matrix, which will by construction depend on the - # values present in the system matrix). As numba cannot handle - # the case of a runtime decision based domain change relative to - # the input type, if it is required numba raises as below. - if np.any(wi): - raise ValueError( - "eigvals() argument must not cause a domain change.") - - # put these in to help with liveness analysis, - # `.ctypes` doesn't keep the vars alive - _dummy_liveness_func([acpy.size, vl.size, vr.size, wr.size, wi.size]) - return wr - - def cmplx_eigvals_impl(a): - """ - eigvals() implementation for complex arrays. - """ - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ldvl = 1 - ldvr = 1 - w = np.empty(n, dtype=a.dtype) - - if n == 0: - return w - - vl = np.empty((1), dtype=a.dtype) - vr = np.empty((1), dtype=a.dtype) - - r = numba_ez_cgeev(kind, - JOBVL, - JOBVR, - n, - acpy.ctypes, - n, - w.ctypes, - vl.ctypes, - ldvl, - vr.ctypes, - ldvr) - _handle_err_maybe_convergence_problem(r) - - # put these in to help with liveness analysis, - # `.ctypes` doesn't keep the vars alive - _dummy_liveness_func([acpy.size, vl.size, vr.size, w.size]) - return w - - if isinstance(a.dtype, types.scalars.Complex): - return cmplx_eigvals_impl - else: - return real_eigvals_impl - - @overload(np.linalg.eigh) - def eigh_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "eigh") - - F_layout = a.layout == 'F' - - # convert typing floats to numpy floats for use in the impl - w_type = getattr(a.dtype, "underlying_float", a.dtype) - w_dtype = np_support.as_dtype(w_type) - - numba_ez_xxxevd = _LAPACK().numba_ez_xxxevd(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "eigh")) - - JOBZ = ord('V') - UPLO = ord('L') - - def eigh_impl(a): - n = a.shape[-1] - - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - w = np.empty(n, dtype=w_dtype) - - if n == 0: - return (w, acpy) - - r = numba_ez_xxxevd(kind, # kind - JOBZ, # jobz - UPLO, # uplo - n, # n - acpy.ctypes, # a - n, # lda - w.ctypes # w - ) - _handle_err_maybe_convergence_problem(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, w.size]) - return (w, acpy) - - return eigh_impl - - @overload(np.linalg.eigvalsh) - def eigvalsh_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "eigvalsh") - - F_layout = a.layout == 'F' - - # convert typing floats to numpy floats for use in the impl - w_type = getattr(a.dtype, "underlying_float", a.dtype) - w_dtype = np_support.as_dtype(w_type) - - numba_ez_xxxevd = _LAPACK().numba_ez_xxxevd(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "eigvalsh")) - - JOBZ = ord('N') - UPLO = ord('L') - - def eigvalsh_impl(a): - n = a.shape[-1] - - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - w = np.empty(n, dtype=w_dtype) - - if n == 0: - return w - - r = numba_ez_xxxevd(kind, # kind - JOBZ, # jobz - UPLO, # uplo - n, # n - acpy.ctypes, # a - n, # lda - w.ctypes # w - ) - _handle_err_maybe_convergence_problem(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, w.size]) - return w - - return eigvalsh_impl - - @overload(np.linalg.svd) - def svd_impl(a, full_matrices=1): - ensure_lapack() - - _check_linalg_matrix(a, "svd") - - F_layout = a.layout == 'F' - - # convert typing floats to numpy floats for use in the impl - s_type = getattr(a.dtype, "underlying_float", a.dtype) - s_dtype = np_support.as_dtype(s_type) - - numba_ez_gesdd = _LAPACK().numba_ez_gesdd(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "svd")) - - JOBZ_A = ord('A') - JOBZ_S = ord('S') - - def svd_impl(a, full_matrices=1): - n = a.shape[-1] - m = a.shape[-2] - - if n == 0 or m == 0: - raise np.linalg.LinAlgError("Arrays cannot be empty") - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ldu = m - minmn = min(m, n) - - if full_matrices: - JOBZ = JOBZ_A - ucol = m - ldvt = n - else: - JOBZ = JOBZ_S - ucol = minmn - ldvt = minmn - - u = np.empty((ucol, ldu), dtype=a.dtype) - s = np.empty(minmn, dtype=s_dtype) - vt = np.empty((n, ldvt), dtype=a.dtype) - - r = numba_ez_gesdd( - kind, # kind - JOBZ, # jobz - m, # m - n, # n - acpy.ctypes, # a - m, # lda - s.ctypes, # s - u.ctypes, # u - ldu, # ldu - vt.ctypes, # vt - ldvt # ldvt - ) - _handle_err_maybe_convergence_problem(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, vt.size, u.size, s.size]) - return (u.T, s, vt.T) - - return svd_impl - - -@overload(np.linalg.qr) -def qr_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "qr") - - # Need two functions, the first computes R, storing it in the upper - # triangle of A with the below diagonal part of A containing elementary - # reflectors needed to construct Q. The second turns the below diagonal - # entries of A into Q, storing Q in A (creates orthonormal columns from - # the elementary reflectors). - - numba_ez_geqrf = _LAPACK().numba_ez_geqrf(a.dtype) - numba_ez_xxgqr = _LAPACK().numba_ez_xxgqr(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "qr")) - - F_layout = a.layout == 'F' - - def qr_impl(a): - n = a.shape[-1] - m = a.shape[-2] - - if n == 0 or m == 0: - raise np.linalg.LinAlgError("Arrays cannot be empty") - - _check_finite_matrix(a) - - # copy A as it will be destroyed - if F_layout: - q = np.copy(a) - else: - q = np.asfortranarray(a) - - lda = m - - minmn = min(m, n) - tau = np.empty((minmn), dtype=a.dtype) - - ret = numba_ez_geqrf( - kind, # kind - m, # m - n, # n - q.ctypes, # a - m, # lda - tau.ctypes # tau - ) - if ret < 0: - fatal_error_func() - assert 0 # unreachable - - # pull out R, this is transposed because of Fortran - r = np.zeros((n, minmn), dtype=a.dtype).T - - # the triangle in R - for i in range(minmn): - for j in range(i + 1): - r[j, i] = q[j, i] - - # and the possible square in R - for i in range(minmn, n): - for j in range(minmn): - r[j, i] = q[j, i] - - ret = numba_ez_xxgqr( - kind, # kind - m, # m - minmn, # n - minmn, # k - q.ctypes, # a - m, # lda - tau.ctypes # tau - ) - _handle_err_maybe_convergence_problem(ret) - - # help liveness analysis - _dummy_liveness_func([tau.size, q.size]) - return (q[:, :minmn], r) - - return qr_impl - - -# helpers and jitted specialisations required for np.linalg.lstsq -# and np.linalg.solve. These functions have "system" in their name -# as a differentiator. - -def _system_copy_in_b(bcpy, b, nrhs): - """ - Correctly copy 'b' into the 'bcpy' scratch space. - """ - raise NotImplementedError - - -@overload(_system_copy_in_b) -def _system_copy_in_b_impl(bcpy, b, nrhs): - if b.ndim == 1: - def oneD_impl(bcpy, b, nrhs): - bcpy[:b.shape[-1], 0] = b - return oneD_impl - else: - def twoD_impl(bcpy, b, nrhs): - bcpy[:b.shape[-2], :nrhs] = b - return twoD_impl - - -def _system_compute_nrhs(b): - """ - Compute the number of right hand sides in the system of equations - """ - raise NotImplementedError - - -@overload(_system_compute_nrhs) -def _system_compute_nrhs_impl(b): - if b.ndim == 1: - def oneD_impl(b): - return 1 - return oneD_impl - else: - def twoD_impl(b): - return b.shape[-1] - return twoD_impl - - -def _system_check_dimensionally_valid(a, b): - """ - Check that AX=B style system input is dimensionally valid. - """ - raise NotImplementedError - - -@overload(_system_check_dimensionally_valid) -def _system_check_dimensionally_valid_impl(a, b): - ndim = b.ndim - if ndim == 1: - def oneD_impl(a, b): - am = a.shape[-2] - bm = b.shape[-1] - if am != bm: - raise np.linalg.LinAlgError( - "Incompatible array sizes, system is not dimensionally valid.") - return oneD_impl - else: - def twoD_impl(a, b): - am = a.shape[-2] - bm = b.shape[-2] - if am != bm: - raise np.linalg.LinAlgError( - "Incompatible array sizes, system is not dimensionally valid.") - return twoD_impl - - -def _system_check_non_empty(a, b): - """ - Check that AX=B style system input is not empty. - """ - raise NotImplementedError - - -@overload(_system_check_non_empty) -def _system_check_non_empty_impl(a, b): - ndim = b.ndim - if ndim == 1: - def oneD_impl(a, b): - am = a.shape[-2] - an = a.shape[-1] - bm = b.shape[-1] - if am == 0 or bm == 0 or an == 0: - raise np.linalg.LinAlgError('Arrays cannot be empty') - return oneD_impl - else: - def twoD_impl(a, b): - am = a.shape[-2] - an = a.shape[-1] - bm = b.shape[-2] - bn = b.shape[-1] - if am == 0 or bm == 0 or an == 0 or bn == 0: - raise np.linalg.LinAlgError('Arrays cannot be empty') - return twoD_impl - - -def _lstsq_residual(b, n, rhs): - """ - Compute the residual from the 'b' scratch space. - """ - raise NotImplementedError - - -@overload(_lstsq_residual) -def _lstsq_residual_impl(b, n, rhs): - ndim = b.ndim - dtype = b.dtype - real_dtype = np_support.as_dtype(getattr(dtype, "underlying_float", dtype)) - - if ndim == 1: - if isinstance(dtype, (types.Complex)): - def cmplx_impl(b, n, nrhs): - res = np.empty((1,), dtype=real_dtype) - res[0] = np.sum(np.abs(b[n:, 0])**2) - return res - return cmplx_impl - else: - def real_impl(b, n, nrhs): - res = np.empty((1,), dtype=real_dtype) - res[0] = np.sum(b[n:, 0]**2) - return res - return real_impl - else: - assert ndim == 2 - if isinstance(dtype, (types.Complex)): - def cmplx_impl(b, n, nrhs): - res = np.empty((nrhs), dtype=real_dtype) - for k in range(nrhs): - res[k] = np.sum(np.abs(b[n:, k])**2) - return res - return cmplx_impl - else: - def real_impl(b, n, nrhs): - res = np.empty((nrhs), dtype=real_dtype) - for k in range(nrhs): - res[k] = np.sum(b[n:, k]**2) - return res - return real_impl - - -def _lstsq_solution(b, bcpy, n): - """ - Extract 'x' (the lstsq solution) from the 'bcpy' scratch space. - Note 'b' is only used to check the system input dimension... - """ - raise NotImplementedError - - -@overload(_lstsq_solution) -def _lstsq_solution_impl(b, bcpy, n): - if b.ndim == 1: - def oneD_impl(b, bcpy, n): - return bcpy.T.ravel()[:n] - return oneD_impl - else: - def twoD_impl(b, bcpy, n): - return bcpy[:n, :].copy() - return twoD_impl - - -@overload(np.linalg.lstsq) -def lstsq_impl(a, b, rcond=-1.0): - ensure_lapack() - - _check_linalg_matrix(a, "lstsq") - - # B can be 1D or 2D. - _check_linalg_1_or_2d_matrix(b, "lstsq") - - a_F_layout = a.layout == 'F' - b_F_layout = b.layout == 'F' - - _check_homogeneous_types("lstsq", a, b) - - np_dt = np_support.as_dtype(a.dtype) - nb_dt = a.dtype - - # convert typing floats to np floats for use in the impl - r_type = getattr(nb_dt, "underlying_float", nb_dt) - real_dtype = np_support.as_dtype(r_type) - - # lapack solver - numba_ez_gelsd = _LAPACK().numba_ez_gelsd(a.dtype) - - kind = ord(get_blas_kind(nb_dt, "lstsq")) - - # The following functions select specialisations based on - # information around 'b', a lot of this effort is required - # as 'b' can be either 1D or 2D, and then there are - # some optimisations available depending on real or complex - # space. - - def lstsq_impl(a, b, rcond=-1.0): - n = a.shape[-1] - m = a.shape[-2] - nrhs = _system_compute_nrhs(b) - - # check the systems have no inf or NaN - _check_finite_matrix(a) - _check_finite_matrix(b) - - # check the system is not empty - _system_check_non_empty(a, b) - - # check the systems are dimensionally valid - _system_check_dimensionally_valid(a, b) - - minmn = min(m, n) - maxmn = max(m, n) - - # a is destroyed on exit, copy it - if a_F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - # b is overwritten on exit with the solution, copy allocate - bcpy = np.empty((nrhs, maxmn), dtype=np_dt).T - # specialised copy in due to b being 1 or 2D - _system_copy_in_b(bcpy, b, nrhs) - - # Allocate returns - s = np.empty(minmn, dtype=real_dtype) - rank_ptr = np.empty(1, dtype=np.int32) - - r = numba_ez_gelsd( - kind, # kind - m, # m - n, # n - nrhs, # nrhs - acpy.ctypes, # a - m, # lda - bcpy.ctypes, # a - maxmn, # ldb - s.ctypes, # s - rcond, # rcond - rank_ptr.ctypes # rank - ) - _handle_err_maybe_convergence_problem(r) - - # set rank to that which was computed - rank = rank_ptr[0] - - # compute residuals - if rank < n or m <= n: - res = np.empty((0), dtype=real_dtype) - else: - # this requires additional dispatch as there's a faster - # impl if the result is in the real domain (no abs() required) - res = _lstsq_residual(bcpy, n, nrhs) - - # extract 'x', the solution - x = _lstsq_solution(b, bcpy, n) - - # help liveness analysis - _dummy_liveness_func([acpy.size, bcpy.size, s.size, rank_ptr.size]) - return (x, res, rank, s[:minmn]) - - return lstsq_impl - - -def _solve_compute_return(b, bcpy): - """ - Extract 'x' (the solution) from the 'bcpy' scratch space. - Note 'b' is only used to check the system input dimension... - """ - raise NotImplementedError - - -@overload(_solve_compute_return) -def _solve_compute_return_impl(b, bcpy): - if b.ndim == 1: - def oneD_impl(b, bcpy): - return bcpy.T.ravel() - return oneD_impl - else: - def twoD_impl(b, bcpy): - return bcpy - return twoD_impl - - -@overload(np.linalg.solve) -def solve_impl(a, b): - ensure_lapack() - - _check_linalg_matrix(a, "solve") - _check_linalg_1_or_2d_matrix(b, "solve") - - a_F_layout = a.layout == 'F' - b_F_layout = b.layout == 'F' - - _check_homogeneous_types("solve", a, b) - - np_dt = np_support.as_dtype(a.dtype) - nb_dt = a.dtype - - # the lapack solver - numba_xgesv = _LAPACK().numba_xgesv(a.dtype) - - kind = ord(get_blas_kind(nb_dt, "solve")) - - def solve_impl(a, b): - n = a.shape[-1] - nrhs = _system_compute_nrhs(b) - - # check the systems have no inf or NaN - _check_finite_matrix(a) - _check_finite_matrix(b) - - # check the systems are dimensionally valid - _system_check_dimensionally_valid(a, b) - - # a is destroyed on exit, copy it - if a_F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - # b is overwritten on exit with the solution, copy allocate - bcpy = np.empty((nrhs, n), dtype=np_dt).T - if n == 0: - return _solve_compute_return(b, bcpy) - - # specialised copy in due to b being 1 or 2D - _system_copy_in_b(bcpy, b, nrhs) - - # allocate pivot array (needs to be fortran int size) - ipiv = np.empty(n, dtype=F_INT_nptype) - - r = numba_xgesv( - kind, # kind - n, # n - nrhs, # nhrs - acpy.ctypes, # a - n, # lda - ipiv.ctypes, # ipiv - bcpy.ctypes, # b - n # ldb - ) - _inv_err_handler(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, bcpy.size, ipiv.size]) - return _solve_compute_return(b, bcpy) - - return solve_impl - - -@overload(np.linalg.pinv) -def pinv_impl(a, rcond=1.e-15): - ensure_lapack() - - _check_linalg_matrix(a, "pinv") - - # convert typing floats to numpy floats for use in the impl - s_type = getattr(a.dtype, "underlying_float", a.dtype) - s_dtype = np_support.as_dtype(s_type) - - numba_ez_gesdd = _LAPACK().numba_ez_gesdd(a.dtype) - - numba_xxgemm = _BLAS().numba_xxgemm(a.dtype) - - F_layout = a.layout == 'F' - - kind = ord(get_blas_kind(a.dtype, "pinv")) - JOB = ord('S') - - # need conjugate transposes - TRANSA = ord('C') - TRANSB = ord('C') - - # scalar constants - dt = np_support.as_dtype(a.dtype) - zero = np.array([0.], dtype=dt) - one = np.array([1.], dtype=dt) - - def pinv_impl(a, rcond=1.e-15): - - # The idea is to build the pseudo-inverse via inverting the singular - # value decomposition of a matrix `A`. Mathematically, this is roughly - # A = U*S*V^H [The SV decomposition of A] - # A^+ = V*(S^+)*U^H [The inverted SV decomposition of A] - # where ^+ is pseudo inversion and ^H is Hermitian transpose. - # As V and U are unitary, their inverses are simply their Hermitian - # transpose. S has singular values on its diagonal and zero elsewhere, - # it is inverted trivially by reciprocal of the diagonal values with - # the exception that zero singular values remain as zero. - # - # The practical implementation can take advantage of a few things to - # gain a few % performance increase: - # * A is destroyed by the SVD algorithm from LAPACK so a copy is - # required, this memory is exactly the right size in which to return - # the pseudo-inverse and so can be resued for this purpose. - # * The pseudo-inverse of S can be applied to either V or U^H, this - # then leaves a GEMM operation to compute the inverse via either: - # A^+ = (V*(S^+))*U^H - # or - # A^+ = V*((S^+)*U^H) - # however application of S^+ to V^H or U is more convenient as they - # are the result of the SVD algorithm. The application of the - # diagonal system is just a matrix multiplication which results in a - # row/column scaling (direction depending). To save effort, this - # "matrix multiplication" is applied to the smallest of U or V^H and - # only up to the point of "cut-off" (see next note) just as a direct - # scaling. - # * The cut-off level for application of S^+ can be used to reduce - # total effort, this cut-off can come via rcond or may just naturally - # be present as a result of zeros in the singular values. Regardless - # there's no need to multiply by zeros in the application of S^+ to - # V^H or U as above. Further, the GEMM operation can be shrunk in - # effort by noting that the possible zero block generated by the - # presence of zeros in S^+ has no effect apart from wasting cycles as - # it is all fmadd()s where one operand is zero. The inner dimension - # of the GEMM operation can therefore be set as shrunk accordingly! - - n = a.shape[-1] - m = a.shape[-2] - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - if m == 0 or n == 0: - return acpy.T.ravel().reshape(a.shape).T - - minmn = min(m, n) - - u = np.empty((minmn, m), dtype=a.dtype) - s = np.empty(minmn, dtype=s_dtype) - vt = np.empty((n, minmn), dtype=a.dtype) - - r = numba_ez_gesdd( - kind, # kind - JOB, # job - m, # m - n, # n - acpy.ctypes, # a - m, # lda - s.ctypes, # s - u.ctypes, # u - m, # ldu - vt.ctypes, # vt - minmn # ldvt - ) - _handle_err_maybe_convergence_problem(r) - - # Invert singular values under threshold. Also find the index of - # the threshold value as this is the upper limit for the application - # of the inverted singular values. Finding this value saves - # multiplication by a block of zeros that would be created by the - # application of these values to either U or V^H ahead of multiplying - # them together. This is done by simply in BLAS parlance via - # restricting the `k` dimension to `cut_idx` in `xgemm` whilst keeping - # the leading dimensions correct. - - cut_at = s[0] * rcond - cut_idx = 0 - for k in range(minmn): - if s[k] > cut_at: - s[k] = 1. / s[k] - cut_idx = k - cut_idx += 1 - - # Use cut_idx so there's no scaling by 0. - if m >= n: - # U is largest so apply S^+ to V^H. - for i in range(n): - for j in range(cut_idx): - vt[i, j] = vt[i, j] * s[j] - else: - # V^H is largest so apply S^+ to U. - for i in range(cut_idx): - s_local = s[i] - for j in range(minmn): - u[i, j] = u[i, j] * s_local - - # Do (v^H)^H*U^H (obviously one of the matrices includes the S^+ - # scaling) and write back to acpy. Note the innner dimension of cut_idx - # taking account of the possible zero block. - # We can store the result in acpy, given we had to create it - # for use in the SVD, and it is now redundant and the right size - # but wrong shape. - - r = numba_xxgemm( - kind, - TRANSA, # TRANSA - TRANSB, # TRANSB - n, # M - m, # N - cut_idx, # K - one.ctypes, # ALPHA - vt.ctypes, # A - minmn, # LDA - u.ctypes, # B - m, # LDB - zero.ctypes, # BETA - acpy.ctypes, # C - n # LDC - ) - - # help liveness analysis - #acpy.size - #vt.size - #u.size - #s.size - #one.size - #zero.size - _dummy_liveness_func([acpy.size, vt.size, u.size, s.size, one.size, - zero.size]) - return acpy.T.ravel().reshape(a.shape).T - - return pinv_impl - - -def _get_slogdet_diag_walker(a): - """ - Walks the diag of a LUP decomposed matrix - uses that det(A) = prod(diag(lup(A))) - and also that log(a)+log(b) = log(a*b) - The return sign is adjusted based on the values found - such that the log(value) stays in the real domain. - """ - if isinstance(a.dtype, types.Complex): - @register_jitable - def cmplx_diag_walker(n, a, sgn): - # walk diagonal - csgn = sgn + 0.j - acc = 0. - for k in range(n): - absel = np.abs(a[k, k]) - csgn = csgn * (a[k, k] / absel) - acc = acc + np.log(absel) - return (csgn, acc) - return cmplx_diag_walker - else: - @register_jitable - def real_diag_walker(n, a, sgn): - # walk diagonal - acc = 0. - for k in range(n): - v = a[k, k] - if v < 0.: - sgn = -sgn - v = -v - acc = acc + np.log(v) - # sgn is a float dtype - return (sgn + 0., acc) - return real_diag_walker - - -@overload(np.linalg.slogdet) -def slogdet_impl(a): - ensure_lapack() - - _check_linalg_matrix(a, "slogdet") - - numba_xxgetrf = _LAPACK().numba_xxgetrf(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "slogdet")) - - F_layout = a.layout == 'F' - - diag_walker = _get_slogdet_diag_walker(a) - - ONE = a.dtype(1) - ZERO = getattr(a.dtype, "underlying_float", a.dtype)(0) - - def slogdet_impl(a): - n = a.shape[-1] - if a.shape[-2] != n: - msg = "Last 2 dimensions of the array must be square." - raise np.linalg.LinAlgError(msg) - - if n == 0: - return (ONE, ZERO) - - _check_finite_matrix(a) - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - ipiv = np.empty(n, dtype=F_INT_nptype) - - r = numba_xxgetrf(kind, n, n, acpy.ctypes, n, ipiv.ctypes) - - if r > 0: - # factorisation failed, return same defaults as np - return (0., -np.inf) - _inv_err_handler(r) # catch input-to-lapack problem - - # The following, prior to the call to diag_walker, is present - # to account for the effect of possible permutations to the - # sign of the determinant. - # This is the same idea as in numpy: - # File name `umath_linalg.c.src` e.g. - # https://github.com/numpy/numpy/blob/master/numpy/linalg/umath_linalg.c.src - # in function `@TYPE@_slogdet_single_element`. - sgn = 1 - for k in range(n): - sgn = sgn + (ipiv[k] != (k + 1)) - - sgn = sgn & 1 - if sgn == 0: - sgn = -1 - - # help liveness analysis - _dummy_liveness_func([ipiv.size]) - return diag_walker(n, acpy, sgn) - - return slogdet_impl - - -@overload(np.linalg.det) -def det_impl(a): - - ensure_lapack() - - _check_linalg_matrix(a, "det") - - def det_impl(a): - (sgn, slogdet) = np.linalg.slogdet(a) - return sgn * np.exp(slogdet) - - return det_impl - - -def _compute_singular_values(a): - """ - Compute singular values of *a*. - """ - raise NotImplementedError - - -@overload(_compute_singular_values) -def _compute_singular_values_impl(a): - """ - Returns a function to compute singular values of `a` - """ - numba_ez_gesdd = _LAPACK().numba_ez_gesdd(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "svd")) - - # Flag for "only compute `S`" to give to xgesdd - JOBZ_N = ord('N') - - nb_ret_type = getattr(a.dtype, "underlying_float", a.dtype) - np_ret_type = np_support.as_dtype(nb_ret_type) - np_dtype = np_support.as_dtype(a.dtype) - - # These are not referenced in the computation but must be set - # for MKL. - u = np.empty((1, 1), dtype=np_dtype) - vt = np.empty((1, 1), dtype=np_dtype) - - F_layout = a.layout == 'F' - - def sv_function(a): - """ - Computes singular values. - """ - # Don't use the np.linalg.svd impl instead - # call LAPACK to shortcut doing the "reconstruct - # singular vectors from reflectors" step and just - # get back the singular values. - n = a.shape[-1] - m = a.shape[-2] - if m == 0 or n == 0: - raise np.linalg.LinAlgError('Arrays cannot be empty') - _check_finite_matrix(a) - - ldu = m - minmn = min(m, n) - - # need to be >=1 but aren't referenced - ucol = 1 - ldvt = 1 - - if F_layout: - acpy = np.copy(a) - else: - acpy = np.asfortranarray(a) - - # u and vt are not referenced however need to be - # allocated (as done above) for MKL as it - # checks for ref is nullptr. - s = np.empty(minmn, dtype=np_ret_type) - - r = numba_ez_gesdd( - kind, # kind - JOBZ_N, # jobz - m, # m - n, # n - acpy.ctypes, # a - m, # lda - s.ctypes, # s - u.ctypes, # u - ldu, # ldu - vt.ctypes, # vt - ldvt # ldvt - ) - _handle_err_maybe_convergence_problem(r) - - # help liveness analysis - _dummy_liveness_func([acpy.size, vt.size, u.size, s.size]) - return s - - return sv_function - - -def _oneD_norm_2(a): - """ - Compute the L2-norm of 1D-array *a*. - """ - raise NotImplementedError - - -@overload(_oneD_norm_2) -def _oneD_norm_2_impl(a): - nb_ret_type = getattr(a.dtype, "underlying_float", a.dtype) - np_ret_type = np_support.as_dtype(nb_ret_type) - - xxnrm2 = _BLAS().numba_xxnrm2(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "norm")) - - def impl(a): - # Just ignore order, calls are guarded to only come - # from cases where order=None or order=2. - n = len(a) - # Call L2-norm routine from BLAS - ret = np.empty((1,), dtype=np_ret_type) - jmp = int(a.strides[0] / a.itemsize) - r = xxnrm2( - kind, # kind - n, # n - a.ctypes, # x - jmp, # incx - ret.ctypes # result - ) - if r < 0: - fatal_error_func() - assert 0 # unreachable - - # help liveness analysis - #ret.size - #a.size - _dummy_liveness_func([ret.size, a.size]) - return ret[0] - - return impl - - -def _get_norm_impl(a, ord_flag): - # This function is quite involved as norm supports a large - # range of values to select different norm types via kwarg `ord`. - # The implementation below branches on dimension of the input - # (1D or 2D). The default for `ord` is `None` which requires - # special handling in numba, this is dealt with first in each of - # the dimension branches. Following this the various norms are - # computed via code that is in most cases simply a loop version - # of a ufunc based version as found in numpy. - - # The following is common to both 1D and 2D cases. - # Convert typing floats to numpy floats for use in the impl. - # The return type is always a float, numba differs from numpy in - # that it returns an input precision specific value whereas numpy - # always returns np.float64. - nb_ret_type = getattr(a.dtype, "underlying_float", a.dtype) - np_ret_type = np_support.as_dtype(nb_ret_type) - - np_dtype = np_support.as_dtype(a.dtype) - - xxnrm2 = _BLAS().numba_xxnrm2(a.dtype) - - kind = ord(get_blas_kind(a.dtype, "norm")) - - if a.ndim == 1: - # 1D cases - - # handle "ord" being "None", must be done separately - if ord_flag in (None, types.none): - def oneD_impl(a, order=None): - return _oneD_norm_2(a) - else: - def oneD_impl(a, order=None): - n = len(a) - - # Shortcut to handle zero length arrays - # this differs slightly to numpy in that - # numpy raises a ValueError for kwarg ord= - # +/-np.inf as the reduction operations like - # max() and min() don't accept zero length - # arrays - if n == 0: - return 0.0 - - # Note: on order == 2 - # This is the same as for ord=="None" but because - # we have to handle "None" specially this condition - # is separated - if order == 2: - return _oneD_norm_2(a) - elif order == np.inf: - # max(abs(a)) - ret = abs(a[0]) - for k in range(1, n): - val = abs(a[k]) - if val > ret: - ret = val - return ret - - elif order == -np.inf: - # min(abs(a)) - ret = abs(a[0]) - for k in range(1, n): - val = abs(a[k]) - if val < ret: - ret = val - return ret - - elif order == 0: - # sum(a != 0) - ret = 0.0 - for k in range(n): - if a[k] != 0.: - ret += 1. - return ret - - elif order == 1: - # sum(abs(a)) - ret = 0.0 - for k in range(n): - ret += abs(a[k]) - return ret - - else: - # sum(abs(a)**ord)**(1./ord) - ret = 0.0 - for k in range(n): - ret += abs(a[k])**order - return ret**(1. / order) - return oneD_impl - - elif a.ndim == 2: - # 2D cases - - # handle "ord" being "None" - if ord_flag in (None, types.none): - # Force `a` to be C-order, so that we can take a contiguous - # 1D view. - if a.layout == 'C': - @register_jitable - def array_prepare(a): - return a - elif a.layout == 'F': - @register_jitable - def array_prepare(a): - # Legal since L2(a) == L2(a.T) - return a.T - else: - @register_jitable - def array_prepare(a): - return a.copy() - - # Compute the Frobenius norm, this is the L2,2 induced norm of `A` - # which is the L2-norm of A.ravel() and so can be computed via BLAS - def twoD_impl(a, order=None): - n = a.size - if n == 0: - # reshape() currently doesn't support zero-sized arrays - return 0.0 - a_c = array_prepare(a) - return _oneD_norm_2(a_c.reshape(n)) - else: - # max value for this dtype - max_val = np.finfo(np_ret_type.type).max - - def twoD_impl(a, order=None): - n = a.shape[-1] - m = a.shape[-2] - - # Shortcut to handle zero size arrays - # this differs slightly to numpy in that - # numpy raises errors for some ord values - # and in other cases returns zero. - if a.size == 0: - return 0.0 - - if order == np.inf: - # max of sum of abs across rows - # max(sum(abs(a)), axis=1) - global_max = 0. - for ii in range(m): - tmp = 0. - for jj in range(n): - tmp += abs(a[ii, jj]) - if tmp > global_max: - global_max = tmp - return global_max - - elif order == -np.inf: - # min of sum of abs across rows - # min(sum(abs(a)), axis=1) - global_min = max_val - for ii in range(m): - tmp = 0. - for jj in range(n): - tmp += abs(a[ii, jj]) - if tmp < global_min: - global_min = tmp - return global_min - elif order == 1: - # max of sum of abs across cols - # max(sum(abs(a)), axis=0) - global_max = 0. - for ii in range(n): - tmp = 0. - for jj in range(m): - tmp += abs(a[jj, ii]) - if tmp > global_max: - global_max = tmp - return global_max - - elif order == -1: - # min of sum of abs across cols - # min(sum(abs(a)), axis=0) - global_min = max_val - for ii in range(n): - tmp = 0. - for jj in range(m): - tmp += abs(a[jj, ii]) - if tmp < global_min: - global_min = tmp - return global_min - - # Results via SVD, singular values are sorted on return - # by definition. - elif order == 2: - # max SV - return _compute_singular_values(a)[0] - elif order == -2: - # min SV - return _compute_singular_values(a)[-1] - else: - # replicate numpy error - raise ValueError("Invalid norm order for matrices.") - return twoD_impl - else: - assert 0 # unreachable - - -@overload(np.linalg.norm) -def norm_impl(a, ord=None): - ensure_lapack() - - _check_linalg_1_or_2d_matrix(a, "norm") - - return _get_norm_impl(a, ord) - - -@overload(np.linalg.cond) -def cond_impl(a, p=None): - ensure_lapack() - - _check_linalg_matrix(a, "cond") - - def _get_cond_impl(a, p): - # handle the p==None case separately for type inference to work ok - if p in (None, types.none): - def cond_none_impl(a, p=None): - s = _compute_singular_values(a) - return s[0] / s[-1] - return cond_none_impl - else: - def cond_not_none_impl(a, p): - # This is extracted for performance, numpy does approximately: - # `condition = norm(a) * norm(inv(a))` - # in the cases of `p == 2` or `p ==-2` singular values are used - # for computing norms. This costs numpy an svd of `a` then an - # inversion of `a` and another svd of `a`. - # Below is a different approach, which also gives a more - # accurate answer as there is no inversion involved. - # Recall that the singular values of an inverted matrix are the - # reciprocal of singular values of the original matrix. - # Therefore calling `svd(a)` once yields all the information - # needed about both `a` and `inv(a)` without the cost or - # potential loss of accuracy incurred through inversion. - # For the case of `p == 2`, the result is just the ratio of - # `largest singular value/smallest singular value`, and for the - # case of `p==-2` the result is simply the - # `smallest singular value/largest singular value`. - # As a result of this, numba accepts non-square matrices as - # input when p==+/-2 as well as when p==None. - if p == 2 or p == -2: - s = _compute_singular_values(a) - if p == 2: - return s[0] / s[-1] - else: - return s[-1] / s[0] - else: # cases np.inf, -np.inf, 1, -1 - norm_a = np.linalg.norm(a, p) - norm_inv_a = np.linalg.norm(np.linalg.inv(a), p) - return norm_a * norm_inv_a - return cond_not_none_impl - - return _get_cond_impl(a, p) - - -@register_jitable -def _get_rank_from_singular_values(sv, t): - """ - Gets rank from singular values with cut-off at a given tolerance - """ - rank = 0 - for k in range(len(sv)): - if sv[k] > t: - rank = rank + 1 - else: # sv is ordered big->small so break on condition not met - break - return rank - - -@overload(np.linalg.matrix_rank) -def matrix_rank_impl(a, tol=None): - """ - Computes rank for matrices and vectors. - The only issue that may arise is that because numpy uses double - precision lapack calls whereas numba uses type specific lapack - calls, some singular values may differ and therefore counting the - number of them above a tolerance may lead to different counts, - and therefore rank, in some cases. - """ - ensure_lapack() - - _check_linalg_1_or_2d_matrix(a, "matrix_rank") - - def _2d_matrix_rank_impl(a, tol): - - # handle the tol==None case separately for type inference to work - if tol in (None, types.none): - nb_type = getattr(a.dtype, "underlying_float", a.dtype) - np_type = np_support.as_dtype(nb_type) - eps_val = np.finfo(np_type).eps - - def _2d_tol_none_impl(a, tol=None): - s = _compute_singular_values(a) - # replicate numpy default tolerance calculation - r = a.shape[0] - c = a.shape[1] - l = max(r, c) - t = s[0] * l * eps_val - return _get_rank_from_singular_values(s, t) - return _2d_tol_none_impl - else: - def _2d_tol_not_none_impl(a, tol): - s = _compute_singular_values(a) - return _get_rank_from_singular_values(s, tol) - return _2d_tol_not_none_impl - - def _get_matrix_rank_impl(a, tol): - ndim = a.ndim - if ndim == 1: - # NOTE: Technically, the numpy implementation could be argued as - # incorrect for the case of a vector (1D matrix). If a tolerance - # is provided and a vector with a singular value below tolerance is - # encountered this should report a rank of zero, the numpy - # implementation does not do this and instead elects to report that - # if any value in the vector is nonzero then the rank is 1. - # An example would be [0, 1e-15, 0, 2e-15] which numpy reports as - # rank 1 invariant of `tol`. The singular value for this vector is - # obviously sqrt(5)*1e-15 and so a tol of e.g. sqrt(6)*1e-15 should - # lead to a reported rank of 0 whereas a tol of 1e-15 should lead - # to a reported rank of 1, numpy reports 1 regardless. - # The code below replicates the numpy behaviour. - def _1d_matrix_rank_impl(a, tol): - for k in range(len(a)): - if a[k] != 0.: - return 1 - return 0 - return _1d_matrix_rank_impl - elif ndim == 2: - return _2d_matrix_rank_impl(a, tol) - else: - assert 0 # unreachable - - return _get_matrix_rank_impl(a, tol) - - -@overload(np.linalg.matrix_power) -def matrix_power_impl(a, n): - """ - Computes matrix power. Only integer powers are supported in numpy. - """ - - _check_linalg_matrix(a, "matrix_power") - np_dtype = np_support.as_dtype(a.dtype) - - nt = getattr(n, 'dtype', n) - if not isinstance(nt, types.Integer): - raise TypeError("Exponent must be an integer.") - - def matrix_power_impl(a, n): - - if n == 0: - # this should be eye() but it doesn't support - # the dtype kwarg yet so do it manually to save - # the copy required by eye(a.shape[0]).asdtype() - A = np.zeros(a.shape, dtype=np_dtype) - for k in range(a.shape[0]): - A[k, k] = 1. - return A - - am, an = a.shape[-1], a.shape[-2] - if am != an: - raise ValueError('input must be a square array') - - # empty, return a copy - if am == 0: - return a.copy() - - # note: to be consistent over contiguousness, C order is - # returned as that is what dot() produces and the most common - # paths through matrix_power will involve that. Therefore - # copies are made here to ensure the data ordering is - # correct for paths not going via dot(). - - if n < 0: - A = np.linalg.inv(a).copy() - if n == -1: # return now - return A - n = -n - else: - if n == 1: # return a copy now - return a.copy() - A = a # this is safe, `a` is only read - - if n < 4: - if n == 2: - return np.dot(A, A) - if n == 3: - return np.dot(np.dot(A, A), A) - else: - - acc = A - exp = n - - # tried a loop split and branchless using identity matrix as - # input but it seems like having a "first entry" flag is quicker - flag = True - while exp != 0: - if exp & 1: - if flag: - ret = acc - flag = False - else: - ret = np.dot(ret, acc) - acc = np.dot(acc, acc) - exp = exp >> 1 - - return ret - - return matrix_power_impl - -# This is documented under linalg despite not being in the module - - -@overload(np.trace) -def matrix_trace_impl(a, offset=types.int_): - """ - Computes the trace of an array. - """ - - _check_linalg_matrix(a, "trace", la_prefix=False) - - if not isinstance(offset, types.Integer): - raise TypeError("integer argument expected, got %s" % offset) - - def matrix_trace_impl(a, offset=0): - rows, cols = a.shape - k = offset - if k < 0: - rows = rows + k - if k > 0: - cols = cols - k - n = max(min(rows, cols), 0) - ret = 0 - if k >= 0: - for i in range(n): - ret += a[i, k + i] - else: - for i in range(n): - ret += a[i - k, i] - return ret - - return matrix_trace_impl - - -def _check_scalar_or_lt_2d_mat(a, func_name, la_prefix=True): - prefix = "np.linalg" if la_prefix else "np" - interp = (prefix, func_name) - # checks that a matrix is 1 or 2D - if isinstance(a, types.Array): - if not a.ndim <= 2: - raise TypingError("%s.%s() only supported on 1 and 2-D arrays " - % interp, highlighting=False) - - -def _get_as_array(x): - if not isinstance(x, types.Array): - @register_jitable - def asarray(x): - return np.array((x,)) - return asarray - else: - @register_jitable - def asarray(x): - return x - return asarray - - -def _get_outer_impl(a, b, out): - a_arr = _get_as_array(a) - b_arr = _get_as_array(b) - - if out in (None, types.none): - @register_jitable - def outer_impl(a, b, out): - aa = a_arr(a) - bb = b_arr(b) - return np.multiply(aa.ravel().reshape((aa.size, 1)), - bb.ravel().reshape((1, bb.size))) - return outer_impl - else: - @register_jitable - def outer_impl(a, b, out): - aa = a_arr(a) - bb = b_arr(b) - np.multiply(aa.ravel().reshape((aa.size, 1)), - bb.ravel().reshape((1, bb.size)), - out) - return out - return outer_impl - - -if numpy_version >= (1, 9): - @overload(np.outer) - def outer_impl(a, b, out=None): - - _check_scalar_or_lt_2d_mat(a, "outer", la_prefix=False) - _check_scalar_or_lt_2d_mat(b, "outer", la_prefix=False) - - impl = _get_outer_impl(a, b, out) - - def outer_impl(a, b, out=None): - return impl(a, b, out) - - return outer_impl -else: - @overload(np.outer) - def outer_impl(a, b): - - _check_scalar_or_lt_2d_mat(a, "outer", la_prefix=False) - _check_scalar_or_lt_2d_mat(b, "outer", la_prefix=False) - - impl = _get_outer_impl(a, b, None) - - def outer_impl(a, b): - return impl(a, b, None) - - return outer_impl - - -def _kron_normaliser_impl(x): - # makes x into a 2d array - if isinstance(x, types.Array): - if x.ndim == 2: - @register_jitable - def nrm_shape(x): - xn = x.shape[-1] - xm = x.shape[-2] - return x.reshape(xm, xn) - return nrm_shape - else: - @register_jitable - def nrm_shape(x): - xn = x.shape[-1] - return x.reshape(1, xn) - return nrm_shape - else: # assume its a scalar - @register_jitable - def nrm_shape(x): - a = np.empty((1, 1), type(x)) - a[0] = x - return a - return nrm_shape - - -def _kron_return(a, b): - # transforms c into something that kron would return - # based on the shapes of a and b - a_is_arr = isinstance(a, types.Array) - b_is_arr = isinstance(b, types.Array) - if a_is_arr and b_is_arr: - if a.ndim == 2 or b.ndim == 2: - @register_jitable - def ret(a, b, c): - return c - return ret - else: - @register_jitable - def ret(a, b, c): - return c.reshape(c.size) - return ret - else: # at least one of (a, b) is a scalar - if a_is_arr: - @register_jitable - def ret(a, b, c): - return c.reshape(a.shape) - return ret - elif b_is_arr: - @register_jitable - def ret(a, b, c): - return c.reshape(b.shape) - return ret - else: # both scalars - @register_jitable - def ret(a, b, c): - return c[0] - return ret - - -@overload(np.kron) -def kron_impl(a, b): - - _check_scalar_or_lt_2d_mat(a, "kron", la_prefix=False) - _check_scalar_or_lt_2d_mat(b, "kron", la_prefix=False) - - fix_a = _kron_normaliser_impl(a) - fix_b = _kron_normaliser_impl(b) - ret_c = _kron_return(a, b) - - # this is fine because the ufunc for the Hadamard product - # will reject differing dtypes in a and b. - dt = getattr(a, 'dtype', a) - - def kron_impl(a, b): - - aa = fix_a(a) - bb = fix_b(b) - - am = aa.shape[-2] - an = aa.shape[-1] - bm = bb.shape[-2] - bn = bb.shape[-1] - - cm = am * bm - cn = an * bn - - # allocate c - C = np.empty((cm, cn), dtype=dt) - - # In practice this is runs quicker than the more obvious - # `each element of A multiplied by B and assigned to - # a block in C` like alg. - - # loop over rows of A - for i in range(am): - # compute the column offset into C - rjmp = i * bm - # loop over rows of B - for k in range(bm): - # compute row the offset into C - irjmp = rjmp + k - # slice a given row of B - slc = bb[k, :] - # loop over columns of A - for j in range(an): - # vectorized assignment of an element of A - # multiplied by the current row of B into - # a slice of a row of C - cjmp = j * bn - C[irjmp, cjmp:cjmp + bn] = aa[i, j] * slc - - return ret_c(a, b, C) - - return kron_impl diff --git a/numba/numba/targets/listobj.py b/numba/numba/targets/listobj.py deleted file mode 100644 index a18d6304b..000000000 --- a/numba/numba/targets/listobj.py +++ /dev/null @@ -1,1091 +0,0 @@ -""" -Support for native homogeneous lists. -""" - -from __future__ import print_function, absolute_import, division - -import math - -from llvmlite import ir -from numba import types, cgutils, typing -from numba.targets.imputils import (lower_builtin, lower_cast, - iternext_impl, impl_ret_borrowed, - impl_ret_new_ref, impl_ret_untracked) -from numba.utils import cached_property -from . import quicksort, slicing - - -def get_list_payload(context, builder, list_type, value): - """ - Given a list value and type, get its payload structure (as a - reference, so that mutations are seen by all). - """ - payload_type = types.ListPayload(list_type) - payload = context.nrt.meminfo_data(builder, value.meminfo) - ptrty = context.get_data_type(payload_type).as_pointer() - payload = builder.bitcast(payload, ptrty) - return context.make_data_helper(builder, payload_type, ref=payload) - - -def get_itemsize(context, list_type): - """ - Return the item size for the given list type. - """ - llty = context.get_data_type(list_type.dtype) - return context.get_abi_sizeof(llty) - - -class _ListPayloadMixin(object): - - @property - def size(self): - return self._payload.size - - @size.setter - def size(self, value): - self._payload.size = value - - @property - def dirty(self): - return self._payload.dirty - - @property - def data(self): - return self._payload._get_ptr_by_name('data') - - def _gep(self, idx): - return cgutils.gep(self._builder, self.data, idx) - - def getitem(self, idx): - ptr = self._gep(idx) - data_item = self._builder.load(ptr) - return self._datamodel.from_data(self._builder, data_item) - - def fix_index(self, idx): - """ - Fix negative indices by adding the size to them. Positive - indices are left untouched. - """ - is_negative = self._builder.icmp_signed('<', idx, - ir.Constant(idx.type, 0)) - wrapped_index = self._builder.add(idx, self.size) - return self._builder.select(is_negative, wrapped_index, idx) - - def is_out_of_bounds(self, idx): - """ - Return whether the index is out of bounds. - """ - underflow = self._builder.icmp_signed('<', idx, - ir.Constant(idx.type, 0)) - overflow = self._builder.icmp_signed('>=', idx, self.size) - return self._builder.or_(underflow, overflow) - - def clamp_index(self, idx): - """ - Clamp the index in [0, size]. - """ - builder = self._builder - idxptr = cgutils.alloca_once_value(builder, idx) - - zero = ir.Constant(idx.type, 0) - size = self.size - - underflow = self._builder.icmp_signed('<', idx, zero) - with builder.if_then(underflow, likely=False): - builder.store(zero, idxptr) - overflow = self._builder.icmp_signed('>=', idx, size) - with builder.if_then(overflow, likely=False): - builder.store(size, idxptr) - - return builder.load(idxptr) - - def guard_index(self, idx, msg): - """ - Raise an error if the index is out of bounds. - """ - with self._builder.if_then(self.is_out_of_bounds(idx), likely=False): - self._context.call_conv.return_user_exc(self._builder, - IndexError, (msg,)) - - def fix_slice(self, slice): - """ - Fix slice start and stop to be valid (inclusive and exclusive, resp) - indexing bounds. - """ - return slicing.fix_slice(self._builder, slice, self.size) - - def incref_value(self, val): - "Incref an element value" - self._context.nrt.incref(self._builder, self.dtype, val) - - def decref_value(self, val): - "Decref an element value" - self._context.nrt.decref(self._builder, self.dtype, val) - - -class ListPayloadAccessor(_ListPayloadMixin): - """ - A helper object to access the list attributes given the pointer to the - payload type. - """ - def __init__(self, context, builder, list_type, payload_ptr): - self._context = context - self._builder = builder - self._ty = list_type - self._datamodel = context.data_model_manager[list_type.dtype] - payload_type = types.ListPayload(list_type) - ptrty = context.get_data_type(payload_type).as_pointer() - payload_ptr = builder.bitcast(payload_ptr, ptrty) - payload = context.make_data_helper(builder, payload_type, - ref=payload_ptr) - self._payload = payload - - -class ListInstance(_ListPayloadMixin): - - def __init__(self, context, builder, list_type, list_val): - self._context = context - self._builder = builder - self._ty = list_type - self._list = context.make_helper(builder, list_type, list_val) - self._itemsize = get_itemsize(context, list_type) - self._datamodel = context.data_model_manager[list_type.dtype] - - @property - def dtype(self): - return self._ty.dtype - - @property - def _payload(self): - # This cannot be cached as it can be reallocated - return get_list_payload(self._context, self._builder, self._ty, self._list) - - @property - def parent(self): - return self._list.parent - - @parent.setter - def parent(self, value): - self._list.parent = value - - @property - def value(self): - return self._list._getvalue() - - @property - def meminfo(self): - return self._list.meminfo - - def set_dirty(self, val): - if self._ty.reflected: - self._payload.dirty = cgutils.true_bit if val else cgutils.false_bit - - def clear_value(self, idx): - """Remove the value at the location - """ - self.decref_value(self.getitem(idx)) - # it's necessary for the dtor which just decref every slot on it. - self.zfill(idx, self._builder.add(idx, idx.type(1))) - - def setitem(self, idx, val, incref): - # Decref old data - self.decref_value(self.getitem(idx)) - - ptr = self._gep(idx) - data_item = self._datamodel.as_data(self._builder, val) - self._builder.store(data_item, ptr) - self.set_dirty(True) - if incref: - # Incref the underlying data - self.incref_value(val) - - def inititem(self, idx, val, incref=True): - ptr = self._gep(idx) - data_item = self._datamodel.as_data(self._builder, val) - self._builder.store(data_item, ptr) - if incref: - self.incref_value(val) - - def zfill(self, start, stop): - """Zero-fill the memory at index *start* to *stop* - - *stop* MUST not be smaller than *start*. - """ - builder = self._builder - base = self._gep(start) - end = self._gep(stop) - intaddr_t = self._context.get_value_type(types.intp) - size = builder.sub(builder.ptrtoint(end, intaddr_t), - builder.ptrtoint(base, intaddr_t)) - cgutils.memset(builder, base, size, ir.IntType(8)(0)) - - @classmethod - def allocate_ex(cls, context, builder, list_type, nitems): - """ - Allocate a ListInstance with its storage. - Return a (ok, instance) tuple where *ok* is a LLVM boolean and - *instance* is a ListInstance object (the object's contents are - only valid when *ok* is true). - """ - intp_t = context.get_value_type(types.intp) - - if isinstance(nitems, int): - nitems = ir.Constant(intp_t, nitems) - - payload_type = context.get_data_type(types.ListPayload(list_type)) - payload_size = context.get_abi_sizeof(payload_type) - - itemsize = get_itemsize(context, list_type) - # Account for the fact that the payload struct contains one entry - payload_size -= itemsize - - ok = cgutils.alloca_once_value(builder, cgutils.true_bit) - self = cls(context, builder, list_type, None) - - # Total allocation size = + nitems * itemsize - allocsize, ovf = cgutils.muladd_with_overflow(builder, nitems, - ir.Constant(intp_t, itemsize), - ir.Constant(intp_t, payload_size)) - with builder.if_then(ovf, likely=False): - builder.store(cgutils.false_bit, ok) - - with builder.if_then(builder.load(ok), likely=True): - meminfo = context.nrt.meminfo_new_varsize_dtor( - builder, size=allocsize, dtor=self.get_dtor()) - with builder.if_else(cgutils.is_null(builder, meminfo), - likely=False) as (if_error, if_ok): - with if_error: - builder.store(cgutils.false_bit, ok) - with if_ok: - self._list.meminfo = meminfo - self._list.parent = context.get_constant_null(types.pyobject) - self._payload.allocated = nitems - self._payload.size = ir.Constant(intp_t, 0) # for safety - self._payload.dirty = cgutils.false_bit - # Zero the allocated region - self.zfill(self.size.type(0), nitems) - - return builder.load(ok), self - - def define_dtor(self): - "Define the destructor if not already defined" - context = self._context - builder = self._builder - mod = builder.module - # Declare dtor - fnty = ir.FunctionType(ir.VoidType(), [cgutils.voidptr_t]) - fn = mod.get_or_insert_function(fnty, name='.dtor.list.{}'.format(self.dtype)) - if not fn.is_declaration: - # End early if the dtor is already defined - return fn - fn.linkage = 'internal' - # Populate the dtor - builder = ir.IRBuilder(fn.append_basic_block()) - base_ptr = fn.args[0] # void* - - # get payload - payload = ListPayloadAccessor(context, builder, self._ty, base_ptr) - - # Loop over all data to decref - intp = payload.size.type - with cgutils.for_range_slice( - builder, start=intp(0), stop=payload.size, step=intp(1), - intp=intp) as (idx, _): - val = payload.getitem(idx) - context.nrt.decref(builder, self.dtype, val) - builder.ret_void() - return fn - - def get_dtor(self): - """"Get the element dtor function pointer as void pointer. - - It's safe to be called multiple times. - """ - # Define and set the Dtor - dtor = self.define_dtor() - dtor_fnptr = self._builder.bitcast(dtor, cgutils.voidptr_t) - return dtor_fnptr - - @classmethod - def allocate(cls, context, builder, list_type, nitems): - """ - Allocate a ListInstance with its storage. Same as allocate_ex(), - but return an initialized *instance*. If allocation failed, - control is transferred to the caller using the target's current - call convention. - """ - ok, self = cls.allocate_ex(context, builder, list_type, nitems) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot allocate list",)) - return self - - @classmethod - def from_meminfo(cls, context, builder, list_type, meminfo): - """ - Allocate a new list instance pointing to an existing payload - (a meminfo pointer). - Note the parent field has to be filled by the caller. - """ - self = cls(context, builder, list_type, None) - self._list.meminfo = meminfo - self._list.parent = context.get_constant_null(types.pyobject) - context.nrt.incref(builder, list_type, self.value) - # Payload is part of the meminfo, no need to touch it - return self - - def resize(self, new_size): - """ - Ensure the list is properly sized for the new size. - """ - def _payload_realloc(new_allocated): - payload_type = context.get_data_type(types.ListPayload(self._ty)) - payload_size = context.get_abi_sizeof(payload_type) - # Account for the fact that the payload struct contains one entry - payload_size -= itemsize - - allocsize, ovf = cgutils.muladd_with_overflow( - builder, new_allocated, - ir.Constant(intp_t, itemsize), - ir.Constant(intp_t, payload_size)) - with builder.if_then(ovf, likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot resize list",)) - - ptr = context.nrt.meminfo_varsize_realloc(builder, self._list.meminfo, - size=allocsize) - cgutils.guard_memory_error(context, builder, ptr, - "cannot resize list") - self._payload.allocated = new_allocated - - context = self._context - builder = self._builder - intp_t = new_size.type - - itemsize = get_itemsize(context, self._ty) - allocated = self._payload.allocated - - two = ir.Constant(intp_t, 2) - eight = ir.Constant(intp_t, 8) - - # allocated < new_size - is_too_small = builder.icmp_signed('<', allocated, new_size) - # (allocated >> 2) > new_size - is_too_large = builder.icmp_signed('>', builder.ashr(allocated, two), new_size) - - with builder.if_then(is_too_large, likely=False): - # Exact downsize to requested size - # NOTE: is_too_large must be aggressive enough to avoid repeated - # upsizes and downsizes when growing a list. - _payload_realloc(new_size) - - with builder.if_then(is_too_small, likely=False): - # Upsize with moderate over-allocation (size + size >> 2 + 8) - new_allocated = builder.add(eight, - builder.add(new_size, - builder.ashr(new_size, two))) - _payload_realloc(new_allocated) - self.zfill(self.size, new_allocated) - - self._payload.size = new_size - self.set_dirty(True) - - def move(self, dest_idx, src_idx, count): - """ - Move `count` elements from `src_idx` to `dest_idx`. - """ - dest_ptr = self._gep(dest_idx) - src_ptr = self._gep(src_idx) - cgutils.raw_memmove(self._builder, dest_ptr, src_ptr, - count, itemsize=self._itemsize) - - self.set_dirty(True) - -class ListIterInstance(_ListPayloadMixin): - - def __init__(self, context, builder, iter_type, iter_val): - self._context = context - self._builder = builder - self._ty = iter_type - self._iter = context.make_helper(builder, iter_type, iter_val) - self._datamodel = context.data_model_manager[iter_type.yield_type] - - @classmethod - def from_list(cls, context, builder, iter_type, list_val): - list_inst = ListInstance(context, builder, iter_type.container, list_val) - self = cls(context, builder, iter_type, None) - index = context.get_constant(types.intp, 0) - self._iter.index = cgutils.alloca_once_value(builder, index) - self._iter.meminfo = list_inst.meminfo - return self - - @property - def _payload(self): - # This cannot be cached as it can be reallocated - return get_list_payload(self._context, self._builder, - self._ty.container, self._iter) - - @property - def value(self): - return self._iter._getvalue() - - @property - def index(self): - return self._builder.load(self._iter.index) - - @index.setter - def index(self, value): - self._builder.store(value, self._iter.index) - - -#------------------------------------------------------------------------------- -# Constructors - -def build_list(context, builder, list_type, items): - """ - Build a list of the given type, containing the given items. - """ - nitems = len(items) - inst = ListInstance.allocate(context, builder, list_type, nitems) - # Populate list - inst.size = context.get_constant(types.intp, nitems) - for i, val in enumerate(items): - inst.setitem(context.get_constant(types.intp, i), val, incref=True) - - return impl_ret_new_ref(context, builder, list_type, inst.value) - - -@lower_builtin(list, types.IterableType) -def list_constructor(context, builder, sig, args): - - def list_impl(iterable): - res = [] - res.extend(iterable) - return res - - return context.compile_internal(builder, list_impl, sig, args) - -@lower_builtin(list) -def list_constructor(context, builder, sig, args): - list_type = sig.return_type - list_len = 0 - inst = ListInstance.allocate(context, builder, list_type, list_len) - return impl_ret_new_ref(context, builder, list_type, inst.value) - -#------------------------------------------------------------------------------- -# Various operations - -@lower_builtin(len, types.List) -def list_len(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - return inst.size - -@lower_builtin('getiter', types.List) -def getiter_list(context, builder, sig, args): - inst = ListIterInstance.from_list(context, builder, sig.return_type, args[0]) - return impl_ret_borrowed(context, builder, sig.return_type, inst.value) - -@lower_builtin('iternext', types.ListIter) -@iternext_impl -def iternext_listiter(context, builder, sig, args, result): - inst = ListIterInstance(context, builder, sig.args[0], args[0]) - - index = inst.index - nitems = inst.size - is_valid = builder.icmp_signed('<', index, nitems) - result.set_valid(is_valid) - - with builder.if_then(is_valid): - result.yield_(inst.getitem(index)) - inst.index = builder.add(index, context.get_constant(types.intp, 1)) - - -@lower_builtin('getitem', types.List, types.Integer) -def getitem_list(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - index = args[1] - - index = inst.fix_index(index) - inst.guard_index(index, msg="getitem out of range") - result = inst.getitem(index) - - return impl_ret_borrowed(context, builder, sig.return_type, result) - -@lower_builtin('setitem', types.List, types.Integer, types.Any) -def setitem_list(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - index = args[1] - value = args[2] - - index = inst.fix_index(index) - inst.guard_index(index, msg="setitem out of range") - inst.setitem(index, value, incref=True) - return context.get_dummy_value() - - -@lower_builtin('getitem', types.List, types.SliceType) -def getslice_list(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - slice = context.make_helper(builder, sig.args[1], args[1]) - slicing.guard_invalid_slice(context, builder, sig.args[1], slice) - inst.fix_slice(slice) - - # Allocate result and populate it - result_size = slicing.get_slice_length(builder, slice) - result = ListInstance.allocate(context, builder, sig.return_type, - result_size) - result.size = result_size - with cgutils.for_range_slice_generic(builder, slice.start, slice.stop, - slice.step) as (pos_range, neg_range): - with pos_range as (idx, count): - value = inst.getitem(idx) - result.inititem(count, value, incref=True) - with neg_range as (idx, count): - value = inst.getitem(idx) - result.inititem(count, value, incref=True) - - return impl_ret_new_ref(context, builder, sig.return_type, result.value) - -@lower_builtin('setitem', types.List, types.SliceType, types.Any) -def setitem_list(context, builder, sig, args): - dest = ListInstance(context, builder, sig.args[0], args[0]) - src = ListInstance(context, builder, sig.args[2], args[2]) - - slice = context.make_helper(builder, sig.args[1], args[1]) - slicing.guard_invalid_slice(context, builder, sig.args[1], slice) - dest.fix_slice(slice) - - src_size = src.size - avail_size = slicing.get_slice_length(builder, slice) - size_delta = builder.sub(src.size, avail_size) - - zero = ir.Constant(size_delta.type, 0) - one = ir.Constant(size_delta.type, 1) - - with builder.if_else(builder.icmp_signed('==', slice.step, one)) as (then, otherwise): - with then: - # Slice step == 1 => we can resize - - # Compute the real stop, e.g. for dest[2:0] = [...] - real_stop = builder.add(slice.start, avail_size) - # Size of the list tail, after the end of slice - tail_size = builder.sub(dest.size, real_stop) - - with builder.if_then(builder.icmp_signed('>', size_delta, zero)): - # Grow list then move list tail - dest.resize(builder.add(dest.size, size_delta)) - dest.move(builder.add(real_stop, size_delta), real_stop, - tail_size) - - with builder.if_then(builder.icmp_signed('<', size_delta, zero)): - # Move list tail then shrink list - dest.move(builder.add(real_stop, size_delta), real_stop, - tail_size) - dest.resize(builder.add(dest.size, size_delta)) - - dest_offset = slice.start - - with cgutils.for_range(builder, src_size) as loop: - value = src.getitem(loop.index) - dest.setitem(builder.add(loop.index, dest_offset), value, incref=True) - - with otherwise: - with builder.if_then(builder.icmp_signed('!=', size_delta, zero)): - msg = "cannot resize extended list slice with step != 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - with cgutils.for_range_slice_generic( - builder, slice.start, slice.stop, slice.step) as (pos_range, neg_range): - with pos_range as (index, count): - value = src.getitem(count) - dest.setitem(index, value, incref=True) - with neg_range as (index, count): - value = src.getitem(count) - dest.setitem(index, value, incref=True) - - return context.get_dummy_value() - - - -@lower_builtin('delitem', types.List, types.Integer) -def delitem_list_index(context, builder, sig, args): - - def list_delitem_impl(lst, i): - lst.pop(i) - - return context.compile_internal(builder, list_delitem_impl, sig, args) - - -@lower_builtin('delitem', types.List, types.SliceType) -def delitem_list(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - slice = context.make_helper(builder, sig.args[1], args[1]) - - slicing.guard_invalid_slice(context, builder, sig.args[1], slice) - inst.fix_slice(slice) - - slice_len = slicing.get_slice_length(builder, slice) - - one = ir.Constant(slice_len.type, 1) - - with builder.if_then(builder.icmp_signed('!=', slice.step, one), likely=False): - msg = "unsupported del list[start:stop:step] with step != 1" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) - - # Compute the real stop, e.g. for dest[2:0] - start = slice.start - real_stop = builder.add(start, slice_len) - # Decref the removed range - with cgutils.for_range_slice( - builder, start, real_stop, start.type(1) - ) as (idx, _): - inst.decref_value(inst.getitem(idx)) - - # Size of the list tail, after the end of slice - tail_size = builder.sub(inst.size, real_stop) - inst.move(start, real_stop, tail_size) - inst.resize(builder.sub(inst.size, slice_len)) - - return context.get_dummy_value() - - -# XXX should there be a specific module for Sequence or collection base classes? - -@lower_builtin("in", types.Any, types.Sequence) -def in_seq(context, builder, sig, args): - def seq_contains_impl(value, lst): - for elem in lst: - if elem == value: - return True - return False - - return context.compile_internal(builder, seq_contains_impl, sig, args) - -@lower_builtin(bool, types.Sequence) -def sequence_bool(context, builder, sig, args): - def sequence_bool_impl(seq): - return len(seq) != 0 - - return context.compile_internal(builder, sequence_bool_impl, sig, args) - - -@lower_builtin("+", types.List, types.List) -def list_add(context, builder, sig, args): - a = ListInstance(context, builder, sig.args[0], args[0]) - b = ListInstance(context, builder, sig.args[1], args[1]) - - a_size = a.size - b_size = b.size - nitems = builder.add(a_size, b_size) - dest = ListInstance.allocate(context, builder, sig.return_type, nitems) - dest.size = nitems - - with cgutils.for_range(builder, a_size) as loop: - value = a.getitem(loop.index) - value = context.cast(builder, value, a.dtype, dest.dtype) - dest.setitem(loop.index, value, incref=True) - with cgutils.for_range(builder, b_size) as loop: - value = b.getitem(loop.index) - value = context.cast(builder, value, b.dtype, dest.dtype) - dest.setitem(builder.add(loop.index, a_size), value, incref=True) - - return impl_ret_new_ref(context, builder, sig.return_type, dest.value) - -@lower_builtin("+=", types.List, types.List) -def list_add_inplace(context, builder, sig, args): - assert sig.args[0].dtype == sig.return_type.dtype - dest = _list_extend_list(context, builder, sig, args) - - return impl_ret_borrowed(context, builder, sig.return_type, dest.value) - - -@lower_builtin("*", types.List, types.Integer) -def list_mul(context, builder, sig, args): - src = ListInstance(context, builder, sig.args[0], args[0]) - src_size = src.size - - mult = args[1] - zero = ir.Constant(mult.type, 0) - mult = builder.select(cgutils.is_neg_int(builder, mult), zero, mult) - nitems = builder.mul(mult, src_size) - - dest = ListInstance.allocate(context, builder, sig.return_type, nitems) - dest.size = nitems - - with cgutils.for_range_slice(builder, zero, nitems, src_size, inc=True) as (dest_offset, _): - with cgutils.for_range(builder, src_size) as loop: - value = src.getitem(loop.index) - dest.setitem(builder.add(loop.index, dest_offset), value, incref=True) - - return impl_ret_new_ref(context, builder, sig.return_type, dest.value) - -@lower_builtin("*=", types.List, types.Integer) -def list_mul_inplace(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - src_size = inst.size - - mult = args[1] - zero = ir.Constant(mult.type, 0) - mult = builder.select(cgutils.is_neg_int(builder, mult), zero, mult) - nitems = builder.mul(mult, src_size) - - inst.resize(nitems) - - with cgutils.for_range_slice(builder, src_size, nitems, src_size, inc=True) as (dest_offset, _): - with cgutils.for_range(builder, src_size) as loop: - value = inst.getitem(loop.index) - inst.setitem(builder.add(loop.index, dest_offset), value, incref=True) - - return impl_ret_borrowed(context, builder, sig.return_type, inst.value) - - -#------------------------------------------------------------------------------- -# Comparisons - -@lower_builtin('is', types.List, types.List) -def list_is(context, builder, sig, args): - a = ListInstance(context, builder, sig.args[0], args[0]) - b = ListInstance(context, builder, sig.args[1], args[1]) - ma = builder.ptrtoint(a.meminfo, cgutils.intp_t) - mb = builder.ptrtoint(b.meminfo, cgutils.intp_t) - return builder.icmp_signed('==', ma, mb) - -@lower_builtin('==', types.List, types.List) -def list_eq(context, builder, sig, args): - aty, bty = sig.args - a = ListInstance(context, builder, aty, args[0]) - b = ListInstance(context, builder, bty, args[1]) - - a_size = a.size - same_size = builder.icmp_signed('==', a_size, b.size) - - res = cgutils.alloca_once_value(builder, same_size) - - with builder.if_then(same_size): - with cgutils.for_range(builder, a_size) as loop: - v = a.getitem(loop.index) - w = b.getitem(loop.index) - itemres = context.generic_compare(builder, '==', - (aty.dtype, bty.dtype), (v, w)) - with builder.if_then(builder.not_(itemres)): - # Exit early - builder.store(cgutils.false_bit, res) - loop.do_break() - - return builder.load(res) - -@lower_builtin('!=', types.List, types.List) -def list_ne(context, builder, sig, args): - - def list_ne_impl(a, b): - return not (a == b) - - return context.compile_internal(builder, list_ne_impl, sig, args) - -@lower_builtin('<=', types.List, types.List) -def list_le(context, builder, sig, args): - - def list_le_impl(a, b): - m = len(a) - n = len(b) - for i in range(min(m, n)): - if a[i] < b[i]: - return True - elif a[i] > b[i]: - return False - return m <= n - - return context.compile_internal(builder, list_le_impl, sig, args) - -@lower_builtin('<', types.List, types.List) -def list_lt(context, builder, sig, args): - - def list_lt_impl(a, b): - m = len(a) - n = len(b) - for i in range(min(m, n)): - if a[i] < b[i]: - return True - elif a[i] > b[i]: - return False - return m < n - - return context.compile_internal(builder, list_lt_impl, sig, args) - -@lower_builtin('>=', types.List, types.List) -def list_ge(context, builder, sig, args): - - def list_ge_impl(a, b): - return b <= a - - return context.compile_internal(builder, list_ge_impl, sig, args) - -@lower_builtin('>', types.List, types.List) -def list_gt(context, builder, sig, args): - - def list_gt_impl(a, b): - return b < a - - return context.compile_internal(builder, list_gt_impl, sig, args) - -#------------------------------------------------------------------------------- -# Methods - -@lower_builtin("list.append", types.List, types.Any) -def list_append(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - item = args[1] - - n = inst.size - new_size = builder.add(n, ir.Constant(n.type, 1)) - inst.resize(new_size) - inst.setitem(n, item, incref=True) - - return context.get_dummy_value() - -@lower_builtin("list.clear", types.List) -def list_clear(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - inst.resize(context.get_constant(types.intp, 0)) - - return context.get_dummy_value() - -@lower_builtin("list.copy", types.List) -def list_copy(context, builder, sig, args): - def list_copy_impl(lst): - return list(lst) - - return context.compile_internal(builder, list_copy_impl, sig, args) - -@lower_builtin("list.count", types.List, types.Any) -def list_count(context, builder, sig, args): - - def list_count_impl(lst, value): - res = 0 - for elem in lst: - if elem == value: - res += 1 - return res - - return context.compile_internal(builder, list_count_impl, sig, args) - -def _list_extend_list(context, builder, sig, args): - src = ListInstance(context, builder, sig.args[1], args[1]) - dest = ListInstance(context, builder, sig.args[0], args[0]) - - src_size = src.size - dest_size = dest.size - nitems = builder.add(src_size, dest_size) - dest.resize(nitems) - dest.size = nitems - - with cgutils.for_range(builder, src_size) as loop: - value = src.getitem(loop.index) - value = context.cast(builder, value, src.dtype, dest.dtype) - dest.setitem(builder.add(loop.index, dest_size), value, incref=True) - - return dest - -@lower_builtin("list.extend", types.List, types.IterableType) -def list_extend(context, builder, sig, args): - if isinstance(sig.args[1], types.List): - # Specialize for list operands, for speed. - _list_extend_list(context, builder, sig, args) - return context.get_dummy_value() - - def list_extend(lst, iterable): - # Speed hack to avoid NRT refcount operations inside the loop - meth = lst.append - for v in iterable: - meth(v) - - return context.compile_internal(builder, list_extend, sig, args) - -@lower_builtin("list.index", types.List, types.Any) -def list_index(context, builder, sig, args): - - def list_index_impl(lst, value): - for i in range(len(lst)): - if lst[i] == value: - return i - # XXX references are leaked when raising - raise ValueError("value not in list") - - return context.compile_internal(builder, list_index_impl, sig, args) - -@lower_builtin("list.index", types.List, types.Any, - types.Integer) -def list_index(context, builder, sig, args): - - def list_index_impl(lst, value, start): - n = len(lst) - if start < 0: - start += n - if start < 0: - start = 0 - for i in range(start, len(lst)): - if lst[i] == value: - return i - # XXX references are leaked when raising - raise ValueError("value not in list") - - return context.compile_internal(builder, list_index_impl, sig, args) - -@lower_builtin("list.index", types.List, types.Any, - types.Integer, types.Integer) -def list_index(context, builder, sig, args): - - def list_index_impl(lst, value, start, stop): - n = len(lst) - if start < 0: - start += n - if start < 0: - start = 0 - if stop < 0: - stop += n - if stop > n: - stop = n - for i in range(start, stop): - if lst[i] == value: - return i - # XXX references are leaked when raising - raise ValueError("value not in list") - - return context.compile_internal(builder, list_index_impl, sig, args) - -@lower_builtin("list.insert", types.List, types.Integer, - types.Any) -def list_insert(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - index = inst.fix_index(args[1]) - index = inst.clamp_index(index) - value = args[2] - - n = inst.size - one = ir.Constant(n.type, 1) - new_size = builder.add(n, one) - inst.resize(new_size) - inst.move(builder.add(index, one), index, builder.sub(n, index)) - inst.setitem(index, value, incref=True) - - return context.get_dummy_value() - -@lower_builtin("list.pop", types.List) -def list_pop(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - - n = inst.size - cgutils.guard_zero(context, builder, n, - (IndexError, "pop from empty list")) - n = builder.sub(n, ir.Constant(n.type, 1)) - res = inst.getitem(n) - inst.incref_value(res) # incref the pop'ed element - inst.clear_value(n) # clear the storage space - inst.resize(n) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin("list.pop", types.List, types.Integer) -def list_pop(context, builder, sig, args): - inst = ListInstance(context, builder, sig.args[0], args[0]) - idx = inst.fix_index(args[1]) - - n = inst.size - cgutils.guard_zero(context, builder, n, - (IndexError, "pop from empty list")) - inst.guard_index(idx, "pop index out of range") - - res = inst.getitem(idx) - - one = ir.Constant(n.type, 1) - n = builder.sub(n, ir.Constant(n.type, 1)) - inst.move(idx, builder.add(idx, one), builder.sub(n, idx)) - inst.resize(n) - return impl_ret_new_ref(context, builder, sig.return_type, res) - -@lower_builtin("list.remove", types.List, types.Any) -def list_remove(context, builder, sig, args): - - def list_remove_impl(lst, value): - for i in range(len(lst)): - if lst[i] == value: - lst.pop(i) - return - # XXX references are leaked when raising - raise ValueError("list.remove(x): x not in list") - - return context.compile_internal(builder, list_remove_impl, sig, args) - -@lower_builtin("list.reverse", types.List) -def list_reverse(context, builder, sig, args): - - def list_reverse_impl(lst): - for a in range(0, len(lst) // 2): - b = -a - 1 - lst[a], lst[b] = lst[b], lst[a] - - return context.compile_internal(builder, list_reverse_impl, sig, args) - - -# ----------------------------------------------------------------------------- -# Sorting - -_sorting_init = False - -def load_sorts(): - """ - Load quicksort lazily, to avoid circular imports accross the jit() global. - """ - g = globals() - if g['_sorting_init']: - return - - def gt(a, b): - return a > b - - default_sort = quicksort.make_jit_quicksort() - reversed_sort = quicksort.make_jit_quicksort(lt=gt) - g['run_default_sort'] = default_sort.run_quicksort - g['run_reversed_sort'] = reversed_sort.run_quicksort - g['_sorting_init'] = True - - -@lower_builtin("list.sort", types.List) -@lower_builtin("list.sort", types.List, types.Boolean) -def list_sort(context, builder, sig, args): - load_sorts() - - if len(args) == 1: - sig = typing.signature(sig.return_type, *sig.args + (types.boolean,)) - args = tuple(args) + (cgutils.false_bit,) - - def list_sort_impl(lst, reverse): - if reverse: - run_reversed_sort(lst) - else: - run_default_sort(lst) - - return context.compile_internal(builder, list_sort_impl, sig, args) - -@lower_builtin(sorted, types.IterableType) -@lower_builtin(sorted, types.IterableType, types.Boolean) -def sorted_impl(context, builder, sig, args): - if len(args) == 1: - sig = typing.signature(sig.return_type, *sig.args + (types.boolean,)) - args = tuple(args) + (cgutils.false_bit,) - - def sorted_impl(it, reverse): - lst = list(it) - lst.sort(reverse=reverse) - return lst - - return context.compile_internal(builder, sorted_impl, sig, args) - - -# ----------------------------------------------------------------------------- -# Implicit casting - -@lower_cast(types.List, types.List) -def list_to_list(context, builder, fromty, toty, val): - # Casting from non-reflected to reflected - assert fromty.dtype == toty.dtype - return val diff --git a/numba/numba/targets/mathimpl.py b/numba/numba/targets/mathimpl.py deleted file mode 100644 index d08748968..000000000 --- a/numba/numba/targets/mathimpl.py +++ /dev/null @@ -1,405 +0,0 @@ -""" -Provide math calls that uses intrinsics or libc math functions. -""" - -from __future__ import print_function, absolute_import, division -import math -import sys - -import llvmlite.llvmpy.core as lc -from llvmlite.llvmpy.core import Type - -from numba.targets.imputils import Registry, impl_ret_untracked -from numba import types, cgutils, utils, config -from numba.typing import signature - - -registry = Registry() -lower = registry.lower - - -# Helpers, shared with cmathimpl. - -FLT_MAX = 3.402823466E+38 -FLT_MIN = 1.175494351E-38 - -FLOAT_ABS_MASK = 0x7fffffff -FLOAT_SIGN_MASK = 0x80000000 -DOUBLE_ABS_MASK = 0x7fffffffffffffff -DOUBLE_SIGN_MASK = 0x8000000000000000 - -def is_nan(builder, val): - """ - Return a condition testing whether *val* is a NaN. - """ - return builder.fcmp_unordered('uno', val, val) - -def is_inf(builder, val): - """ - Return a condition testing whether *val* is an infinite. - """ - pos_inf = lc.Constant.real(val.type, float("+inf")) - neg_inf = lc.Constant.real(val.type, float("-inf")) - isposinf = builder.fcmp(lc.FCMP_OEQ, val, pos_inf) - isneginf = builder.fcmp(lc.FCMP_OEQ, val, neg_inf) - return builder.or_(isposinf, isneginf) - -def is_finite(builder, val): - """ - Return a condition testing whether *val* is a finite. - """ - # is_finite(x) <=> x - x != NaN - val_minus_val = builder.fsub(val, val) - return builder.fcmp_ordered('ord', val_minus_val, val_minus_val) - -def f64_as_int64(builder, val): - """ - Bitcast a double into a 64-bit integer. - """ - assert val.type == Type.double() - return builder.bitcast(val, Type.int(64)) - -def int64_as_f64(builder, val): - """ - Bitcast a 64-bit integer into a double. - """ - assert val.type == Type.int(64) - return builder.bitcast(val, Type.double()) - -def f32_as_int32(builder, val): - """ - Bitcast a float into a 32-bit integer. - """ - assert val.type == Type.float() - return builder.bitcast(val, Type.int(32)) - -def int32_as_f32(builder, val): - """ - Bitcast a 32-bit integer into a float. - """ - assert val.type == Type.int(32) - return builder.bitcast(val, Type.float()) - -def negate_real(builder, val): - """ - Negate real number *val*, with proper handling of zeros. - """ - # The negative zero forces LLVM to handle signed zeros properly. - return builder.fsub(lc.Constant.real(val.type, -0.0), val) - -def call_fp_intrinsic(builder, name, args): - """ - Call a LLVM intrinsic floating-point operation. - """ - mod = builder.module - intr = lc.Function.intrinsic(mod, name, [a.type for a in args]) - return builder.call(intr, args) - - -def _unary_int_input_wrapper_impl(wrapped_impl): - """ - Return an implementation factory to convert the single integral input - argument to a float64, then defer to the *wrapped_impl*. - """ - def implementer(context, builder, sig, args): - val, = args - input_type = sig.args[0] - fpval = context.cast(builder, val, input_type, types.float64) - inner_sig = signature(types.float64, types.float64) - res = wrapped_impl(context, builder, inner_sig, (fpval,)) - return context.cast(builder, res, types.float64, sig.return_type) - - return implementer - -def unary_math_int_impl(fn, float_impl): - impl = _unary_int_input_wrapper_impl(float_impl) - lower(fn, types.Integer)(impl) - -def unary_math_intr(fn, intrcode): - """ - Implement the math function *fn* using the LLVM intrinsic *intrcode*. - """ - @lower(fn, types.Float) - def float_impl(context, builder, sig, args): - res = call_fp_intrinsic(builder, intrcode, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - unary_math_int_impl(fn, float_impl) - return float_impl - -def unary_math_extern(fn, f32extern, f64extern, int_restype=False): - """ - Register implementations of Python function *fn* using the - external function named *f32extern* and *f64extern* (for float32 - and float64 inputs, respectively). - If *int_restype* is true, then the function's return value should be - integral, otherwise floating-point. - """ - f_restype = types.int64 if int_restype else None - - def float_impl(context, builder, sig, args): - """ - Implement *fn* for a types.Float input. - """ - [val] = args - mod = builder.module - input_type = sig.args[0] - lty = context.get_value_type(input_type) - func_name = { - types.float32: f32extern, - types.float64: f64extern, - }[input_type] - fnty = Type.function(lty, [lty]) - fn = cgutils.insert_pure_function(builder.module, fnty, name=func_name) - res = builder.call(fn, (val,)) - res = context.cast(builder, res, input_type, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - lower(fn, types.Float)(float_impl) - - # Implement wrapper for integer inputs - unary_math_int_impl(fn, float_impl) - - return float_impl - - -unary_math_intr(math.fabs, lc.INTR_FABS) -#unary_math_intr(math.sqrt, lc.INTR_SQRT) -exp_impl = unary_math_intr(math.exp, lc.INTR_EXP) -log_impl = unary_math_intr(math.log, lc.INTR_LOG) -log10_impl = unary_math_intr(math.log10, lc.INTR_LOG10) -sin_impl = unary_math_intr(math.sin, lc.INTR_SIN) -cos_impl = unary_math_intr(math.cos, lc.INTR_COS) -#unary_math_intr(math.floor, lc.INTR_FLOOR) -#unary_math_intr(math.ceil, lc.INTR_CEIL) -#unary_math_intr(math.trunc, lc.INTR_TRUNC) - -log1p_impl = unary_math_extern(math.log1p, "log1pf", "log1p") -expm1_impl = unary_math_extern(math.expm1, "expm1f", "expm1") -erf_impl = unary_math_extern(math.erf, "erff", "erf") -erfc_impl = unary_math_extern(math.erfc, "erfcf", "erfc") - -tan_impl = unary_math_extern(math.tan, "tanf", "tan") -asin_impl = unary_math_extern(math.asin, "asinf", "asin") -acos_impl = unary_math_extern(math.acos, "acosf", "acos") -atan_impl = unary_math_extern(math.atan, "atanf", "atan") - -asinh_impl = unary_math_extern(math.asinh, "asinhf", "asinh") -acosh_impl = unary_math_extern(math.acosh, "acoshf", "acosh") -atanh_impl = unary_math_extern(math.atanh, "atanhf", "atanh") -sinh_impl = unary_math_extern(math.sinh, "sinhf", "sinh") -cosh_impl = unary_math_extern(math.cosh, "coshf", "cosh") -tanh_impl = unary_math_extern(math.tanh, "tanhf", "tanh") - -# math.floor and math.ceil return float on 2.x, int on 3.x -if utils.PYVERSION > (3, 0): - log2_impl = unary_math_extern(math.log2, "log2f", "log2") - ceil_impl = unary_math_extern(math.ceil, "ceilf", "ceil", True) - floor_impl = unary_math_extern(math.floor, "floorf", "floor", True) -else: - ceil_impl = unary_math_extern(math.ceil, "ceilf", "ceil") - floor_impl = unary_math_extern(math.floor, "floorf", "floor") -gamma_impl = unary_math_extern(math.gamma, "numba_gammaf", "numba_gamma") # work-around -sqrt_impl = unary_math_extern(math.sqrt, "sqrtf", "sqrt") -trunc_impl = unary_math_extern(math.trunc, "truncf", "trunc", True) -lgamma_impl = unary_math_extern(math.lgamma, "lgammaf", "lgamma") - - -@lower(math.isnan, types.Float) -def isnan_float_impl(context, builder, sig, args): - [val] = args - res = is_nan(builder, val) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower(math.isnan, types.Integer) -def isnan_int_impl(context, builder, sig, args): - res = cgutils.false_bit - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(math.isinf, types.Float) -def isinf_float_impl(context, builder, sig, args): - [val] = args - res = is_inf(builder, val) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower(math.isinf, types.Integer) -def isinf_int_impl(context, builder, sig, args): - res = cgutils.false_bit - return impl_ret_untracked(context, builder, sig.return_type, res) - - -if utils.PYVERSION >= (3, 2): - @lower(math.isfinite, types.Float) - def isfinite_float_impl(context, builder, sig, args): - [val] = args - res = is_finite(builder, val) - return impl_ret_untracked(context, builder, sig.return_type, res) - - @lower(math.isfinite, types.Integer) - def isfinite_int_impl(context, builder, sig, args): - res = cgutils.true_bit - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(math.copysign, types.Float, types.Float) -def copysign_float_impl(context, builder, sig, args): - lty = args[0].type - mod = builder.module - fn = mod.get_or_insert_function(lc.Type.function(lty, (lty, lty)), - 'llvm.copysign.%s' % lty.intrinsic_name) - res = builder.call(fn, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# ----------------------------------------------------------------------------- - - -@lower(math.frexp, types.Float) -def frexp_impl(context, builder, sig, args): - val, = args - fltty = context.get_data_type(sig.args[0]) - intty = context.get_data_type(sig.return_type[1]) - expptr = cgutils.alloca_once(builder, intty, name='exp') - fnty = Type.function(fltty, (fltty, Type.pointer(intty))) - fname = { - "float": "numba_frexpf", - "double": "numba_frexp", - }[str(fltty)] - fn = builder.module.get_or_insert_function(fnty, name=fname) - res = builder.call(fn, (val, expptr)) - res = cgutils.make_anonymous_struct(builder, (res, builder.load(expptr))) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(math.ldexp, types.Float, types.intc) -def ldexp_impl(context, builder, sig, args): - val, exp = args - fltty, intty = map(context.get_data_type, sig.args) - fnty = Type.function(fltty, (fltty, intty)) - fname = { - "float": "numba_ldexpf", - "double": "numba_ldexp", - }[str(fltty)] - fn = cgutils.insert_pure_function(builder.module, fnty, name=fname) - res = builder.call(fn, (val, exp)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# ----------------------------------------------------------------------------- - - -@lower(math.atan2, types.int64, types.int64) -def atan2_s64_impl(context, builder, sig, args): - [y, x] = args - y = builder.sitofp(y, Type.double()) - x = builder.sitofp(x, Type.double()) - fsig = signature(types.float64, types.float64, types.float64) - return atan2_float_impl(context, builder, fsig, (y, x)) - -@lower(math.atan2, types.uint64, types.uint64) -def atan2_u64_impl(context, builder, sig, args): - [y, x] = args - y = builder.uitofp(y, Type.double()) - x = builder.uitofp(x, Type.double()) - fsig = signature(types.float64, types.float64, types.float64) - return atan2_float_impl(context, builder, fsig, (y, x)) - -@lower(math.atan2, types.Float, types.Float) -def atan2_float_impl(context, builder, sig, args): - assert len(args) == 2 - mod = builder.module - ty = sig.args[0] - lty = context.get_value_type(ty) - func_name = { - types.float32: "atan2f", - # Workaround atan2() issues under Windows - types.float64: "atan2_fixed" if sys.platform == "win32" else "atan2" - }[ty] - fnty = Type.function(lty, (lty, lty)) - fn = cgutils.insert_pure_function(builder.module, fnty, name=func_name) - res = builder.call(fn, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# ----------------------------------------------------------------------------- - - -@lower(math.hypot, types.int64, types.int64) -def hypot_s64_impl(context, builder, sig, args): - [x, y] = args - y = builder.sitofp(y, Type.double()) - x = builder.sitofp(x, Type.double()) - fsig = signature(types.float64, types.float64, types.float64) - res = hypot_float_impl(context, builder, fsig, (x, y)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(math.hypot, types.uint64, types.uint64) -def hypot_u64_impl(context, builder, sig, args): - [x, y] = args - y = builder.sitofp(y, Type.double()) - x = builder.sitofp(x, Type.double()) - fsig = signature(types.float64, types.float64, types.float64) - res = hypot_float_impl(context, builder, fsig, (x, y)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(math.hypot, types.Float, types.Float) -def hypot_float_impl(context, builder, sig, args): - xty, yty = sig.args - assert xty == yty == sig.return_type - x, y = args - - # Windows has alternate names for hypot/hypotf, see - # https://msdn.microsoft.com/fr-fr/library/a9yb3dbt%28v=vs.80%29.aspx - fname = { - types.float32: "_hypotf" if sys.platform == 'win32' else "hypotf", - types.float64: "_hypot" if sys.platform == 'win32' else "hypot", - }[xty] - plat_hypot = types.ExternalFunction(fname, sig) - - if sys.platform == 'win32' and config.MACHINE_BITS == 32: - inf = xty(float('inf')) - - def hypot_impl(x, y): - if math.isinf(x) or math.isinf(y): - return inf - return plat_hypot(x, y) - else: - def hypot_impl(x, y): - return plat_hypot(x, y) - - res = context.compile_internal(builder, hypot_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# ----------------------------------------------------------------------------- - -@lower(math.radians, types.Float) -def radians_float_impl(context, builder, sig, args): - [x] = args - coef = context.get_constant(sig.return_type, math.pi / 180) - res = builder.fmul(x, coef) - return impl_ret_untracked(context, builder, sig.return_type, res) - -unary_math_int_impl(math.radians, radians_float_impl) - -# ----------------------------------------------------------------------------- - -@lower(math.degrees, types.Float) -def degrees_float_impl(context, builder, sig, args): - [x] = args - coef = context.get_constant(sig.return_type, 180 / math.pi) - res = builder.fmul(x, coef) - return impl_ret_untracked(context, builder, sig.return_type, res) - -unary_math_int_impl(math.degrees, degrees_float_impl) - -# ----------------------------------------------------------------------------- - -@lower(math.pow, types.Float, types.Float) -@lower(math.pow, types.Float, types.Integer) -def pow_impl(context, builder, sig, args): - impl = context.get_function("**", sig) - return impl(builder, args) diff --git a/numba/numba/targets/mergesort.py b/numba/numba/targets/mergesort.py deleted file mode 100644 index cba5f3318..000000000 --- a/numba/numba/targets/mergesort.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -The same algorithm as translated from numpy. -See numpy/core/src/npysort/mergesort.c.src. -The high-level numba code is adding a little overhead comparing to -the pure-C implementation in numpy. -""" -import numpy as np -from collections import namedtuple - -# Array size smaller than this will be sorted by insertion sort -SMALL_MERGESORT = 20 - - -MergesortImplementation = namedtuple('MergesortImplementation', [ - 'run_mergesort', -]) - - -def make_mergesort_impl(wrap, lt=None, is_argsort=False): - kwargs_lite = dict(no_cpython_wrapper=True, _nrt=False) - - # The less than - if lt is None: - @wrap(**kwargs_lite) - def lt(a, b): - return a < b - else: - lt = wrap(**kwargs_lite)(lt) - - if is_argsort: - @wrap(**kwargs_lite) - def lessthan(a, b, vals): - return lt(vals[a], vals[b]) - else: - @wrap(**kwargs_lite) - def lessthan(a, b, vals): - return lt(a, b) - - @wrap(**kwargs_lite) - def argmergesort_inner(arr, vals, ws): - """The actual mergesort function - - Parameters - ---------- - arr : array [read+write] - The values being sorted inplace. For argsort, this is the - indices. - vals : array [readonly] - ``None`` for normal sort. In argsort, this is the actual array values. - ws : array [write] - The workspace. Must be of size ``arr.size // 2`` - """ - if arr.size > SMALL_MERGESORT: - # Merge sort - mid = arr.size // 2 - - argmergesort_inner(arr[:mid], vals, ws) - argmergesort_inner(arr[mid:], vals, ws) - - # Copy left half into workspace so we don't overwrite it - for i in range(mid): - ws[i] = arr[i] - - # Merge - left = ws[:mid] - right = arr[mid:] - out = arr - - i = j = k = 0 - while i < left.size and j < right.size: - if not lessthan(right[j], left[i], vals): - out[k] = left[i] - i += 1 - else: - out[k] = right[j] - j += 1 - k += 1 - - # Leftovers - while i < left.size: - out[k] = left[i] - i += 1 - k += 1 - - while j < right.size: - out[k] = right[j] - j += 1 - k += 1 - else: - # Insertion sort - i = 1 - while i < arr.size: - j = i - while j > 0 and lessthan(arr[j], arr[j - 1], vals): - arr[j - 1], arr[j] = arr[j], arr[j - 1] - j -= 1 - i += 1 - - # The top-level entry points - - @wrap(no_cpython_wrapper=True) - def mergesort(arr): - "Inplace" - ws = np.empty(arr.size // 2, dtype=arr.dtype) - argmergesort_inner(arr, None, ws) - return arr - - - @wrap(no_cpython_wrapper=True) - def argmergesort(arr): - "Out-of-place" - idxs = np.arange(arr.size) - ws = np.empty(arr.size // 2, dtype=idxs.dtype) - argmergesort_inner(idxs, arr, ws) - return idxs - - return MergesortImplementation( - run_mergesort=(argmergesort if is_argsort else mergesort) - ) - - -def make_jit_mergesort(*args, **kwargs): - from numba import njit - # NOTE: wrap with njit to allow recursion - # because @register_jitable => @overload doesn't support recursion - return make_mergesort_impl(njit, *args, **kwargs) diff --git a/numba/numba/targets/npdatetime.py b/numba/numba/targets/npdatetime.py deleted file mode 100644 index fbab3067b..000000000 --- a/numba/numba/targets/npdatetime.py +++ /dev/null @@ -1,628 +0,0 @@ -""" -Implementation of operations on numpy timedelta64. -""" - -import numpy as np - -from llvmlite.llvmpy.core import Type, Constant -import llvmlite.llvmpy.core as lc - -from numba import npdatetime, types, cgutils -from .imputils import lower_builtin, lower_constant, impl_ret_untracked - - -# datetime64 and timedelta64 use the same internal representation -DATETIME64 = TIMEDELTA64 = Type.int(64) -NAT = Constant.int(TIMEDELTA64, npdatetime.NAT) - -TIMEDELTA_BINOP_SIG = (types.NPTimedelta,) * 2 - - -def scale_by_constant(builder, val, factor): - """ - Multiply *val* by the constant *factor*. - """ - return builder.mul(val, Constant.int(TIMEDELTA64, factor)) - -def unscale_by_constant(builder, val, factor): - """ - Divide *val* by the constant *factor*. - """ - return builder.sdiv(val, Constant.int(TIMEDELTA64, factor)) - -def add_constant(builder, val, const): - """ - Add constant *const* to *val*. - """ - return builder.add(val, Constant.int(TIMEDELTA64, const)) - -def scale_timedelta(context, builder, val, srcty, destty): - """ - Scale the timedelta64 *val* from *srcty* to *destty* - (both numba.types.NPTimedelta instances) - """ - factor = npdatetime.get_timedelta_conversion_factor(srcty.unit, destty.unit) - if factor is None: - # This can happen when using explicit output in a ufunc. - raise NotImplementedError("cannot convert timedelta64 from %r to %r" - % (srcty.unit, destty.unit)) - return scale_by_constant(builder, val, factor) - -def normalize_timedeltas(context, builder, left, right, leftty, rightty): - """ - Scale either *left* or *right* to the other's unit, in order to have - homogeneous units. - """ - factor = npdatetime.get_timedelta_conversion_factor(leftty.unit, rightty.unit) - if factor is not None: - return scale_by_constant(builder, left, factor), right - factor = npdatetime.get_timedelta_conversion_factor(rightty.unit, leftty.unit) - if factor is not None: - return left, scale_by_constant(builder, right, factor) - # Typing should not let this happen, except on == and != operators - raise RuntimeError("cannot normalize %r and %r" % (leftty, rightty)) - -def alloc_timedelta_result(builder, name='ret'): - """ - Allocate a NaT-initialized datetime64 (or timedelta64) result slot. - """ - ret = cgutils.alloca_once(builder, TIMEDELTA64, name=name) - builder.store(NAT, ret) - return ret - -def alloc_boolean_result(builder, name='ret'): - """ - Allocate an uninitialized boolean result slot. - """ - ret = cgutils.alloca_once(builder, Type.int(1), name=name) - return ret - -def is_not_nat(builder, val): - """ - Return a predicate which is true if *val* is not NaT. - """ - return builder.icmp(lc.ICMP_NE, val, NAT) - -def are_not_nat(builder, vals): - """ - Return a predicate which is true if all of *vals* are not NaT. - """ - assert len(vals) >= 1 - pred = is_not_nat(builder, vals[0]) - for val in vals[1:]: - pred = builder.and_(pred, is_not_nat(builder, val)) - return pred - -def make_constant_array(vals): - consts = [Constant.int(TIMEDELTA64, v) for v in vals] - return Constant.array(TIMEDELTA64, consts) - - -normal_year_months = make_constant_array([31, 28, 31, 30, 31, 30, - 31, 31, 30, 31, 30, 31]) -leap_year_months = make_constant_array([31, 29, 31, 30, 31, 30, - 31, 31, 30, 31, 30, 31]) -normal_year_months_acc = make_constant_array( - [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]) -leap_year_months_acc = make_constant_array( - [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335]) - - -@lower_constant(types.NPDatetime) -@lower_constant(types.NPTimedelta) -def datetime_constant(context, builder, ty, pyval): - return DATETIME64(pyval.astype(np.int64)) - - -# Arithmetic operators on timedelta64 - -@lower_builtin('+', types.NPTimedelta) -def timedelta_pos_impl(context, builder, sig, args): - res =args[0] - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('-', types.NPTimedelta) -def timedelta_neg_impl(context, builder, sig, args): - res = builder.neg(args[0]) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(abs, types.NPTimedelta) -def timedelta_abs_impl(context, builder, sig, args): - val, = args - ret = alloc_timedelta_result(builder) - with builder.if_else(cgutils.is_scalar_neg(builder, val)) as (then, otherwise): - with then: - builder.store(builder.neg(val), ret) - with otherwise: - builder.store(val, ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def timedelta_sign_impl(context, builder, sig, args): - """ - np.sign(timedelta64) - """ - val, = args - ret = alloc_timedelta_result(builder) - zero = Constant.int(TIMEDELTA64, 0) - with builder.if_else(builder.icmp(lc.ICMP_SGT, val, zero) - ) as (gt_zero, le_zero): - with gt_zero: - builder.store(Constant.int(TIMEDELTA64, 1), ret) - with le_zero: - with builder.if_else(builder.icmp(lc.ICMP_EQ, val, zero) - ) as (eq_zero, lt_zero): - with eq_zero: - builder.store(Constant.int(TIMEDELTA64, 0), ret) - with lt_zero: - builder.store(Constant.int(TIMEDELTA64, -1), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('+', *TIMEDELTA_BINOP_SIG) -def timedelta_add_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - ret = alloc_timedelta_result(builder) - with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): - va = scale_timedelta(context, builder, va, ta, sig.return_type) - vb = scale_timedelta(context, builder, vb, tb, sig.return_type) - builder.store(builder.add(va, vb), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('-', *TIMEDELTA_BINOP_SIG) -def timedelta_sub_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - ret = alloc_timedelta_result(builder) - with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): - va = scale_timedelta(context, builder, va, ta, sig.return_type) - vb = scale_timedelta(context, builder, vb, tb, sig.return_type) - builder.store(builder.sub(va, vb), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def _timedelta_times_number(context, builder, td_arg, td_type, - number_arg, number_type, return_type): - ret = alloc_timedelta_result(builder) - with cgutils.if_likely(builder, is_not_nat(builder, td_arg)): - if isinstance(number_type, types.Float): - val = builder.sitofp(td_arg, number_arg.type) - val = builder.fmul(val, number_arg) - val = builder.fptosi(val, TIMEDELTA64) - else: - val = builder.mul(td_arg, number_arg) - # The scaling is required for ufunc np.multiply() with an explicit - # output in a different unit. - val = scale_timedelta(context, builder, val, td_type, return_type) - builder.store(val, ret) - return builder.load(ret) - - -@lower_builtin('*', types.NPTimedelta, types.Integer) -@lower_builtin('*', types.NPTimedelta, types.Float) -def timedelta_times_number(context, builder, sig, args): - res = _timedelta_times_number(context, builder, - args[0], sig.args[0], args[1], sig.args[1], - sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('*', types.Integer, types.NPTimedelta) -@lower_builtin('*', types.Float, types.NPTimedelta) -def number_times_timedelta(context, builder, sig, args): - res = _timedelta_times_number(context, builder, - args[1], sig.args[1], args[0], sig.args[0], - sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('/', types.NPTimedelta, types.Integer) -@lower_builtin('//', types.NPTimedelta, types.Integer) -@lower_builtin('/?', types.NPTimedelta, types.Integer) -@lower_builtin('/', types.NPTimedelta, types.Float) -@lower_builtin('//', types.NPTimedelta, types.Float) -@lower_builtin('/?', types.NPTimedelta, types.Float) -def timedelta_over_number(context, builder, sig, args): - td_arg, number_arg = args - number_type = sig.args[1] - ret = alloc_timedelta_result(builder) - ok = builder.and_(is_not_nat(builder, td_arg), - builder.not_(cgutils.is_scalar_zero_or_nan(builder, number_arg))) - with cgutils.if_likely(builder, ok): - # Denominator is non-zero, non-NaN - if isinstance(number_type, types.Float): - val = builder.sitofp(td_arg, number_arg.type) - val = builder.fdiv(val, number_arg) - val = builder.fptosi(val, TIMEDELTA64) - else: - val = builder.sdiv(td_arg, number_arg) - # The scaling is required for ufuncs np.*divide() with an explicit - # output in a different unit. - val = scale_timedelta(context, builder, val, sig.args[0], sig.return_type) - builder.store(val, ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('/', *TIMEDELTA_BINOP_SIG) -@lower_builtin('/?', *TIMEDELTA_BINOP_SIG) -def timedelta_over_timedelta(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - not_nan = are_not_nat(builder, [va, vb]) - ll_ret_type = context.get_value_type(sig.return_type) - ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') - builder.store(Constant.real(ll_ret_type, float('nan')), ret) - with cgutils.if_likely(builder, not_nan): - va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) - va = builder.sitofp(va, ll_ret_type) - vb = builder.sitofp(vb, ll_ret_type) - builder.store(builder.fdiv(va, vb), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# Comparison operators on timedelta64 - -def _create_timedelta_comparison_impl(ll_op, default_value): - def impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - ret = alloc_boolean_result(builder) - with builder.if_else(are_not_nat(builder, [va, vb])) as (then, otherwise): - with then: - try: - norm_a, norm_b = normalize_timedeltas(context, builder, va, vb, ta, tb) - except RuntimeError: - # Cannot normalize units => the values are unequal (except if NaT) - builder.store(default_value, ret) - else: - builder.store(builder.icmp(ll_op, norm_a, norm_b), ret) - with otherwise: - # No scaling when comparing NaTs - builder.store(builder.icmp(ll_op, va, vb), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - - return impl - - -def _create_timedelta_ordering_impl(ll_op): - def impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - ret = alloc_boolean_result(builder) - with builder.if_else(are_not_nat(builder, [va, vb])) as (then, otherwise): - with then: - norm_a, norm_b = normalize_timedeltas(context, builder, va, vb, ta, tb) - builder.store(builder.icmp(ll_op, norm_a, norm_b), ret) - with otherwise: - # No scaling when comparing NaT with something else - # (i.e. NaT is <= everything else, since it's the smallest - # int64 value) - builder.store(builder.icmp(ll_op, va, vb), ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - - return impl - - -timedelta_eq_timedelta_impl = _create_timedelta_comparison_impl(lc.ICMP_EQ, cgutils.false_bit) -timedelta_ne_timedelta_impl = _create_timedelta_comparison_impl(lc.ICMP_NE, cgutils.true_bit) -timedelta_lt_timedelta_impl = _create_timedelta_ordering_impl(lc.ICMP_SLT) -timedelta_le_timedelta_impl = _create_timedelta_ordering_impl(lc.ICMP_SLE) -timedelta_gt_timedelta_impl = _create_timedelta_ordering_impl(lc.ICMP_SGT) -timedelta_ge_timedelta_impl = _create_timedelta_ordering_impl(lc.ICMP_SGE) - -for op, func in [('==', timedelta_eq_timedelta_impl), - ('!=', timedelta_ne_timedelta_impl), - ('<', timedelta_lt_timedelta_impl), - ('<=', timedelta_le_timedelta_impl), - ('>', timedelta_gt_timedelta_impl), - ('>=', timedelta_ge_timedelta_impl)]: - lower_builtin(op, *TIMEDELTA_BINOP_SIG)(func) - - -# Arithmetic on datetime64 - -def is_leap_year(builder, year_val): - """ - Return a predicate indicating whether *year_val* (offset by 1970) is a - leap year. - """ - actual_year = builder.add(year_val, Constant.int(DATETIME64, 1970)) - multiple_of_4 = cgutils.is_null( - builder, builder.and_(actual_year, Constant.int(DATETIME64, 3))) - not_multiple_of_100 = cgutils.is_not_null( - builder, builder.srem(actual_year, Constant.int(DATETIME64, 100))) - multiple_of_400 = cgutils.is_null( - builder, builder.srem(actual_year, Constant.int(DATETIME64, 400))) - return builder.and_(multiple_of_4, - builder.or_(not_multiple_of_100, multiple_of_400)) - -def year_to_days(builder, year_val): - """ - Given a year *year_val* (offset to 1970), return the number of days - since the 1970 epoch. - """ - # The algorithm below is copied from Numpy's get_datetimestruct_days() - # (src/multiarray/datetime.c) - ret = cgutils.alloca_once(builder, TIMEDELTA64) - # First approximation - days = scale_by_constant(builder, year_val, 365) - # Adjust for leap years - with builder.if_else(cgutils.is_neg_int(builder, year_val)) \ - as (if_neg, if_pos): - with if_pos: - # At or after 1970: - # 1968 is the closest leap year before 1970. - # Exclude the current year, so add 1. - from_1968 = add_constant(builder, year_val, 1) - # Add one day for each 4 years - p_days = builder.add(days, - unscale_by_constant(builder, from_1968, 4)) - # 1900 is the closest previous year divisible by 100 - from_1900 = add_constant(builder, from_1968, 68) - # Subtract one day for each 100 years - p_days = builder.sub(p_days, - unscale_by_constant(builder, from_1900, 100)) - # 1600 is the closest previous year divisible by 400 - from_1600 = add_constant(builder, from_1900, 300) - # Add one day for each 400 years - p_days = builder.add(p_days, - unscale_by_constant(builder, from_1600, 400)) - builder.store(p_days, ret) - with if_neg: - # Before 1970: - # NOTE `year_val` is negative, and so will be `from_1972` and `from_2000`. - # 1972 is the closest later year after 1970. - # Include the current year, so subtract 2. - from_1972 = add_constant(builder, year_val, -2) - # Subtract one day for each 4 years (`from_1972` is negative) - n_days = builder.add(days, - unscale_by_constant(builder, from_1972, 4)) - # 2000 is the closest later year divisible by 100 - from_2000 = add_constant(builder, from_1972, -28) - # Add one day for each 100 years - n_days = builder.sub(n_days, - unscale_by_constant(builder, from_2000, 100)) - # 2000 is also the closest later year divisible by 400 - # Subtract one day for each 400 years - n_days = builder.add(n_days, - unscale_by_constant(builder, from_2000, 400)) - builder.store(n_days, ret) - return builder.load(ret) - - -def reduce_datetime_for_unit(builder, dt_val, src_unit, dest_unit): - dest_unit_code = npdatetime.DATETIME_UNITS[dest_unit] - src_unit_code = npdatetime.DATETIME_UNITS[src_unit] - if dest_unit_code < 2 or src_unit_code >= 2: - return dt_val, src_unit - # Need to compute the day ordinal for *dt_val* - if src_unit_code == 0: - # Years to days - year_val = dt_val - days_val = year_to_days(builder, year_val) - - else: - # Months to days - leap_array = cgutils.global_constant(builder, "leap_year_months_acc", - leap_year_months_acc) - normal_array = cgutils.global_constant(builder, "normal_year_months_acc", - normal_year_months_acc) - - days = cgutils.alloca_once(builder, TIMEDELTA64) - - # First compute year number and month number - year, month = cgutils.divmod_by_constant(builder, dt_val, 12) - - # Then deduce the number of days - with builder.if_else(is_leap_year(builder, year)) as (then, otherwise): - with then: - addend = builder.load(cgutils.gep(builder, leap_array, - 0, month, inbounds=True)) - builder.store(addend, days) - with otherwise: - addend = builder.load(cgutils.gep(builder, normal_array, - 0, month, inbounds=True)) - builder.store(addend, days) - - days_val = year_to_days(builder, year) - days_val = builder.add(days_val, builder.load(days)) - - if dest_unit_code == 2: - # Need to scale back to weeks - weeks, _ = cgutils.divmod_by_constant(builder, days_val, 7) - return weeks, 'W' - else: - return days_val, 'D' - - -def convert_datetime_for_arith(builder, dt_val, src_unit, dest_unit): - """ - Convert datetime *dt_val* from *src_unit* to *dest_unit*. - """ - # First partial conversion to days or weeks, if necessary. - dt_val, dt_unit = reduce_datetime_for_unit(builder, dt_val, src_unit, dest_unit) - # Then multiply by the remaining constant factor. - dt_factor = npdatetime.get_timedelta_conversion_factor(dt_unit, dest_unit) - if dt_factor is None: - # This can happen when using explicit output in a ufunc. - raise NotImplementedError("cannot convert datetime64 from %r to %r" - % (src_unit, dest_unit)) - return scale_by_constant(builder, dt_val, dt_factor) - - -def _datetime_timedelta_arith(ll_op_name): - def impl(context, builder, dt_arg, dt_unit, - td_arg, td_unit, ret_unit): - ret = alloc_timedelta_result(builder) - with cgutils.if_likely(builder, are_not_nat(builder, [dt_arg, td_arg])): - dt_arg = convert_datetime_for_arith(builder, dt_arg, - dt_unit, ret_unit) - td_factor = npdatetime.get_timedelta_conversion_factor(td_unit, ret_unit) - td_arg = scale_by_constant(builder, td_arg, td_factor) - ret_val = getattr(builder, ll_op_name)(dt_arg, td_arg) - builder.store(ret_val, ret) - return builder.load(ret) - return impl - -_datetime_plus_timedelta = _datetime_timedelta_arith('add') -_datetime_minus_timedelta = _datetime_timedelta_arith('sub') - -# datetime64 + timedelta64 - -@lower_builtin('+', types.NPDatetime, types.NPTimedelta) -def datetime_plus_timedelta(context, builder, sig, args): - dt_arg, td_arg = args - dt_type, td_type = sig.args - res = _datetime_plus_timedelta(context, builder, - dt_arg, dt_type.unit, - td_arg, td_type.unit, - sig.return_type.unit) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('+', types.NPTimedelta, types.NPDatetime) -def timedelta_plus_datetime(context, builder, sig, args): - td_arg, dt_arg = args - td_type, dt_type = sig.args - res = _datetime_plus_timedelta(context, builder, - dt_arg, dt_type.unit, - td_arg, td_type.unit, - sig.return_type.unit) - return impl_ret_untracked(context, builder, sig.return_type, res) - -# datetime64 - timedelta64 - -@lower_builtin('-', types.NPDatetime, types.NPTimedelta) -def datetime_minus_timedelta(context, builder, sig, args): - dt_arg, td_arg = args - dt_type, td_type = sig.args - res = _datetime_minus_timedelta(context, builder, - dt_arg, dt_type.unit, - td_arg, td_type.unit, - sig.return_type.unit) - return impl_ret_untracked(context, builder, sig.return_type, res) - -# datetime64 - datetime64 - -@lower_builtin('-', types.NPDatetime, types.NPDatetime) -def datetime_minus_datetime(context, builder, sig, args): - va, vb = args - ta, tb = sig.args - unit_a = ta.unit - unit_b = tb.unit - ret_unit = sig.return_type.unit - ret = alloc_timedelta_result(builder) - with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): - va = convert_datetime_for_arith(builder, va, unit_a, ret_unit) - vb = convert_datetime_for_arith(builder, vb, unit_b, ret_unit) - ret_val = builder.sub(va, vb) - builder.store(ret_val, ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - -# datetime64 comparisons - -def _create_datetime_comparison_impl(ll_op): - def impl(context, builder, sig, args): - va, vb = args - ta, tb = sig.args - unit_a = ta.unit - unit_b = tb.unit - ret_unit = npdatetime.get_best_unit(unit_a, unit_b) - ret = alloc_boolean_result(builder) - with builder.if_else(are_not_nat(builder, [va, vb])) as (then, otherwise): - with then: - norm_a = convert_datetime_for_arith(builder, va, unit_a, ret_unit) - norm_b = convert_datetime_for_arith(builder, vb, unit_b, ret_unit) - ret_val = builder.icmp(ll_op, norm_a, norm_b) - builder.store(ret_val, ret) - with otherwise: - # No scaling when comparing NaTs - ret_val = builder.icmp(ll_op, va, vb) - builder.store(ret_val, ret) - res = builder.load(ret) - return impl_ret_untracked(context, builder, sig.return_type, res) - - return impl - - -datetime_eq_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_EQ) -datetime_ne_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_NE) -datetime_lt_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_SLT) -datetime_le_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_SLE) -datetime_gt_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_SGT) -datetime_ge_datetime_impl = _create_datetime_comparison_impl(lc.ICMP_SGE) - -for op, func in [('==', datetime_eq_datetime_impl), - ('!=', datetime_ne_datetime_impl), - ('<', datetime_lt_datetime_impl), - ('<=', datetime_le_datetime_impl), - ('>', datetime_gt_datetime_impl), - ('>=', datetime_ge_datetime_impl)]: - lower_builtin(op, *[types.NPDatetime]*2)(func) - - -######################################################################## -# datetime/timedelta fmax/fmin maximum/minimum support - -def datetime_max_impl(context, builder, sig, args): - # just a regular int64 max avoiding nats. - # note this could be optimizing relying on the actual value of NAT - # but as NumPy doesn't rely on this, this seems more resilient - in1, in2 = args - in1_not_nat = is_not_nat(builder, in1) - in2_not_nat = is_not_nat(builder, in2) - in1_ge_in2 = builder.icmp(lc.ICMP_SGE, in1, in2) - res = builder.select(in1_ge_in2, in1, in2) - res = builder.select(in1_not_nat, res, in2) - res = builder.select(in2_not_nat, res, in1) - - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def datetime_min_impl(context, builder, sig, args): - # just a regular int64 min avoiding nats. - # note this could be optimizing relying on the actual value of NAT - # but as NumPy doesn't rely on this, this seems more resilient - in1, in2 = args - in1_not_nat = is_not_nat(builder, in1) - in2_not_nat = is_not_nat(builder, in2) - in1_le_in2 = builder.icmp(lc.ICMP_SLE, in1, in2) - res = builder.select(in1_le_in2, in1, in2) - res = builder.select(in1_not_nat, res, in2) - res = builder.select(in2_not_nat, res, in1) - - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def timedelta_max_impl(context, builder, sig, args): - # just a regular int64 max avoiding nats. - # note this could be optimizing relying on the actual value of NAT - # but as NumPy doesn't rely on this, this seems more resilient - in1, in2 = args - in1_not_nat = is_not_nat(builder, in1) - in2_not_nat = is_not_nat(builder, in2) - in1_ge_in2 = builder.icmp(lc.ICMP_SGE, in1, in2) - res = builder.select(in1_ge_in2, in1, in2) - res = builder.select(in1_not_nat, res, in2) - res = builder.select(in2_not_nat, res, in1) - - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def timedelta_min_impl(context, builder, sig, args): - # just a regular int64 min avoiding nats. - # note this could be optimizing relying on the actual value of NAT - # but as NumPy doesn't rely on this, this seems more resilient - in1, in2 = args - in1_not_nat = is_not_nat(builder, in1) - in2_not_nat = is_not_nat(builder, in2) - in1_le_in2 = builder.icmp(lc.ICMP_SLE, in1, in2) - res = builder.select(in1_le_in2, in1, in2) - res = builder.select(in1_not_nat, res, in2) - res = builder.select(in2_not_nat, res, in1) - - return impl_ret_untracked(context, builder, sig.return_type, res) diff --git a/numba/numba/targets/npyfuncs.py b/numba/numba/targets/npyfuncs.py deleted file mode 100644 index d7f66901e..000000000 --- a/numba/numba/targets/npyfuncs.py +++ /dev/null @@ -1,1794 +0,0 @@ -"""Codegen for functions used as kernels in NumPy functions - -Typically, the kernels of several ufuncs that can't map directly to -Python builtins -""" - -from __future__ import print_function, absolute_import, division - -import math - -from llvmlite.llvmpy import core as lc - -from .. import cgutils, typing, types, lowering, errors -from . import cmathimpl, mathimpl, numbers - -# some NumPy constants. Note that we could generate some of them using -# the math library, but having the values copied from npy_math seems to -# yield more accurate results -_NPY_LOG2E = 1.442695040888963407359924681001892137 # math.log(math.e, 2) -_NPY_LOG10E = 0.434294481903251827651128918916605082 # math.log(math.e, 10) -_NPY_LOGE2 = 0.693147180559945309417232121458176568 # math.log(2) - - -def _check_arity_and_homogeneity(sig, args, arity, return_type = None): - """checks that the following are true: - - args and sig.args have arg_count elements - - all input types are homogeneous - - return type is 'return_type' if provided, otherwise it must be - homogeneous with the input types. - """ - assert len(args) == arity - assert len(sig.args) == arity - ty = sig.args[0] - if return_type is None: - return_type = ty - # must have homogeneous args - if not( all(arg==ty for arg in sig.args) and sig.return_type == return_type): - import inspect - fname = inspect.currentframe().f_back.f_code.co_name - msg = '{0} called with invalid types: {1}'.format(fname, sig) - assert False, msg - - -def _call_func_by_name_with_cast(context, builder, sig, args, - func_name, ty=types.float64): - # it is quite common in NumPy to have loops implemented as a call - # to the double version of the function, wrapped in casts. This - # helper function facilitates that. - mod = builder.module - lty = context.get_argument_type(ty) - fnty = lc.Type.function(lty, [lty]*len(sig.args)) - fn = cgutils.insert_pure_function(mod, fnty, name=func_name) - cast_args = [context.cast(builder, arg, argty, ty) - for arg, argty in zip(args, sig.args) ] - - result = builder.call(fn, cast_args) - return context.cast(builder, result, types.float64, sig.return_type) - - -def _dispatch_func_by_name_type(context, builder, sig, args, table, user_name): - # for most cases the functions are homogeneous on all their types. - # this code dispatches on the first argument type as it is the most useful - # for our uses (all cases but ldexp are homogeneous in all types, and - # dispatching on the first argument type works of ldexp as well) - # - # assumes that the function pointed by func_name has the type - # signature sig (but needs translation to llvm types). - - ty = sig.args[0] - try: - func_name = table[ty] - except KeyError as e: - msg = "No {0} function for real type {1}".format(user_name, str(e)) - raise errors.LoweringError(msg) - - mod = builder.module - if ty in types.complex_domain: - # In numba struct types are always passed by pointer. So the call has to - # be transformed from "result = func(ops...)" to "func(&result, ops...). - # note that the result value pointer as first argument is the convention - # used by numba. - - # First, prepare the return value - out = context.make_complex(builder, ty) - ptrargs = [cgutils.alloca_once_value(builder, arg) - for arg in args] - call_args = [out._getpointer()] + ptrargs - # get_value_as_argument for struct types like complex allocate stack space - # and initialize with the value, the return value is the pointer to that - # allocated space (ie: pointer to a copy of the value in the stack). - # get_argument_type returns a pointer to the struct type in consonance. - call_argtys = [ty] + list(sig.args) - call_argltys = [context.get_value_type(ty).as_pointer() - for ty in call_argtys] - fnty = lc.Type.function(lc.Type.void(), call_argltys) - # Note: the function isn't pure here (it writes to its pointer args) - fn = mod.get_or_insert_function(fnty, name=func_name) - builder.call(fn, call_args) - retval = builder.load(call_args[0]) - else: - argtypes = [context.get_argument_type(aty) for aty in sig.args] - restype = context.get_argument_type(sig.return_type) - fnty = lc.Type.function(restype, argtypes) - fn = cgutils.insert_pure_function(mod, fnty, name=func_name) - retval = context.call_external_function(builder, fn, sig.args, args) - return retval - - - -######################################################################## -# Division kernels inspired by NumPy loops.c.src code -# -# The builtins are not applicable as they rely on a test for zero in the -# denominator. If it is zero the appropriate exception is raised. -# In NumPy, a division by zero does not raise an exception, but instead -# generated a known value. Note that a division by zero in any of the -# operations of a vector may raise an exception or issue a warning -# depending on the np.seterr configuration. This is not supported -# right now (and in any case, it won't be handled by these functions -# either) - -def np_int_sdiv_impl(context, builder, sig, args): - # based on the actual code in NumPy loops.c.src for signed integer types - num, den = args - lltype = num.type - assert all(i.type==lltype for i in args), "must have homogeneous types" - - ZERO = lc.Constant.int(lltype, 0) - MINUS_ONE = lc.Constant.int(lltype, -1) - MIN_INT = lc.Constant.int(lltype, 1 << (den.type.width-1)) - den_is_zero = builder.icmp(lc.ICMP_EQ, ZERO, den) - den_is_minus_one = builder.icmp(lc.ICMP_EQ, MINUS_ONE, den) - num_is_min_int = builder.icmp(lc.ICMP_EQ, MIN_INT, num) - could_cause_sigfpe = builder.and_(den_is_minus_one, num_is_min_int) - force_zero = builder.or_(den_is_zero, could_cause_sigfpe) - with builder.if_else(force_zero, likely=False) as (then, otherwise): - with then: - bb_then = builder.basic_block - with otherwise: - bb_otherwise = builder.basic_block - div = builder.sdiv(num, den) - mod = builder.srem(num, den) - num_gt_zero = builder.icmp(lc.ICMP_SGT, num, ZERO) - den_gt_zero = builder.icmp(lc.ICMP_SGT, den, ZERO) - not_same_sign = builder.xor(num_gt_zero, den_gt_zero) - mod_not_zero = builder.icmp(lc.ICMP_NE, mod, ZERO) - needs_fixing = builder.and_(not_same_sign, mod_not_zero) - fix_value = builder.select(needs_fixing, MINUS_ONE, ZERO) - result_otherwise = builder.add(div, fix_value) - - result = builder.phi(lltype) - result.add_incoming(ZERO, bb_then) - result.add_incoming(result_otherwise, bb_otherwise) - - return result - - -def np_int_srem_impl(context, builder, sig, args): - # based on the actual code in NumPy loops.c.src for signed integers - _check_arity_and_homogeneity(sig, args, 2) - - num, den = args - ty = sig.args[0] # any arg type will do, homogeneous - lty = num.type - - ZERO = context.get_constant(ty, 0) - den_not_zero = builder.icmp(lc.ICMP_NE, ZERO, den) - bb_no_if = builder.basic_block - with cgutils.if_unlikely(builder, den_not_zero): - bb_if = builder.basic_block - mod = builder.srem(num,den) - num_gt_zero = builder.icmp(lc.ICMP_SGT, num, ZERO) - den_gt_zero = builder.icmp(lc.ICMP_SGT, den, ZERO) - not_same_sign = builder.xor(num_gt_zero, den_gt_zero) - mod_not_zero = builder.icmp(lc.ICMP_NE, mod, ZERO) - needs_fixing = builder.and_(not_same_sign, mod_not_zero) - fix_value = builder.select(needs_fixing, den, ZERO) - final_mod = builder.add(fix_value, mod) - - result = builder.phi(lty) - result.add_incoming(ZERO, bb_no_if) - result.add_incoming(final_mod, bb_if) - - return result - - -def np_int_udiv_impl(context, builder, sig, args): - num, den = args - lltype = num.type - assert all(i.type==lltype for i in args), "must have homogeneous types" - - ZERO = lc.Constant.int(lltype, 0) - div_by_zero = builder.icmp(lc.ICMP_EQ, ZERO, den) - with builder.if_else(div_by_zero, likely=False) as (then, otherwise): - with then: - # division by zero - bb_then = builder.basic_block - with otherwise: - # divide! - div = builder.udiv(num, den) - bb_otherwise = builder.basic_block - - result = builder.phi(lltype) - result.add_incoming(ZERO, bb_then) - result.add_incoming(div, bb_otherwise) - return result - - -def np_int_urem_impl(context, builder, sig, args): - # based on the actual code in NumPy loops.c.src for signed integers - _check_arity_and_homogeneity(sig, args, 2) - - num, den = args - ty = sig.args[0] # any arg type will do, homogeneous - lty = num.type - - ZERO = context.get_constant(ty, 0) - den_not_zero = builder.icmp(lc.ICMP_NE, ZERO, den) - bb_no_if = builder.basic_block - with cgutils.if_unlikely(builder, den_not_zero): - bb_if = builder.basic_block - mod = builder.srem(num,den) - - result = builder.phi(lty) - result.add_incoming(ZERO, bb_no_if) - result.add_incoming(mod, bb_if) - - return result - - -# implementation of int_fmod is in fact the same as the unsigned remainder, -# that is: srem with a special case returning 0 when the denominator is 0. -np_int_fmod_impl = np_int_urem_impl - - -def np_real_div_impl(context, builder, sig, args): - # in NumPy real div has the same semantics as an fdiv for generating - # NANs, INF and NINF - _check_arity_and_homogeneity(sig, args, 2) - return builder.fdiv(*args) - - -def np_real_mod_impl(context, builder, sig, args): - # note: this maps to NumPy remainder, which has the same semantics as Python - # based on code in loops.c.src - _check_arity_and_homogeneity(sig, args, 2) - in1, in2 = args - ty = sig.args[0] - - ZERO = context.get_constant(ty, 0.0) - res = builder.frem(in1, in2) - res_ne_zero = builder.fcmp(lc.FCMP_ONE, res, ZERO) - den_lt_zero = builder.fcmp(lc.FCMP_OLT, in2, ZERO) - res_lt_zero = builder.fcmp(lc.FCMP_OLT, res, ZERO) - needs_fixing = builder.and_(res_ne_zero, - builder.xor(den_lt_zero, res_lt_zero)) - fix_value = builder.select(needs_fixing, in2, ZERO) - - return builder.fadd(res, fix_value) - - -def np_real_fmod_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - return builder.frem(*args) - - -def _fabs(context, builder, arg): - ZERO = lc.Constant.real(arg.type, 0.0) - arg_negated = builder.fsub(ZERO, arg) - arg_is_negative = builder.fcmp(lc.FCMP_OLT, arg, ZERO) - return builder.select(arg_is_negative, arg_negated, arg) - - -def np_complex_div_impl(context, builder, sig, args): - # Extracted from numpy/core/src/umath/loops.c.src, - # inspired by complex_div_impl - # variables named coherent with loops.c.src - # This is implemented using the approach described in - # R.L. Smith. Algorithm 116: Complex division. - # Communications of the ACM, 5(8):435, 1962 - - in1, in2 = [context.make_complex(builder, sig.args[0], value=arg) - for arg in args] - - in1r = in1.real # numerator.real - in1i = in1.imag # numerator.imag - in2r = in2.real # denominator.real - in2i = in2.imag # denominator.imag - ftype = in1r.type - assert all([i.type==ftype for i in [in1r, in1i, in2r, in2i]]), "mismatched types" - out = context.make_helper(builder, sig.return_type) - - ZERO = lc.Constant.real(ftype, 0.0) - ONE = lc.Constant.real(ftype, 1.0) - - # if abs(denominator.real) >= abs(denominator.imag) - in2r_abs = _fabs(context, builder, in2r) - in2i_abs = _fabs(context, builder, in2i) - in2r_abs_ge_in2i_abs = builder.fcmp(lc.FCMP_OGE, in2r_abs, in2i_abs) - with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise): - with then: - # if abs(denominator.real) == 0 and abs(denominator.imag) == 0 - in2r_is_zero = builder.fcmp(lc.FCMP_OEQ, in2r_abs, ZERO) - in2i_is_zero = builder.fcmp(lc.FCMP_OEQ, in2i_abs, ZERO) - in2_is_zero = builder.and_(in2r_is_zero, in2i_is_zero) - with builder.if_else(in2_is_zero) as (inn_then, inn_otherwise): - with inn_then: - # division by 0. - # fdiv generates the appropriate NAN/INF/NINF - out.real = builder.fdiv(in1r, in2r_abs) - out.imag = builder.fdiv(in1i, in2i_abs) - with inn_otherwise: - # general case for: - # abs(denominator.real) > abs(denominator.imag) - rat = builder.fdiv(in2i, in2r) - # scl = 1.0/(in2r + in2i*rat) - tmp1 = builder.fmul(in2i, rat) - tmp2 = builder.fadd(in2r, tmp1) - scl = builder.fdiv(ONE, tmp2) - # out.real = (in1r + in1i*rat)*scl - # out.imag = (in1i - in1r*rat)*scl - tmp3 = builder.fmul(in1i, rat) - tmp4 = builder.fmul(in1r, rat) - tmp5 = builder.fadd(in1r, tmp3) - tmp6 = builder.fsub(in1i, tmp4) - out.real = builder.fmul(tmp5, scl) - out.imag = builder.fmul(tmp6, scl) - with otherwise: - # general case for: - # abs(denominator.imag) > abs(denominator.real) - rat = builder.fdiv(in2r, in2i) - # scl = 1.0/(in2i + in2r*rat) - tmp1 = builder.fmul(in2r, rat) - tmp2 = builder.fadd(in2i, tmp1) - scl = builder.fdiv(ONE, tmp2) - # out.real = (in1r*rat + in1i)*scl - # out.imag = (in1i*rat - in1r)*scl - tmp3 = builder.fmul(in1r, rat) - tmp4 = builder.fmul(in1i, rat) - tmp5 = builder.fadd(tmp3, in1i) - tmp6 = builder.fsub(tmp4, in1r) - out.real = builder.fmul(tmp5, scl) - out.imag = builder.fmul(tmp6, scl) - - return out._getvalue() - - -######################################################################## -# NumPy logaddexp - -def np_real_logaddexp_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - - dispatch_table = { - types.float32: 'npy_logaddexpf', - types.float64: 'npy_logaddexp', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'logaddexp') - -######################################################################## -# NumPy logaddexp2 - -def np_real_logaddexp2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - - dispatch_table = { - types.float32: 'npy_logaddexp2f', - types.float64: 'npy_logaddexp2', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'logaddexp2') - - -######################################################################## -# true div kernels - -def np_int_truediv_impl(context, builder, sig, args): - # in NumPy we don't check for 0 denominator... fdiv handles div by - # 0 in the way NumPy expects.. - # integer truediv always yields double - num, den = args - lltype = num.type - assert all(i.type==lltype for i in args), "must have homogeneous types" - numty, denty = sig.args - - num = context.cast(builder, num, numty, types.float64) - den = context.cast(builder, den, denty, types.float64) - - return builder.fdiv(num,den) - - -######################################################################## -# floor div kernels - -def np_real_floor_div_impl(context, builder, sig, args): - res = np_real_div_impl(context, builder, sig, args) - s = typing.signature(sig.return_type, sig.return_type) - return np_real_floor_impl(context, builder, s, (res,)) - - -def np_complex_floor_div_impl(context, builder, sig, args): - # this is based on the complex floor divide in Numpy's loops.c.src - # This is basically a full complex division with a complex floor - # applied. - # The complex floor seems to be defined as the real floor applied - # with the real part and zero in the imaginary part. Fully developed - # so it avoids computing anything related to the imaginary result. - float_kind = sig.args[0].underlying_float - floor_sig = typing.signature(float_kind, float_kind) - - in1, in2 = [context.make_complex(builder, sig.args[0], value=arg) - for arg in args] - - in1r = in1.real - in1i = in1.imag - in2r = in2.real - in2i = in2.imag - ftype = in1r.type - assert all([i.type==ftype for i in [in1r, in1i, in2r, in2i]]), "mismatched types" - - ZERO = lc.Constant.real(ftype, 0.0) - - out = context.make_helper(builder, sig.return_type) - out.imag = ZERO - - in2r_abs = _fabs(context, builder, in2r) - in2i_abs = _fabs(context, builder, in2i) - in2r_abs_ge_in2i_abs = builder.fcmp(lc.FCMP_OGE, in2r_abs, in2i_abs) - - with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise): - with then: - rat = builder.fdiv(in2i, in2r) - # out.real = floor((in1r+in1i*rat)/(in2r + in2i*rat)) - tmp1 = builder.fmul(in1i, rat) - tmp2 = builder.fmul(in2i, rat) - tmp3 = builder.fadd(in1r, tmp1) - tmp4 = builder.fadd(in2r, tmp2) - tmp5 = builder.fdiv(tmp3, tmp4) - out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,)) - with otherwise: - rat = builder.fdiv(in2r, in2i) - # out.real = floor((in1i + in1r*rat)/(in2i + in2r*rat)) - tmp1 = builder.fmul(in1r, rat) - tmp2 = builder.fmul(in2r, rat) - tmp3 = builder.fadd(in1i, tmp1) - tmp4 = builder.fadd(in2i, tmp2) - tmp5 = builder.fdiv(tmp3, tmp4) - out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,)) - return out._getvalue() - - -######################################################################## -# numpy power funcs - -def np_complex_power_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - - return numbers.complex_power_impl(context, builder, sig, args) - - -######################################################################## -# Numpy style complex sign - -def np_complex_sign_impl(context, builder, sig, args): - # equivalent to complex sign in NumPy's sign - # but implemented via selects, balancing the 4 cases. - _check_arity_and_homogeneity(sig, args, 1) - op = args[0] - ty = sig.args[0] - float_ty = ty.underlying_float - - ZERO = context.get_constant(float_ty, 0.0) - ONE = context.get_constant(float_ty, 1.0) - MINUS_ONE = context.get_constant(float_ty, -1.0) - NAN = context.get_constant(float_ty, float('nan')) - result = context.make_complex(builder, ty) - result.real = ZERO - result.imag = ZERO - - cmp_sig = typing.signature(types.boolean, *[ty] * 2) - cmp_args = [op, result._getvalue()] - arg1_ge_arg2 = np_complex_ge_impl(context, builder, cmp_sig, cmp_args) - arg1_eq_arg2 = np_complex_eq_impl(context, builder, cmp_sig, cmp_args) - arg1_lt_arg2 = np_complex_lt_impl(context, builder, cmp_sig, cmp_args) - - real_when_ge = builder.select(arg1_eq_arg2, ZERO, ONE) - real_when_nge = builder.select(arg1_lt_arg2, MINUS_ONE, NAN) - result.real = builder.select(arg1_ge_arg2, real_when_ge, real_when_nge) - - return result._getvalue() - - -######################################################################## -# Numpy rint - -def np_real_rint_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - return mathimpl.call_fp_intrinsic(builder, 'llvm.rint', args) - - -def np_complex_rint_impl(context, builder, sig, args): - # based on code in NumPy's funcs.inc.src - # rint of a complex number defined as rint of its real and imag - # parts - _check_arity_and_homogeneity(sig, args, 1) - ty = sig.args[0] - float_ty = ty.underlying_float - in1 = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - - inner_sig = typing.signature(*[float_ty]*2) - out.real = np_real_rint_impl(context, builder, inner_sig, [in1.real]) - out.imag = np_real_rint_impl(context, builder, inner_sig, [in1.imag]) - return out._getvalue() - - -######################################################################## -# NumPy exp - -def np_real_exp_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.exp_impl(context, builder, sig, args) - - -def np_complex_exp_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return cmathimpl.exp_impl(context, builder, sig, args) - -######################################################################## -# NumPy exp2 - -def np_real_exp2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - dispatch_table = { - types.float32: 'npy_exp2f', - types.float64: 'npy_exp2', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'exp2') - - -def np_complex_exp2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - ty = sig.args[0] - float_ty = ty.underlying_float - in1 = context.make_complex(builder, ty, value=args[0]) - tmp = context.make_complex(builder, ty) - loge2 = context.get_constant(float_ty, _NPY_LOGE2) - tmp.real = builder.fmul(loge2, in1.real) - tmp.imag = builder.fmul(loge2, in1.imag) - return np_complex_exp_impl(context, builder, sig, [tmp._getvalue()]) - - -######################################################################## -# NumPy log - -def np_real_log_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.log_impl(context, builder, sig, args) - - -def np_complex_log_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return cmathimpl.log_impl(context, builder, sig, args) - -######################################################################## -# NumPy log2 - -def np_real_log2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - dispatch_table = { - types.float32: 'npy_log2f', - types.float64: 'npy_log2', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'log2') - -def np_complex_log2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - tmp = np_complex_log_impl(context, builder, sig, args) - tmp = context.make_complex(builder, ty, value=tmp) - log2e = context.get_constant(float_ty, _NPY_LOG2E) - tmp.real = builder.fmul(log2e, tmp.real) - tmp.imag = builder.fmul(log2e, tmp.imag) - return tmp._getvalue() - - -######################################################################## -# NumPy log10 - -def np_real_log10_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.log10_impl(context, builder, sig, args) - - -def np_complex_log10_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - tmp = np_complex_log_impl(context, builder, sig, args) - tmp = context.make_complex(builder, ty, value=tmp) - log10e = context.get_constant(float_ty, _NPY_LOG10E) - tmp.real = builder.fmul(log10e, tmp.real) - tmp.imag = builder.fmul(log10e, tmp.imag) - return tmp._getvalue() - - -######################################################################## -# NumPy expm1 - -def np_real_expm1_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.expm1_impl(context, builder, sig, args) - -def np_complex_expm1_impl(context, builder, sig, args): - # this is based on nc_expm1 in funcs.inc.src - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - float_unary_sig = typing.signature(*[float_ty]*2) - - MINUS_ONE = context.get_constant(float_ty, -1.0) - in1 = context.make_complex(builder, ty, value=args[0]) - a = np_real_exp_impl(context, builder, float_unary_sig, [in1.real]) - out = context.make_complex(builder, ty) - cos_imag = np_real_cos_impl(context, builder, float_unary_sig, [in1.imag]) - sin_imag = np_real_sin_impl(context, builder, float_unary_sig, [in1.imag]) - tmp = builder.fmul(a, cos_imag) - out.imag = builder.fmul(a, sin_imag) - out.real = builder.fadd(tmp, MINUS_ONE) - - return out._getvalue() - - -######################################################################## -# NumPy log1p - -def np_real_log1p_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.log1p_impl(context, builder, sig, args) - -def np_complex_log1p_impl(context, builder, sig, args): - # base on NumPy's nc_log1p in funcs.inc.src - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - float_unary_sig = typing.signature(*[float_ty]*2) - float_binary_sig = typing.signature(*[float_ty]*3) - - ONE = context.get_constant(float_ty, 1.0) - in1 = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - real_plus_one = builder.fadd(in1.real, ONE) - l = np_real_hypot_impl(context, builder, float_binary_sig, - [real_plus_one, in1.imag]) - out.imag = np_real_atan2_impl(context, builder, float_binary_sig, - [in1.imag, real_plus_one]) - out.real = np_real_log_impl(context, builder, float_unary_sig, [l]) - - return out._getvalue() - - -######################################################################## -# NumPy sqrt - -def np_real_sqrt_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.sqrt_impl(context, builder, sig, args) - - -def np_complex_sqrt_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return cmathimpl.sqrt_impl(context, builder, sig, args) - - -######################################################################## -# NumPy square - -def np_int_square_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return builder.mul(args[0], args[0]) - - -def np_real_square_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return builder.fmul(args[0], args[0]) - -def np_complex_square_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - binary_sig = typing.signature(*[sig.return_type]*3) - return numbers.complex_mul_impl(context, builder, binary_sig, - [args[0], args[0]]) - - -######################################################################## -# NumPy reciprocal - -def np_int_reciprocal_impl(context, builder, sig, args): - # based on the implementation in loops.c.src - # integer versions for reciprocal are performed via promotion - # using double, and then converted back to the type - _check_arity_and_homogeneity(sig, args, 1) - ty = sig.return_type - - binary_sig = typing.signature(*[ty]*3) - in_as_float = context.cast(builder, args[0], ty, types.float64) - ONE = context.get_constant(types.float64, 1) - result_as_float = builder.fdiv(ONE, in_as_float) - return context.cast(builder, result_as_float, types.float64, ty) - - -def np_real_reciprocal_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - ONE = context.get_constant(sig.return_type, 1.0) - return builder.fdiv(ONE, args[0]) - - -def np_complex_reciprocal_impl(context, builder, sig, args): - # based on the implementation in loops.c.src - # Basically the same Smith method used for division, but with - # the numerator substitued by 1.0 - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - - ZERO = context.get_constant(float_ty, 0.0) - ONE = context.get_constant(float_ty, 1.0) - in1 = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - in1r = in1.real - in1i = in1.imag - in1r_abs = _fabs(context, builder, in1r) - in1i_abs = _fabs(context, builder, in1i) - in1i_abs_le_in1r_abs = builder.fcmp(lc.FCMP_OLE, in1i_abs, in1r_abs) - - with builder.if_else(in1i_abs_le_in1r_abs) as (then, otherwise): - with then: - r = builder.fdiv(in1i, in1r) - tmp0 = builder.fmul(in1i, r) - d = builder.fadd(in1r, tmp0) - inv_d = builder.fdiv(ONE, d) - minus_r = builder.fsub(ZERO, r) - out.real = inv_d - out.imag = builder.fmul(minus_r, inv_d) - with otherwise: - r = builder.fdiv(in1r, in1i) - tmp0 = builder.fmul(in1r, r) - d = builder.fadd(tmp0, in1i) - inv_d = builder.fdiv(ONE, d) - out.real = builder.fmul(r, inv_d) - out.imag = builder.fsub(ZERO, inv_d) - - return out._getvalue() - - -######################################################################## -# NumPy sin - -def np_real_sin_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.sin_impl(context, builder, sig, args) - - -def np_complex_sin_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return cmathimpl.sin_impl(context, builder, sig, args) - - -######################################################################## -# NumPy cos - -def np_real_cos_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.cos_impl(context, builder, sig, args) - - -def np_complex_cos_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return cmathimpl.cos_impl(context, builder, sig, args) - - -######################################################################## -# NumPy tan - -def np_real_tan_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.tan_impl(context, builder, sig, args) - - -def np_complex_tan_impl(context, builder, sig, args): - # npymath does not provide complex tan functions. The code - # in funcs.inc.src for tan is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - float_unary_sig = typing.signature(*[float_ty]*2) - ONE = context.get_constant(float_ty, 1.0) - x = context.make_complex(builder, ty, args[0]) - out = context.make_complex(builder, ty) - - xr = x.real - xi = x.imag - sr = np_real_sin_impl(context, builder, float_unary_sig, [xr]) - cr = np_real_cos_impl(context, builder, float_unary_sig, [xr]) - shi = np_real_sinh_impl(context, builder, float_unary_sig, [xi]) - chi = np_real_cosh_impl(context, builder, float_unary_sig, [xi]) - rs = builder.fmul(sr, chi) - is_ = builder.fmul(cr, shi) - rc = builder.fmul(cr, chi) - ic = builder.fmul(sr, shi) # note: opposite sign from code in funcs.inc.src - sqr_rc = builder.fmul(rc, rc) - sqr_ic = builder.fmul(ic, ic) - d = builder.fadd(sqr_rc, sqr_ic) - inv_d = builder.fdiv(ONE, d) - rs_rc = builder.fmul(rs, rc) - is_ic = builder.fmul(is_, ic) - is_rc = builder.fmul(is_, rc) - rs_ic = builder.fmul(rs, ic) - numr = builder.fsub(rs_rc, is_ic) - numi = builder.fadd(is_rc, rs_ic) - out.real = builder.fmul(numr, inv_d) - out.imag = builder.fmul(numi, inv_d) - - return out._getvalue() - - -######################################################################## -# NumPy asin - -def np_real_asin_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.asin_impl(context, builder, sig, args) - - -def _complex_expand_series(context, builder, ty, initial, x, coefs): - """this is used to implement approximations using series that are - quite common in NumPy's source code to improve precision when the - magnitude of the arguments is small. In funcs.inc.src this is - implemented by repeated use of the macro "SERIES_HORNER_TERM - """ - assert ty in types.complex_domain - binary_sig = typing.signature(*[ty]*3) - accum = context.make_complex(builder, ty, value=initial) - ONE = context.get_constant(ty.underlying_float, 1.0) - for coef in reversed(coefs): - constant = context.get_constant(ty.underlying_float, coef) - value = numbers.complex_mul_impl(context, builder, binary_sig, - [x, accum._getvalue()]) - accum._setvalue(value) - accum.real = builder.fadd(ONE, builder.fmul(accum.real, constant)) - accum.imag = builder.fmul(accum.imag, constant) - - return accum._getvalue() - - -def np_complex_asin_impl(context, builder, sig, args): - # npymath does not provide a complex asin. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - epsilon = context.get_constant(float_ty, 1e-3) - - # if real or imag has magnitude over 1e-3... - x = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - abs_r = _fabs(context, builder, x.real) - abs_i = _fabs(context, builder, x.imag) - abs_r_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_r, epsilon) - abs_i_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_i, epsilon) - any_gt_epsilon = builder.or_(abs_r_gt_epsilon, abs_i_gt_epsilon) - complex_binary_sig = typing.signature(*[ty]*3) - with builder.if_else(any_gt_epsilon) as (then, otherwise): - with then: - # ... then use formula: - # - j * log(j * x + sqrt(1 - sqr(x))) - I = context.get_constant_generic(builder, ty, 1.0j) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - ZERO = context.get_constant_generic(builder, ty, 0.0 + 0.0j) - xx = np_complex_square_impl(context, builder, sig, args) - one_minus_xx = numbers.complex_sub_impl(context, builder, - complex_binary_sig, - [ONE, xx]) - sqrt_one_minus_xx = np_complex_sqrt_impl(context, builder, sig, - [one_minus_xx]) - ix = numbers.complex_mul_impl(context, builder, - complex_binary_sig, - [I, args[0]]) - log_arg = numbers.complex_add_impl(context, builder, sig, - [ix, sqrt_one_minus_xx]) - log = np_complex_log_impl(context, builder, sig, [log_arg]) - ilog = numbers.complex_mul_impl(context, builder, - complex_binary_sig, - [I, log]) - out._setvalue(numbers.complex_sub_impl(context, builder, - complex_binary_sig, - [ZERO, ilog])) - with otherwise: - # ... else use series expansion (to avoid loss of precision) - coef_dict = { - types.complex64: [1.0/6.0, 9.0/20.0], - types.complex128: [1.0/6.0, 9.0/20.0, 25.0/42.0], - # types.complex256: [1.0/6.0, 9.0/20.0, 25.0/42.0, 49.0/72.0, 81.0/110.0] - } - - xx = np_complex_square_impl(context, builder, sig, args) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - tmp = _complex_expand_series(context, builder, ty, - ONE, xx, coef_dict[ty]) - out._setvalue(numbers.complex_mul_impl(context, builder, - complex_binary_sig, - [args[0], tmp])) - - return out._getvalue() - - -######################################################################## -# NumPy acos - -def np_real_acos_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.acos_impl(context, builder, sig, args) - - -######################################################################## -# NumPy atan - -def np_real_atan_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.atan_impl(context, builder, sig, args) - - -def np_complex_atan_impl(context, builder, sig, args): - # npymath does not provide a complex atan. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - epsilon = context.get_constant(float_ty, 1e-3) - - # if real or imag has magnitude over 1e-3... - x = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - abs_r = _fabs(context, builder, x.real) - abs_i = _fabs(context, builder, x.imag) - abs_r_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_r, epsilon) - abs_i_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_i, epsilon) - any_gt_epsilon = builder.or_(abs_r_gt_epsilon, abs_i_gt_epsilon) - binary_sig = typing.signature(*[ty]*3) - with builder.if_else(any_gt_epsilon) as (then, otherwise): - with then: - # ... then use formula - # 0.5j * log((j + x)/(j - x)) - I = context.get_constant_generic(builder, ty, 0.0 + 1.0j) - I2 = context.get_constant_generic(builder, ty, 0.0 + 0.5j) - den = numbers.complex_sub_impl(context, builder, binary_sig, - [I, args[0]]) - num = numbers.complex_add_impl(context, builder, binary_sig, - [I, args[0]]) - div = np_complex_div_impl(context, builder, binary_sig, - [num, den]) - log = np_complex_log_impl(context, builder, sig, [div]) - res = numbers.complex_mul_impl(context, builder, binary_sig, - [I2, log]) - - out._setvalue(res) - with otherwise: - # else use series expansion (to avoid loss of precision) - coef_dict = { - types.complex64: [-1.0/3.0, -3.0/5.0], - types.complex128: [-1.0/3.0, -3.0/5.0, -5.0/7.0], - # types.complex256: [-1.0/3.0, -3.0/5.0, -5.0/7.0, -7.0/9.0, -9.0/11.0] - } - - xx = np_complex_square_impl(context, builder, sig, args) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - tmp = _complex_expand_series(context, builder, ty, - ONE, xx, coef_dict[ty]) - out._setvalue(numbers.complex_mul_impl(context, builder, - binary_sig, - [args[0], tmp])) - - return out._getvalue() - - -######################################################################## -# NumPy atan2 - -def np_real_atan2_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - return mathimpl.atan2_float_impl(context, builder, sig, args) - - -######################################################################## -# NumPy hypot - -def np_real_hypot_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - return mathimpl.hypot_float_impl(context, builder, sig, args) - - -######################################################################## -# NumPy sinh - -def np_real_sinh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.sinh_impl(context, builder, sig, args) - - -def np_complex_sinh_impl(context, builder, sig, args): - # npymath does not provide a complex sinh. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - - ty = sig.args[0] - fty = ty.underlying_float - fsig1 = typing.signature(*[fty]*2) - x = context.make_complex(builder, ty, args[0]) - out = context.make_complex(builder, ty) - xr = x.real - xi = x.imag - - sxi = np_real_sin_impl(context, builder, fsig1, [xi]) - shxr = np_real_sinh_impl(context, builder, fsig1, [xr]) - cxi = np_real_cos_impl(context, builder, fsig1, [xi]) - chxr = np_real_cosh_impl(context, builder, fsig1, [xr]) - - out.real = builder.fmul(cxi, shxr) - out.imag = builder.fmul(sxi, chxr) - - return out._getvalue() - - -######################################################################## -# NumPy cosh - -def np_real_cosh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.cosh_impl(context, builder, sig, args) - - -def np_complex_cosh_impl(context, builder, sig, args): - # npymath does not provide a complex cosh. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - fty = ty.underlying_float - fsig1 = typing.signature(*[fty]*2) - x = context.make_complex(builder, ty, args[0]) - out = context.make_complex(builder, ty) - xr = x.real - xi = x.imag - - cxi = np_real_cos_impl(context, builder, fsig1, [xi]) - chxr = np_real_cosh_impl(context, builder, fsig1, [xr]) - sxi = np_real_sin_impl(context, builder, fsig1, [xi]) - shxr = np_real_sinh_impl(context, builder, fsig1, [xr]) - - out.real = builder.fmul(cxi, chxr) - out.imag = builder.fmul(sxi, shxr) - - return out._getvalue() - - -######################################################################## -# NumPy tanh - -def np_real_tanh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.tanh_impl(context, builder, sig, args) - - -def np_complex_tanh_impl(context, builder, sig, args): - # npymath does not provide complex tan functions. The code - # in funcs.inc.src for tanh is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - fty = ty.underlying_float - fsig1 = typing.signature(*[fty]*2) - ONE = context.get_constant(fty, 1.0) - x = context.make_complex(builder, ty, args[0]) - out = context.make_complex(builder, ty) - - xr = x.real - xi = x.imag - si = np_real_sin_impl(context, builder, fsig1, [xi]) - ci = np_real_cos_impl(context, builder, fsig1, [xi]) - shr = np_real_sinh_impl(context, builder, fsig1, [xr]) - chr_ = np_real_cosh_impl(context, builder, fsig1, [xr]) - rs = builder.fmul(ci, shr) - is_ = builder.fmul(si, chr_) - rc = builder.fmul(ci, chr_) - ic = builder.fmul(si, shr) # note: opposite sign from code in funcs.inc.src - sqr_rc = builder.fmul(rc, rc) - sqr_ic = builder.fmul(ic, ic) - d = builder.fadd(sqr_rc, sqr_ic) - inv_d = builder.fdiv(ONE, d) - rs_rc = builder.fmul(rs, rc) - is_ic = builder.fmul(is_, ic) - is_rc = builder.fmul(is_, rc) - rs_ic = builder.fmul(rs, ic) - numr = builder.fadd(rs_rc, is_ic) - numi = builder.fsub(is_rc, rs_ic) - out.real = builder.fmul(numr, inv_d) - out.imag = builder.fmul(numi, inv_d) - - return out._getvalue() - - -######################################################################## -# NumPy asinh - -def np_real_asinh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.asinh_impl(context, builder, sig, args) - - -def np_complex_asinh_impl(context, builder, sig, args): - # npymath does not provide a complex atan. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - epsilon = context.get_constant(float_ty, 1e-3) - - # if real or imag has magnitude over 1e-3... - x = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - abs_r = _fabs(context, builder, x.real) - abs_i = _fabs(context, builder, x.imag) - abs_r_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_r, epsilon) - abs_i_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_i, epsilon) - any_gt_epsilon = builder.or_(abs_r_gt_epsilon, abs_i_gt_epsilon) - binary_sig = typing.signature(*[ty]*3) - with builder.if_else(any_gt_epsilon) as (then, otherwise): - with then: - # ... then use formula - # log(sqrt(1+sqr(x)) + x) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - xx = np_complex_square_impl(context, builder, sig, args) - one_plus_xx = numbers.complex_add_impl(context, builder, - binary_sig, [ONE, xx]) - sqrt_res = np_complex_sqrt_impl(context, builder, sig, - [one_plus_xx]) - log_arg = numbers.complex_add_impl(context, builder, - binary_sig, [sqrt_res, args[0]]) - res = np_complex_log_impl(context, builder, sig, [log_arg]) - out._setvalue(res) - with otherwise: - # else use series expansion (to avoid loss of precision) - coef_dict = { - types.complex64: [-1.0/6.0, -9.0/20.0], - types.complex128: [-1.0/6.0, -9.0/20.0, -25.0/42.0], - # types.complex256: [-1.0/6.0, -9.0/20.0, -25.0/42.0, -49.0/72.0, -81.0/110.0] - } - - xx = np_complex_square_impl(context, builder, sig, args) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - tmp = _complex_expand_series(context, builder, ty, - ONE, xx, coef_dict[ty]) - out._setvalue(numbers.complex_mul_impl(context, builder, - binary_sig, - [args[0], tmp])) - - return out._getvalue() - - -######################################################################## -# NumPy acosh - -def np_real_acosh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.acosh_impl(context, builder, sig, args) - - -def np_complex_acosh_impl(context, builder, sig, args): - # npymath does not provide a complex acosh. The code in funcs.inc.src - # is translated here... - # log(x + sqrt(x+1) * sqrt(x-1)) - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - csig2 = typing.signature(*[ty]*3) - - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - x = args[0] - - x_plus_one = numbers.complex_add_impl(context, builder, csig2, [x, - ONE]) - x_minus_one = numbers.complex_sub_impl(context, builder, csig2, [x, - ONE]) - sqrt_x_plus_one = np_complex_sqrt_impl(context, builder, sig, [x_plus_one]) - sqrt_x_minus_one = np_complex_sqrt_impl(context, builder, sig, [x_minus_one]) - prod_sqrt = numbers.complex_mul_impl(context, builder, csig2, - [sqrt_x_plus_one, - sqrt_x_minus_one]) - log_arg = numbers.complex_add_impl(context, builder, csig2, [x, - prod_sqrt]) - - return np_complex_log_impl(context, builder, sig, [log_arg]) - - -######################################################################## -# NumPy atanh - -def np_real_atanh_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - return mathimpl.atanh_impl(context, builder, sig, args) - - -def np_complex_atanh_impl(context, builder, sig, args): - # npymath does not provide a complex atanh. The code in funcs.inc.src - # is translated here... - _check_arity_and_homogeneity(sig, args, 1) - - ty = sig.args[0] - float_ty = ty.underlying_float - epsilon = context.get_constant(float_ty, 1e-3) - - # if real or imag has magnitude over 1e-3... - x = context.make_complex(builder, ty, value=args[0]) - out = context.make_complex(builder, ty) - abs_r = _fabs(context, builder, x.real) - abs_i = _fabs(context, builder, x.imag) - abs_r_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_r, epsilon) - abs_i_gt_epsilon = builder.fcmp(lc.FCMP_OGT, abs_i, epsilon) - any_gt_epsilon = builder.or_(abs_r_gt_epsilon, abs_i_gt_epsilon) - binary_sig = typing.signature(*[ty]*3) - with builder.if_else(any_gt_epsilon) as (then, otherwise): - with then: - # ... then use formula - # 0.5 * log((1 + x)/(1 - x)) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - HALF = context.get_constant_generic(builder, ty, 0.5 + 0.0j) - den = numbers.complex_sub_impl(context, builder, binary_sig, - [ONE, args[0]]) - num = numbers.complex_add_impl(context, builder, binary_sig, - [ONE, args[0]]) - div = np_complex_div_impl(context, builder, binary_sig, - [num, den]) - log = np_complex_log_impl(context, builder, sig, [div]) - res = numbers.complex_mul_impl(context, builder, binary_sig, - [HALF, log]) - - out._setvalue(res) - with otherwise: - # else use series expansion (to avoid loss of precision) - coef_dict = { - types.complex64: [1.0/3.0, 3.0/5.0], - types.complex128: [1.0/3.0, 3.0/5.0, 5.0/7.0], - # types.complex256: [1.0/3.0, 3.0/5.0, 5.0/7.0, 7.0/9.0, 9.0/11.0] - } - - xx = np_complex_square_impl(context, builder, sig, args) - ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j) - tmp = _complex_expand_series(context, builder, ty, - ONE, xx, coef_dict[ty]) - out._setvalue(numbers.complex_mul_impl(context, builder, - binary_sig, - [args[0], tmp])) - - return out._getvalue() - - - -######################################################################## -# NumPy floor - -def np_real_floor_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - return mathimpl.call_fp_intrinsic(builder, 'llvm.floor', args) - - -######################################################################## -# NumPy ceil - -def np_real_ceil_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - return mathimpl.call_fp_intrinsic(builder, 'llvm.ceil', args) - - -######################################################################## -# NumPy trunc - -def np_real_trunc_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - return mathimpl.call_fp_intrinsic(builder, 'llvm.trunc', args) - - -######################################################################## -# NumPy fabs - -def np_real_fabs_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - return mathimpl.call_fp_intrinsic(builder, 'llvm.fabs', args) - - -######################################################################## -# NumPy style predicates - -# For real and integer types rely on numbers... but complex ordering in -# NumPy is lexicographic (while Python does not provide ordering). -def np_complex_ge_impl(context, builder, sig, args): - # equivalent to macro CGE in NumPy's loops.c.src - # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi >= yi)) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_gt_yr = builder.fcmp(lc.FCMP_OGT, xr, yr) - no_nan_xi_yi = builder.fcmp(lc.FCMP_ORD, xi, yi) - xr_eq_yr = builder.fcmp(lc.FCMP_OEQ, xr, yr) - xi_ge_yi = builder.fcmp(lc.FCMP_OGE, xi, yi) - first_term = builder.and_(xr_gt_yr, no_nan_xi_yi) - second_term = builder.and_(xr_eq_yr, xi_ge_yi) - return builder.or_(first_term, second_term) - - -def np_complex_le_impl(context, builder, sig, args): - # equivalent to macro CLE in NumPy's loops.c.src - # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi <= yi)) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_lt_yr = builder.fcmp(lc.FCMP_OLT, xr, yr) - no_nan_xi_yi = builder.fcmp(lc.FCMP_ORD, xi, yi) - xr_eq_yr = builder.fcmp(lc.FCMP_OEQ, xr, yr) - xi_le_yi = builder.fcmp(lc.FCMP_OLE, xi, yi) - first_term = builder.and_(xr_lt_yr, no_nan_xi_yi) - second_term = builder.and_(xr_eq_yr, xi_le_yi) - return builder.or_(first_term, second_term) - - -def np_complex_gt_impl(context, builder, sig, args): - # equivalent to macro CGT in NumPy's loops.c.src - # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi > yi)) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_gt_yr = builder.fcmp(lc.FCMP_OGT, xr, yr) - no_nan_xi_yi = builder.fcmp(lc.FCMP_ORD, xi, yi) - xr_eq_yr = builder.fcmp(lc.FCMP_OEQ, xr, yr) - xi_gt_yi = builder.fcmp(lc.FCMP_OGT, xi, yi) - first_term = builder.and_(xr_gt_yr, no_nan_xi_yi) - second_term = builder.and_(xr_eq_yr, xi_gt_yi) - return builder.or_(first_term, second_term) - - -def np_complex_lt_impl(context, builder, sig, args): - # equivalent to macro CLT in NumPy's loops.c.src - # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi < yi)) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_lt_yr = builder.fcmp(lc.FCMP_OLT, xr, yr) - no_nan_xi_yi = builder.fcmp(lc.FCMP_ORD, xi, yi) - xr_eq_yr = builder.fcmp(lc.FCMP_OEQ, xr, yr) - xi_lt_yi = builder.fcmp(lc.FCMP_OLT, xi, yi) - first_term = builder.and_(xr_lt_yr, no_nan_xi_yi) - second_term = builder.and_(xr_eq_yr, xi_lt_yi) - return builder.or_(first_term, second_term) - - -def np_complex_eq_impl(context, builder, sig, args): - # equivalent to macro CEQ in NumPy's loops.c.src - # (xr == yr && xi == yi) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_eq_yr = builder.fcmp(lc.FCMP_OEQ, xr, yr) - xi_eq_yi = builder.fcmp(lc.FCMP_OEQ, xi, yi) - return builder.and_(xr_eq_yr, xi_eq_yi) - - -def np_complex_ne_impl(context, builder, sig, args): - # equivalent to macro CNE in NumPy's loops.c.src - # (xr != yr || xi != yi) - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - - ty = sig.args[0] - in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args] - xr = in1.real - xi = in1.imag - yr = in2.real - yi = in2.imag - - xr_ne_yr = builder.fcmp(lc.FCMP_UNE, xr, yr) - xi_ne_yi = builder.fcmp(lc.FCMP_UNE, xi, yi) - return builder.or_(xr_ne_yr, xi_ne_yi) - - -######################################################################## -# NumPy logical algebra - -# these are made generic for all types for now, assuming that -# cgutils.is_true works in the underlying types. - -def _complex_is_true(context, builder, ty, val): - complex_val = context.make_complex(builder, ty, value=val) - re_true = cgutils.is_true(builder, complex_val.real) - im_true = cgutils.is_true(builder, complex_val.imag) - return builder.or_(re_true, im_true) - - -def np_logical_and_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = cgutils.is_true(builder, args[0]) - b = cgutils.is_true(builder, args[1]) - return builder.and_(a, b) - - -def np_complex_logical_and_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = _complex_is_true(context, builder, sig.args[0], args[0]) - b = _complex_is_true(context, builder, sig.args[1], args[1]) - return builder.and_(a, b) - - -def np_logical_or_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = cgutils.is_true(builder, args[0]) - b = cgutils.is_true(builder, args[1]) - return builder.or_(a, b) - - -def np_complex_logical_or_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = _complex_is_true(context, builder, sig.args[0], args[0]) - b = _complex_is_true(context, builder, sig.args[1], args[1]) - return builder.or_(a, b) - - -def np_logical_xor_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = cgutils.is_true(builder, args[0]) - b = cgutils.is_true(builder, args[1]) - return builder.xor(a, b) - - -def np_complex_logical_xor_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean) - a = _complex_is_true(context, builder, sig.args[0], args[0]) - b = _complex_is_true(context, builder, sig.args[1], args[1]) - return builder.xor(a, b) - - -def np_logical_not_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - return cgutils.is_false(builder, args[0]) - - -def np_complex_logical_not_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - a = _complex_is_true(context, builder, sig.args[0], args[0]) - return builder.not_(a) - -######################################################################## -# NumPy style max/min -# -# There are 2 different sets of functions to perform max and min in -# NumPy: maximum/minimum and fmax/fmin. -# Both differ in the way NaNs are handled, so the actual differences -# come in action only on float/complex numbers. The functions used for -# integers is shared. For booleans maximum is equivalent to or, and -# minimum is equivalent to and. Datetime support will go elsewhere. - -def np_int_smax_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - arg1, arg2 = args - arg1_sge_arg2 = builder.icmp(lc.ICMP_SGE, arg1, arg2) - return builder.select(arg1_sge_arg2, arg1, arg2) - - -def np_int_umax_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - arg1, arg2 = args - arg1_uge_arg2 = builder.icmp(lc.ICMP_UGE, arg1, arg2) - return builder.select(arg1_uge_arg2, arg1, arg2) - - -def np_real_maximum_impl(context, builder, sig, args): - # maximum prefers nan (tries to return a nan). - _check_arity_and_homogeneity(sig, args, 2) - - arg1, arg2 = args - arg1_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg1) - any_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg2) - nan_result = builder.select(arg1_nan, arg1, arg2) - - arg1_ge_arg2 = builder.fcmp(lc.FCMP_OGE, arg1, arg2) - non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_real_fmax_impl(context, builder, sig, args): - # fmax prefers non-nan (tries to return a non-nan). - _check_arity_and_homogeneity(sig, args, 2) - - arg1, arg2 = args - arg2_nan = builder.fcmp(lc.FCMP_UNO, arg2, arg2) - any_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg2) - nan_result = builder.select(arg2_nan, arg1, arg2) - - arg1_ge_arg2 = builder.fcmp(lc.FCMP_OGE, arg1, arg2) - non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_complex_maximum_impl(context, builder, sig, args): - # maximum prefers nan (tries to return a nan). - # There is an extra caveat with complex numbers, as there is more - # than one type of nan. NumPy's docs state that the nan in the - # first argument is returned when both arguments are nans. - # If only one nan is found, that nan is returned. - _check_arity_and_homogeneity(sig, args, 2) - ty = sig.args[0] - bc_sig = typing.signature(types.boolean, ty) - bcc_sig = typing.signature(types.boolean, *[ty]*2) - arg1, arg2 = args - arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1]) - arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2]) - any_nan = builder.or_(arg1_nan, arg2_nan) - nan_result = builder.select(arg1_nan, arg1, arg2) - - arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args) - non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_complex_fmax_impl(context, builder, sig, args): - # fmax prefers non-nan (tries to return a non-nan). - # There is an extra caveat with complex numbers, as there is more - # than one type of nan. NumPy's docs state that the nan in the - # first argument is returned when both arguments are nans. - _check_arity_and_homogeneity(sig, args, 2) - ty = sig.args[0] - bc_sig = typing.signature(types.boolean, ty) - bcc_sig = typing.signature(types.boolean, *[ty]*2) - arg1, arg2 = args - arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1]) - arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2]) - any_nan = builder.or_(arg1_nan, arg2_nan) - nan_result = builder.select(arg2_nan, arg1, arg2) - - arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args) - non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_int_smin_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - arg1, arg2 = args - arg1_sle_arg2 = builder.icmp(lc.ICMP_SLE, arg1, arg2) - return builder.select(arg1_sle_arg2, arg1, arg2) - - -def np_int_umin_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - arg1, arg2 = args - arg1_ule_arg2 = builder.icmp(lc.ICMP_ULE, arg1, arg2) - return builder.select(arg1_ule_arg2, arg1, arg2) - - -def np_real_minimum_impl(context, builder, sig, args): - # minimum prefers nan (tries to return a nan). - _check_arity_and_homogeneity(sig, args, 2) - - arg1, arg2 = args - arg1_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg1) - any_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg2) - nan_result = builder.select(arg1_nan, arg1, arg2) - - arg1_le_arg2 = builder.fcmp(lc.FCMP_OLE, arg1, arg2) - non_nan_result = builder.select(arg1_le_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_real_fmin_impl(context, builder, sig, args): - # fmin prefers non-nan (tries to return a non-nan). - _check_arity_and_homogeneity(sig, args, 2) - - arg1, arg2 = args - arg1_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg1) - any_nan = builder.fcmp(lc.FCMP_UNO, arg1, arg2) - nan_result = builder.select(arg1_nan, arg2, arg1) - - arg1_le_arg2 = builder.fcmp(lc.FCMP_OLE, arg1, arg2) - non_nan_result = builder.select(arg1_le_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_complex_minimum_impl(context, builder, sig, args): - # minimum prefers nan (tries to return a nan). - # There is an extra caveat with complex numbers, as there is more - # than one type of nan. NumPy's docs state that the nan in the - # first argument is returned when both arguments are nans. - # If only one nan is found, that nan is returned. - _check_arity_and_homogeneity(sig, args, 2) - ty = sig.args[0] - bc_sig = typing.signature(types.boolean, ty) - bcc_sig = typing.signature(types.boolean, *[ty]*2) - arg1, arg2 = args - arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1]) - arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2]) - any_nan = builder.or_(arg1_nan, arg2_nan) - nan_result = builder.select(arg1_nan, arg1, arg2) - - arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args) - non_nan_result = builder.select(arg1_le_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -def np_complex_fmin_impl(context, builder, sig, args): - # fmin prefers non-nan (tries to return a non-nan). - # There is an extra caveat with complex numbers, as there is more - # than one type of nan. NumPy's docs state that the nan in the - # first argument is returned when both arguments are nans. - _check_arity_and_homogeneity(sig, args, 2) - ty = sig.args[0] - bc_sig = typing.signature(types.boolean, ty) - bcc_sig = typing.signature(types.boolean, *[ty]*2) - arg1, arg2 = args - arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1]) - arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2]) - any_nan = builder.or_(arg1_nan, arg2_nan) - nan_result = builder.select(arg2_nan, arg1, arg2) - - arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args) - non_nan_result = builder.select(arg1_le_arg2, arg1, arg2) - - return builder.select(any_nan, nan_result, non_nan_result) - - -######################################################################## -# NumPy floating point misc - -def np_real_isnan_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - return mathimpl.is_nan(builder, args[0]) - - -def np_complex_isnan_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - - x, = args - ty, = sig.args - complex_val = context.make_complex(builder, ty, value=x) - return cmathimpl.is_nan(builder, complex_val) - - -def np_real_isfinite_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - return mathimpl.is_finite(builder, args[0]) - - -def np_complex_isfinite_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - x, = args - ty, = sig.args - complex_val = context.make_complex(builder, ty, value=x) - return cmathimpl.is_finite(builder, complex_val) - - -def np_real_isinf_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - return mathimpl.is_inf(builder, args[0]) - - -def np_complex_isinf_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - x, = args - ty, = sig.args - complex_val = context.make_complex(builder, ty, value=x) - return cmathimpl.is_inf(builder, complex_val) - - -def np_real_signbit_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean) - - dispatch_table = { - types.float32: 'numba_signbitf', - types.float64: 'numba_signbit', - } - inner_sig = typing.signature(types.intc, *sig.args) - - int_res = _dispatch_func_by_name_type(context, builder, inner_sig, args, - dispatch_table, 'signbit') - bool_res = builder.icmp_unsigned('!=', int_res, int_res.type(0)) - return bool_res - - -def np_real_copysign_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - - return mathimpl.copysign_float_impl(context, builder, sig, args) - -def np_real_nextafter_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 2) - - dispatch_table = { - types.float32: 'npy_nextafterf', - types.float64: 'npy_nextafter', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'nextafter') - -def np_real_spacing_impl(context, builder, sig, args): - _check_arity_and_homogeneity(sig, args, 1) - - dispatch_table = { - types.float32: 'npy_spacingf', - types.float64: 'npy_spacing', - } - - return _dispatch_func_by_name_type(context, builder, sig, args, - dispatch_table, 'spacing') - - -def np_real_ldexp_impl(context, builder, sig, args): - # this one is slightly different to other ufuncs. - # arguments are not homogeneous and second arg may come as - # an 'i' or an 'l'. - - # the function expects the second argument to be have a C int type - x1, x2 = args - ty1, ty2 = sig.args - # note that types.intc should be equivalent to int_ that is - # 'NumPy's default int') - x2 = context.cast(builder, x2, ty2, types.intc) - f_fi_sig = typing.signature(ty1, ty1, types.intc) - return mathimpl.ldexp_impl(context, builder, f_fi_sig, (x1, x2)) diff --git a/numba/numba/targets/npyimpl.py b/numba/numba/targets/npyimpl.py deleted file mode 100644 index 846b7496d..000000000 --- a/numba/numba/targets/npyimpl.py +++ /dev/null @@ -1,542 +0,0 @@ -""" -Implementation of functions in the Numpy package. -""" - -from __future__ import print_function, division, absolute_import - -import math -import sys -import itertools -from collections import namedtuple - -from llvmlite.llvmpy import core as lc - -import numpy as np - -from . import builtins, callconv, ufunc_db, arrayobj -from .imputils import Registry, impl_ret_new_ref, force_error_model -from .. import typing, types, cgutils, numpy_support, utils -from ..config import PYVERSION -from ..numpy_support import ufunc_find_matching_loop, select_array_wrapper -from ..typing import npydecl - -from .. import errors - -registry = Registry() -lower = registry.lower - - -######################################################################## - -# In the way we generate code, ufuncs work with scalar as well as -# with array arguments. The following helper classes help dealing -# with scalar and array arguments in a regular way. -# -# In short, the classes provide a uniform interface. The interface -# handles the indexing of as many dimensions as the array may have. -# For scalars, all indexing is ignored and when the value is read, -# the scalar is returned. For arrays code for actual indexing is -# generated and reading performs the appropriate indirection. - -class _ScalarIndexingHelper(object): - def update_indices(self, loop_indices, name): - pass - - def as_values(self): - pass - - -class _ScalarHelper(object): - """Helper class to handle scalar arguments (and result). - Note that store_data is only used when generating code for - a scalar ufunc and to write the output value. - - For loading, the value is directly used without having any - kind of indexing nor memory backing it up. This is the use - for input arguments. - - For storing, a variable is created in the stack where the - value will be written. - - Note that it is not supported (as it is unneeded for our - current use-cases) reading back a stored value. This class - will always "load" the original value it got at its creation. - """ - def __init__(self, ctxt, bld, val, ty): - self.context = ctxt - self.builder = bld - self.val = val - self.base_type = ty - intpty = ctxt.get_value_type(types.intp) - self.shape = [lc.Constant.int(intpty, 1)] - - lty = ctxt.get_data_type(ty) if ty != types.boolean else lc.Type.int(1) - self._ptr = cgutils.alloca_once(bld, lty) - - def create_iter_indices(self): - return _ScalarIndexingHelper() - - def load_data(self, indices): - return self.val - - def store_data(self, indices, val): - self.builder.store(val, self._ptr) - - @property - def return_val(self): - return self.builder.load(self._ptr) - - -class _ArrayIndexingHelper(namedtuple('_ArrayIndexingHelper', - ('array', 'indices'))): - def update_indices(self, loop_indices, name): - bld = self.array.builder - intpty = self.array.context.get_value_type(types.intp) - ONE = lc.Constant.int(lc.Type.int(intpty.width), 1) - - # we are only interested in as many inner dimensions as dimensions - # the indexed array has (the outer dimensions are broadcast, so - # ignoring the outer indices produces the desired result. - indices = loop_indices[len(loop_indices) - len(self.indices):] - for src, dst, dim in zip(indices, self.indices, self.array.shape): - cond = bld.icmp(lc.ICMP_UGT, dim, ONE) - with bld.if_then(cond): - bld.store(src, dst) - - def as_values(self): - """ - The indexing helper is built using alloca for each value, so it - actually contains pointers to the actual indices to load. Note - that update_indices assumes the same. This method returns the - indices as values - """ - bld = self.array.builder - return [bld.load(index) for index in self.indices] - - -class _ArrayHelper(namedtuple('_ArrayHelper', ('context', 'builder', - 'shape', 'strides', 'data', - 'layout', 'base_type', 'ndim', - 'return_val'))): - """Helper class to handle array arguments/result. - It provides methods to generate code loading/storing specific - items as well as support code for handling indices. - """ - def create_iter_indices(self): - intpty = self.context.get_value_type(types.intp) - ZERO = lc.Constant.int(lc.Type.int(intpty.width), 0) - - indices = [] - for i in range(self.ndim): - x = cgutils.alloca_once(self.builder, lc.Type.int(intpty.width)) - self.builder.store(ZERO, x) - indices.append(x) - return _ArrayIndexingHelper(self, indices) - - def _load_effective_address(self, indices): - return cgutils.get_item_pointer2(self.builder, - data=self.data, - shape=self.shape, - strides=self.strides, - layout=self.layout, - inds=indices) - - def load_data(self, indices): - model = self.context.data_model_manager[self.base_type] - ptr = self._load_effective_address(indices) - return model.load_from_data_pointer(self.builder, ptr) - - def store_data(self, indices, value): - ctx = self.context - bld = self.builder - store_value = ctx.get_value_as_data(bld, self.base_type, value) - assert ctx.get_data_type(self.base_type) == store_value.type - bld.store(store_value, self._load_effective_address(indices)) - - -def _prepare_argument(ctxt, bld, inp, tyinp, where='input operand'): - """returns an instance of the appropriate Helper (either - _ScalarHelper or _ArrayHelper) class to handle the argument. - using the polymorphic interface of the Helper classes, scalar - and array cases can be handled with the same code""" - if isinstance(tyinp, types.ArrayCompatible): - ary = ctxt.make_array(tyinp)(ctxt, bld, inp) - shape = cgutils.unpack_tuple(bld, ary.shape, tyinp.ndim) - strides = cgutils.unpack_tuple(bld, ary.strides, tyinp.ndim) - return _ArrayHelper(ctxt, bld, shape, strides, ary.data, - tyinp.layout, tyinp.dtype, tyinp.ndim, inp) - elif tyinp in types.number_domain | set([types.boolean]): - return _ScalarHelper(ctxt, bld, inp, tyinp) - else: - raise NotImplementedError('unsupported type for {0}: {1}'.format(where, str(tyinp))) - - -_broadcast_onto_sig = types.intp(types.intp, types.CPointer(types.intp), - types.intp, types.CPointer(types.intp)) -def _broadcast_onto(src_ndim, src_shape, dest_ndim, dest_shape): - '''Low-level utility function used in calculating a shape for - an implicit output array. This function assumes that the - destination shape is an LLVM pointer to a C-style array that was - already initialized to a size of one along all axes. - - Returns an integer value: - >= 1 : Succeeded. Return value should equal the number of dimensions in - the destination shape. - 0 : Failed to broadcast because source shape is larger than the - destination shape (this case should be weeded out at type - checking). - < 0 : Failed to broadcast onto destination axis, at axis number == - -(return_value + 1). - ''' - if src_ndim > dest_ndim: - # This check should have been done during type checking, but - # let's be defensive anyway... - return 0 - else: - src_index = 0 - dest_index = dest_ndim - src_ndim - while src_index < src_ndim: - src_dim_size = src_shape[src_index] - dest_dim_size = dest_shape[dest_index] - # Check to see if we've already mutated the destination - # shape along this axis. - if dest_dim_size != 1: - # If we have mutated the destination shape already, - # then the source axis size must either be one, - # or the destination axis size. - if src_dim_size != dest_dim_size and src_dim_size != 1: - return -(dest_index + 1) - elif src_dim_size != 1: - # If the destination size is still its initial - dest_shape[dest_index] = src_dim_size - src_index += 1 - dest_index += 1 - return dest_index - -def _build_array(context, builder, array_ty, input_types, inputs): - """Utility function to handle allocation of an implicit output array - given the target context, builder, output array type, and a list of - _ArrayHelper instances. - """ - intp_ty = context.get_value_type(types.intp) - def make_intp_const(val): - return context.get_constant(types.intp, val) - - ZERO = make_intp_const(0) - ONE = make_intp_const(1) - - src_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim, - "src_shape") - dest_ndim = make_intp_const(array_ty.ndim) - dest_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim, - "dest_shape") - dest_shape_addrs = tuple(cgutils.gep_inbounds(builder, dest_shape, index) - for index in range(array_ty.ndim)) - - # Initialize the destination shape with all ones. - for dest_shape_addr in dest_shape_addrs: - builder.store(ONE, dest_shape_addr) - - # For each argument, try to broadcast onto the destination shape, - # mutating along any axis where the argument shape is not one and - # the destination shape is one. - for arg_number, arg in enumerate(inputs): - if not hasattr(arg, "ndim"): # Skip scalar arguments - continue - arg_ndim = make_intp_const(arg.ndim) - for index in range(arg.ndim): - builder.store(arg.shape[index], - cgutils.gep_inbounds(builder, src_shape, index)) - arg_result = context.compile_internal( - builder, _broadcast_onto, _broadcast_onto_sig, - [arg_ndim, src_shape, dest_ndim, dest_shape]) - with cgutils.if_unlikely(builder, - builder.icmp(lc.ICMP_SLT, arg_result, ONE)): - msg = "unable to broadcast argument %d to output array" % ( - arg_number,) - - loc = errors.loc_info.get('loc', None) - if loc is not None: - msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line) - - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - real_array_ty = array_ty.as_array - - dest_shape_tup = tuple(builder.load(dest_shape_addr) - for dest_shape_addr in dest_shape_addrs) - array_val = arrayobj._empty_nd_impl(context, builder, real_array_ty, - dest_shape_tup) - - # Get the best argument to call __array_wrap__ on - array_wrapper_index = select_array_wrapper(input_types) - array_wrapper_ty = input_types[array_wrapper_index] - try: - # __array_wrap__(source wrapped array, out array) -> out wrapped array - array_wrap = context.get_function('__array_wrap__', - array_ty(array_wrapper_ty, real_array_ty)) - except NotImplementedError: - # If it's the same priority as a regular array, assume we - # should use the allocated array unchanged. - if array_wrapper_ty.array_priority != types.Array.array_priority: - raise - out_val = array_val._getvalue() - else: - wrap_args = (inputs[array_wrapper_index].return_val, array_val._getvalue()) - out_val = array_wrap(builder, wrap_args) - - ndim = array_ty.ndim - shape = cgutils.unpack_tuple(builder, array_val.shape, ndim) - strides = cgutils.unpack_tuple(builder, array_val.strides, ndim) - return _ArrayHelper(context, builder, shape, strides, array_val.data, - array_ty.layout, array_ty.dtype, ndim, - out_val) - - -def numpy_ufunc_kernel(context, builder, sig, args, kernel_class, - explicit_output=True): - # This is the code generator that builds all the looping needed - # to execute a numpy functions over several dimensions (including - # scalar cases). - # - # context - the code generation context - # builder - the code emitter - # sig - signature of the ufunc - # args - the args to the ufunc - # kernel_class - a code generating subclass of _Kernel that provides - # explicit_output - if the output was explicit in the call - # (ie: np.add(x,y,r)) - - arguments = [_prepare_argument(context, builder, arg, tyarg) - for arg, tyarg in zip(args, sig.args)] - if not explicit_output: - ret_ty = sig.return_type - if isinstance(ret_ty, types.ArrayCompatible): - output = _build_array(context, builder, ret_ty, sig.args, arguments) - else: - output = _prepare_argument( - context, builder, - lc.Constant.null(context.get_value_type(ret_ty)), ret_ty) - arguments.append(output) - elif context.enable_nrt: - # Incref the output - context.nrt.incref(builder, sig.return_type, args[-1]) - - inputs = arguments[0:-1] - output = arguments[-1] - - outer_sig = [a.base_type for a in arguments] - #signature expects return type first, while we have it last: - outer_sig = outer_sig[-1:] + outer_sig[:-1] - outer_sig = typing.signature(*outer_sig) - kernel = kernel_class(context, builder, outer_sig) - intpty = context.get_value_type(types.intp) - - indices = [inp.create_iter_indices() for inp in inputs] - - loopshape = output.shape - with cgutils.loop_nest(builder, loopshape, intp=intpty) as loop_indices: - vals_in = [] - for i, (index, arg) in enumerate(zip(indices, inputs)): - index.update_indices(loop_indices, i) - vals_in.append(arg.load_data(index.as_values())) - - val_out = kernel.generate(*vals_in) - output.store_data(loop_indices, val_out) - out = arguments[-1].return_val - return impl_ret_new_ref(context, builder, sig.return_type, out) - - -# Kernels are the code to be executed inside the multidimensional loop. -class _Kernel(object): - def __init__(self, context, builder, outer_sig): - self.context = context - self.builder = builder - self.outer_sig = outer_sig - - def cast(self, val, fromty, toty): - """Numpy uses cast semantics that are different from standard Python - (for example, it does allow casting from complex to float). - - This method acts as a patch to context.cast so that it allows - complex to real/int casts. - - """ - if (isinstance(fromty, types.Complex) and - not isinstance(toty, types.Complex)): - # attempt conversion of the real part to the specified type. - # note that NumPy issues a warning in this kind of conversions - newty = fromty.underlying_float - attr = self.context.get_getattr(fromty, 'real') - val = attr(self.context, self.builder, fromty, val, 'real') - fromty = newty - # let the regular cast do the rest... - - return self.context.cast(self.builder, val, fromty, toty) - - -def _ufunc_db_function(ufunc): - """Use the ufunc loop type information to select the code generation - function from the table provided by the dict_of_kernels. The dict - of kernels maps the loop identifier to a function with the - following signature: (context, builder, signature, args). - - The loop type information has the form 'AB->C'. The letters to the - left of '->' are the input types (specified as NumPy letter - types). The letters to the right of '->' are the output - types. There must be 'ufunc.nin' letters to the left of '->', and - 'ufunc.nout' letters to the right. - - For example, a binary float loop resulting in a float, will have - the following signature: 'ff->f'. - - A given ufunc implements many loops. The list of loops implemented - for a given ufunc can be accessed using the 'types' attribute in - the ufunc object. The NumPy machinery selects the first loop that - fits a given calling signature (in our case, what we call the - outer_sig). This logic is mimicked by 'ufunc_find_matching_loop'. - """ - - class _KernelImpl(_Kernel): - def __init__(self, context, builder, outer_sig): - super(_KernelImpl, self).__init__(context, builder, outer_sig) - loop = ufunc_find_matching_loop( - ufunc, outer_sig.args + (outer_sig.return_type,)) - self.fn = ufunc_db.get_ufunc_info(ufunc).get(loop.ufunc_sig) - self.inner_sig = typing.signature( - *(loop.outputs + loop.inputs)) - - if self.fn is None: - msg = "Don't know how to lower ufunc '{0}' for loop '{1}'" - raise NotImplementedError(msg.format(ufunc.__name__, loop)) - - def generate(self, *args): - isig = self.inner_sig - osig = self.outer_sig - - cast_args = [self.cast(val, inty, outty) - for val, inty, outty in zip(args, osig.args, - isig.args)] - with force_error_model(self.context, 'numpy'): - res = self.fn(self.context, self.builder, isig, cast_args) - dmm = self.context.data_model_manager - res = dmm[isig.return_type].from_return(self.builder, res) - return self.cast(res, isig.return_type, osig.return_type) - - return _KernelImpl - - -################################################################################ -# Helper functions that register the ufuncs - -_kernels = {} # Temporary map from ufunc's to their kernel implementation class - -def register_unary_ufunc_kernel(ufunc, kernel): - def unary_ufunc(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel) - - def unary_ufunc_no_explicit_output(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel, - explicit_output=False) - - _any = types.Any - - # (array or scalar, out=array) - lower(ufunc, _any, types.Array)(unary_ufunc) - # (array or scalar) - lower(ufunc, _any)(unary_ufunc_no_explicit_output) - - _kernels[ufunc] = kernel - - -def register_binary_ufunc_kernel(ufunc, kernel): - def binary_ufunc(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel) - - def binary_ufunc_no_explicit_output(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel, - explicit_output=False) - - _any = types.Any - - # (array or scalar, array o scalar, out=array) - lower(ufunc, _any, _any, types.Array)(binary_ufunc) - # (scalar, scalar) - lower(ufunc, _any, _any)(binary_ufunc_no_explicit_output) - - _kernels[ufunc] = kernel - - -def register_unary_operator_kernel(operator, kernel): - def lower_unary_operator(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel, - explicit_output=False) - _arr_kind = types.Array - lower(operator, _arr_kind)(lower_unary_operator) - - -def register_binary_operator_kernel(operator, kernel): - def lower_binary_operator(context, builder, sig, args): - return numpy_ufunc_kernel(context, builder, sig, args, kernel, - explicit_output=False) - - def lower_inplace_operator(context, builder, sig, args): - # The visible signature is (A, B) -> A - # The implementation's signature (with explicit output) - # is (A, B, A) -> A - args = args + (args[0],) - sig = typing.signature(sig.return_type, *sig.args + (sig.args[0],)) - return numpy_ufunc_kernel(context, builder, sig, args, kernel, - explicit_output=True) - - _any = types.Any - _arr_kind = types.Array - formal_sigs = [(_arr_kind, _arr_kind), (_any, _arr_kind), (_arr_kind, _any)] - for sig in formal_sigs: - lower(operator, *sig)(lower_binary_operator) - inplace = operator + '=' - if inplace in utils.inplace_map: - lower(inplace, *sig)(lower_inplace_operator) - - -################################################################################ -# Use the contents of ufunc_db to initialize the supported ufuncs - -for ufunc in ufunc_db.get_ufuncs(): - if ufunc.nin == 1: - register_unary_ufunc_kernel(ufunc, _ufunc_db_function(ufunc)) - elif ufunc.nin == 2: - register_binary_ufunc_kernel(ufunc, _ufunc_db_function(ufunc)) - else: - raise RuntimeError("Don't know how to register ufuncs from ufunc_db with arity > 2") - - -@lower('+', types.Array) -def array_positive_impl(context, builder, sig, args): - '''Lowering function for +(array) expressions. Defined here - (numba.targets.npyimpl) since the remaining array-operator - lowering functions are also registered in this module. - ''' - class _UnaryPositiveKernel(_Kernel): - def generate(self, *args): - [val] = args - return val - - return numpy_ufunc_kernel(context, builder, sig, args, - _UnaryPositiveKernel, explicit_output=False) - - -for _op_map in (npydecl.NumpyRulesUnaryArrayOperator._op_map, - npydecl.NumpyRulesArrayOperator._op_map): - for operator, ufunc_name in _op_map.items(): - ufunc = getattr(np, ufunc_name) - kernel = _kernels[ufunc] - if ufunc.nin == 1: - register_unary_operator_kernel(operator, kernel) - elif ufunc.nin == 2: - register_binary_operator_kernel(operator, kernel) - else: - raise RuntimeError("There shouldn't be any non-unary or binary operators") - - -del _kernels diff --git a/numba/numba/targets/numbers.py b/numba/numba/targets/numbers.py deleted file mode 100644 index 96e815391..000000000 --- a/numba/numba/targets/numbers.py +++ /dev/null @@ -1,1389 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import math -import numbers - -import numpy as np - -from llvmlite import ir -from llvmlite.llvmpy.core import Type, Constant -import llvmlite.llvmpy.core as lc - -from .imputils import (lower_builtin, lower_getattr, lower_getattr_generic, - lower_cast, lower_constant, - impl_ret_borrowed, impl_ret_untracked) -from . import optional -from .. import typing, types, cgutils, utils - - -def _int_arith_flags(rettype): - """ - Return the modifier flags for integer arithmetic. - """ - if rettype.signed: - # Ignore the effects of signed overflow. This is important for - # optimization of some indexing operations. For example - # array[i+1] could see `i+1` trigger a signed overflow and - # give a negative number. With Python's indexing, a negative - # index is treated differently: its resolution has a runtime cost. - # Telling LLVM to ignore signed overflows allows it to optimize - # away the check for a negative `i+1` if it knows `i` is positive. - return ['nsw'] - else: - return [] - - -def int_add_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - a = context.cast(builder, va, ta, sig.return_type) - b = context.cast(builder, vb, tb, sig.return_type) - res = builder.add(a, b, flags=_int_arith_flags(sig.return_type)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_sub_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - a = context.cast(builder, va, ta, sig.return_type) - b = context.cast(builder, vb, tb, sig.return_type) - res = builder.sub(a, b, flags=_int_arith_flags(sig.return_type)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_mul_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - a = context.cast(builder, va, ta, sig.return_type) - b = context.cast(builder, vb, tb, sig.return_type) - res = builder.mul(a, b, flags=_int_arith_flags(sig.return_type)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_divmod_signed(context, builder, ty, x, y): - """ - Reference Objects/intobject.c - xdivy = x / y; - xmody = (long)(x - (unsigned long)xdivy * y); - /* If the signs of x and y differ, and the remainder is non-0, - * C89 doesn't define whether xdivy is now the floor or the - * ceiling of the infinitely precise quotient. We want the floor, - * and we have it iff the remainder's sign matches y's. - */ - if (xmody && ((y ^ xmody) < 0) /* i.e. and signs differ */) { - xmody += y; - --xdivy; - assert(xmody && ((y ^ xmody) >= 0)); - } - *p_xdivy = xdivy; - *p_xmody = xmody; - """ - assert x.type == y.type - - ZERO = y.type(0) - ONE = y.type(1) - - # NOTE: On x86 at least, dividing the lowest representable integer - # (e.g. 0x80000000 for int32) by -1 causes a SIFGPE (division overflow), - # causing the process to crash. - # We return 0, 0 instead (more or less like Numpy). - - resdiv = cgutils.alloca_once_value(builder, ZERO) - resmod = cgutils.alloca_once_value(builder, ZERO) - - is_overflow = builder.and_( - builder.icmp_signed('==', x, x.type(ty.minval)), - builder.icmp_signed('==', y, y.type(-1))) - - with builder.if_then(builder.not_(is_overflow), likely=True): - # Note LLVM will optimize this to a single divmod instruction, - # if available on the target CPU (e.g. x86). - xdivy = builder.sdiv(x, y) - xmody = builder.srem(x, y) - - y_xor_xmody_ltz = builder.icmp_signed('<', builder.xor(y, xmody), ZERO) - xmody_istrue = builder.icmp_signed('!=', xmody, ZERO) - cond = builder.and_(xmody_istrue, y_xor_xmody_ltz) - - with builder.if_else(cond) as (if_different_signs, if_same_signs): - with if_same_signs: - builder.store(xdivy, resdiv) - builder.store(xmody, resmod) - - with if_different_signs: - builder.store(builder.sub(xdivy, ONE), resdiv) - builder.store(builder.add(xmody, y), resmod) - - return builder.load(resdiv), builder.load(resmod) - - -def int_divmod(context, builder, ty, x, y): - """ - Integer divmod(x, y). The caller must ensure that y != 0. - """ - if ty.signed: - return int_divmod_signed(context, builder, ty, x, y) - else: - return builder.udiv(x, y), builder.urem(x, y) - - -def _int_divmod_impl(context, builder, sig, args, zerodiv_message): - va, vb = args - ta, tb = sig.args - - ty = sig.return_type - if isinstance(ty, types.UniTuple): - ty = ty.dtype - a = context.cast(builder, va, ta, ty) - b = context.cast(builder, vb, tb, ty) - quot = cgutils.alloca_once(builder, a.type, name="quot") - rem = cgutils.alloca_once(builder, a.type, name="rem") - - with builder.if_else(cgutils.is_scalar_zero(builder, b), likely=False - ) as (if_zero, if_non_zero): - with if_zero: - if not context.error_model.fp_zero_division( - builder, (zerodiv_message,)): - # No exception raised => return 0 - # XXX We should also set the FPU exception status, but - # there's no easy way to do that from LLVM. - builder.store(b, quot) - builder.store(b, rem) - with if_non_zero: - q, r = int_divmod(context, builder, ty, a, b) - builder.store(q, quot) - builder.store(r, rem) - - return quot, rem - - -@lower_builtin(divmod, types.Integer, types.Integer) -def int_divmod_impl(context, builder, sig, args): - quot, rem = _int_divmod_impl(context, builder, sig, args, - "integer divmod by zero") - - return cgutils.pack_array(builder, - (builder.load(quot), builder.load(rem))) - - -@lower_builtin('/?', types.Integer, types.Integer) -@lower_builtin('//', types.Integer, types.Integer) -def int_floordiv_impl(context, builder, sig, args): - quot, rem = _int_divmod_impl(context, builder, sig, args, - "integer division by zero") - return builder.load(quot) - - -@lower_builtin('/', types.Integer, types.Integer) -def int_truediv_impl(context, builder, sig, args): - [va, vb] = args - [ta, tb] = sig.args - a = context.cast(builder, va, ta, sig.return_type) - b = context.cast(builder, vb, tb, sig.return_type) - with cgutils.if_zero(builder, b): - context.error_model.fp_zero_division(builder, ("division by zero",)) - res = builder.fdiv(a, b) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin('%', types.Integer, types.Integer) -def int_rem_impl(context, builder, sig, args): - quot, rem = _int_divmod_impl(context, builder, sig, args, - "integer modulo by zero") - return builder.load(rem) - - -def _get_power_zerodiv_return(context, return_type): - if (isinstance(return_type, types.Integer) - and not context.error_model.raise_on_fp_zero_division): - # If not raising, return 0x8000... when computing 0 ** - return -1 << (return_type.bitwidth - 1) - else: - return False - - -def int_power_impl(context, builder, sig, args): - """ - a ^ b, where a is an integer or real, and b an integer - """ - is_integer = isinstance(sig.args[0], types.Integer) - tp = sig.return_type - zerodiv_return = _get_power_zerodiv_return(context, tp) - - def int_power(a, b): - # Ensure computations are done with a large enough width - r = tp(1) - a = tp(a) - if b < 0: - invert = True - exp = -b - if exp < 0: - raise OverflowError - if is_integer: - if a == 0: - if zerodiv_return: - return zerodiv_return - else: - raise ZeroDivisionError("0 cannot be raised to a negative power") - if a != 1 and a != -1: - return 0 - else: - invert = False - exp = b - if exp > 0x10000: - # Optimization cutoff: fallback on the generic algorithm - return math.pow(a, float(b)) - while exp != 0: - if exp & 1: - r *= a - exp >>= 1 - a *= a - - return 1.0 / r if invert else r - - res = context.compile_internal(builder, int_power, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin('**', types.Integer, types.Const) -@lower_builtin('**', types.Float, types.Const) -def static_power_impl(context, builder, sig, args): - """ - a ^ b, where a is an integer or real, and b a constant integer - """ - exp = sig.args[1].value - if not isinstance(exp, numbers.Integral): - raise NotImplementedError - if abs(exp) > 0x10000: - # Optimization cutoff: fallback on the generic algorithm above - raise NotImplementedError - invert = exp < 0 - exp = abs(exp) - - tp = sig.return_type - is_integer = isinstance(tp, types.Integer) - zerodiv_return = _get_power_zerodiv_return(context, tp) - - val = context.cast(builder, args[0], sig.args[0], tp) - lty = val.type - - def mul(a, b): - if is_integer: - return builder.mul(a, b) - else: - return builder.fmul(a, b) - - # Unroll the exponentiation loop - res = lty(1) - a = val - while exp != 0: - if exp & 1: - res = mul(res, val) - exp >>= 1 - val = mul(val, val) - - if invert: - # If the exponent was negative, fix the result by inverting it - if is_integer: - # Integer inversion - def invert_impl(a): - if a == 0: - if zerodiv_return: - return zerodiv_return - else: - raise ZeroDivisionError("0 cannot be raised to a negative power") - if a != 1 and a != -1: - return 0 - else: - return a - - else: - # Real inversion - def invert_impl(a): - return 1.0 / a - - res = context.compile_internal(builder, invert_impl, - typing.signature(tp, tp), (res,)) - - return res - - -def int_slt_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_SLT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_sle_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_SLE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_sgt_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_SGT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_sge_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_SGE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_ult_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_ULT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_ule_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_ULE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_ugt_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_UGT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_uge_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_UGE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_eq_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_EQ, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_ne_impl(context, builder, sig, args): - res = builder.icmp(lc.ICMP_NE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_abs_impl(context, builder, sig, args): - [x] = args - ZERO = Constant.null(x.type) - ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) - negated = builder.neg(x) - res = builder.select(ltz, negated, x) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def uint_abs_impl(context, builder, sig, args): - [x] = args - return impl_ret_untracked(context, builder, sig.return_type, x) - - -def int_shl_impl(context, builder, sig, args): - [valty, amtty] = sig.args - [val, amt] = args - val = context.cast(builder, val, valty, sig.return_type) - amt = context.cast(builder, amt, amtty, sig.return_type) - res = builder.shl(val, amt) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_shr_impl(context, builder, sig, args): - [valty, amtty] = sig.args - [val, amt] = args - val = context.cast(builder, val, valty, sig.return_type) - amt = context.cast(builder, amt, amtty, sig.return_type) - if sig.return_type.signed: - res = builder.ashr(val, amt) - else: - res = builder.lshr(val, amt) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_and_impl(context, builder, sig, args): - [at, bt] = sig.args - [av, bv] = args - cav = context.cast(builder, av, at, sig.return_type) - cbc = context.cast(builder, bv, bt, sig.return_type) - res = builder.and_(cav, cbc) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_or_impl(context, builder, sig, args): - [at, bt] = sig.args - [av, bv] = args - cav = context.cast(builder, av, at, sig.return_type) - cbc = context.cast(builder, bv, bt, sig.return_type) - res = builder.or_(cav, cbc) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_xor_impl(context, builder, sig, args): - [at, bt] = sig.args - [av, bv] = args - cav = context.cast(builder, av, at, sig.return_type) - cbc = context.cast(builder, bv, bt, sig.return_type) - res = builder.xor(cav, cbc) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_negate_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - # Negate before upcasting, for unsigned numbers - res = builder.neg(val) - res = context.cast(builder, res, typ, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_positive_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - res = context.cast(builder, val, typ, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_invert_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - # Invert before upcasting, for unsigned numbers - res = builder.xor(val, Constant.all_ones(val.type)) - res = context.cast(builder, res, typ, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def int_sign_impl(context, builder, sig, args): - """ - np.sign(int) - """ - [x] = args - POS = Constant.int(x.type, 1) - NEG = Constant.int(x.type, -1) - ZERO = Constant.int(x.type, 0) - - cmp_zero = builder.icmp(lc.ICMP_EQ, x, ZERO) - cmp_pos = builder.icmp(lc.ICMP_SGT, x, ZERO) - - presult = cgutils.alloca_once(builder, x.type) - - bb_zero = builder.append_basic_block(".zero") - bb_postest = builder.append_basic_block(".postest") - bb_pos = builder.append_basic_block(".pos") - bb_neg = builder.append_basic_block(".neg") - bb_exit = builder.append_basic_block(".exit") - - builder.cbranch(cmp_zero, bb_zero, bb_postest) - - with builder.goto_block(bb_zero): - builder.store(ZERO, presult) - builder.branch(bb_exit) - - with builder.goto_block(bb_postest): - builder.cbranch(cmp_pos, bb_pos, bb_neg) - - with builder.goto_block(bb_pos): - builder.store(POS, presult) - builder.branch(bb_exit) - - with builder.goto_block(bb_neg): - builder.store(NEG, presult) - builder.branch(bb_exit) - - builder.position_at_end(bb_exit) - res = builder.load(presult) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def bool_negate_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - res = context.cast(builder, val, typ, sig.return_type) - res = builder.neg(res) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def bool_unary_positive_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - res = context.cast(builder, val, typ, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -lower_builtin('==', types.boolean, types.boolean)(int_eq_impl) -lower_builtin('!=', types.boolean, types.boolean)(int_ne_impl) -lower_builtin('<', types.boolean, types.boolean)(int_ult_impl) -lower_builtin('<=', types.boolean, types.boolean)(int_ule_impl) -lower_builtin('>', types.boolean, types.boolean)(int_ugt_impl) -lower_builtin('>=', types.boolean, types.boolean)(int_uge_impl) -lower_builtin('-', types.boolean)(bool_negate_impl) -lower_builtin('+', types.boolean)(bool_unary_positive_impl) - - -@lower_builtin('==', types.Const, types.Const) -def const_eq_impl(context, builder, sig, args): - arg1, arg2 = sig.args - val = 0 - if arg1.value==arg2.value: - val = 1 - res = ir.Constant(ir.IntType(1), val) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('!=', types.Const, types.Const) -def const_eq_impl(context, builder, sig, args): - arg1, arg2 = sig.args - val = 0 - if arg1.value!=arg2.value: - val = 1 - res = ir.Constant(ir.IntType(1), val) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _implement_integer_operators(): - ty = types.Integer - - lower_builtin('+', ty, ty)(int_add_impl) - lower_builtin('-', ty, ty)(int_sub_impl) - lower_builtin('*', ty, ty)(int_mul_impl) - lower_builtin('==', ty, ty)(int_eq_impl) - lower_builtin('!=', ty, ty)(int_ne_impl) - - lower_builtin('<<', ty, ty)(int_shl_impl) - lower_builtin('>>', ty, ty)(int_shr_impl) - - lower_builtin('-', ty)(int_negate_impl) - lower_builtin('+', ty)(int_positive_impl) - - lower_builtin('**', ty, ty)(int_power_impl) - lower_builtin(pow, ty, ty)(int_power_impl) - - for ty in types.unsigned_domain: - lower_builtin('<', ty, ty)(int_ult_impl) - lower_builtin('<=', ty, ty)(int_ule_impl) - lower_builtin('>', ty, ty)(int_ugt_impl) - lower_builtin('>=', ty, ty)(int_uge_impl) - lower_builtin('**', types.Float, ty)(int_power_impl) - lower_builtin(pow, types.Float, ty)(int_power_impl) - lower_builtin(abs, ty)(uint_abs_impl) - - for ty in types.signed_domain: - lower_builtin('<', ty, ty)(int_slt_impl) - lower_builtin('<=', ty, ty)(int_sle_impl) - lower_builtin('>', ty, ty)(int_sgt_impl) - lower_builtin('>=', ty, ty)(int_sge_impl) - lower_builtin('**', types.Float, ty)(int_power_impl) - lower_builtin(pow, types.Float, ty)(int_power_impl) - lower_builtin(abs, ty)(int_abs_impl) - -def _implement_bitwise_operators(): - for ty in (types.Boolean, types.Integer): - lower_builtin('&', ty, ty)(int_and_impl) - lower_builtin('|', ty, ty)(int_or_impl) - lower_builtin('^', ty, ty)(int_xor_impl) - - lower_builtin('~', ty)(int_invert_impl) - -_implement_integer_operators() - -_implement_bitwise_operators() - - -def real_add_impl(context, builder, sig, args): - res = builder.fadd(*args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_sub_impl(context, builder, sig, args): - res = builder.fsub(*args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_mul_impl(context, builder, sig, args): - res = builder.fmul(*args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_div_impl(context, builder, sig, args): - with cgutils.if_zero(builder, args[1]): - context.error_model.fp_zero_division(builder, ("division by zero",)) - res = builder.fdiv(*args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_divmod(context, builder, x, y): - assert x.type == y.type - floatty = x.type - - module = builder.module - fname = context.mangler(".numba.python.rem", [x.type]) - fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty))) - fn = module.get_or_insert_function(fnty, fname) - - if fn.is_declaration: - fn.linkage = lc.LINKAGE_LINKONCE_ODR - fnbuilder = lc.Builder(fn.append_basic_block('entry')) - fx, fy, pmod = fn.args - div, mod = real_divmod_func_body(context, fnbuilder, fx, fy) - fnbuilder.store(mod, pmod) - fnbuilder.ret(div) - - pmod = cgutils.alloca_once(builder, floatty) - quotient = builder.call(fn, (x, y, pmod)) - return quotient, builder.load(pmod) - - -def real_divmod_func_body(context, builder, vx, wx): - # Reference Objects/floatobject.c - # - # float_divmod(PyObject *v, PyObject *w) - # { - # double vx, wx; - # double div, mod, floordiv; - # CONVERT_TO_DOUBLE(v, vx); - # CONVERT_TO_DOUBLE(w, wx); - # mod = fmod(vx, wx); - # /* fmod is typically exact, so vx-mod is *mathematically* an - # exact multiple of wx. But this is fp arithmetic, and fp - # vx - mod is an approximation; the result is that div may - # not be an exact integral value after the division, although - # it will always be very close to one. - # */ - # div = (vx - mod) / wx; - # if (mod) { - # /* ensure the remainder has the same sign as the denominator */ - # if ((wx < 0) != (mod < 0)) { - # mod += wx; - # div -= 1.0; - # } - # } - # else { - # /* the remainder is zero, and in the presence of signed zeroes - # fmod returns different results across platforms; ensure - # it has the same sign as the denominator; we'd like to do - # "mod = wx * 0.0", but that may get optimized away */ - # mod *= mod; /* hide "mod = +0" from optimizer */ - # if (wx < 0.0) - # mod = -mod; - # } - # /* snap quotient to nearest integral value */ - # if (div) { - # floordiv = floor(div); - # if (div - floordiv > 0.5) - # floordiv += 1.0; - # } - # else { - # /* div is zero - get the same sign as the true quotient */ - # div *= div; /* hide "div = +0" from optimizers */ - # floordiv = div * vx / wx; /* zero w/ sign of vx/wx */ - # } - # return Py_BuildValue("(dd)", floordiv, mod); - # } - pmod = cgutils.alloca_once(builder, vx.type) - pdiv = cgutils.alloca_once(builder, vx.type) - pfloordiv = cgutils.alloca_once(builder, vx.type) - - mod = builder.frem(vx, wx) - div = builder.fdiv(builder.fsub(vx, mod), wx) - - builder.store(mod, pmod) - builder.store(div, pdiv) - - # Note the use of negative zero for proper negating with `ZERO - x` - ZERO = vx.type(0.0) - NZERO = vx.type(-0.0) - ONE = vx.type(1.0) - mod_istrue = builder.fcmp_unordered('!=', mod, ZERO) - wx_ltz = builder.fcmp_ordered('<', wx, ZERO) - mod_ltz = builder.fcmp_ordered('<', mod, ZERO) - - with builder.if_else(mod_istrue, likely=True) as (if_nonzero_mod, if_zero_mod): - with if_nonzero_mod: - # `mod` is non-zero or NaN - # Ensure the remainder has the same sign as the denominator - wx_ltz_ne_mod_ltz = builder.icmp(lc.ICMP_NE, wx_ltz, mod_ltz) - - with builder.if_then(wx_ltz_ne_mod_ltz): - builder.store(builder.fsub(div, ONE), pdiv) - builder.store(builder.fadd(mod, wx), pmod) - - with if_zero_mod: - # `mod` is zero, select the proper sign depending on - # the denominator's sign - mod = builder.select(wx_ltz, NZERO, ZERO) - builder.store(mod, pmod) - - del mod, div - - div = builder.load(pdiv) - div_istrue = builder.fcmp(lc.FCMP_ONE, div, ZERO) - - with builder.if_then(div_istrue): - realtypemap = {'float': types.float32, - 'double': types.float64} - realtype = realtypemap[str(wx.type)] - floorfn = context.get_function(math.floor, - typing.signature(realtype, realtype)) - floordiv = floorfn(builder, [div]) - floordivdiff = builder.fsub(div, floordiv) - floordivincr = builder.fadd(floordiv, ONE) - HALF = Constant.real(wx.type, 0.5) - pred = builder.fcmp(lc.FCMP_OGT, floordivdiff, HALF) - floordiv = builder.select(pred, floordivincr, floordiv) - builder.store(floordiv, pfloordiv) - - with cgutils.ifnot(builder, div_istrue): - div = builder.fmul(div, div) - builder.store(div, pdiv) - floordiv = builder.fdiv(builder.fmul(div, vx), wx) - builder.store(floordiv, pfloordiv) - - return builder.load(pfloordiv), builder.load(pmod) - - -@lower_builtin(divmod, types.Float, types.Float) -def real_divmod_impl(context, builder, sig, args): - x, y = args - quot = cgutils.alloca_once(builder, x.type, name="quot") - rem = cgutils.alloca_once(builder, x.type, name="rem") - - with builder.if_else(cgutils.is_scalar_zero(builder, y), likely=False - ) as (if_zero, if_non_zero): - with if_zero: - if not context.error_model.fp_zero_division( - builder, ("modulo by zero",)): - # No exception raised => compute the nan result, - # and set the FP exception word for Numpy warnings. - q = builder.fdiv(x, y) - r = builder.frem(x, y) - builder.store(q, quot) - builder.store(r, rem) - with if_non_zero: - q, r = real_divmod(context, builder, x, y) - builder.store(q, quot) - builder.store(r, rem) - - return cgutils.pack_array(builder, - (builder.load(quot), builder.load(rem))) - - -def real_mod_impl(context, builder, sig, args): - x, y = args - res = cgutils.alloca_once(builder, x.type) - with builder.if_else(cgutils.is_scalar_zero(builder, y), likely=False - ) as (if_zero, if_non_zero): - with if_zero: - if not context.error_model.fp_zero_division( - builder, ("modulo by zero",)): - # No exception raised => compute the nan result, - # and set the FP exception word for Numpy warnings. - rem = builder.frem(x, y) - builder.store(rem, res) - with if_non_zero: - _, rem = real_divmod(context, builder, x, y) - builder.store(rem, res) - return impl_ret_untracked(context, builder, sig.return_type, - builder.load(res)) - - -def real_floordiv_impl(context, builder, sig, args): - x, y = args - res = cgutils.alloca_once(builder, x.type) - with builder.if_else(cgutils.is_scalar_zero(builder, y), likely=False - ) as (if_zero, if_non_zero): - with if_zero: - if not context.error_model.fp_zero_division( - builder, ("division by zero",)): - # No exception raised => compute the +/-inf or nan result, - # and set the FP exception word for Numpy warnings. - quot = builder.fdiv(x, y) - builder.store(quot, res) - with if_non_zero: - quot, _ = real_divmod(context, builder, x, y) - builder.store(quot, res) - return impl_ret_untracked(context, builder, sig.return_type, - builder.load(res)) - - -def real_power_impl(context, builder, sig, args): - x, y = args - module = builder.module - if context.implement_powi_as_math_call: - imp = context.get_function(math.pow, sig) - res = imp(builder, args) - else: - fn = lc.Function.intrinsic(module, lc.INTR_POW, [y.type]) - res = builder.call(fn, (x, y)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_lt_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_OLT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_le_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_OLE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_gt_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_OGT, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_ge_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_OGE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_eq_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_OEQ, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_ne_impl(context, builder, sig, args): - res = builder.fcmp(lc.FCMP_UNE, *args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_abs_impl(context, builder, sig, args): - [ty] = sig.args - sig = typing.signature(ty, ty) - impl = context.get_function(math.fabs, sig) - return impl(builder, args) - - -def real_negate_impl(context, builder, sig, args): - from . import mathimpl - res = mathimpl.negate_real(builder, args[0]) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_positive_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - res = context.cast(builder, val, typ, sig.return_type) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def real_sign_impl(context, builder, sig, args): - """ - np.sign(float) - """ - [x] = args - POS = Constant.real(x.type, 1) - NEG = Constant.real(x.type, -1) - ZERO = Constant.real(x.type, 0) - - presult = cgutils.alloca_once(builder, x.type) - - is_pos = builder.fcmp(lc.FCMP_OGT, x, ZERO) - is_neg = builder.fcmp(lc.FCMP_OLT, x, ZERO) - - with builder.if_else(is_pos) as (gt_zero, not_gt_zero): - with gt_zero: - builder.store(POS, presult) - with not_gt_zero: - with builder.if_else(is_neg) as (lt_zero, not_lt_zero): - with lt_zero: - builder.store(NEG, presult) - with not_lt_zero: - # For both NaN and 0, the result of sign() is simply - # the input value. - builder.store(x, presult) - - res = builder.load(presult) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -ty = types.Float - -lower_builtin('+', ty, ty)(real_add_impl) -lower_builtin('-', ty, ty)(real_sub_impl) -lower_builtin('*', ty, ty)(real_mul_impl) -lower_builtin('/?', ty, ty)(real_div_impl) -lower_builtin('//', ty, ty)(real_floordiv_impl) -lower_builtin('/', ty, ty)(real_div_impl) -lower_builtin('%', ty, ty)(real_mod_impl) -lower_builtin('**', ty, ty)(real_power_impl) -lower_builtin(pow, ty, ty)(real_power_impl) - -lower_builtin('==', ty, ty)(real_eq_impl) -lower_builtin('!=', ty, ty)(real_ne_impl) -lower_builtin('<', ty, ty)(real_lt_impl) -lower_builtin('<=', ty, ty)(real_le_impl) -lower_builtin('>', ty, ty)(real_gt_impl) -lower_builtin('>=', ty, ty)(real_ge_impl) - -lower_builtin(abs, ty)(real_abs_impl) - -lower_builtin('-', ty)(real_negate_impl) -lower_builtin('+', ty)(real_positive_impl) - -del ty - - -@lower_getattr(types.Complex, "real") -def complex_real_impl(context, builder, typ, value): - cplx = context.make_complex(builder, typ, value=value) - res = cplx.real - return impl_ret_untracked(context, builder, typ, res) - -@lower_getattr(types.Complex, "imag") -def complex_imag_impl(context, builder, typ, value): - cplx = context.make_complex(builder, typ, value=value) - res = cplx.imag - return impl_ret_untracked(context, builder, typ, res) - -@lower_builtin("complex.conjugate", types.Complex) -def complex_conjugate_impl(context, builder, sig, args): - from . import mathimpl - z = context.make_complex(builder, sig.args[0], args[0]) - z.imag = mathimpl.negate_real(builder, z.imag) - res = z._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - -def real_real_impl(context, builder, typ, value): - return impl_ret_untracked(context, builder, typ, value) - -def real_imag_impl(context, builder, typ, value): - res = cgutils.get_null_value(value.type) - return impl_ret_untracked(context, builder, typ, res) - -def real_conjugate_impl(context, builder, sig, args): - return impl_ret_untracked(context, builder, sig.return_type, args[0]) - -for cls in (types.Float, types.Integer): - lower_getattr(cls, "real")(real_real_impl) - lower_getattr(cls, "imag")(real_imag_impl) - lower_builtin("complex.conjugate", cls)(real_conjugate_impl) - - -@lower_builtin("**", types.Complex, types.Complex) -@lower_builtin(pow, types.Complex, types.Complex) -def complex_power_impl(context, builder, sig, args): - [ca, cb] = args - ty = sig.args[0] - fty = ty.underlying_float - a = context.make_helper(builder, ty, value=ca) - b = context.make_helper(builder, ty, value=cb) - c = context.make_helper(builder, ty) - module = builder.module - pa = a._getpointer() - pb = b._getpointer() - pc = c._getpointer() - - # Optimize for square because cpow loses a lot of precision - TWO = context.get_constant(fty, 2) - ZERO = context.get_constant(fty, 0) - - b_real_is_two = builder.fcmp_ordered('==', b.real, TWO) - b_imag_is_zero = builder.fcmp_ordered('==', b.imag, ZERO) - b_is_two = builder.and_(b_real_is_two, b_imag_is_zero) - - with builder.if_else(b_is_two) as (then, otherwise): - with then: - # Lower as multiplication - res = complex_mul_impl(context, builder, sig, (ca, ca)) - cres = context.make_helper(builder, ty, value=res) - c.real = cres.real - c.imag = cres.imag - - with otherwise: - # Lower with call to external function - func_name = { - types.complex64: "numba_cpowf", - types.complex128: "numba_cpow", - }[ty] - fnty = Type.function(Type.void(), [pa.type] * 3) - cpow = module.get_or_insert_function(fnty, name=func_name) - builder.call(cpow, (pa, pb, pc)) - - res = builder.load(pc) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def complex_add_impl(context, builder, sig, args): - [cx, cy] = args - ty = sig.args[0] - x = context.make_complex(builder, ty, value=cx) - y = context.make_complex(builder, ty, value=cy) - z = context.make_complex(builder, ty) - a = x.real - b = x.imag - c = y.real - d = y.imag - z.real = builder.fadd(a, c) - z.imag = builder.fadd(b, d) - res = z._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_sub_impl(context, builder, sig, args): - [cx, cy] = args - ty = sig.args[0] - x = context.make_complex(builder, ty, value=cx) - y = context.make_complex(builder, ty, value=cy) - z = context.make_complex(builder, ty) - a = x.real - b = x.imag - c = y.real - d = y.imag - z.real = builder.fsub(a, c) - z.imag = builder.fsub(b, d) - res = z._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_mul_impl(context, builder, sig, args): - """ - (a+bi)(c+di)=(ac-bd)+i(ad+bc) - """ - [cx, cy] = args - ty = sig.args[0] - x = context.make_complex(builder, ty, value=cx) - y = context.make_complex(builder, ty, value=cy) - z = context.make_complex(builder, ty) - a = x.real - b = x.imag - c = y.real - d = y.imag - ac = builder.fmul(a, c) - bd = builder.fmul(b, d) - ad = builder.fmul(a, d) - bc = builder.fmul(b, c) - z.real = builder.fsub(ac, bd) - z.imag = builder.fadd(ad, bc) - res = z._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -NAN = float('nan') - -def complex_div_impl(context, builder, sig, args): - def complex_div(a, b): - # This is CPython's algorithm (in _Py_c_quot()). - areal = a.real - aimag = a.imag - breal = b.real - bimag = b.imag - if not breal and not bimag: - raise ZeroDivisionError("complex division by zero") - if abs(breal) >= abs(bimag): - # Divide tops and bottom by b.real - if not breal: - return complex(NAN, NAN) - ratio = bimag / breal - denom = breal + bimag * ratio - return complex( - (areal + aimag * ratio) / denom, - (aimag - areal * ratio) / denom) - else: - # Divide tops and bottom by b.imag - if not bimag: - return complex(NAN, NAN) - ratio = breal / bimag - denom = breal * ratio + bimag - return complex( - (a.real * ratio + a.imag) / denom, - (a.imag * ratio - a.real) / denom) - - res = context.compile_internal(builder, complex_div, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_negate_impl(context, builder, sig, args): - from . import mathimpl - [typ] = sig.args - [val] = args - cmplx = context.make_complex(builder, typ, value=val) - res = context.make_complex(builder, typ) - res.real = mathimpl.negate_real(builder, cmplx.real) - res.imag = mathimpl.negate_real(builder, cmplx.imag) - res = res._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_positive_impl(context, builder, sig, args): - [val] = args - return impl_ret_untracked(context, builder, sig.return_type, val) - - -def complex_eq_impl(context, builder, sig, args): - [cx, cy] = args - typ = sig.args[0] - x = context.make_complex(builder, typ, value=cx) - y = context.make_complex(builder, typ, value=cy) - - reals_are_eq = builder.fcmp(lc.FCMP_OEQ, x.real, y.real) - imags_are_eq = builder.fcmp(lc.FCMP_OEQ, x.imag, y.imag) - res = builder.and_(reals_are_eq, imags_are_eq) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_ne_impl(context, builder, sig, args): - [cx, cy] = args - typ = sig.args[0] - x = context.make_complex(builder, typ, value=cx) - y = context.make_complex(builder, typ, value=cy) - - reals_are_ne = builder.fcmp(lc.FCMP_UNE, x.real, y.real) - imags_are_ne = builder.fcmp(lc.FCMP_UNE, x.imag, y.imag) - res = builder.or_(reals_are_ne, imags_are_ne) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def complex_abs_impl(context, builder, sig, args): - """ - abs(z) := hypot(z.real, z.imag) - """ - def complex_abs(z): - return math.hypot(z.real, z.imag) - - res = context.compile_internal(builder, complex_abs, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -ty = types.Complex - -lower_builtin("+", ty, ty)(complex_add_impl) -lower_builtin("-", ty, ty)(complex_sub_impl) -lower_builtin("*", ty, ty)(complex_mul_impl) -lower_builtin("/?", ty, ty)(complex_div_impl) -lower_builtin("/", ty, ty)(complex_div_impl) -lower_builtin("-", ty)(complex_negate_impl) -lower_builtin("+", ty)(complex_positive_impl) -# Complex modulo is deprecated in python3 - -lower_builtin('==', ty, ty)(complex_eq_impl) -lower_builtin('!=', ty, ty)(complex_ne_impl) - -lower_builtin(abs, ty)(complex_abs_impl) - -del ty - - -@lower_builtin("number.item", types.Boolean) -@lower_builtin("number.item", types.Number) -def number_item_impl(context, builder, sig, args): - """ - The no-op .item() method on booleans and numbers. - """ - return args[0] - - -#------------------------------------------------------------------------------ - - -def number_not_impl(context, builder, sig, args): - [typ] = sig.args - [val] = args - istrue = context.cast(builder, val, typ, sig.return_type) - res = builder.not_(istrue) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_builtin(bool, types.boolean) -def bool_as_bool(context, builder, sig, args): - [val] = args - return val - -@lower_builtin(bool, types.Integer) -def int_as_bool(context, builder, sig, args): - [val] = args - return builder.icmp_unsigned('!=', val, ir.Constant(val.type, 0)) - -@lower_builtin(bool, types.Float) -def float_as_bool(context, builder, sig, args): - [val] = args - return builder.fcmp(lc.FCMP_UNE, val, ir.Constant(val.type, 0.0)) - -@lower_builtin(bool, types.Complex) -def complex_as_bool(context, builder, sig, args): - [typ] = sig.args - [val] = args - cmplx = context.make_complex(builder, typ, val) - real, imag = cmplx.real, cmplx.imag - zero = ir.Constant(real.type, 0.0) - real_istrue = builder.fcmp(lc.FCMP_UNE, real, zero) - imag_istrue = builder.fcmp(lc.FCMP_UNE, imag, zero) - return builder.or_(real_istrue, imag_istrue) - - -for ty in (types.Integer, types.Float, types.Complex): - lower_builtin('not', ty)(number_not_impl) - -lower_builtin('not', types.boolean)(number_not_impl) - - -#------------------------------------------------------------------------------ -# Hashing numbers - -@lower_builtin(hash, types.Integer) -@lower_builtin(hash, types.Boolean) -def hash_int(context, builder, sig, args): - ty, = sig.args - retty = sig.return_type - val, = args - - if isinstance(ty, types.Integer) and ty.bitwidth > retty.bitwidth: - # Value is wider than hash => fold MSB into LSB - nbits = ty.bitwidth - retty.bitwidth - val = builder.add(val, - builder.lshr(val, ir.Constant(val.type, nbits))) - - return context.cast(builder, val, ty, retty) - -@lower_builtin(hash, types.Float) -def hash_float(context, builder, sig, args): - ty, = sig.args - retty = sig.return_type - val, = args - - # NOTE: CPython's algorithm is more involved as it seeks to maintain - # the invariant that hash(float(x)) == hash(x) for every integer x - # exactly representable as a float. - # Numba doesn't care as it doesn't support heterogeneous associative - # containers. - - intty = types.Integer("int%d" % ty.bitwidth) - ll_intty = ir.IntType(ty.bitwidth) - - # XXX Disabled as llvm.canonicalize doesn't work: - # http://lists.llvm.org/pipermail/llvm-dev/2016-February/095746.html - #func_name = "llvm.canonicalize.f%d" % (ty.bitwidth,) - #fnty = ir.FunctionType(val.type, (val.type,)) - #fn = builder.module.get_or_insert_function(fnty, func_name) - #val = builder.call(fn, (val,)) - - # Take the float's binary representation as an int - val_p = cgutils.alloca_once_value(builder, val) - # y = *(int *)(&val) - y = builder.load(builder.bitcast(val_p, ll_intty.as_pointer())) - - if intty.bitwidth > retty.bitwidth: - # Value is wider than hash => fold MSB into LSB - nbits = intty.bitwidth - retty.bitwidth - y = builder.add(y, - builder.lshr(y, ir.Constant(y.type, nbits))) - - return context.cast(builder, y, intty, retty) - -@lower_builtin(hash, types.Complex) -def hash_complex(context, builder, sig, args): - ty, = sig.args - val, = args - fltty = ty.underlying_float - - z = context.make_complex(builder, ty, val) - float_hash_sig = typing.signature(sig.return_type, fltty) - h_real = hash_float(context, builder, float_hash_sig, (z.real,)) - h_imag = hash_float(context, builder, float_hash_sig, (z.imag,)) - mult = ir.Constant(h_imag.type, 1000003) - - return builder.add(h_real, builder.mul(h_imag, mult)) - - -#------------------------------------------------------------------------------- -# Implicit casts between numerics - -@lower_cast(types.Integer, types.Const) -def integer_to_constant(context, builder, fromty, toty, val): - # Perform runtime check to ensure that the runtime value - # matches the expected constant. - # The violation would imply an internal error. - # The runtime checking logic cannot be tested automatically. - # The easiest way to test is to change the comparison from `!=` to `==` - # so that the exception will raise when the expection is met. - const = context.get_constant(fromty, toty.value) - matches = builder.icmp_unsigned('!=', val, const) - with cgutils.if_unlikely(builder, matches): - # Raise RuntimeError about the assumption violation - usermsg = "numba constant integer assumption violated" - errmsg = "{}: expecting {}".format(usermsg, toty.value) - context.call_conv.return_user_exc(builder, RuntimeError, (errmsg,)) - return cgutils.get_null_value(context.get_value_type(toty)) - - -@lower_cast(types.Integer, types.Integer) -def integer_to_integer(context, builder, fromty, toty, val): - if toty.bitwidth == fromty.bitwidth: - # Just a change of signedness - return val - elif toty.bitwidth < fromty.bitwidth: - # Downcast - return builder.trunc(val, context.get_value_type(toty)) - elif fromty.signed: - # Signed upcast - return builder.sext(val, context.get_value_type(toty)) - else: - # Unsigned upcast - return builder.zext(val, context.get_value_type(toty)) - -@lower_cast(types.Integer, types.voidptr) -def integer_to_voidptr(context, builder, fromty, toty, val): - return builder.inttoptr(val, context.get_value_type(toty)) - -@lower_cast(types.Float, types.Float) -def float_to_float(context, builder, fromty, toty, val): - lty = context.get_value_type(toty) - if fromty.bitwidth < toty.bitwidth: - return builder.fpext(val, lty) - else: - return builder.fptrunc(val, lty) - -@lower_cast(types.Integer, types.Float) -def integer_to_float(context, builder, fromty, toty, val): - lty = context.get_value_type(toty) - if fromty.signed: - return builder.sitofp(val, lty) - else: - return builder.uitofp(val, lty) - -@lower_cast(types.Float, types.Integer) -def float_to_integer(context, builder, fromty, toty, val): - lty = context.get_value_type(toty) - if toty.signed: - return builder.fptosi(val, lty) - else: - return builder.fptoui(val, lty) - -@lower_cast(types.Float, types.Complex) -@lower_cast(types.Integer, types.Complex) -def non_complex_to_complex(context, builder, fromty, toty, val): - real = context.cast(builder, val, fromty, toty.underlying_float) - imag = context.get_constant(toty.underlying_float, 0) - - cmplx = context.make_complex(builder, toty) - cmplx.real = real - cmplx.imag = imag - return cmplx._getvalue() - -@lower_cast(types.Complex, types.Complex) -def complex_to_complex(context, builder, fromty, toty, val): - srcty = fromty.underlying_float - dstty = toty.underlying_float - - src = context.make_complex(builder, fromty, value=val) - dst = context.make_complex(builder, toty) - dst.real = context.cast(builder, src.real, srcty, dstty) - dst.imag = context.cast(builder, src.imag, srcty, dstty) - return dst._getvalue() - -@lower_cast(types.Any, types.Boolean) -def any_to_boolean(context, builder, fromty, toty, val): - return context.is_true(builder, fromty, val) - -@lower_cast(types.Boolean, types.Number) -def boolean_to_any(context, builder, fromty, toty, val): - # Casting from boolean to anything first casts to int32 - asint = builder.zext(val, Type.int()) - return context.cast(builder, asint, types.int32, toty) - - -#------------------------------------------------------------------------------- -# Constants - -@lower_constant(types.Complex) -def constant_complex(context, builder, ty, pyval): - fty = ty.underlying_float - real = context.get_constant_generic(builder, fty, pyval.real) - imag = context.get_constant_generic(builder, fty, pyval.imag) - return ir.Constant.literal_struct((real, imag)) - -@lower_constant(types.Integer) -@lower_constant(types.Float) -@lower_constant(types.Boolean) -def constant_integer(context, builder, ty, pyval): - lty = context.get_value_type(ty) - return lty(pyval) diff --git a/numba/numba/targets/operatorimpl.py b/numba/numba/targets/operatorimpl.py deleted file mode 100644 index d7bcf4e05..000000000 --- a/numba/numba/targets/operatorimpl.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -Definition of implementations for the `operator` module. -""" - -import operator - -from numba.targets.imputils import Registry -from numba.targets import builtins -from numba import types, utils, typing - -registry = Registry() -lower = registry.lower - - -# Redirect the implementation of operator module functions to the -# the corresponding built-in operators. - -def map_operator(name, inplace_name, op): - op_func = getattr(operator, name) - - reverse_args = (op == 'in') - - @lower(op_func, types.VarArg(types.Any)) - def binop_impl(context, builder, sig, args): - if reverse_args: - args = args[::-1] - sig = typing.signature(sig.return_type, *sig.args[::-1]) - impl = context.get_function(op, sig) - return impl(builder, args) - - if inplace_name: - op_func = getattr(operator, inplace_name) - - @lower(op_func, types.VarArg(types.Any)) - def binop_inplace_impl(context, builder, sig, args): - first = sig.args[0] - if first.mutable: - impl = context.get_function(op + '=', sig) - else: - impl = context.get_function(op, sig) - return impl(builder, args) - - -for name, inplace_name, op in utils.operator_map: - map_operator(name, inplace_name, op) diff --git a/numba/numba/targets/optional.py b/numba/numba/targets/optional.py deleted file mode 100644 index bf011d1fe..000000000 --- a/numba/numba/targets/optional.py +++ /dev/null @@ -1,120 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from numba import types, cgutils, typing - -from .imputils import (lower_cast, lower_builtin, lower_getattr_generic, - impl_ret_untracked, lower_setattr_generic) - - -def always_return_true_impl(context, builder, sig, args): - return cgutils.true_bit - - -def always_return_false_impl(context, builder, sig, args): - return cgutils.false_bit - - -def optional_is_none(context, builder, sig, args): - """ - Check if an Optional value is invalid - """ - [lty, rty] = sig.args - [lval, rval] = args - - # Make sure None is on the right - if lty == types.none: - lty, rty = rty, lty - lval, rval = rval, lval - - opt_type = lty - opt_val = lval - - opt = context.make_helper(builder, opt_type, opt_val) - res = builder.not_(cgutils.as_bool_bit(builder, opt.valid)) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -# None is/not None -lower_builtin('is', types.none, types.none)(always_return_true_impl) - -# Optional is None -lower_builtin('is', types.Optional, types.none)(optional_is_none) -lower_builtin('is', types.none, types.Optional)(optional_is_none) - - -@lower_getattr_generic(types.Optional) -def optional_getattr(context, builder, typ, value, attr): - """ - Optional.__getattr__ => redirect to the wrapped type. - """ - inner_type = typ.type - val = context.cast(builder, value, typ, inner_type) - imp = context.get_getattr(inner_type, attr) - return imp(context, builder, inner_type, val, attr) - - -@lower_setattr_generic(types.Optional) -def optional_setattr(context, builder, sig, args, attr): - """ - Optional.__setattr__ => redirect to the wrapped type. - """ - basety, valty = sig.args - target, val = args - target_type = basety.type - target = context.cast(builder, target, basety, target_type) - - newsig = typing.signature(sig.return_type, target_type, valty) - imp = context.get_setattr(attr, newsig) - return imp(builder, (target, val)) - - -@lower_cast(types.Optional, types.Optional) -def optional_to_optional(context, builder, fromty, toty, val): - """ - The handling of optional->optional cast must be special cased for - correct propagation of None value. Given type T and U. casting of - T? to U? (? denotes optional) should always succeed. If the from-value - is None, the None value the casted value (U?) should be None; otherwise, - the from-value is casted to U. This is different from casting T? to U, - which requires the from-value must not be None. - """ - optval = context.make_helper(builder, fromty, value=val) - validbit = cgutils.as_bool_bit(builder, optval.valid) - # Create uninitialized optional value - outoptval = context.make_helper(builder, toty) - - with builder.if_else(validbit) as (is_valid, is_not_valid): - with is_valid: - # Cast internal value - outoptval.valid = cgutils.true_bit - outoptval.data = context.cast(builder, optval.data, - fromty.type, toty.type) - - with is_not_valid: - # Store None to result - outoptval.valid = cgutils.false_bit - outoptval.data = cgutils.get_null_value( - outoptval.data.type) - - return outoptval._getvalue() - - -@lower_cast(types.Any, types.Optional) -def any_to_optional(context, builder, fromty, toty, val): - if fromty == types.none: - return context.make_optional_none(builder, toty.type) - else: - val = context.cast(builder, val, fromty, toty.type) - return context.make_optional_value(builder, toty.type, val) - - -@lower_cast(types.Optional, types.Any) -@lower_cast(types.Optional, types.Boolean) -def optional_to_any(context, builder, fromty, toty, val): - optval = context.make_helper(builder, fromty, value=val) - validbit = cgutils.as_bool_bit(builder, optval.valid) - with builder.if_then(builder.not_(validbit), likely=False): - msg = "expected %s, got None" % (fromty.type,) - context.call_conv.return_user_exc(builder, TypeError, (msg,)) - - return context.cast(builder, optval.data, fromty.type, toty) diff --git a/numba/numba/targets/options.py b/numba/numba/targets/options.py deleted file mode 100644 index e06b3513e..000000000 --- a/numba/numba/targets/options.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Target Options -""" -from __future__ import print_function, division, absolute_import - -from .. import config - -class TargetOptions(object): - OPTIONS = {} - - def __init__(self): - self.values = {} - - def from_dict(self, dic): - for k, v in dic.items(): - try: - ctor = self.OPTIONS[k] - except KeyError: - fmt = "%r does not support option: '%s'" - raise KeyError(fmt % (self.__class__, k)) - else: - self.values[k] = ctor(v) - - @classmethod - def parse_as_flags(cls, flags, options): - opt = cls() - opt.from_dict(options) - opt.set_flags(flags) - return flags - - def set_flags(self, flags): - """ - Provide default flags setting logic. - Subclass can override. - """ - kws = self.values.copy() - - if kws.pop('nopython', False) == False: - flags.set("enable_pyobject") - - if kws.pop("forceobj", False): - flags.set("force_pyobject") - - if kws.pop('looplift', True): - flags.set("enable_looplift") - - if kws.pop('boundcheck', False): - flags.set("boundcheck") - - if kws.pop('_nrt', True): - flags.set("nrt") - - if kws.pop('debug', config.DEBUGINFO_DEFAULT): - flags.set("debuginfo") - flags.set("boundcheck") - - if kws.pop('nogil', False): - flags.set("release_gil") - - if kws.pop('no_rewrites', False): - flags.set('no_rewrites') - - if kws.pop('no_cpython_wrapper', False): - flags.set('no_cpython_wrapper') - - if 'parallel' in kws: - flags.set('auto_parallel', kws.pop('parallel')) - - if kws.pop('fastmath', False): - flags.set('fastmath') - - if 'error_model' in kws: - flags.set('error_model', kws.pop('error_model')) - - flags.set("enable_pyobject_looplift") - - if kws: - # Unread options? - raise NameError("Unrecognized options: %s" % kws.keys()) - diff --git a/numba/numba/targets/polynomial.py b/numba/numba/targets/polynomial.py deleted file mode 100644 index 13a03bfb8..000000000 --- a/numba/numba/targets/polynomial.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Implementation of operations involving polynomials. -""" - -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import types, jit -from numba.extending import overload -from numba import numpy_support as np_support - - -@overload(np.roots) -def roots_impl(p): - - # cast int vectors to float cf. numpy, this is a bit dicey as - # the roots could be complex which will fail anyway - ty = getattr(p, 'dtype', p) - if isinstance(ty, types.Integer): - cast_t = np.float64 - else: - cast_t = np_support.as_dtype(ty) - - def roots_impl(p): - # impl based on numpy: - # https://github.com/numpy/numpy/blob/master/numpy/lib/polynomial.py - - if len(p.shape) != 1: - raise ValueError("Input must be a 1d array.") - - non_zero = np.nonzero(p)[0] - - if len(non_zero) == 0: - return np.zeros(0, dtype=cast_t) - - tz = len(p) - non_zero[-1] - 1 - - # pull out the coeffs selecting between possible zero pads - p = p[int(non_zero[0]):int(non_zero[-1]) + 1] - - n = len(p) - if n > 1: - # construct companion matrix, ensure fortran order - # to give to eigvals, write to upper diag and then - # transpose. - A = np.diag(np.ones((n - 2,), cast_t), 1).T - A[0, :] = -p[1:] / p[0] # normalize - roots = np.linalg.eigvals(A) - else: - roots = np.zeros(0, dtype=cast_t) - - # add in additional zeros on the end if needed - if tz > 0: - return np.hstack((roots, np.zeros(tz, dtype=cast_t))) - else: - return roots - - return roots_impl diff --git a/numba/numba/targets/printimpl.py b/numba/numba/targets/printimpl.py deleted file mode 100644 index adf8913b6..000000000 --- a/numba/numba/targets/printimpl.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -This file implements print functionality for the CPU. -""" -from __future__ import print_function, absolute_import, division -from llvmlite.llvmpy.core import Type -from numba import types, typing, cgutils -from numba.targets.imputils import Registry, impl_ret_untracked - -registry = Registry() -lower = registry.lower - - -# NOTE: the current implementation relies on CPython API even in -# nopython mode. - - -@lower("print_item", types.Const) -def print_item_impl(context, builder, sig, args): - """ - Print a single constant value. - """ - ty, = sig.args - val = ty.value - - pyapi = context.get_python_api(builder) - - strobj = pyapi.unserialize(pyapi.serialize_object(val)) - pyapi.print_object(strobj) - pyapi.decref(strobj) - - res = context.get_dummy_value() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("print_item", types.Any) -def print_item_impl(context, builder, sig, args): - """ - Print a single native value by boxing it in a Python object and - invoking the Python interpreter's print routine. - """ - ty, = sig.args - val, = args - - pyapi = context.get_python_api(builder) - env_manager = context.get_env_manager(builder) - - if context.enable_nrt: - context.nrt.incref(builder, ty, val) - - obj = pyapi.from_native_value(ty, val, env_manager) - with builder.if_else(cgutils.is_not_null(builder, obj), likely=True) as (if_ok, if_error): - with if_ok: - pyapi.print_object(obj) - pyapi.decref(obj) - with if_error: - cstr = context.insert_const_string(builder.module, - "the print() function") - strobj = pyapi.string_from_string(cstr) - pyapi.err_write_unraisable(strobj) - pyapi.decref(strobj) - - res = context.get_dummy_value() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower(print, types.VarArg(types.Any)) -def print_varargs_impl(context, builder, sig, args): - """ - A entire print() call. - """ - pyapi = context.get_python_api(builder) - gil = pyapi.gil_ensure() - - for i, (argtype, argval) in enumerate(zip(sig.args, args)): - signature = typing.signature(types.none, argtype) - imp = context.get_function("print_item", signature) - imp(builder, [argval]) - if i < len(args) - 1: - pyapi.print_string(' ') - pyapi.print_string('\n') - - pyapi.gil_release(gil) - res = context.get_dummy_value() - return impl_ret_untracked(context, builder, sig.return_type, res) diff --git a/numba/numba/targets/quicksort.py b/numba/numba/targets/quicksort.py deleted file mode 100644 index 1c845f9cf..000000000 --- a/numba/numba/targets/quicksort.py +++ /dev/null @@ -1,242 +0,0 @@ - -from __future__ import print_function, absolute_import, division - -import collections - -import numpy as np - -from numba import types - - -QuicksortImplementation = collections.namedtuple( - 'QuicksortImplementation', - (# The compile function itself - 'compile', - # All subroutines exercised by test_sort - 'partition', 'partition3', 'insertion_sort', - # The top-level function - 'run_quicksort', - )) - - -Partition = collections.namedtuple('Partition', ('start', 'stop')) - -# Under this size, switch to a simple insertion sort -SMALL_QUICKSORT = 15 - -MAX_STACK = 100 - - -def make_quicksort_impl(wrap, lt=None, is_argsort=False): - - intp = types.intp - zero = intp(0) - - # Two subroutines to make the core algorithm generic wrt. argsort - # or normal sorting. Note the genericity may make basic sort() - # slightly slower (~5%) - if is_argsort: - @wrap - def make_res(A): - return np.arange(A.size) - - @wrap - def GET(A, idx_or_val): - return A[idx_or_val] - - else: - @wrap - def make_res(A): - return A - - @wrap - def GET(A, idx_or_val): - return idx_or_val - - def default_lt(a, b): - """ - Trivial comparison function between two keys. - """ - return a < b - - LT = wrap(lt if lt is not None else default_lt) - - @wrap - def insertion_sort(A, R, low, high): - """ - Insertion sort A[low:high + 1]. Note the inclusive bounds. - """ - assert low >= 0 - if high <= low: - return - - for i in range(low + 1, high + 1): - k = R[i] - v = GET(A, k) - # Insert v into A[low:i] - j = i - while j > low and LT(v, GET(A, R[j - 1])): - # Make place for moving A[i] downwards - R[j] = R[j - 1] - j -= 1 - R[j] = k - - @wrap - def partition(A, R, low, high): - """ - Partition A[low:high + 1] around a chosen pivot. The pivot's index - is returned. - """ - assert low >= 0 - assert high > low - - mid = (low + high) >> 1 - # NOTE: the pattern of swaps below for the pivot choice and the - # partitioning gives good results (i.e. regular O(n log n)) - # on sorted, reverse-sorted, and uniform arrays. Subtle changes - # risk breaking this property. - - # median of three {low, middle, high} - if LT(GET(A, R[mid]), GET(A, R[low])): - R[low], R[mid] = R[mid], R[low] - if LT(GET(A, R[high]), GET(A, R[mid])): - R[high], R[mid] = R[mid], R[high] - if LT(GET(A, R[mid]), GET(A, R[low])): - R[low], R[mid] = R[mid], R[low] - pivot = GET(A, R[mid]) - - # Temporarily stash the pivot at the end - R[high], R[mid] = R[mid], R[high] - i = low - j = high - 1 - while True: - while i < high and LT(GET(A, R[i]), pivot): - i += 1 - while j >= low and LT(pivot, GET(A, R[j])): - j -= 1 - if i >= j: - break - R[i], R[j] = R[j], R[i] - i += 1 - j -= 1 - # Put the pivot back in its final place (all items before `i` - # are smaller than the pivot, all items at/after `i` are larger) - R[i], R[high] = R[high], R[i] - return i - - @wrap - def partition3(A, low, high): - """ - Three-way partition [low, high) around a chosen pivot. - A tuple (lt, gt) is returned such that: - - all elements in [low, lt) are < pivot - - all elements in [lt, gt] are == pivot - - all elements in (gt, high] are > pivot - """ - mid = (low + high) >> 1 - # median of three {low, middle, high} - if LT(A[mid], A[low]): - A[low], A[mid] = A[mid], A[low] - if LT(A[high], A[mid]): - A[high], A[mid] = A[mid], A[high] - if LT(A[mid], A[low]): - A[low], A[mid] = A[mid], A[low] - pivot = A[mid] - - A[low], A[mid] = A[mid], A[low] - lt = low - gt = high - i = low + 1 - while i <= gt: - if LT(A[i], pivot): - A[lt], A[i] = A[i], A[lt] - lt += 1 - i += 1 - elif LT(pivot, A[i]): - A[gt], A[i] = A[i], A[gt] - gt -= 1 - else: - i += 1 - return lt, gt - - @wrap - def run_quicksort(A): - R = make_res(A) - - if len(A) < 2: - return R - - stack = [Partition(zero, zero)] * MAX_STACK - stack[0] = Partition(zero, len(A) - 1) - n = 1 - - while n > 0: - n -= 1 - low, high = stack[n] - # Partition until it becomes more efficient to do an insertion sort - while high - low >= SMALL_QUICKSORT: - assert n < MAX_STACK - i = partition(A, R, low, high) - # Push largest partition on the stack - if high - i > i - low: - # Right is larger - if high > i: - stack[n] = Partition(i + 1, high) - n += 1 - high = i - 1 - else: - if i > low: - stack[n] = Partition(low, i - 1) - n += 1 - low = i + 1 - - insertion_sort(A, R, low, high) - - return R - - # Unused quicksort implementation based on 3-way partitioning; the - # partitioning scheme turns out exhibiting bad behaviour on sorted arrays. - @wrap - def _run_quicksort(A): - stack = [Partition(zero, zero)] * 100 - stack[0] = Partition(zero, len(A) - 1) - n = 1 - - while n > 0: - n -= 1 - low, high = stack[n] - # Partition until it becomes more efficient to do an insertion sort - while high - low >= SMALL_QUICKSORT: - assert n < MAX_STACK - l, r = partition3(A, low, high) - # One trivial (empty) partition => iterate on the other - if r == high: - high = l - 1 - elif l == low: - low = r + 1 - # Push largest partition on the stack - elif high - r > l - low: - # Right is larger - stack[n] = Partition(r + 1, high) - n += 1 - high = l - 1 - else: - stack[n] = Partition(low, l - 1) - n += 1 - low = r + 1 - - insertion_sort(A, low, high) - - - return QuicksortImplementation(wrap, - partition, partition3, insertion_sort, - run_quicksort) - - -def make_py_quicksort(*args, **kwargs): - return make_quicksort_impl((lambda f: f), *args, **kwargs) - -def make_jit_quicksort(*args, **kwargs): - from numba.extending import register_jitable - return make_quicksort_impl((lambda f: register_jitable(f)), - *args, **kwargs) diff --git a/numba/numba/targets/randomimpl.py b/numba/numba/targets/randomimpl.py deleted file mode 100644 index d0fb914c0..000000000 --- a/numba/numba/targets/randomimpl.py +++ /dev/null @@ -1,1493 +0,0 @@ -""" -Implement the random and np.random module functions. -""" - -from __future__ import print_function, absolute_import, division - -import math -import os -import random - -import numpy as np - -from llvmlite import ir - -from numba.extending import overload, register_jitable -from numba.targets.imputils import (Registry, impl_ret_untracked, - impl_ret_new_ref) -from numba.typing import signature -from numba import _helperlib, cgutils, types - - -registry = Registry() -lower = registry.lower - -int32_t = ir.IntType(32) -int64_t = ir.IntType(64) -def const_int(x): - return ir.Constant(int32_t, x) -double = ir.DoubleType() - -N = 624 -N_const = ir.Constant(int32_t, N) - - -# This is the same struct as rnd_state_t in _random.c. -rnd_state_t = ir.LiteralStructType([ - # index - int32_t, - # mt[N] - ir.ArrayType(int32_t, N), - # has_gauss - int32_t, - # gauss - double, - # is_initialized - int32_t, - ]) -rnd_state_ptr_t = ir.PointerType(rnd_state_t) - -def get_state_ptr(context, builder, name): - """ - Get a pointer to the given thread-local random state - (depending on *name*: "py" or "np"). - If the state isn't initialized, it is lazily initialized with - system entropy. - """ - assert name in ('py', 'np') - func_name = "numba_get_%s_random_state" % name - fnty = ir.FunctionType(rnd_state_ptr_t, ()) - fn = builder.module.get_or_insert_function(fnty, func_name) - # These two attributes allow LLVM to hoist the function call - # outside of loops. - fn.attributes.add('readnone') - fn.attributes.add('nounwind') - return builder.call(fn, ()) - -def get_py_state_ptr(context, builder): - """ - Get a pointer to the thread-local Python random state. - """ - return get_state_ptr(context, builder, 'py') - -def get_np_state_ptr(context, builder): - """ - Get a pointer to the thread-local Numpy random state. - """ - return get_state_ptr(context, builder, 'np') - - -# Accessors -def get_index_ptr(builder, state_ptr): - return cgutils.gep_inbounds(builder, state_ptr, 0, 0) - -def get_array_ptr(builder, state_ptr): - return cgutils.gep_inbounds(builder, state_ptr, 0, 1) - -def get_has_gauss_ptr(builder, state_ptr): - return cgutils.gep_inbounds(builder, state_ptr, 0, 2) - -def get_gauss_ptr(builder, state_ptr): - return cgutils.gep_inbounds(builder, state_ptr, 0, 3) - -def get_rnd_shuffle(builder): - """ - Get the internal function to shuffle the MT taste. - """ - fnty = ir.FunctionType(ir.VoidType(), (rnd_state_ptr_t,)) - fn = builder.function.module.get_or_insert_function(fnty, "numba_rnd_shuffle") - fn.args[0].add_attribute("nocapture") - return fn - - -def get_next_int32(context, builder, state_ptr): - """ - Get the next int32 generated by the PRNG at *state_ptr*. - """ - idxptr = get_index_ptr(builder, state_ptr) - idx = builder.load(idxptr) - need_reshuffle = builder.icmp_unsigned('>=', idx, N_const) - with cgutils.if_unlikely(builder, need_reshuffle): - fn = get_rnd_shuffle(builder) - builder.call(fn, (state_ptr,)) - builder.store(const_int(0), idxptr) - idx = builder.load(idxptr) - array_ptr = get_array_ptr(builder, state_ptr) - y = builder.load(cgutils.gep_inbounds(builder, array_ptr, 0, idx)) - idx = builder.add(idx, const_int(1)) - builder.store(idx, idxptr) - # Tempering - y = builder.xor(y, builder.lshr(y, const_int(11))) - y = builder.xor(y, builder.and_(builder.shl(y, const_int(7)), - const_int(0x9d2c5680))) - y = builder.xor(y, builder.and_(builder.shl(y, const_int(15)), - const_int(0xefc60000))) - y = builder.xor(y, builder.lshr(y, const_int(18))) - return y - -def get_next_double(context, builder, state_ptr): - """ - Get the next double generated by the PRNG at *state_ptr*. - """ - # a = rk_random(state) >> 5, b = rk_random(state) >> 6; - a = builder.lshr(get_next_int32(context, builder, state_ptr), const_int(5)) - b = builder.lshr(get_next_int32(context, builder, state_ptr), const_int(6)) - - # return (a * 67108864.0 + b) / 9007199254740992.0; - a = builder.uitofp(a, double) - b = builder.uitofp(b, double) - return builder.fdiv( - builder.fadd(b, builder.fmul(a, ir.Constant(double, 67108864.0))), - ir.Constant(double, 9007199254740992.0)) - -def get_next_int(context, builder, state_ptr, nbits, is_numpy): - """ - Get the next integer with width *nbits*. - """ - c32 = ir.Constant(nbits.type, 32) - def get_shifted_int(nbits): - shift = builder.sub(c32, nbits) - y = get_next_int32(context, builder, state_ptr) - if is_numpy: - # Use the last N bits, to match np.random - mask = builder.not_(ir.Constant(y.type, 0)) - mask = builder.lshr(mask, builder.zext(shift, y.type)) - return builder.and_(y, mask) - else: - # Use the first N bits, to match CPython random - return builder.lshr(y, builder.zext(shift, y.type)) - - ret = cgutils.alloca_once_value(builder, ir.Constant(int64_t, 0)) - - is_32b = builder.icmp_unsigned('<=', nbits, c32) - with builder.if_else(is_32b) as (ifsmall, iflarge): - with ifsmall: - low = get_shifted_int(nbits) - builder.store(builder.zext(low, int64_t), ret) - with iflarge: - # XXX This assumes nbits <= 64 - if is_numpy: - # Get the high bits first to match np.random - high = get_shifted_int(builder.sub(nbits, c32)) - low = get_next_int32(context, builder, state_ptr) - if not is_numpy: - # Get the high bits second to match CPython random - high = get_shifted_int(builder.sub(nbits, c32)) - total = builder.add( - builder.zext(low, int64_t), - builder.shl(builder.zext(high, int64_t), ir.Constant(int64_t, 32))) - builder.store(total, ret) - - return builder.load(ret) - - -def _fill_defaults(context, builder, sig, args, defaults): - """ - Assuming a homogeneous signature (same type for result and all arguments), - fill in the *defaults* if missing from the arguments. - """ - ty = sig.return_type - llty = context.get_data_type(ty) - args = tuple(args) + tuple(ir.Constant(llty, d) for d in defaults[len(args):]) - sig = signature(*(ty,) * (len(args) + 1)) - return sig, args - - -@lower("random.seed", types.uint32) -def seed_impl(context, builder, sig, args): - res = _seed_impl(context, builder, sig, args, get_state_ptr(context, - builder, "py")) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.seed", types.uint32) -def seed_impl(context, builder, sig, args): - res = _seed_impl(context, builder, sig, args, get_state_ptr(context, - builder, "np")) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _seed_impl(context, builder, sig, args, state_ptr): - seed_value, = args - fnty = ir.FunctionType(ir.VoidType(), (rnd_state_ptr_t, int32_t)) - fn = builder.function.module.get_or_insert_function(fnty, "numba_rnd_init") - builder.call(fn, (state_ptr, seed_value)) - return context.get_constant(types.none, None) - -@lower("random.random") -def random_impl(context, builder, sig, args): - state_ptr = get_state_ptr(context, builder, "py") - res = get_next_double(context, builder, state_ptr) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.random") -def random_impl(context, builder, sig, args): - state_ptr = get_state_ptr(context, builder, "np") - res = get_next_double(context, builder, state_ptr) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("random.gauss", types.Float, types.Float) -@lower("random.normalvariate", types.Float, types.Float) -def gauss_impl(context, builder, sig, args): - res = _gauss_impl(context, builder, sig, args, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.standard_normal") -@lower("np.random.normal") -@lower("np.random.normal", types.Float) -@lower("np.random.normal", types.Float, types.Float) -def np_gauss_impl(context, builder, sig, args): - sig, args = _fill_defaults(context, builder, sig, args, (0.0, 1.0)) - res = _gauss_impl(context, builder, sig, args, "np") - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def _gauss_pair_impl(_random): - def compute_gauss_pair(): - """ - Compute a pair of numbers on the normal distribution. - """ - while True: - x1 = 2.0 * _random() - 1.0 - x2 = 2.0 * _random() - 1.0 - r2 = x1*x1 + x2*x2 - if r2 < 1.0 and r2 != 0.0: - break - - # Box-Muller transform - f = math.sqrt(-2.0 * math.log(r2) / r2) - return f * x1, f * x2 - return compute_gauss_pair - -def _gauss_impl(context, builder, sig, args, state): - # The type for all computations (either float or double) - ty = sig.return_type - llty = context.get_data_type(ty) - - state_ptr = get_state_ptr(context, builder, state) - _random = {"py": random.random, - "np": np.random.random}[state] - - ret = cgutils.alloca_once(builder, llty, name="result") - - gauss_ptr = get_gauss_ptr(builder, state_ptr) - has_gauss_ptr = get_has_gauss_ptr(builder, state_ptr) - has_gauss = cgutils.is_true(builder, builder.load(has_gauss_ptr)) - with builder.if_else(has_gauss) as (then, otherwise): - with then: - # if has_gauss: return it - builder.store(builder.load(gauss_ptr), ret) - builder.store(const_int(0), has_gauss_ptr) - with otherwise: - # if not has_gauss: compute a pair of numbers using the Box-Muller - # transform; keep one and return the other - pair = context.compile_internal(builder, - _gauss_pair_impl(_random), - signature(types.UniTuple(ty, 2)), - ()) - - first, second = cgutils.unpack_tuple(builder, pair, 2) - builder.store(first, gauss_ptr) - builder.store(second, ret) - builder.store(const_int(1), has_gauss_ptr) - - mu, sigma = args - return builder.fadd(mu, - builder.fmul(sigma, builder.load(ret))) - -@lower("random.getrandbits", types.Integer) -def getrandbits_impl(context, builder, sig, args): - nbits, = args - too_large = builder.icmp_unsigned(">=", nbits, const_int(65)) - too_small = builder.icmp_unsigned("==", nbits, const_int(0)) - with cgutils.if_unlikely(builder, builder.or_(too_large, too_small)): - msg = "getrandbits() limited to 64 bits" - context.call_conv.return_user_exc(builder, OverflowError, (msg,)) - state_ptr = get_state_ptr(context, builder, "py") - res = get_next_int(context, builder, state_ptr, nbits, False) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -def _randrange_impl(context, builder, start, stop, step, state): - state_ptr = get_state_ptr(context, builder, state) - ty = stop.type - zero = ir.Constant(ty, 0) - one = ir.Constant(ty, 1) - nptr = cgutils.alloca_once(builder, ty, name="n") - # n = stop - start - builder.store(builder.sub(stop, start), nptr) - - with builder.if_then(builder.icmp_signed('<', step, zero)): - # n = (n + step + 1) // step - w = builder.add(builder.add(builder.load(nptr), step), one) - n = builder.sdiv(w, step) - builder.store(n, nptr) - with builder.if_then(builder.icmp_signed('>', step, one)): - # n = (n + step - 1) // step - w = builder.sub(builder.add(builder.load(nptr), step), one) - n = builder.sdiv(w, step) - builder.store(n, nptr) - - n = builder.load(nptr) - with cgutils.if_unlikely(builder, builder.icmp_signed('<=', n, zero)): - # n <= 0 - msg = "empty range for randrange()" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) - - fnty = ir.FunctionType(ty, [ty, cgutils.true_bit.type]) - fn = builder.function.module.get_or_insert_function(fnty, "llvm.ctlz.%s" % ty) - # Since the upper bound is exclusive, we need to subtract one before - # calculating the number of bits. This leads to a special case when - # n == 1; there's only one possible result, so we don't need bits from - # the PRNG. This case is handled separately towards the end of this - # function. CPython's implementation is simpler and just runs another - # iteration of the while loop when the resulting number is too large - # instead of subtracting one, to avoid needing to handle a special - # case. Thus, we only perform this subtraction for the NumPy case. - nm1 = builder.sub(n, one) if state == "np" else n - nbits = builder.trunc(builder.call(fn, [nm1, cgutils.true_bit]), int32_t) - nbits = builder.sub(ir.Constant(int32_t, ty.width), nbits) - - rptr = cgutils.alloca_once(builder, ty, name="r") - - def get_num(): - bbwhile = builder.append_basic_block("while") - bbend = builder.append_basic_block("while.end") - builder.branch(bbwhile) - - builder.position_at_end(bbwhile) - r = get_next_int(context, builder, state_ptr, nbits, state == "np") - r = builder.trunc(r, ty) - too_large = builder.icmp_signed('>=', r, n) - builder.cbranch(too_large, bbwhile, bbend) - - builder.position_at_end(bbend) - builder.store(r, rptr) - - if state == "np": - # Handle n == 1 case, per previous comment. - with builder.if_else(builder.icmp_signed('==', n, one)) as (is_one, is_not_one): - with is_one: - builder.store(zero, rptr) - with is_not_one: - get_num() - else: - get_num() - - return builder.add(start, builder.mul(builder.load(rptr), step)) - - -@lower("random.randrange", types.Integer) -def randrange_impl_1(context, builder, sig, args): - stop, = args - start = ir.Constant(stop.type, 0) - step = ir.Constant(stop.type, 1) - res = _randrange_impl(context, builder, start, stop, step, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.randrange", types.Integer, types.Integer) -def randrange_impl_2(context, builder, sig, args): - start, stop = args - step = ir.Constant(start.type, 1) - res = _randrange_impl(context, builder, start, stop, step, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.randrange", types.Integer, - types.Integer, types.Integer) -def randrange_impl_3(context, builder, sig, args): - start, stop, step = args - res = _randrange_impl(context, builder, start, stop, step, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.randint", types.Integer, types.Integer) -def randint_impl_1(context, builder, sig, args): - start, stop = args - step = ir.Constant(start.type, 1) - stop = builder.add(stop, step) - res = _randrange_impl(context, builder, start, stop, step, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.randint", types.Integer) -def randint_impl_2(context, builder, sig, args): - stop, = args - start = ir.Constant(stop.type, 0) - step = ir.Constant(stop.type, 1) - res = _randrange_impl(context, builder, start, stop, step, "np") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.randint", types.Integer, types.Integer) -def randrange_impl_2(context, builder, sig, args): - start, stop = args - step = ir.Constant(start.type, 1) - res = _randrange_impl(context, builder, start, stop, step, "np") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.uniform", types.Float, types.Float) -def uniform_impl(context, builder, sig, args): - res = uniform_impl(context, builder, sig, args, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.uniform", types.Float, types.Float) -def uniform_impl(context, builder, sig, args): - res = uniform_impl(context, builder, sig, args, "np") - return impl_ret_untracked(context, builder, sig.return_type, res) - -def uniform_impl(context, builder, sig, args, state): - state_ptr = get_state_ptr(context, builder, state) - a, b = args - width = builder.fsub(b, a) - r = get_next_double(context, builder, state_ptr) - return builder.fadd(a, builder.fmul(width, r)) - -@lower("random.triangular", types.Float, types.Float) -def triangular_impl_2(context, builder, sig, args): - fltty = sig.return_type - low, high = args - state_ptr = get_state_ptr(context, builder, "py") - randval = get_next_double(context, builder, state_ptr) - - def triangular_impl_2(randval, low, high): - u = randval - c = 0.5 - if u > c: - u = 1.0 - u - low, high = high, low - return low + (high - low) * math.sqrt(u * c) - - res = context.compile_internal(builder, triangular_impl_2, - signature(*(fltty,) * 4), - (randval, low, high)) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.triangular", types.Float, - types.Float, types.Float) -def triangular_impl_3(context, builder, sig, args): - low, high, mode = args - res = _triangular_impl_3(context, builder, sig, low, high, mode, "py") - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.triangular", types.Float, - types.Float, types.Float) -def triangular_impl_3(context, builder, sig, args): - low, mode, high = args - res = _triangular_impl_3(context, builder, sig, low, high, mode, "np") - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _triangular_impl_3(context, builder, sig, low, high, mode, state): - fltty = sig.return_type - state_ptr = get_state_ptr(context, builder, state) - randval = get_next_double(context, builder, state_ptr) - - def triangular_impl_3(randval, low, high, mode): - if high == low: - return low - u = randval - c = (mode - low) / (high - low) - if u > c: - u = 1.0 - u - c = 1.0 - c - low, high = high, low - return low + (high - low) * math.sqrt(u * c) - - return context.compile_internal(builder, triangular_impl_3, - signature(*(fltty,) * 5), - (randval, low, high, mode)) - - -@lower("random.gammavariate", - types.Float, types.Float) -def gammavariate_impl(context, builder, sig, args): - res = _gammavariate_impl(context, builder, sig, args, random.random) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.standard_gamma", types.Float) -@lower("np.random.gamma", types.Float) -@lower("np.random.gamma", types.Float, types.Float) -def gammavariate_impl(context, builder, sig, args): - sig, args = _fill_defaults(context, builder, sig, args, (None, 1.0)) - res = _gammavariate_impl(context, builder, sig, args, np.random.random) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _gammavariate_impl(context, builder, sig, args, _random): - _exp = math.exp - _log = math.log - _sqrt = math.sqrt - _e = math.e - - TWOPI = 2.0 * math.pi - LOG4 = _log(4.0) - SG_MAGICCONST = 1.0 + _log(4.5) - - def gammavariate_impl(alpha, beta): - """Gamma distribution. Taken from CPython. - """ - # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 - - # Warning: a few older sources define the gamma distribution in terms - # of alpha > -1.0 - if alpha <= 0.0 or beta <= 0.0: - raise ValueError('gammavariate: alpha and beta must be > 0.0') - - if alpha > 1.0: - # Uses R.C.H. Cheng, "The generation of Gamma - # variables with non-integral shape parameters", - # Applied Statistics, (1977), 26, No. 1, p71-74 - ainv = _sqrt(2.0 * alpha - 1.0) - bbb = alpha - LOG4 - ccc = alpha + ainv - - while 1: - u1 = _random() - if not 1e-7 < u1 < .9999999: - continue - u2 = 1.0 - _random() - v = _log(u1/(1.0-u1))/ainv - x = alpha*_exp(v) - z = u1*u1*u2 - r = bbb+ccc*v-x - if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z): - return x * beta - - elif alpha == 1.0: - # expovariate(1) - u = _random() - while u <= 1e-7: - u = _random() - return -_log(u) * beta - - else: # alpha is between 0 and 1 (exclusive) - # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle - while 1: - u = _random() - b = (_e + alpha)/_e - p = b*u - if p <= 1.0: - x = p ** (1.0/alpha) - else: - x = -_log((b-p)/alpha) - u1 = _random() - if p > 1.0: - if u1 <= x ** (alpha - 1.0): - break - elif u1 <= _exp(-x): - break - return x * beta - - return context.compile_internal(builder, gammavariate_impl, - sig, args) - - -@lower("random.betavariate", - types.Float, types.Float) -def betavariate_impl(context, builder, sig, args): - res = _betavariate_impl(context, builder, sig, args, - random.gammavariate) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.beta", - types.Float, types.Float) -def betavariate_impl(context, builder, sig, args): - res = _betavariate_impl(context, builder, sig, args, - np.random.gamma) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _betavariate_impl(context, builder, sig, args, gamma): - - def betavariate_impl(alpha, beta): - """Beta distribution. Taken from CPython. - """ - # This version due to Janne Sinkkonen, and matches all the std - # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution"). - y = gamma(alpha, 1.) - if y == 0.0: - return 0.0 - else: - return y / (y + gamma(beta, 1.)) - - return context.compile_internal(builder, betavariate_impl, - sig, args) - - -@lower("random.expovariate", - types.Float) -def expovariate_impl(context, builder, sig, args): - _random = random.random - _log = math.log - - def expovariate_impl(lambd): - """Exponential distribution. Taken from CPython. - """ - # lambd: rate lambd = 1/mean - # ('lambda' is a Python reserved word) - - # we use 1-random() instead of random() to preclude the - # possibility of taking the log of zero. - return -_log(1.0 - _random()) / lambd - - res = context.compile_internal(builder, expovariate_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.exponential", types.Float) -def exponential_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def exponential_impl(scale): - return -_log(1.0 - _random()) * scale - - res = context.compile_internal(builder, exponential_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.standard_exponential") -@lower("np.random.exponential") -def exponential_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def exponential_impl(): - return -_log(1.0 - _random()) - - res = context.compile_internal(builder, exponential_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.lognormal") -@lower("np.random.lognormal", types.Float) -@lower("np.random.lognormal", types.Float, types.Float) -def np_lognormal_impl(context, builder, sig, args): - sig, args = _fill_defaults(context, builder, sig, args, (0.0, 1.0)) - res = _lognormvariate_impl(context, builder, sig, args, - np.random.normal) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.lognormvariate", - types.Float, types.Float) -def lognormvariate_impl(context, builder, sig, args): - res = _lognormvariate_impl(context, builder, sig, args, random.gauss) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _lognormvariate_impl(context, builder, sig, args, _gauss): - _exp = math.exp - - def lognormvariate_impl(mu, sigma): - return _exp(_gauss(mu, sigma)) - - return context.compile_internal(builder, lognormvariate_impl, - sig, args) - - -@lower("random.paretovariate", types.Float) -def paretovariate_impl(context, builder, sig, args): - _random = random.random - - def paretovariate_impl(alpha): - """Pareto distribution. Taken from CPython.""" - # Jain, pg. 495 - u = 1.0 - _random() - return 1.0 / u ** (1.0/alpha) - - res = context.compile_internal(builder, paretovariate_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.pareto", types.Float) -def pareto_impl(context, builder, sig, args): - _random = np.random.random - - def pareto_impl(alpha): - # Same as paretovariate() - 1. - u = 1.0 - _random() - return 1.0 / u ** (1.0/alpha) - 1 - - res = context.compile_internal(builder, pareto_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.weibullvariate", - types.Float, types.Float) -def weibullvariate_impl(context, builder, sig, args): - _random = random.random - _log = math.log - - def weibullvariate_impl(alpha, beta): - """Weibull distribution. Taken from CPython.""" - # Jain, pg. 499; bug fix courtesy Bill Arms - u = 1.0 - _random() - return alpha * (-_log(u)) ** (1.0/beta) - - res = context.compile_internal(builder, weibullvariate_impl, - sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.weibull", types.Float) -def weibull_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def weibull_impl(beta): - # Same as weibullvariate(1.0, beta) - u = 1.0 - _random() - return (-_log(u)) ** (1.0/beta) - - res = context.compile_internal(builder, weibull_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("random.vonmisesvariate", - types.Float, types.Float) -def vonmisesvariate_impl(context, builder, sig, args): - res = _vonmisesvariate_impl(context, builder, sig, args, random.random) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.vonmises", - types.Float, types.Float) -def vonmisesvariate_impl(context, builder, sig, args): - res = _vonmisesvariate_impl(context, builder, sig, args, np.random.random) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def _vonmisesvariate_impl(context, builder, sig, args, _random): - _exp = math.exp - _sqrt = math.sqrt - _cos = math.cos - _acos = math.acos - _pi = math.pi - TWOPI = 2.0 * _pi - - def vonmisesvariate_impl(mu, kappa): - """Circular data distribution. Taken from CPython. - Note the algorithm in Python 2.6 and Numpy is different: - http://bugs.python.org/issue17141 - """ - # mu: mean angle (in radians between 0 and 2*pi) - # kappa: concentration parameter kappa (>= 0) - # if kappa = 0 generate uniform random angle - - # Based upon an algorithm published in: Fisher, N.I., - # "Statistical Analysis of Circular Data", Cambridge - # University Press, 1993. - - # Thanks to Magnus Kessler for a correction to the - # implementation of step 4. - if kappa <= 1e-6: - return TWOPI * _random() - - s = 0.5 / kappa - r = s + _sqrt(1.0 + s * s) - - while 1: - u1 = _random() - z = _cos(_pi * u1) - - d = z / (r + z) - u2 = _random() - if u2 < 1.0 - d * d or u2 <= (1.0 - d) * _exp(d): - break - - q = 1.0 / r - f = (q + z) / (1.0 + q * z) - u3 = _random() - if u3 > 0.5: - theta = (mu + _acos(f)) % TWOPI - else: - theta = (mu - _acos(f)) % TWOPI - - return theta - - return context.compile_internal(builder, vonmisesvariate_impl, - sig, args) - - -@lower("np.random.binomial", types.Integer, types.Float) -def binomial_impl(context, builder, sig, args): - intty = sig.return_type - _random = np.random.random - - def binomial_impl(n, p): - """ - Binomial distribution. Numpy's variant of the BINV algorithm - is used. - (Numpy uses BTPE for n*p >= 30, though) - """ - if n < 0: - raise ValueError("binomial(): n <= 0") - if not (0.0 <= p <= 1.0): - raise ValueError("binomial(): p outside of [0, 1]") - if p == 0.0: - return 0 - if p == 1.0: - return n - - flipped = p > 0.5 - if flipped: - p = 1.0 - p - q = 1.0 - p - - niters = 1 - qn = q ** n - while qn <= 1e-308: - # Underflow => split into several iterations - # Note this is much slower than Numpy's BTPE - niters <<= 2 - n >>= 2 - qn = q ** n - assert n > 0 - - np = n * p - bound = min(n, np + 10.0 * math.sqrt(np * q + 1)) - - finished = False - total = 0 - while niters > 0: - X = 0 - U = _random() - px = qn - while X <= bound: - if U <= px: - total += n - X if flipped else X - niters -= 1 - break - U -= px - X += 1 - px = ((n - X + 1) * p * px) / (X * q) - - return total - - res = context.compile_internal(builder, binomial_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.chisquare", types.Float) -def chisquare_impl(context, builder, sig, args): - - def chisquare_impl(df): - return 2.0 * np.random.standard_gamma(df / 2.0) - - res = context.compile_internal(builder, chisquare_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.f", types.Float, types.Float) -def f_impl(context, builder, sig, args): - - def f_impl(num, denom): - return ((np.random.chisquare(num) * denom) / - (np.random.chisquare(denom) * num)) - - res = context.compile_internal(builder, f_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.geometric", types.Float) -def geometric_impl(context, builder, sig, args): - _random = np.random.random - intty = sig.return_type - - def geometric_impl(p): - # Numpy's algorithm. - if p <= 0.0 or p > 1.0: - raise ValueError("geometric(): p outside of (0, 1]") - q = 1.0 - p - if p >= 0.333333333333333333333333: - X = intty(1) - sum = prod = p - U = _random() - while U > sum: - prod *= q - sum += prod - X += 1 - return X - else: - return math.ceil(math.log(1.0 - _random()) / math.log(q)) - - res = context.compile_internal(builder, geometric_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.gumbel", types.Float, types.Float) -def gumbel_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def gumbel_impl(loc, scale): - U = 1.0 - _random() - return loc - scale * _log(-_log(U)) - - res = context.compile_internal(builder, gumbel_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.hypergeometric", types.Integer, - types.Integer, types.Integer) -def hypergeometric_impl(context, builder, sig, args): - _random = np.random.random - _floor = math.floor - - def hypergeometric_impl(ngood, nbad, nsamples): - """Numpy's algorithm for hypergeometric().""" - d1 = nbad + ngood - nsamples - d2 = float(min(nbad, ngood)) - - Y = d2 - K = nsamples - while Y > 0.0 and K > 0: - Y -= _floor(_random() + Y / (d1 + K)) - K -= 1 - Z = int(d2 - Y) - if ngood > nbad: - return nsamples - Z - else: - return Z - - res = context.compile_internal(builder, hypergeometric_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.laplace") -@lower("np.random.laplace", types.Float) -@lower("np.random.laplace", types.Float, types.Float) -def laplace_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def laplace_impl(loc, scale): - U = _random() - if U < 0.5: - return loc + scale * _log(U + U) - else: - return loc - scale * _log(2.0 - U - U) - - sig, args = _fill_defaults(context, builder, sig, args, (0.0, 1.0)) - res = context.compile_internal(builder, laplace_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.logistic") -@lower("np.random.logistic", types.Float) -@lower("np.random.logistic", types.Float, types.Float) -def logistic_impl(context, builder, sig, args): - _random = np.random.random - _log = math.log - - def logistic_impl(loc, scale): - U = _random() - return loc + scale * _log(U / (1.0 - U)) - - sig, args = _fill_defaults(context, builder, sig, args, (0.0, 1.0)) - res = context.compile_internal(builder, logistic_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower("np.random.logseries", types.Float) -def logseries_impl(context, builder, sig, args): - intty = sig.return_type - _random = np.random.random - _log = math.log - _exp = math.exp - - def logseries_impl(p): - """Numpy's algorithm for logseries().""" - if p <= 0.0 or p > 1.0: - raise ValueError("logseries(): p outside of (0, 1]") - r = _log(1.0 - p) - - while 1: - V = _random() - if V >= p: - return 1 - U = _random() - q = 1.0 - _exp(r * U) - if V <= q * q: - # XXX what if V == 0.0 ? - return intty(1.0 + _log(V) / _log(q)) - elif V >= q: - return 1 - else: - return 2 - - res = context.compile_internal(builder, logseries_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.negative_binomial", types.int64, types.Float) -def negative_binomial_impl(context, builder, sig, args): - _gamma = np.random.gamma - _poisson = np.random.poisson - - def negative_binomial_impl(n, p): - if n <= 0: - raise ValueError("negative_binomial(): n <= 0") - if p < 0.0 or p > 1.0: - raise ValueError("negative_binomial(): p outside of [0, 1]") - Y = _gamma(n, (1.0 - p) / p) - return _poisson(Y) - - res = context.compile_internal(builder, negative_binomial_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.poisson") -@lower("np.random.poisson", types.Float) -def poisson_impl(context, builder, sig, args): - state_ptr = get_np_state_ptr(context, builder) - - retptr = cgutils.alloca_once(builder, int64_t, name="ret") - bbcont = builder.append_basic_block("bbcont") - bbend = builder.append_basic_block("bbend") - - if len(args) == 1: - lam, = args - big_lam = builder.fcmp_ordered('>=', lam, ir.Constant(double, 10.0)) - with builder.if_then(big_lam): - # For lambda >= 10.0, we switch to a more accurate - # algorithm (see _random.c). - fnty = ir.FunctionType(int64_t, (rnd_state_ptr_t, double)) - fn = builder.function.module.get_or_insert_function(fnty, - "numba_poisson_ptrs") - ret = builder.call(fn, (state_ptr, lam)) - builder.store(ret, retptr) - builder.branch(bbend) - - builder.branch(bbcont) - builder.position_at_end(bbcont) - - _random = np.random.random - _exp = math.exp - - def poisson_impl(lam): - """Numpy's algorithm for poisson() on small *lam*. - - This method is invoked only if the parameter lambda of the - distribution is small ( < 10 ). The algorithm used is described - in "Knuth, D. 1969. 'Seminumerical Algorithms. The Art of - Computer Programming' vol 2. - """ - if lam < 0.0: - raise ValueError("poisson(): lambda < 0") - if lam == 0.0: - return 0 - enlam = _exp(-lam) - X = 0 - prod = 1.0 - while 1: - U = _random() - prod *= U - if prod <= enlam: - return X - X += 1 - - if len(args) == 0: - sig = signature(sig.return_type, types.float64) - args = (ir.Constant(double, 1.0),) - - ret = context.compile_internal(builder, poisson_impl, sig, args) - builder.store(ret, retptr) - builder.branch(bbend) - builder.position_at_end(bbend) - res = builder.load(retptr) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.power", types.Float) -def power_impl(context, builder, sig, args): - - def power_impl(a): - if a <= 0.0: - raise ValueError("power(): a <= 0") - return math.pow(1 - math.exp(-np.random.standard_exponential()), - 1./a) - - res = context.compile_internal(builder, power_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.rayleigh") -@lower("np.random.rayleigh", types.Float) -def rayleigh_impl(context, builder, sig, args): - _random = np.random.random - - def rayleigh_impl(mode): - if mode <= 0.0: - raise ValueError("rayleigh(): mode <= 0") - return mode * math.sqrt(-2.0 * math.log(1.0 - _random())) - - sig, args = _fill_defaults(context, builder, sig, args, (1.0,)) - res = context.compile_internal(builder, rayleigh_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.standard_cauchy") -def cauchy_impl(context, builder, sig, args): - _gauss = np.random.standard_normal - - def cauchy_impl(): - return _gauss() / _gauss() - - res = context.compile_internal(builder, cauchy_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.standard_t", types.Float) -def standard_t_impl(context, builder, sig, args): - - def standard_t_impl(df): - N = np.random.standard_normal() - G = np.random.standard_gamma(df / 2.0) - X = math.sqrt(df / 2.0) * N / math.sqrt(G) - return X - - res = context.compile_internal(builder, standard_t_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.wald", types.Float, types.Float) -def wald_impl(context, builder, sig, args): - - def wald_impl(mean, scale): - if mean <= 0.0: - raise ValueError("wald(): mean <= 0") - if scale <= 0.0: - raise ValueError("wald(): scale <= 0") - mu_2l = mean / (2.0 * scale) - Y = np.random.standard_normal() - Y = mean * Y * Y - X = mean + mu_2l * (Y - math.sqrt(4 * scale * Y + Y * Y)) - U = np.random.random() - if U <= mean / (mean + X): - return X - else: - return mean * mean / X - - res = context.compile_internal(builder, wald_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower("np.random.zipf", types.Float) -def zipf_impl(context, builder, sig, args): - _random = np.random.random - intty = sig.return_type - - def zipf_impl(a): - if a <= 1.0: - raise ValueError("zipf(): a <= 1") - am1 = a - 1.0 - b = 2.0 ** am1 - while 1: - U = 1.0 - _random() - V = _random() - X = intty(math.floor(U ** (-1.0 / am1))) - T = (1.0 + 1.0 / X) ** am1 - if X >= 1 and V * X * (T - 1.0) / (b - 1.0) <= (T / b): - return X - - res = context.compile_internal(builder, zipf_impl, sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -def do_shuffle_impl(arr, rng): - - if not isinstance(arr, types.Buffer): - raise TypeError("The argument to shuffle() should be a buffer type") - - if rng == "np": - rand = np.random.randint - elif rng == "py": - rand = random.randrange - - if arr.ndim == 1: - def impl(arr): - i = arr.shape[0] - 1 - while i > 0: - j = rand(i + 1) - arr[i], arr[j] = arr[j], arr[i] - i -= 1 - else: - def impl(arr): - i = arr.shape[0] - 1 - while i > 0: - j = rand(i + 1) - arr[i], arr[j] = np.copy(arr[j]), np.copy(arr[i]) - i -= 1 - - return impl - -@overload(random.shuffle) -def shuffle_impl(arr): - return do_shuffle_impl(arr, "py") - -@overload(np.random.shuffle) -def shuffle_impl(arr): - return do_shuffle_impl(arr, "np") - -@overload(np.random.permutation) -def permutation_impl(arr): - if isinstance(arr, types.Integer): - def permutation_impl(n): - arr = np.arange(n) - np.random.shuffle(arr) - return arr - elif isinstance(arr, types.Array): - def permutation_impl(arr): - arr_copy = arr.copy() - np.random.shuffle(arr_copy) - return arr_copy - else: - permutation_impl = None - return permutation_impl - - -# ------------------------------------------------------------------------ -# Array-producing variants of scalar random functions - -for typing_key, arity in [ - ("np.random.beta", 3), - ("np.random.binomial", 3), - ("np.random.chisquare", 2), - ("np.random.exponential", 2), - ("np.random.f", 3), - ("np.random.gamma", 3), - ("np.random.geometric", 2), - ("np.random.gumbel", 3), - ("np.random.hypergeometric", 4), - ("np.random.laplace", 3), - ("np.random.logistic", 3), - ("np.random.lognormal", 3), - ("np.random.logseries", 2), - ("np.random.negative_binomial", 3), - ("np.random.normal", 3), - ("np.random.pareto", 2), - ("np.random.poisson", 2), - ("np.random.power", 2), - ("np.random.random", 1), - ("np.random.randint", 3), - ("np.random.rayleigh", 2), - ("np.random.standard_cauchy", 1), - ("np.random.standard_exponential", 1), - ("np.random.standard_gamma", 2), - ("np.random.standard_normal", 1), - ("np.random.standard_t", 2), - ("np.random.triangular", 4), - ("np.random.uniform", 3), - ("np.random.vonmises", 3), - ("np.random.wald", 3), - ("np.random.weibull", 2), - ("np.random.zipf", 2), - ]: - - @lower(typing_key, *(types.Any,) * arity) - def random_arr(context, builder, sig, args, typing_key=typing_key): - from . import arrayobj - - arrty = sig.return_type - dtype = arrty.dtype - scalar_sig = signature(dtype, *sig.args[:-1]) - scalar_args = args[:-1] - - # Allocate array... - shapes = arrayobj._parse_shape(context, builder, sig.args[-1], args[-1]) - arr = arrayobj._empty_nd_impl(context, builder, arrty, shapes) - - # ... and populate it in natural order - scalar_impl = context.get_function(typing_key, scalar_sig) - with cgutils.for_range(builder, arr.nitems) as loop: - val = scalar_impl(builder, scalar_args) - ptr = cgutils.gep(builder, arr.data, loop.index) - arrayobj.store_item(context, builder, arrty, val, ptr) - - return impl_ret_new_ref(context, builder, sig.return_type, arr._getvalue()) - - -# ------------------------------------------------------------------------ -# Irregular aliases: np.random.rand, np.random.randn - -@overload(np.random.rand) -def rand(*size): - if len(size) == 0: - # Scalar output - def rand_impl(): - return np.random.random() - - else: - # Array output - def rand_impl(*size): - return np.random.random(size) - - return rand_impl - -@overload(np.random.randn) -def randn(*size): - if len(size) == 0: - # Scalar output - def randn_impl(): - return np.random.standard_normal() - - else: - # Array output - def randn_impl(*size): - return np.random.standard_normal(size) - - return randn_impl - - -# ------------------------------------------------------------------------ -# np.random.choice - -@overload(np.random.choice) -def choice(a, size=None, replace=True): - - if isinstance(a, types.Array): - # choice() over an array population - assert a.ndim == 1 - dtype = a.dtype - - @register_jitable - def get_source_size(a): - return len(a) - - @register_jitable - def copy_source(a): - return a.copy() - - @register_jitable - def getitem(a, a_i): - return a[a_i] - - elif isinstance(a, types.Integer): - # choice() over an implied arange() population - dtype = np.intp - - @register_jitable - def get_source_size(a): - return a - - @register_jitable - def copy_source(a): - return np.arange(a) - - @register_jitable - def getitem(a, a_i): - return a_i - - else: - raise TypeError("np.random.choice() first argument should be " - "int or array, got %s" % (a,)) - - if size in (None, types.none): - def choice_impl(a, size=None, replace=True): - """ - choice() implementation returning a single sample - (note *replace* is ignored) - """ - n = get_source_size(a) - i = np.random.randint(0, n) - return getitem(a, i) - - else: - def choice_impl(a, size=None, replace=True): - """ - choice() implementation returning an array of samples - """ - n = get_source_size(a) - if replace: - out = np.empty(size, dtype) - fl = out.flat - for i in range(len(fl)): - j = np.random.randint(0, n) - fl[i] = getitem(a, j) - return out - else: - # Note we have to construct the array to compute out.size - # (`size` can be an arbitrary int or tuple of ints) - out = np.empty(size, dtype) - if out.size > n: - raise ValueError("Cannot take a larger sample than " - "population when 'replace=False'") - # Get a contiguous copy of the source so as to permute it - src = copy_source(a) - fl = out.flat - for i in range(len(fl)): - j = np.random.randint(i, n) - fl[i] = src[j] - # Move away selected element - src[j] = src[i] - return out - - return choice_impl - - -# ------------------------------------------------------------------------ -# np.random.multinomial - -@overload(np.random.multinomial) -def multinomial(n, pvals, size=None): - - dtype = np.intp - - @register_jitable - def multinomial_inner(n, pvals, out): - # Numpy's algorithm for multinomial() - fl = out.flat - sz = out.size - plen = len(pvals) - - for i in range(0, sz, plen): - # Loop body: take a set of n experiments and fill up - # fl[i:i + plen] with the distribution of results. - - # Current sum of outcome probabilities - p_sum = 1.0 - # Current remaining number of experiments - n_experiments = n - # For each possible outcome `j`, compute the number of results - # with this outcome. This is done by considering the - # conditional probability P(X=j | X>=j) and running a binomial - # distribution over the remaining number of experiments. - for j in range(0, plen - 1): - p_j = pvals[j] - n_j = fl[i + j] = np.random.binomial(n_experiments, p_j / p_sum) - n_experiments -= n_j - if n_experiments <= 0: - # Note the output was initialized to zero - break - p_sum -= p_j - if n_experiments > 0: - # The remaining experiments end up in the last bucket - fl[i + plen - 1] = n_experiments - - if not isinstance(n, types.Integer): - raise TypeError("np.random.multinomial(): n should be an " - "integer, got %s" % (n,)) - - if not isinstance(pvals, (types.Sequence, types.Array)): - raise TypeError("np.random.multinomial(): pvals should be an " - "array or sequence, got %s" % (pvals,)) - - if size in (None, types.none): - def multinomial_impl(n, pvals, size=None): - """ - multinomial(..., size=None) - """ - out = np.zeros(len(pvals), dtype) - multinomial_inner(n, pvals, out) - return out - - elif isinstance(size, types.Integer): - def multinomial_impl(n, pvals, size=None): - """ - multinomial(..., size=int) - """ - out = np.zeros((size, len(pvals)), dtype) - multinomial_inner(n, pvals, out) - return out - - elif isinstance(size, types.BaseTuple): - def multinomial_impl(n, pvals, size=None): - """ - multinomial(..., size=tuple) - """ - out = np.zeros(size + (len(pvals),), dtype) - multinomial_inner(n, pvals, out) - return out - - else: - raise TypeError("np.random.multinomial(): size should be int or " - "tuple or None, got %s" % (size,)) - - return multinomial_impl diff --git a/numba/numba/targets/rangeobj.py b/numba/numba/targets/rangeobj.py deleted file mode 100644 index 2e4b7dfc8..000000000 --- a/numba/numba/targets/rangeobj.py +++ /dev/null @@ -1,212 +0,0 @@ -""" -Implementation of the range object for fixed-size integers. -""" - -import llvmlite.llvmpy.core as lc - -from numba import types, cgutils, prange -from .listobj import ListIterInstance -from .arrayobj import make_array -from .imputils import (lower_builtin, lower_cast, - iterator_impl, impl_ret_untracked) -from numba.typing import signature -from numba.extending import intrinsic -from numba.parfor import internal_prange - -def make_range_iterator(typ): - """ - Return the Structure representation of the given *typ* (an - instance of types.RangeIteratorType). - """ - return cgutils.create_struct_proxy(typ) - - -def make_range_impl(int_type, range_state_type, range_iter_type): - RangeState = cgutils.create_struct_proxy(range_state_type) - - @lower_builtin(range, int_type) - @lower_builtin(prange, int_type) - @lower_builtin(internal_prange, int_type) - def range1_impl(context, builder, sig, args): - """ - range(stop: int) -> range object - """ - [stop] = args - state = RangeState(context, builder) - state.start = context.get_constant(int_type, 0) - state.stop = stop - state.step = context.get_constant(int_type, 1) - return impl_ret_untracked(context, - builder, - range_state_type, - state._getvalue()) - - @lower_builtin(range, int_type, int_type) - @lower_builtin(prange, int_type, int_type) - @lower_builtin(internal_prange, int_type, int_type) - def range2_impl(context, builder, sig, args): - """ - range(start: int, stop: int) -> range object - """ - start, stop = args - state = RangeState(context, builder) - state.start = start - state.stop = stop - state.step = context.get_constant(int_type, 1) - return impl_ret_untracked(context, - builder, - range_state_type, - state._getvalue()) - - @lower_builtin(range, int_type, int_type, int_type) - @lower_builtin(prange, int_type, int_type, int_type) - @lower_builtin(internal_prange, int_type, int_type, int_type) - def range3_impl(context, builder, sig, args): - """ - range(start: int, stop: int, step: int) -> range object - """ - [start, stop, step] = args - state = RangeState(context, builder) - state.start = start - state.stop = stop - state.step = step - return impl_ret_untracked(context, - builder, - range_state_type, - state._getvalue()) - - @lower_builtin(len, range_state_type) - def range_len(context, builder, sig, args): - """ - len(range) - """ - (value,) = args - state = RangeState(context, builder, value) - res = RangeIter.from_range_state(context, builder, state) - return impl_ret_untracked(context, builder, int_type, builder.load(res.count)) - - @lower_builtin('getiter', range_state_type) - def getiter_range32_impl(context, builder, sig, args): - """ - range.__iter__ - """ - (value,) = args - state = RangeState(context, builder, value) - res = RangeIter.from_range_state(context, builder, state)._getvalue() - return impl_ret_untracked(context, builder, range_iter_type, res) - - @iterator_impl(range_state_type, range_iter_type) - class RangeIter(make_range_iterator(range_iter_type)): - - @classmethod - def from_range_state(cls, context, builder, state): - """ - Create a RangeIter initialized from the given RangeState *state*. - """ - self = cls(context, builder) - start = state.start - stop = state.stop - step = state.step - - startptr = cgutils.alloca_once(builder, start.type) - builder.store(start, startptr) - - countptr = cgutils.alloca_once(builder, start.type) - - self.iter = startptr - self.stop = stop - self.step = step - self.count = countptr - - diff = builder.sub(stop, start) - zero = context.get_constant(int_type, 0) - one = context.get_constant(int_type, 1) - pos_diff = builder.icmp(lc.ICMP_SGT, diff, zero) - pos_step = builder.icmp(lc.ICMP_SGT, step, zero) - sign_differs = builder.xor(pos_diff, pos_step) - zero_step = builder.icmp(lc.ICMP_EQ, step, zero) - - with cgutils.if_unlikely(builder, zero_step): - # step shouldn't be zero - context.call_conv.return_user_exc(builder, ValueError, - ("range() arg 3 must not be zero",)) - - with builder.if_else(sign_differs) as (then, orelse): - with then: - builder.store(zero, self.count) - - with orelse: - rem = builder.srem(diff, step) - rem = builder.select(pos_diff, rem, builder.neg(rem)) - uneven = builder.icmp(lc.ICMP_SGT, rem, zero) - newcount = builder.add(builder.sdiv(diff, step), - builder.select(uneven, one, zero)) - builder.store(newcount, self.count) - - return self - - def iternext(self, context, builder, result): - zero = context.get_constant(int_type, 0) - countptr = self.count - count = builder.load(countptr) - is_valid = builder.icmp(lc.ICMP_SGT, count, zero) - result.set_valid(is_valid) - - with builder.if_then(is_valid): - value = builder.load(self.iter) - result.yield_(value) - one = context.get_constant(int_type, 1) - - builder.store(builder.sub(count, one, flags=["nsw"]), countptr) - builder.store(builder.add(value, self.step), self.iter) - - -range_impl_map = { - types.int32 : (types.range_state32_type, types.range_iter32_type), - types.int64 : (types.range_state64_type, types.range_iter64_type), - types.uint64 : (types.unsigned_range_state64_type, types.unsigned_range_iter64_type) -} - -for int_type, state_types in range_impl_map.items(): - make_range_impl(int_type, *state_types) - -@lower_cast(types.RangeType, types.RangeType) -def range_to_range(context, builder, fromty, toty, val): - olditems = cgutils.unpack_tuple(builder, val, 3) - items = [context.cast(builder, v, fromty.dtype, toty.dtype) - for v in olditems] - return cgutils.make_anonymous_struct(builder, items) - -@intrinsic -def range_iter_len(typingctx, val): - """ - An implementation of len(range_iter) for internal use. - """ - if isinstance(val, types.RangeIteratorType): - val_type = val.yield_type - def codegen(context, builder, sig, args): - (value,) = args - iter_type = range_impl_map[val_type][1] - iterobj = cgutils.create_struct_proxy(iter_type)(context, builder, value) - int_type = iterobj.count.type - return impl_ret_untracked(context, builder, int_type, builder.load(iterobj.count)) - return signature(val_type, val), codegen - elif isinstance(val, types.ListIter): - def codegen(context, builder, sig, args): - (value,) = args - intp_t = context.get_value_type(types.intp) - iterobj = ListIterInstance(context, builder, sig.args[0], value) - return impl_ret_untracked(context, builder, intp_t, iterobj.size) - return signature(types.intp, val), codegen - elif isinstance(val, types.ArrayIterator): - def codegen(context, builder, sig, args): - (iterty,) = sig.args - (value,) = args - intp_t = context.get_value_type(types.intp) - iterobj = context.make_helper(builder, iterty, value=value) - arrayty = iterty.array_type - ary = make_array(arrayty)(context, builder, value=iterobj.array) - shape = cgutils.unpack_tuple(builder, ary.shape) - # array iterates along the outer dimension - return impl_ret_untracked(context, builder, intp_t, shape[0]) - return signature(types.intp, val), codegen diff --git a/numba/numba/targets/registry.py b/numba/numba/targets/registry.py deleted file mode 100644 index 0434a565b..000000000 --- a/numba/numba/targets/registry.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import contextlib -import threading - -from . import cpu -from .descriptors import TargetDescriptor -from .. import dispatcher, utils, typing - -# ----------------------------------------------------------------------------- -# Default CPU target descriptors - -class _ThreadLocalContext(threading.local): - """ - Thread-local helper for CPUTarget. - """ - _nested_typing_context = None - _nested_target_context = None - - @contextlib.contextmanager - def nested(self, typing_context, target_context): - old_nested = self._nested_typing_context, self._nested_target_context - try: - self._nested_typing_context = typing_context - self._nested_target_context = target_context - yield - finally: - self._nested_typing_context, self._nested_target_context = old_nested - - -class CPUTarget(TargetDescriptor): - options = cpu.CPUTargetOptions - _tls = _ThreadLocalContext() - - @utils.cached_property - def _toplevel_target_context(self): - # Lazily-initialized top-level target context, for all threads - return cpu.CPUContext(self.typing_context) - - @utils.cached_property - def _toplevel_typing_context(self): - # Lazily-initialized top-level typing context, for all threads - return typing.Context() - - @property - def target_context(self): - """ - The target context for CPU targets. - """ - nested = self._tls._nested_target_context - if nested is not None: - return nested - else: - return self._toplevel_target_context - - @property - def typing_context(self): - """ - The typing context for CPU targets. - """ - nested = self._tls._nested_typing_context - if nested is not None: - return nested - else: - return self._toplevel_typing_context - - def nested_context(self, typing_context, target_context): - """ - A context manager temporarily replacing the contexts with the - given ones, for the current thread of execution. - """ - return self._tls.nested(typing_context, target_context) - - -# The global CPU target -cpu_target = CPUTarget() - - -class CPUDispatcher(dispatcher.Dispatcher): - targetdescr = cpu_target - - -class TargetRegistry(utils.UniqueDict): - """ - A registry of API implementations for various backends. - - Attributes - ---------- - ondemand: - - A dictionary of target-name -> function, where function is executed - the first time a target is used. It is used for deferred - initialization for some targets (e.g. gpu). - """ - def __init__(self, *args, **kws): - super(TargetRegistry, self).__init__(*args, **kws) - self.ondemand = utils.UniqueDict() - - def __getitem__(self, item): - if item in self.ondemand: - self[item] = self.ondemand[item]() - del self.ondemand[item] - return super(TargetRegistry, self).__getitem__(item) - - -dispatcher_registry = TargetRegistry() -dispatcher_registry['cpu'] = CPUDispatcher diff --git a/numba/numba/targets/removerefctpass.py b/numba/numba/targets/removerefctpass.py deleted file mode 100644 index ff5bc7ae3..000000000 --- a/numba/numba/targets/removerefctpass.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Implement a rewrite pass on LLVM module to remove unnecessary refcount -operation. -""" -from __future__ import absolute_import, print_function - -from llvmlite.ir.transforms import CallVisitor - -from numba import types - - -class _MarkNrtCallVisitor(CallVisitor): - """ - A pass to mark all NRT_incref and NRT_decref. - """ - def __init__(self): - self.marked = set() - - def visit_Call(self, instr): - if instr.callee.name in ('NRT_incref', 'NRT_decref'): - self.marked.add(instr) - - -def _rewrite_function(function): - # Mark NRT usage - markpass = _MarkNrtCallVisitor() - markpass.visit_Function(function) - marked = markpass.marked - # Remove NRT usage - for bb in function.basic_blocks: - for inst in list(bb.instructions): - if inst in marked: - bb.instructions.remove(inst) - - -_accepted_nrtfns = 'NRT_incref', 'NRT_decref' - - -def _legalize(module, dmm, fndesc): - """ - Legalize the code in the module. - Returns True if the module is legal for the rewrite pass that remove - unnecessary refcount. - """ - - def valid_output(ty): - """ - Valid output are any type that does not need refcount - """ - model = dmm[ty] - return not model.contains_nrt_meminfo() - - def valid_input(ty): - """ - Valid input are any type that does not need refcount except Array. - """ - return valid_output(ty) or isinstance(ty, types.Array) - - argtypes = fndesc.argtypes - restype = fndesc.restype - calltypes = fndesc.calltypes - - # Legalize function arguments - for argty in argtypes: - if not valid_input(argty): - return False - - # Legalize function return - if not valid_output(restype): - return False - - # Legalize all called functions - for callty in calltypes.values(): - if callty is not None and not valid_output(callty.return_type): - return False - - # Ensure no allocation - for fn in module.functions: - if fn.name.startswith("NRT_"): - if fn.name not in _accepted_nrtfns: - return False - - return True - - -def remove_unnecessary_nrt_usage(function, context, fndesc): - """ - Remove unnecessary NRT incref/decref in the given LLVM function. - It uses highlevel type info to determine if the function does not need NRT. - Such a function does not: - - - return array object; - - take arguments that need refcount except array; - - call function that return refcounted object. - - In effect, the function will not capture or create references that extend - the lifetime of any refcounted objects beyound the lifetime of the - function. - - The rewrite performs inplace. - If rewrite has happen, this function return True. Otherwise, return False. - """ - dmm = context.data_model_manager - if _legalize(function.module, dmm, fndesc): - _rewrite_function(function) - return True - else: - return False - diff --git a/numba/numba/targets/setobj.py b/numba/numba/targets/setobj.py deleted file mode 100644 index 738ea6889..000000000 --- a/numba/numba/targets/setobj.py +++ /dev/null @@ -1,1418 +0,0 @@ -""" -Support for native homogeneous sets. -""" - -from __future__ import print_function, absolute_import, division - -import collections -import contextlib -import math - -from llvmlite import ir -from numba import types, cgutils, typing -from numba.targets.imputils import (lower_builtin, lower_cast, - iternext_impl, impl_ret_borrowed, - impl_ret_new_ref, impl_ret_untracked, - for_iter, call_len) -from numba.utils import cached_property -from . import quicksort, slicing - - -def get_payload_struct(context, builder, set_type, ptr): - """ - Given a set value and type, get its payload structure (as a - reference, so that mutations are seen by all). - """ - payload_type = types.SetPayload(set_type) - ptrty = context.get_data_type(payload_type).as_pointer() - payload = builder.bitcast(ptr, ptrty) - return context.make_data_helper(builder, payload_type, ref=payload) - - -def get_entry_size(context, set_type): - """ - Return the entry size for the given set type. - """ - llty = context.get_data_type(types.SetEntry(set_type)) - return context.get_abi_sizeof(llty) - - -# Note these values are special: -# - EMPTY is obtained by issuing memset(..., 0xFF) -# - (unsigned) EMPTY > (unsigned) DELETED > any other hash value -EMPTY = -1 -DELETED = -2 -FALLBACK = -43 - -# Minimal size of entries table. Must be a power of 2! -MINSIZE = 16 - -# Number of cache-friendly linear probes before switching to non-linear probing -LINEAR_PROBES = 3 - -DEBUG_ALLOCS = False - - -def get_hash_value(context, builder, typ, value): - """ - Compute the hash of the given value. - """ - sig = typing.signature(types.intp, typ) - fn = context.get_function(hash, sig) - h = fn(builder, (value,)) - # Fixup reserved values - is_ok = is_hash_used(context, builder, h) - fallback = ir.Constant(h.type, FALLBACK) - return builder.select(is_ok, h, fallback) - -def is_hash_empty(context, builder, h): - """ - Whether the hash value denotes an empty entry. - """ - empty = ir.Constant(h.type, EMPTY) - return builder.icmp_unsigned('==', h, empty) - -def is_hash_deleted(context, builder, h): - """ - Whether the hash value denotes a deleted entry. - """ - deleted = ir.Constant(h.type, DELETED) - return builder.icmp_unsigned('==', h, deleted) - -def is_hash_used(context, builder, h): - """ - Whether the hash value denotes an active entry. - """ - # Everything below DELETED is an used entry - deleted = ir.Constant(h.type, DELETED) - return builder.icmp_unsigned('<', h, deleted) - - -SetLoop = collections.namedtuple('SetLoop', ('index', 'entry', 'do_break')) - - -class _SetPayload(object): - - def __init__(self, context, builder, set_type, ptr): - payload = get_payload_struct(context, builder, set_type, ptr) - self._context = context - self._builder = builder - self._ty = set_type - self._payload = payload - self._entries = payload._get_ptr_by_name('entries') - self._ptr = ptr - - @property - def mask(self): - return self._payload.mask - - @mask.setter - def mask(self, value): - # CAUTION: mask must be a power of 2 minus 1 - self._payload.mask = value - - @property - def used(self): - return self._payload.used - - @used.setter - def used(self, value): - self._payload.used = value - - @property - def fill(self): - return self._payload.fill - - @fill.setter - def fill(self, value): - self._payload.fill = value - - @property - def finger(self): - return self._payload.finger - - @finger.setter - def finger(self, value): - self._payload.finger = value - - @property - def dirty(self): - return self._payload.dirty - - @dirty.setter - def dirty(self, value): - self._payload.dirty = value - - @property - def entries(self): - """ - A pointer to the start of the entries array. - """ - return self._entries - - @property - def ptr(self): - """ - A pointer to the start of the NRT-allocated area. - """ - return self._ptr - - def get_entry(self, idx): - """ - Get entry number *idx*. - """ - entry_ptr = cgutils.gep(self._builder, self._entries, idx) - entry = self._context.make_data_helper(self._builder, - types.SetEntry(self._ty), - ref=entry_ptr) - return entry - - def _lookup(self, item, h, for_insert=False): - """ - Lookup the *item* with the given hash values in the entries. - - Return a (found, entry index) tuple: - - If found is true, points to the entry containing - the item. - - If found is false, points to the empty entry that - the item can be written to (only if *for_insert* is true) - """ - context = self._context - builder = self._builder - - intp_t = h.type - - mask = self.mask - dtype = self._ty.dtype - eqfn = context.get_function('==', - typing.signature(types.boolean, dtype, dtype)) - - one = ir.Constant(intp_t, 1) - five = ir.Constant(intp_t, 5) - - # The perturbation value for probing - perturb = cgutils.alloca_once_value(builder, h) - # The index of the entry being considered: start with (hash & mask) - index = cgutils.alloca_once_value(builder, - builder.and_(h, mask)) - if for_insert: - # The index of the first deleted entry in the lookup chain - free_index_sentinel = mask.type(-1) # highest unsigned index - free_index = cgutils.alloca_once_value(builder, free_index_sentinel) - - bb_body = builder.append_basic_block("lookup.body") - bb_found = builder.append_basic_block("lookup.found") - bb_not_found = builder.append_basic_block("lookup.not_found") - bb_end = builder.append_basic_block("lookup.end") - - def check_entry(i): - """ - Check entry *i* against the value being searched for. - """ - entry = self.get_entry(i) - entry_hash = entry.hash - - with builder.if_then(builder.icmp_unsigned('==', h, entry_hash)): - # Hashes are equal, compare values - # (note this also ensures the entry is used) - eq = eqfn(builder, (item, entry.key)) - with builder.if_then(eq): - builder.branch(bb_found) - - with builder.if_then(is_hash_empty(context, builder, entry_hash)): - builder.branch(bb_not_found) - - if for_insert: - # Memorize the index of the first deleted entry - with builder.if_then(is_hash_deleted(context, builder, entry_hash)): - j = builder.load(free_index) - j = builder.select(builder.icmp_unsigned('==', j, free_index_sentinel), - i, j) - builder.store(j, free_index) - - # First linear probing. When the number of collisions is small, - # the lineary probing loop achieves better cache locality and - # is also slightly cheaper computationally. - with cgutils.for_range(builder, ir.Constant(intp_t, LINEAR_PROBES)): - i = builder.load(index) - check_entry(i) - i = builder.add(i, one) - i = builder.and_(i, mask) - builder.store(i, index) - - # If not found after linear probing, switch to a non-linear - # perturbation keyed on the unmasked hash value. - # XXX how to tell LLVM this branch is unlikely? - builder.branch(bb_body) - with builder.goto_block(bb_body): - i = builder.load(index) - check_entry(i) - - # Perturb to go to next entry: - # perturb >>= 5 - # i = (i * 5 + 1 + perturb) & mask - p = builder.load(perturb) - p = builder.lshr(p, five) - i = builder.add(one, builder.mul(i, five)) - i = builder.and_(mask, builder.add(i, p)) - builder.store(i, index) - builder.store(p, perturb) - # Loop - builder.branch(bb_body) - - with builder.goto_block(bb_not_found): - if for_insert: - # Not found => for insertion, return the index of the first - # deleted entry (if any), to avoid creating an infinite - # lookup chain (issue #1913). - i = builder.load(index) - j = builder.load(free_index) - i = builder.select(builder.icmp_unsigned('==', j, free_index_sentinel), - i, j) - builder.store(i, index) - builder.branch(bb_end) - - with builder.goto_block(bb_found): - builder.branch(bb_end) - - builder.position_at_end(bb_end) - - found = builder.phi(ir.IntType(1), 'found') - found.add_incoming(cgutils.true_bit, bb_found) - found.add_incoming(cgutils.false_bit, bb_not_found) - - return found, builder.load(index) - - @contextlib.contextmanager - def _iterate(self, start=None): - """ - Iterate over the payload's entries. Yield a SetLoop. - """ - context = self._context - builder = self._builder - - intp_t = context.get_value_type(types.intp) - one = ir.Constant(intp_t, 1) - size = builder.add(self.mask, one) - - with cgutils.for_range(builder, size, start=start) as range_loop: - entry = self.get_entry(range_loop.index) - is_used = is_hash_used(context, builder, entry.hash) - with builder.if_then(is_used): - loop = SetLoop(index=range_loop.index, entry=entry, - do_break=range_loop.do_break) - yield loop - - @contextlib.contextmanager - def _next_entry(self): - """ - Yield a random entry from the payload. Caller must ensure the - set isn't empty, otherwise the function won't end. - """ - context = self._context - builder = self._builder - - intp_t = context.get_value_type(types.intp) - zero = ir.Constant(intp_t, 0) - one = ir.Constant(intp_t, 1) - mask = self.mask - - # Start walking the entries from the stored "search finger" and - # break as soon as we find a used entry. - - bb_body = builder.append_basic_block('next_entry_body') - bb_end = builder.append_basic_block('next_entry_end') - - index = cgutils.alloca_once_value(builder, self.finger) - builder.branch(bb_body) - - with builder.goto_block(bb_body): - i = builder.load(index) - # ANDing with mask ensures we stay inside the table boundaries - i = builder.and_(mask, builder.add(i, one)) - builder.store(i, index) - entry = self.get_entry(i) - is_used = is_hash_used(context, builder, entry.hash) - builder.cbranch(is_used, bb_end, bb_body) - - builder.position_at_end(bb_end) - - # Update the search finger with the next position. This avoids - # O(n**2) behaviour when pop() is called in a loop. - i = builder.load(index) - self.finger = i - yield self.get_entry(i) - - -class SetInstance(object): - - def __init__(self, context, builder, set_type, set_val): - self._context = context - self._builder = builder - self._ty = set_type - self._entrysize = get_entry_size(context, set_type) - self._set = context.make_helper(builder, set_type, set_val) - - @property - def dtype(self): - return self._ty.dtype - - @property - def payload(self): - """ - The _SetPayload for this set. - """ - # This cannot be cached as the pointer can move around! - context = self._context - builder = self._builder - - ptr = self._context.nrt.meminfo_data(builder, self.meminfo) - return _SetPayload(context, builder, self._ty, ptr) - - @property - def value(self): - return self._set._getvalue() - - @property - def meminfo(self): - return self._set.meminfo - - @property - def parent(self): - return self._set.parent - - @parent.setter - def parent(self, value): - self._set.parent = value - - def get_size(self): - """ - Return the number of elements in the size. - """ - return self.payload.used - - def set_dirty(self, val): - if self._ty.reflected: - self.payload.dirty = cgutils.true_bit if val else cgutils.false_bit - - def _add_entry(self, payload, entry, item, h, do_resize=True): - context = self._context - builder = self._builder - - old_hash = entry.hash - entry.hash = h - entry.key = item - # used++ - used = payload.used - one = ir.Constant(used.type, 1) - used = payload.used = builder.add(used, one) - # fill++ if entry wasn't a deleted one - with builder.if_then(is_hash_empty(context, builder, old_hash), - likely=True): - payload.fill = builder.add(payload.fill, one) - # Grow table if necessary - if do_resize: - self.upsize(used) - self.set_dirty(True) - - def _add_key(self, payload, item, h, do_resize=True): - context = self._context - builder = self._builder - - found, i = payload._lookup(item, h, for_insert=True) - not_found = builder.not_(found) - - with builder.if_then(not_found): - # Not found => add it - entry = payload.get_entry(i) - old_hash = entry.hash - entry.hash = h - entry.key = item - # used++ - used = payload.used - one = ir.Constant(used.type, 1) - used = payload.used = builder.add(used, one) - # fill++ if entry wasn't a deleted one - with builder.if_then(is_hash_empty(context, builder, old_hash), - likely=True): - payload.fill = builder.add(payload.fill, one) - # Grow table if necessary - if do_resize: - self.upsize(used) - self.set_dirty(True) - - def _remove_entry(self, payload, entry, do_resize=True): - # Mark entry deleted - entry.hash = ir.Constant(entry.hash.type, DELETED) - # used-- - used = payload.used - one = ir.Constant(used.type, 1) - used = payload.used = self._builder.sub(used, one) - # Shrink table if necessary - if do_resize: - self.downsize(used) - self.set_dirty(True) - - def _remove_key(self, payload, item, h, do_resize=True): - context = self._context - builder = self._builder - - found, i = payload._lookup(item, h) - - with builder.if_then(found): - entry = payload.get_entry(i) - self._remove_entry(payload, entry, do_resize) - - return found - - def add(self, item, do_resize=True): - context = self._context - builder = self._builder - - payload = self.payload - h = get_hash_value(context, builder, self._ty.dtype, item) - self._add_key(payload, item, h, do_resize) - - def contains(self, item): - context = self._context - builder = self._builder - - payload = self.payload - h = get_hash_value(context, builder, self._ty.dtype, item) - found, i = payload._lookup(item, h) - return found - - def discard(self, item): - context = self._context - builder = self._builder - - payload = self.payload - h = get_hash_value(context, builder, self._ty.dtype, item) - found = self._remove_key(payload, item, h) - return found - - def pop(self): - context = self._context - builder = self._builder - - lty = context.get_value_type(self._ty.dtype) - key = cgutils.alloca_once(builder, lty) - - payload = self.payload - with payload._next_entry() as entry: - builder.store(entry.key, key) - self._remove_entry(payload, entry) - - return builder.load(key) - - def clear(self): - context = self._context - builder = self._builder - - intp_t = context.get_value_type(types.intp) - minsize = ir.Constant(intp_t, MINSIZE) - self._replace_payload(minsize) - self.set_dirty(True) - - def copy(self): - """ - Return a copy of this set. - """ - context = self._context - builder = self._builder - - payload = self.payload - used = payload.used - fill = payload.fill - - other = type(self)(context, builder, self._ty, None) - - no_deleted_entries = builder.icmp_unsigned('==', used, fill) - with builder.if_else(no_deleted_entries, likely=True) \ - as (if_no_deleted, if_deleted): - with if_no_deleted: - # No deleted entries => raw copy the payload - ok = other._copy_payload(payload) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot copy set",)) - - with if_deleted: - # Deleted entries => re-insert entries one by one - nentries = self.choose_alloc_size(context, builder, used) - ok = other._allocate_payload(nentries) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot copy set",)) - - other_payload = other.payload - with payload._iterate() as loop: - entry = loop.entry - other._add_key(other_payload, entry.key, entry.hash, - do_resize=False) - - return other - - def intersect(self, other): - """ - In-place intersection with *other* set. - """ - context = self._context - builder = self._builder - payload = self.payload - other_payload = other.payload - - with payload._iterate() as loop: - entry = loop.entry - found, _ = other_payload._lookup(entry.key, entry.hash) - with builder.if_then(builder.not_(found)): - self._remove_entry(payload, entry, do_resize=False) - - # Final downsize - self.downsize(payload.used) - - def difference(self, other): - """ - In-place difference with *other* set. - """ - context = self._context - builder = self._builder - payload = self.payload - other_payload = other.payload - - with other_payload._iterate() as loop: - entry = loop.entry - self._remove_key(payload, entry.key, entry.hash, do_resize=False) - - # Final downsize - self.downsize(payload.used) - - def symmetric_difference(self, other): - """ - In-place symmetric difference with *other* set. - """ - context = self._context - builder = self._builder - other_payload = other.payload - - with other_payload._iterate() as loop: - key = loop.entry.key - h = loop.entry.hash - # We must reload our payload as it may be resized during the loop - payload = self.payload - found, i = payload._lookup(key, h, for_insert=True) - entry = payload.get_entry(i) - with builder.if_else(found) as (if_common, if_not_common): - with if_common: - self._remove_entry(payload, entry, do_resize=False) - with if_not_common: - self._add_entry(payload, entry, key, h) - - # Final downsize - self.downsize(self.payload.used) - - def issubset(self, other, strict=False): - context = self._context - builder = self._builder - payload = self.payload - other_payload = other.payload - - cmp_op = '<' if strict else '<=' - - res = cgutils.alloca_once_value(builder, cgutils.true_bit) - with builder.if_else( - builder.icmp_unsigned(cmp_op, payload.used, other_payload.used) - ) as (if_smaller, if_larger): - with if_larger: - # self larger than other => self cannot possibly a subset - builder.store(cgutils.false_bit, res) - with if_smaller: - # check whether each key of self is in other - with payload._iterate() as loop: - entry = loop.entry - found, _ = other_payload._lookup(entry.key, entry.hash) - with builder.if_then(builder.not_(found)): - builder.store(cgutils.false_bit, res) - loop.do_break() - - return builder.load(res) - - def isdisjoint(self, other): - context = self._context - builder = self._builder - payload = self.payload - other_payload = other.payload - - res = cgutils.alloca_once_value(builder, cgutils.true_bit) - - def check(smaller, larger): - # Loop over the smaller of the two, and search in the larger - with smaller._iterate() as loop: - entry = loop.entry - found, _ = larger._lookup(entry.key, entry.hash) - with builder.if_then(found): - builder.store(cgutils.false_bit, res) - loop.do_break() - - with builder.if_else( - builder.icmp_unsigned('>', payload.used, other_payload.used) - ) as (if_larger, otherwise): - - with if_larger: - # len(self) > len(other) - check(other_payload, payload) - - with otherwise: - # len(self) <= len(other) - check(payload, other_payload) - - return builder.load(res) - - def equals(self, other): - context = self._context - builder = self._builder - payload = self.payload - other_payload = other.payload - - res = cgutils.alloca_once_value(builder, cgutils.true_bit) - with builder.if_else( - builder.icmp_unsigned('==', payload.used, other_payload.used) - ) as (if_same_size, otherwise): - with if_same_size: - # same sizes => check whether each key of self is in other - with payload._iterate() as loop: - entry = loop.entry - found, _ = other_payload._lookup(entry.key, entry.hash) - with builder.if_then(builder.not_(found)): - builder.store(cgutils.false_bit, res) - loop.do_break() - with otherwise: - # different sizes => cannot possibly be equal - builder.store(cgutils.false_bit, res) - - return builder.load(res) - - @classmethod - def allocate_ex(cls, context, builder, set_type, nitems=None): - """ - Allocate a SetInstance with its storage. - Return a (ok, instance) tuple where *ok* is a LLVM boolean and - *instance* is a SetInstance object (the object's contents are - only valid when *ok* is true). - """ - intp_t = context.get_value_type(types.intp) - - if nitems is None: - nentries = ir.Constant(intp_t, MINSIZE) - else: - if isinstance(nitems, int): - nitems = ir.Constant(intp_t, nitems) - nentries = cls.choose_alloc_size(context, builder, nitems) - - self = cls(context, builder, set_type, None) - ok = self._allocate_payload(nentries) - return ok, self - - @classmethod - def allocate(cls, context, builder, set_type, nitems=None): - """ - Allocate a SetInstance with its storage. Same as allocate_ex(), - but return an initialized *instance*. If allocation failed, - control is transferred to the caller using the target's current - call convention. - """ - ok, self = cls.allocate_ex(context, builder, set_type, nitems) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot allocate set",)) - return self - - @classmethod - def from_meminfo(cls, context, builder, set_type, meminfo): - """ - Allocate a new set instance pointing to an existing payload - (a meminfo pointer). - Note the parent field has to be filled by the caller. - """ - self = cls(context, builder, set_type, None) - self._set.meminfo = meminfo - self._set.parent = context.get_constant_null(types.pyobject) - context.nrt.incref(builder, set_type, self.value) - # Payload is part of the meminfo, no need to touch it - return self - - @classmethod - def choose_alloc_size(cls, context, builder, nitems): - """ - Choose a suitable number of entries for the given number of items. - """ - intp_t = nitems.type - one = ir.Constant(intp_t, 1) - minsize = ir.Constant(intp_t, MINSIZE) - - # Ensure number of entries >= 2 * used - min_entries = builder.shl(nitems, one) - # Find out first suitable power of 2, starting from MINSIZE - size_p = cgutils.alloca_once_value(builder, minsize) - - bb_body = builder.append_basic_block("calcsize.body") - bb_end = builder.append_basic_block("calcsize.end") - - builder.branch(bb_body) - - with builder.goto_block(bb_body): - size = builder.load(size_p) - is_large_enough = builder.icmp_unsigned('>=', size, min_entries) - with builder.if_then(is_large_enough, likely=False): - builder.branch(bb_end) - next_size = builder.shl(size, one) - builder.store(next_size, size_p) - builder.branch(bb_body) - - builder.position_at_end(bb_end) - return builder.load(size_p) - - def upsize(self, nitems): - """ - When adding to the set, ensure it is properly sized for the given - number of used entries. - """ - context = self._context - builder = self._builder - intp_t = nitems.type - - one = ir.Constant(intp_t, 1) - two = ir.Constant(intp_t, 2) - - payload = self.payload - - # Ensure number of entries >= 2 * used - min_entries = builder.shl(nitems, one) - size = builder.add(payload.mask, one) - need_resize = builder.icmp_unsigned('>=', min_entries, size) - - with builder.if_then(need_resize, likely=False): - # Find out next suitable size - new_size_p = cgutils.alloca_once_value(builder, size) - - bb_body = builder.append_basic_block("calcsize.body") - bb_end = builder.append_basic_block("calcsize.end") - - builder.branch(bb_body) - - with builder.goto_block(bb_body): - # Multiply by 4 (ensuring size remains a power of two) - new_size = builder.load(new_size_p) - new_size = builder.shl(new_size, two) - builder.store(new_size, new_size_p) - is_too_small = builder.icmp_unsigned('>=', min_entries, new_size) - builder.cbranch(is_too_small, bb_body, bb_end) - - builder.position_at_end(bb_end) - - new_size = builder.load(new_size_p) - if DEBUG_ALLOCS: - context.printf(builder, - "upsize to %zd items: current size = %zd, " - "min entries = %zd, new size = %zd\n", - nitems, size, min_entries, new_size) - self._resize(payload, new_size, "cannot grow set") - - def downsize(self, nitems): - """ - When removing from the set, ensure it is properly sized for the given - number of used entries. - """ - context = self._context - builder = self._builder - intp_t = nitems.type - - one = ir.Constant(intp_t, 1) - two = ir.Constant(intp_t, 2) - minsize = ir.Constant(intp_t, MINSIZE) - - payload = self.payload - - # Ensure entries >= max(2 * used, MINSIZE) - min_entries = builder.shl(nitems, one) - min_entries = builder.select(builder.icmp_unsigned('>=', min_entries, minsize), - min_entries, minsize) - # Shrink only if size >= 4 * min_entries && size > MINSIZE - max_size = builder.shl(min_entries, two) - size = builder.add(payload.mask, one) - need_resize = builder.and_( - builder.icmp_unsigned('<=', max_size, size), - builder.icmp_unsigned('<', minsize, size)) - - with builder.if_then(need_resize, likely=False): - # Find out next suitable size - new_size_p = cgutils.alloca_once_value(builder, size) - - bb_body = builder.append_basic_block("calcsize.body") - bb_end = builder.append_basic_block("calcsize.end") - - builder.branch(bb_body) - - with builder.goto_block(bb_body): - # Divide by 2 (ensuring size remains a power of two) - new_size = builder.load(new_size_p) - new_size = builder.lshr(new_size, one) - # Keep current size if new size would be < min_entries - is_too_small = builder.icmp_unsigned('>', min_entries, new_size) - with builder.if_then(is_too_small): - builder.branch(bb_end) - builder.store(new_size, new_size_p) - builder.branch(bb_body) - - builder.position_at_end(bb_end) - - # Ensure new_size >= MINSIZE - new_size = builder.load(new_size_p) - # At this point, new_size should be < size if the factors - # above were chosen carefully! - - if DEBUG_ALLOCS: - context.printf(builder, - "downsize to %zd items: current size = %zd, " - "min entries = %zd, new size = %zd\n", - nitems, size, min_entries, new_size) - self._resize(payload, new_size, "cannot shrink set") - - def _resize(self, payload, nentries, errmsg): - """ - Resize the payload to the given number of entries. - - CAUTION: *nentries* must be a power of 2! - """ - context = self._context - builder = self._builder - - # Allocate new entries - old_payload = payload - - ok = self._allocate_payload(nentries, realloc=True) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - (errmsg,)) - - # Re-insert old entries - payload = self.payload - with old_payload._iterate() as loop: - entry = loop.entry - self._add_key(payload, entry.key, entry.hash, - do_resize=False) - - self._free_payload(old_payload.ptr) - - def _replace_payload(self, nentries): - """ - Replace the payload with a new empty payload with the given number - of entries. - - CAUTION: *nentries* must be a power of 2! - """ - context = self._context - builder = self._builder - - # Free old payload - self._free_payload(self.payload.ptr) - - ok = self._allocate_payload(nentries, realloc=True) - with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, - ("cannot reallocate set",)) - - def _allocate_payload(self, nentries, realloc=False): - """ - Allocate and initialize payload for the given number of entries. - If *realloc* is True, the existing meminfo is reused. - - CAUTION: *nentries* must be a power of 2! - """ - context = self._context - builder = self._builder - - ok = cgutils.alloca_once_value(builder, cgutils.true_bit) - - intp_t = context.get_value_type(types.intp) - zero = ir.Constant(intp_t, 0) - one = ir.Constant(intp_t, 1) - - payload_type = context.get_data_type(types.SetPayload(self._ty)) - payload_size = context.get_abi_sizeof(payload_type) - entry_size = self._entrysize - # Account for the fact that the payload struct already contains an entry - payload_size -= entry_size - - # Total allocation size = + nentries * entry_size - allocsize, ovf = cgutils.muladd_with_overflow(builder, nentries, - ir.Constant(intp_t, entry_size), - ir.Constant(intp_t, payload_size)) - with builder.if_then(ovf, likely=False): - builder.store(cgutils.false_bit, ok) - - with builder.if_then(builder.load(ok), likely=True): - if realloc: - meminfo = self._set.meminfo - ptr = context.nrt.meminfo_varsize_alloc(builder, meminfo, - size=allocsize) - alloc_ok = cgutils.is_null(builder, ptr) - else: - meminfo = context.nrt.meminfo_new_varsize(builder, size=allocsize) - alloc_ok = cgutils.is_null(builder, meminfo) - - with builder.if_else(cgutils.is_null(builder, meminfo), - likely=False) as (if_error, if_ok): - with if_error: - builder.store(cgutils.false_bit, ok) - with if_ok: - if not realloc: - self._set.meminfo = meminfo - self._set.parent = context.get_constant_null(types.pyobject) - payload = self.payload - # Initialize entries to 0xff (EMPTY) - cgutils.memset(builder, payload.ptr, allocsize, 0xFF) - payload.used = zero - payload.fill = zero - payload.finger = zero - new_mask = builder.sub(nentries, one) - payload.mask = new_mask - - if DEBUG_ALLOCS: - context.printf(builder, - "allocated %zd bytes for set at %p: mask = %zd\n", - allocsize, payload.ptr, new_mask) - - return builder.load(ok) - - def _free_payload(self, ptr): - """ - Free an allocated old payload at *ptr*. - """ - self._context.nrt.meminfo_varsize_free(self._builder, self.meminfo, ptr) - - def _copy_payload(self, src_payload): - """ - Raw-copy the given payload into self. - """ - context = self._context - builder = self._builder - - ok = cgutils.alloca_once_value(builder, cgutils.true_bit) - - intp_t = context.get_value_type(types.intp) - zero = ir.Constant(intp_t, 0) - one = ir.Constant(intp_t, 1) - - payload_type = context.get_data_type(types.SetPayload(self._ty)) - payload_size = context.get_abi_sizeof(payload_type) - entry_size = self._entrysize - # Account for the fact that the payload struct already contains an entry - payload_size -= entry_size - - mask = src_payload.mask - nentries = builder.add(one, mask) - - # Total allocation size = + nentries * entry_size - # (note there can't be any overflow since we're reusing an existing - # payload's parameters) - allocsize = builder.add(ir.Constant(intp_t, payload_size), - builder.mul(ir.Constant(intp_t, entry_size), - nentries)) - - with builder.if_then(builder.load(ok), likely=True): - meminfo = context.nrt.meminfo_new_varsize(builder, size=allocsize) - alloc_ok = cgutils.is_null(builder, meminfo) - - with builder.if_else(cgutils.is_null(builder, meminfo), - likely=False) as (if_error, if_ok): - with if_error: - builder.store(cgutils.false_bit, ok) - with if_ok: - self._set.meminfo = meminfo - payload = self.payload - payload.used = src_payload.used - payload.fill = src_payload.fill - payload.finger = zero - payload.mask = mask - cgutils.raw_memcpy(builder, payload.entries, - src_payload.entries, nentries, - entry_size) - - if DEBUG_ALLOCS: - context.printf(builder, - "allocated %zd bytes for set at %p: mask = %zd\n", - allocsize, payload.ptr, mask) - - return builder.load(ok) - - -class SetIterInstance(object): - - def __init__(self, context, builder, iter_type, iter_val): - self._context = context - self._builder = builder - self._ty = iter_type - self._iter = context.make_helper(builder, iter_type, iter_val) - ptr = self._context.nrt.meminfo_data(builder, self.meminfo) - self._payload = _SetPayload(context, builder, self._ty.container, ptr) - - @classmethod - def from_set(cls, context, builder, iter_type, set_val): - set_inst = SetInstance(context, builder, iter_type.container, set_val) - self = cls(context, builder, iter_type, None) - index = context.get_constant(types.intp, 0) - self._iter.index = cgutils.alloca_once_value(builder, index) - self._iter.meminfo = set_inst.meminfo - return self - - @property - def value(self): - return self._iter._getvalue() - - @property - def meminfo(self): - return self._iter.meminfo - - @property - def index(self): - return self._builder.load(self._iter.index) - - @index.setter - def index(self, value): - self._builder.store(value, self._iter.index) - - def iternext(self, result): - index = self.index - payload = self._payload - one = ir.Constant(index.type, 1) - - result.set_exhausted() - - with payload._iterate(start=index) as loop: - # An entry was found - entry = loop.entry - result.set_valid() - result.yield_(entry.key) - self.index = self._builder.add(loop.index, one) - loop.do_break() - - -#------------------------------------------------------------------------------- -# Constructors - -def build_set(context, builder, set_type, items): - """ - Build a set of the given type, containing the given items. - """ - nitems = len(items) - inst = SetInstance.allocate(context, builder, set_type, nitems) - - # Populate set. Inlining the insertion code for each item would be very - # costly, instead we create a LLVM array and iterate over it. - array = cgutils.pack_array(builder, items) - array_ptr = cgutils.alloca_once_value(builder, array) - - count = context.get_constant(types.intp, nitems) - with cgutils.for_range(builder, count) as loop: - item = builder.load(cgutils.gep(builder, array_ptr, 0, loop.index)) - inst.add(item) - - return impl_ret_new_ref(context, builder, set_type, inst.value) - - -@lower_builtin(set) -def set_empty_constructor(context, builder, sig, args): - set_type = sig.return_type - inst = SetInstance.allocate(context, builder, set_type) - return impl_ret_new_ref(context, builder, set_type, inst.value) - -@lower_builtin(set, types.IterableType) -def set_constructor(context, builder, sig, args): - set_type = sig.return_type - items_type, = sig.args - items, = args - - # If the argument has a len(), preallocate the set so as to - # avoid resizes. - n = call_len(context, builder, items_type, items) - inst = SetInstance.allocate(context, builder, set_type, n) - with for_iter(context, builder, items_type, items) as loop: - inst.add(loop.value) - - return impl_ret_new_ref(context, builder, set_type, inst.value) - - -#------------------------------------------------------------------------------- -# Various operations - -@lower_builtin(len, types.Set) -def set_len(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - return inst.get_size() - -@lower_builtin("in", types.Any, types.Set) -def in_set(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[1], args[1]) - return inst.contains(args[0]) - -@lower_builtin('getiter', types.Set) -def getiter_set(context, builder, sig, args): - inst = SetIterInstance.from_set(context, builder, sig.return_type, args[0]) - return impl_ret_borrowed(context, builder, sig.return_type, inst.value) - -@lower_builtin('iternext', types.SetIter) -@iternext_impl -def iternext_listiter(context, builder, sig, args, result): - inst = SetIterInstance(context, builder, sig.args[0], args[0]) - inst.iternext(result) - - -#------------------------------------------------------------------------------- -# Methods - -# One-item-at-a-time operations - -@lower_builtin("set.add", types.Set, types.Any) -def set_add(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - item = args[1] - inst.add(item) - - return context.get_dummy_value() - -@lower_builtin("set.discard", types.Set, types.Any) -def set_discard(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - item = args[1] - inst.discard(item) - - return context.get_dummy_value() - -@lower_builtin("set.pop", types.Set) -def set_pop(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - used = inst.payload.used - with builder.if_then(cgutils.is_null(builder, used), likely=False): - context.call_conv.return_user_exc(builder, KeyError, - ("set.pop(): empty set",)) - - return inst.pop() - -@lower_builtin("set.remove", types.Set, types.Any) -def set_remove(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - item = args[1] - found = inst.discard(item) - with builder.if_then(builder.not_(found), likely=False): - context.call_conv.return_user_exc(builder, KeyError, - ("set.remove(): key not in set",)) - - return context.get_dummy_value() - - -# Mutating set operations - -@lower_builtin("set.clear", types.Set) -def set_clear(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - inst.clear() - return context.get_dummy_value() - -@lower_builtin("set.copy", types.Set) -def set_copy(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = inst.copy() - return impl_ret_new_ref(context, builder, sig.return_type, other.value) - -@lower_builtin("set.difference_update", types.Set, types.IterableType) -def set_difference_update(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - inst.difference(other) - - return context.get_dummy_value() - -@lower_builtin("set.intersection_update", types.Set, types.Set) -def set_intersection_update(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - inst.intersect(other) - - return context.get_dummy_value() - -@lower_builtin("set.symmetric_difference_update", types.Set, types.Set) -def set_symmetric_difference_update(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - inst.symmetric_difference(other) - - return context.get_dummy_value() - -@lower_builtin("set.update", types.Set, types.IterableType) -def set_update(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - items_type = sig.args[1] - items = args[1] - - # If the argument has a len(), assume there are few collisions and - # presize to len(set) + len(items) - n = call_len(context, builder, items_type, items) - if n is not None: - new_size = builder.add(inst.payload.used, n) - inst.upsize(new_size) - - with for_iter(context, builder, items_type, items) as loop: - inst.add(loop.value) - - if n is not None: - # If we pre-grew the set, downsize in case there were many collisions - inst.downsize(inst.payload.used) - - return context.get_dummy_value() - -for op, op_impl in [ - ('&=', set_intersection_update), - ('|=', set_update), - ('-=', set_difference_update), - ('^=', set_symmetric_difference_update), - ]: - @lower_builtin(op, types.Set, types.Set) - def set_inplace(context, builder, sig, args, op_impl=op_impl): - assert sig.return_type == sig.args[0] - op_impl(context, builder, sig, args) - return impl_ret_borrowed(context, builder, sig.args[0], args[0]) - - -# Set operations creating a new set - -@lower_builtin("-", types.Set, types.Set) -@lower_builtin("set.difference", types.Set, types.Set) -def set_difference(context, builder, sig, args): - def difference_impl(a, b): - s = a.copy() - s.difference_update(b) - return s - - return context.compile_internal(builder, difference_impl, sig, args) - -@lower_builtin("&", types.Set, types.Set) -@lower_builtin("set.intersection", types.Set, types.Set) -def set_intersection(context, builder, sig, args): - def intersection_impl(a, b): - if len(a) < len(b): - s = a.copy() - s.intersection_update(b) - return s - else: - s = b.copy() - s.intersection_update(a) - return s - - return context.compile_internal(builder, intersection_impl, sig, args) - -@lower_builtin("^", types.Set, types.Set) -@lower_builtin("set.symmetric_difference", types.Set, types.Set) -def set_symmetric_difference(context, builder, sig, args): - def symmetric_difference_impl(a, b): - if len(a) > len(b): - s = a.copy() - s.symmetric_difference_update(b) - return s - else: - s = b.copy() - s.symmetric_difference_update(a) - return s - - return context.compile_internal(builder, symmetric_difference_impl, - sig, args) - -@lower_builtin("|", types.Set, types.Set) -@lower_builtin("set.union", types.Set, types.Set) -def set_union(context, builder, sig, args): - def union_impl(a, b): - if len(a) > len(b): - s = a.copy() - s.update(b) - return s - else: - s = b.copy() - s.update(a) - return s - - return context.compile_internal(builder, union_impl, sig, args) - - -# Predicates - -@lower_builtin("set.isdisjoint", types.Set, types.Set) -def set_isdisjoint(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - return inst.isdisjoint(other) - -@lower_builtin("<=", types.Set, types.Set) -@lower_builtin("set.issubset", types.Set, types.Set) -def set_issubset(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - return inst.issubset(other) - -@lower_builtin(">=", types.Set, types.Set) -@lower_builtin("set.issuperset", types.Set, types.Set) -def set_issuperset(context, builder, sig, args): - def superset_impl(a, b): - return b.issubset(a) - - return context.compile_internal(builder, superset_impl, sig, args) - -@lower_builtin("==", types.Set, types.Set) -def set_isdisjoint(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - return inst.equals(other) - -@lower_builtin("!=", types.Set, types.Set) -def set_ne(context, builder, sig, args): - def ne_impl(a, b): - return not a == b - - return context.compile_internal(builder, ne_impl, sig, args) - -@lower_builtin("<", types.Set, types.Set) -def set_lt(context, builder, sig, args): - inst = SetInstance(context, builder, sig.args[0], args[0]) - other = SetInstance(context, builder, sig.args[1], args[1]) - - return inst.issubset(other, strict=True) - -@lower_builtin(">", types.Set, types.Set) -def set_gt(context, builder, sig, args): - def gt_impl(a, b): - return b < a - - return context.compile_internal(builder, gt_impl, sig, args) - -@lower_builtin('is', types.Set, types.Set) -def set_is(context, builder, sig, args): - a = SetInstance(context, builder, sig.args[0], args[0]) - b = SetInstance(context, builder, sig.args[1], args[1]) - ma = builder.ptrtoint(a.meminfo, cgutils.intp_t) - mb = builder.ptrtoint(b.meminfo, cgutils.intp_t) - return builder.icmp_signed('==', ma, mb) - - -# ----------------------------------------------------------------------------- -# Implicit casting - -@lower_cast(types.Set, types.Set) -def set_to_set(context, builder, fromty, toty, val): - # Casting from non-reflected to reflected - assert fromty.dtype == toty.dtype - return val diff --git a/numba/numba/targets/slicing.py b/numba/numba/targets/slicing.py deleted file mode 100644 index cc81883a1..000000000 --- a/numba/numba/targets/slicing.py +++ /dev/null @@ -1,209 +0,0 @@ -""" -Implement slices and various slice computations. -""" - -import itertools - -from llvmlite import ir - -from numba.six.moves import zip_longest -from numba import cgutils, types, typing -from .imputils import (lower_builtin, lower_getattr, - iternext_impl, impl_ret_borrowed, - impl_ret_new_ref, impl_ret_untracked) - - -def fix_index(builder, idx, size): - """ - Fix negative index by adding *size* to it. Positive - indices are left untouched. - """ - is_negative = builder.icmp_signed('<', idx, ir.Constant(size.type, 0)) - wrapped_index = builder.add(idx, size) - return builder.select(is_negative, wrapped_index, idx) - - -def fix_slice(builder, slice, size): - """ - Fix *slice* start and stop to be valid (inclusive and exclusive, resp) - indexing bounds for a sequence of the given *size*. - """ - # See PySlice_GetIndicesEx() - zero = ir.Constant(size.type, 0) - minus_one = ir.Constant(size.type, -1) - - def fix_bound(bound_name, lower_repl, upper_repl): - bound = getattr(slice, bound_name) - bound = fix_index(builder, bound, size) - # Store value - setattr(slice, bound_name, bound) - # Still negative? => clamp to lower_repl - underflow = builder.icmp_signed('<', bound, zero) - with builder.if_then(underflow, likely=False): - setattr(slice, bound_name, lower_repl) - # Greater than size? => clamp to upper_repl - overflow = builder.icmp_signed('>=', bound, size) - with builder.if_then(overflow, likely=False): - setattr(slice, bound_name, upper_repl) - - with builder.if_else(cgutils.is_neg_int(builder, slice.step)) as (if_neg_step, if_pos_step): - with if_pos_step: - # < 0 => 0; >= size => size - fix_bound('start', zero, size) - fix_bound('stop', zero, size) - with if_neg_step: - # < 0 => -1; >= size => size - 1 - lower = minus_one - upper = builder.add(size, minus_one) - fix_bound('start', lower, upper) - fix_bound('stop', lower, upper) - - -def get_slice_length(builder, slicestruct): - """ - Given a slice, compute the number of indices it spans, i.e. the - number of iterations that for_range_slice() will execute. - - Pseudo-code: - assert step != 0 - if step > 0: - if stop <= start: - return 0 - else: - return (stop - start - 1) // step + 1 - else: - if stop >= start: - return 0 - else: - return (stop - start + 1) // step + 1 - - (see PySlice_GetIndicesEx() in CPython) - """ - start = slicestruct.start - stop = slicestruct.stop - step = slicestruct.step - one = ir.Constant(start.type, 1) - zero = ir.Constant(start.type, 0) - - is_step_negative = cgutils.is_neg_int(builder, step) - delta = builder.sub(stop, start) - - # Nominal case - pos_dividend = builder.sub(delta, one) - neg_dividend = builder.add(delta, one) - dividend = builder.select(is_step_negative, neg_dividend, pos_dividend) - nominal_length = builder.add(one, builder.sdiv(dividend, step)) - - # Catch zero length - is_zero_length = builder.select(is_step_negative, - builder.icmp_signed('>=', delta, zero), - builder.icmp_signed('<=', delta, zero)) - - # Clamp to 0 if is_zero_length - return builder.select(is_zero_length, zero, nominal_length) - - -def get_slice_bounds(builder, slicestruct): - """ - Return the [lower, upper) indexing bounds of a slice. - """ - start = slicestruct.start - stop = slicestruct.stop - zero = start.type(0) - one = start.type(1) - # This is a bit pessimal, e.g. it will return [1, 5) instead - # of [1, 4) for `1:5:2` - is_step_negative = builder.icmp_signed('<', slicestruct.step, zero) - lower = builder.select(is_step_negative, - builder.add(stop, one), start) - upper = builder.select(is_step_negative, - builder.add(start, one), stop) - return lower, upper - - -def fix_stride(builder, slice, stride): - """ - Fix the given stride for the slice's step. - """ - return builder.mul(slice.step, stride) - -def guard_invalid_slice(context, builder, typ, slicestruct): - """ - Guard against *slicestruct* having a zero step (and raise ValueError). - """ - if typ.has_step: - cgutils.guard_null(context, builder, slicestruct.step, - (ValueError, "slice step cannot be zero")) - - -def get_defaults(context): - """ - Get the default values for a slice's members: - (start for positive step, start for negative step, - stop for positive step, stop for negative step, step) - """ - maxint = (1 << (context.address_size - 1)) - 1 - return (0, maxint, maxint, - maxint - 1, 1) - - -#--------------------------------------------------------------------------- -# The slice structure - -@lower_builtin(slice, types.VarArg(types.Any)) -def slice_constructor_impl(context, builder, sig, args): - default_start_pos, default_start_neg, default_stop_pos, default_stop_neg, default_step = \ - [context.get_constant(types.intp, x) for x in get_defaults(context)] - - # Fetch non-None arguments - slice_args = [None] * 3 - for i, (ty, val) in enumerate(zip(sig.args, args)): - if ty is types.none: - slice_args[i] = None - else: - slice_args[i] = val - - # Fill omitted arguments - def get_arg_value(i, default): - val = slice_args[i] - if val is None: - return default - else: - return val - - step = get_arg_value(2, default_step) - is_step_negative = builder.icmp_signed('<', step, - context.get_constant(types.intp, 0)) - default_stop = builder.select(is_step_negative, - default_stop_neg, default_stop_pos) - default_start = builder.select(is_step_negative, - default_start_neg, default_start_pos) - stop = get_arg_value(1, default_stop) - start = get_arg_value(0, default_start) - - ty = sig.return_type - sli = context.make_helper(builder, sig.return_type) - sli.start = start - sli.stop = stop - sli.step = step - - res = sli._getvalue() - return impl_ret_untracked(context, builder, sig.return_type, res) - - -@lower_getattr(types.SliceType, "start") -def slice_start_impl(context, builder, typ, value): - sli = context.make_helper(builder, typ, value) - return sli.start - -@lower_getattr(types.SliceType, "stop") -def slice_stop_impl(context, builder, typ, value): - sli = context.make_helper(builder, typ, value) - return sli.stop - -@lower_getattr(types.SliceType, "step") -def slice_step_impl(context, builder, typ, value): - if typ.has_step: - sli = context.make_helper(builder, typ, value) - return sli.step - else: - return context.get_constant(types.intp, 1) diff --git a/numba/numba/targets/smartarray.py b/numba/numba/targets/smartarray.py deleted file mode 100644 index 91684b917..000000000 --- a/numba/numba/targets/smartarray.py +++ /dev/null @@ -1,41 +0,0 @@ -from numba import cgutils, types -from numba.extending import ( - NativeValue, - box, - lower_builtin, - lower_cast, - make_attribute_wrapper, - models, - reflect, - register_model, - type_callable, - typeof_impl, - unbox, -) - -from .imputils import impl_ret_borrowed - - -@type_callable("__array_wrap__") -def type_array_wrap(context): - def typer(input_type, result): - if isinstance(input_type, types.SmartArrayType): - return input_type.copy( - dtype=result.dtype, ndim=result.ndim, layout=result.layout - ) - - return typer - - -@lower_builtin("__array__", types.SmartArrayType) -def array_as_array(context, builder, sig, args): - [argtype], [arg] = sig.args, args - val = context.make_helper(builder, argtype, ref=arg) - return val._get_ptr_by_name("data") - - -@lower_builtin("__array_wrap__", types.SmartArrayType, types.Array) -def array_wrap_array(context, builder, sig, args): - dest = context.make_helper(builder, sig.return_type) - dest.data = args[1] - return impl_ret_borrowed(context, builder, sig.return_type, dest._getvalue()) diff --git a/numba/numba/targets/tupleobj.py b/numba/numba/targets/tupleobj.py deleted file mode 100644 index 8cc8515c3..000000000 --- a/numba/numba/targets/tupleobj.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -Implementation of tuple objects -""" - -from llvmlite import ir -import llvmlite.llvmpy.core as lc - -from .imputils import (lower_builtin, lower_getattr_generic, lower_cast, - lower_constant, - iternext_impl, impl_ret_borrowed, impl_ret_untracked) -from .. import typing, types, cgutils -from ..extending import overload_method - - -@lower_builtin(types.NamedTupleClass, types.VarArg(types.Any)) -def namedtuple_constructor(context, builder, sig, args): - # A namedtuple has the same representation as a regular tuple - res = context.make_tuple(builder, sig.return_type, args) - # The tuple's contents are borrowed - return impl_ret_borrowed(context, builder, sig.return_type, res) - -@lower_builtin('+', types.BaseTuple, types.BaseTuple) -def tuple_add(context, builder, sig, args): - left, right = [cgutils.unpack_tuple(builder, x) for x in args] - res = context.make_tuple(builder, sig.return_type, left + right) - # The tuple's contents are borrowed - return impl_ret_borrowed(context, builder, sig.return_type, res) - -def tuple_cmp_ordered(context, builder, op, sig, args): - tu, tv = sig.args - u, v = args - res = cgutils.alloca_once_value(builder, cgutils.true_bit) - bbend = builder.append_basic_block("cmp_end") - for i, (ta, tb) in enumerate(zip(tu.types, tv.types)): - a = builder.extract_value(u, i) - b = builder.extract_value(v, i) - not_equal = context.generic_compare(builder, '!=', (ta, tb), (a, b)) - with builder.if_then(not_equal): - pred = context.generic_compare(builder, op, (ta, tb), (a, b)) - builder.store(pred, res) - builder.branch(bbend) - # Everything matched equal => compare lengths - len_compare = eval("%d %s %d" % (len(tu.types), op, len(tv.types))) - pred = context.get_constant(types.boolean, len_compare) - builder.store(pred, res) - builder.branch(bbend) - builder.position_at_end(bbend) - return builder.load(res) - -@lower_builtin('==', types.BaseTuple, types.BaseTuple) -def tuple_eq(context, builder, sig, args): - tu, tv = sig.args - u, v = args - if len(tu.types) != len(tv.types): - res = context.get_constant(types.boolean, False) - return impl_ret_untracked(context, builder, sig.return_type, res) - res = context.get_constant(types.boolean, True) - for i, (ta, tb) in enumerate(zip(tu.types, tv.types)): - a = builder.extract_value(u, i) - b = builder.extract_value(v, i) - pred = context.generic_compare(builder, "==", (ta, tb), (a, b)) - res = builder.and_(res, pred) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('!=', types.BaseTuple, types.BaseTuple) -def tuple_ne(context, builder, sig, args): - res = builder.not_(tuple_eq(context, builder, sig, args)) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('<', types.BaseTuple, types.BaseTuple) -def tuple_lt(context, builder, sig, args): - res = tuple_cmp_ordered(context, builder, '<', sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('<=', types.BaseTuple, types.BaseTuple) -def tuple_le(context, builder, sig, args): - res = tuple_cmp_ordered(context, builder, '<=', sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('>', types.BaseTuple, types.BaseTuple) -def tuple_gt(context, builder, sig, args): - res = tuple_cmp_ordered(context, builder, '>', sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin('>=', types.BaseTuple, types.BaseTuple) -def tuple_ge(context, builder, sig, args): - res = tuple_cmp_ordered(context, builder, '>=', sig, args) - return impl_ret_untracked(context, builder, sig.return_type, res) - -@lower_builtin(hash, types.BaseTuple) -def hash_tuple(context, builder, sig, args): - tupty, = sig.args - tup, = args - lty = context.get_value_type(sig.return_type) - - h = ir.Constant(lty, 0x345678) - mult = ir.Constant(lty, 1000003) - n = ir.Constant(lty, len(tupty)) - - for i, ty in enumerate(tupty.types): - # h = h * mult - h = builder.mul(h, mult) - val = builder.extract_value(tup, i) - hash_impl = context.get_function(hash, - typing.signature(sig.return_type, ty)) - h_val = hash_impl(builder, (val,)) - # h = h ^ hash(val) - h = builder.xor(h, h_val) - # Perturb: mult = mult + len(tup) - mult = builder.add(mult, n) - - return h - - -@lower_getattr_generic(types.BaseNamedTuple) -def namedtuple_getattr(context, builder, typ, value, attr): - """ - Fetch a namedtuple's field. - """ - index = typ.fields.index(attr) - res = builder.extract_value(value, index) - return impl_ret_borrowed(context, builder, typ[index], res) - - -@lower_constant(types.UniTuple) -@lower_constant(types.NamedUniTuple) -def unituple_constant(context, builder, ty, pyval): - """ - Create a homogeneous tuple constant. - """ - consts = [context.get_constant_generic(builder, ty.dtype, v) - for v in pyval] - return ir.ArrayType(consts[0].type, len(consts))(consts) - -@lower_constant(types.Tuple) -@lower_constant(types.NamedTuple) -def unituple_constant(context, builder, ty, pyval): - """ - Create a heterogeneous tuple constant. - """ - consts = [context.get_constant_generic(builder, ty.types[i], v) - for i, v in enumerate(pyval)] - return ir.Constant.literal_struct(consts) - - -#------------------------------------------------------------------------------ -# Tuple iterators - -@lower_builtin('getiter', types.UniTuple) -@lower_builtin('getiter', types.NamedUniTuple) -def getiter_unituple(context, builder, sig, args): - [tupty] = sig.args - [tup] = args - - iterval = context.make_helper(builder, types.UniTupleIter(tupty)) - - index0 = context.get_constant(types.intp, 0) - indexptr = cgutils.alloca_once(builder, index0.type) - builder.store(index0, indexptr) - - iterval.index = indexptr - iterval.tuple = tup - - res = iterval._getvalue() - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('iternext', types.UniTupleIter) -@iternext_impl -def iternext_unituple(context, builder, sig, args, result): - [tupiterty] = sig.args - [tupiter] = args - - iterval = context.make_helper(builder, tupiterty, value=tupiter) - - tup = iterval.tuple - idxptr = iterval.index - idx = builder.load(idxptr) - count = context.get_constant(types.intp, tupiterty.container.count) - - is_valid = builder.icmp(lc.ICMP_SLT, idx, count) - result.set_valid(is_valid) - - with builder.if_then(is_valid): - getitem_sig = typing.signature(tupiterty.container.dtype, - tupiterty.container, - types.intp) - getitem_out = getitem_unituple(context, builder, getitem_sig, - [tup, idx]) - result.yield_(getitem_out) - nidx = builder.add(idx, context.get_constant(types.intp, 1)) - builder.store(nidx, iterval.index) - - -@lower_builtin('getitem', types.UniTuple, types.intp) -@lower_builtin('getitem', types.NamedUniTuple, types.intp) -def getitem_unituple(context, builder, sig, args): - tupty, _ = sig.args - tup, idx = args - - errmsg_oob = ("tuple index out of range",) - - if len(tupty) == 0: - # Empty tuple. - - # Always branch and raise IndexError - with builder.if_then(cgutils.true_bit): - context.call_conv.return_user_exc(builder, IndexError, - errmsg_oob) - # This is unreachable in runtime, - # but it exists to not terminate the current basicblock. - res = context.get_constant_null(sig.return_type) - return impl_ret_untracked(context, builder, - sig.return_type, res) - else: - # The tuple is not empty - bbelse = builder.append_basic_block("switch.else") - bbend = builder.append_basic_block("switch.end") - switch = builder.switch(idx, bbelse) - - with builder.goto_block(bbelse): - context.call_conv.return_user_exc(builder, IndexError, - errmsg_oob) - - lrtty = context.get_value_type(tupty.dtype) - with builder.goto_block(bbend): - phinode = builder.phi(lrtty) - - for i in range(tupty.count): - ki = context.get_constant(types.intp, i) - bbi = builder.append_basic_block("switch.%d" % i) - switch.add_case(ki, bbi) - with builder.goto_block(bbi): - value = builder.extract_value(tup, i) - builder.branch(bbend) - phinode.add_incoming(value, bbi) - - builder.position_at_end(bbend) - res = phinode - assert sig.return_type == tupty.dtype - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -@lower_builtin('static_getitem', types.BaseTuple, types.Const) -def static_getitem_tuple(context, builder, sig, args): - tupty, _ = sig.args - tup, idx = args - if isinstance(idx, int): - if idx < 0: - idx += len(tupty) - if not 0 <= idx < len(tupty): - raise IndexError("cannot index at %d in %s" % (idx, tupty)) - res = builder.extract_value(tup, idx) - elif isinstance(idx, slice): - items = cgutils.unpack_tuple(builder, tup)[idx] - res = context.make_tuple(builder, sig.return_type, items) - else: - raise NotImplementedError("unexpected index %r for %s" - % (idx, sig.args[0])) - return impl_ret_borrowed(context, builder, sig.return_type, res) - - -#------------------------------------------------------------------------------ -# Implicit conversion - -@lower_cast(types.BaseTuple, types.BaseTuple) -def tuple_to_tuple(context, builder, fromty, toty, val): - if (isinstance(fromty, types.BaseNamedTuple) - or isinstance(toty, types.BaseNamedTuple)): - # Disallowed by typing layer - raise NotImplementedError - - if len(fromty) != len(toty): - # Disallowed by typing layer - raise NotImplementedError - - olditems = cgutils.unpack_tuple(builder, val, len(fromty)) - items = [context.cast(builder, v, f, t) - for v, f, t in zip(olditems, fromty, toty)] - return context.make_tuple(builder, toty, items) - - -#------------------------------------------------------------------------------ -# Methods - -@overload_method(types.BaseTuple, 'index') -def tuple_index(tup, value): - - def tuple_index_impl(tup, value): - for i in range(len(tup)): - if tup[i] == value: - return i - raise ValueError("tuple.index(x): x not in tuple") - - return tuple_index_impl diff --git a/numba/numba/targets/ufunc_db.py b/numba/numba/targets/ufunc_db.py deleted file mode 100644 index 1852ae6a9..000000000 --- a/numba/numba/targets/ufunc_db.py +++ /dev/null @@ -1,994 +0,0 @@ -"""This file contains information on how to translate different ufuncs -into numba. It is a database of different ufuncs and how each of its -loops maps to a function that implements the inner kernel of that ufunc -(the inner kernel being the per-element function). - -Use the function get_ufunc_info to get the information related to the -ufunc -""" - -from __future__ import print_function, division, absolute_import - -import numpy as np - - -# this is lazily initialized to avoid circular imports -_ufunc_db = None - -def _lazy_init_db(): - global _ufunc_db - - if _ufunc_db is None: - _ufunc_db = {} - _fill_ufunc_db(_ufunc_db) - - -def get_ufuncs(): - """obtain a list of supported ufuncs in the db""" - _lazy_init_db() - return _ufunc_db.keys() - - -def get_ufunc_info(ufunc_key): - """get the lowering information for the ufunc with key ufunc_key. - - The lowering information is a dictionary that maps from a numpy - loop string (as given by the ufunc types attribute) to a function - that handles code generation for a scalar version of the ufunc - (that is, generates the "per element" operation"). - - raises a KeyError if the ufunc is not in the ufunc_db - """ - _lazy_init_db() - return _ufunc_db[ufunc_key] - - -def _fill_ufunc_db(ufunc_db): - # some of these imports would cause a problem of circular - # imports if done at global scope when importing the numba - # module. - from . import numbers, npyfuncs, mathimpl, cmathimpl - from numba import numpy_support - - v = numpy_support.version - - ufunc_db[np.negative] = { - '?->?': numbers.int_invert_impl, - 'b->b': numbers.int_negate_impl, - 'B->B': numbers.int_negate_impl, - 'h->h': numbers.int_negate_impl, - 'H->H': numbers.int_negate_impl, - 'i->i': numbers.int_negate_impl, - 'I->I': numbers.int_negate_impl, - 'l->l': numbers.int_negate_impl, - 'L->L': numbers.int_negate_impl, - 'q->q': numbers.int_negate_impl, - 'Q->Q': numbers.int_negate_impl, - 'f->f': numbers.real_negate_impl, - 'd->d': numbers.real_negate_impl, - 'F->F': numbers.complex_negate_impl, - 'D->D': numbers.complex_negate_impl, - } - - ufunc_db[np.absolute] = { - '?->?': numbers.int_abs_impl, - 'b->b': numbers.int_abs_impl, - 'B->B': numbers.uint_abs_impl, - 'h->h': numbers.int_abs_impl, - 'H->H': numbers.uint_abs_impl, - 'i->i': numbers.int_abs_impl, - 'I->I': numbers.uint_abs_impl, - 'l->l': numbers.int_abs_impl, - 'L->L': numbers.uint_abs_impl, - 'q->q': numbers.int_abs_impl, - 'Q->Q': numbers.uint_abs_impl, - 'f->f': numbers.real_abs_impl, - 'd->d': numbers.real_abs_impl, - 'F->f': numbers.complex_abs_impl, - 'D->d': numbers.complex_abs_impl, - } - - ufunc_db[np.sign] = { - 'b->b': numbers.int_sign_impl, - 'B->B': numbers.int_sign_impl, - 'h->h': numbers.int_sign_impl, - 'H->H': numbers.int_sign_impl, - 'i->i': numbers.int_sign_impl, - 'I->I': numbers.int_sign_impl, - 'l->l': numbers.int_sign_impl, - 'L->L': numbers.int_sign_impl, - 'q->q': numbers.int_sign_impl, - 'Q->Q': numbers.int_sign_impl, - 'f->f': numbers.real_sign_impl, - 'd->d': numbers.real_sign_impl, - 'F->F': npyfuncs.np_complex_sign_impl, - 'D->D': npyfuncs.np_complex_sign_impl, - } - - ufunc_db[np.add] = { - '??->?': numbers.int_or_impl, - 'bb->b': numbers.int_add_impl, - 'BB->B': numbers.int_add_impl, - 'hh->h': numbers.int_add_impl, - 'HH->H': numbers.int_add_impl, - 'ii->i': numbers.int_add_impl, - 'II->I': numbers.int_add_impl, - 'll->l': numbers.int_add_impl, - 'LL->L': numbers.int_add_impl, - 'qq->q': numbers.int_add_impl, - 'QQ->Q': numbers.int_add_impl, - 'ff->f': numbers.real_add_impl, - 'dd->d': numbers.real_add_impl, - 'FF->F': numbers.complex_add_impl, - 'DD->D': numbers.complex_add_impl, - } - - ufunc_db[np.subtract] = { - '??->?': numbers.int_xor_impl, - 'bb->b': numbers.int_sub_impl, - 'BB->B': numbers.int_sub_impl, - 'hh->h': numbers.int_sub_impl, - 'HH->H': numbers.int_sub_impl, - 'ii->i': numbers.int_sub_impl, - 'II->I': numbers.int_sub_impl, - 'll->l': numbers.int_sub_impl, - 'LL->L': numbers.int_sub_impl, - 'qq->q': numbers.int_sub_impl, - 'QQ->Q': numbers.int_sub_impl, - 'ff->f': numbers.real_sub_impl, - 'dd->d': numbers.real_sub_impl, - 'FF->F': numbers.complex_sub_impl, - 'DD->D': numbers.complex_sub_impl, - } - - ufunc_db[np.multiply] = { - '??->?': numbers.int_and_impl, - 'bb->b': numbers.int_mul_impl, - 'BB->B': numbers.int_mul_impl, - 'hh->h': numbers.int_mul_impl, - 'HH->H': numbers.int_mul_impl, - 'ii->i': numbers.int_mul_impl, - 'II->I': numbers.int_mul_impl, - 'll->l': numbers.int_mul_impl, - 'LL->L': numbers.int_mul_impl, - 'qq->q': numbers.int_mul_impl, - 'QQ->Q': numbers.int_mul_impl, - 'ff->f': numbers.real_mul_impl, - 'dd->d': numbers.real_mul_impl, - 'FF->F': numbers.complex_mul_impl, - 'DD->D': numbers.complex_mul_impl, - } - - if np.divide != np.true_divide: - ufunc_db[np.divide] = { - 'bb->b': npyfuncs.np_int_sdiv_impl, - 'BB->B': npyfuncs.np_int_udiv_impl, - 'hh->h': npyfuncs.np_int_sdiv_impl, - 'HH->H': npyfuncs.np_int_udiv_impl, - 'ii->i': npyfuncs.np_int_sdiv_impl, - 'II->I': npyfuncs.np_int_udiv_impl, - 'll->l': npyfuncs.np_int_sdiv_impl, - 'LL->L': npyfuncs.np_int_udiv_impl, - 'qq->q': npyfuncs.np_int_sdiv_impl, - 'QQ->Q': npyfuncs.np_int_udiv_impl, - 'ff->f': npyfuncs.np_real_div_impl, - 'dd->d': npyfuncs.np_real_div_impl, - 'FF->F': npyfuncs.np_complex_div_impl, - 'DD->D': npyfuncs.np_complex_div_impl, - } - - ufunc_db[np.true_divide] = { - 'bb->d': npyfuncs.np_int_truediv_impl, - 'BB->d': npyfuncs.np_int_truediv_impl, - 'hh->d': npyfuncs.np_int_truediv_impl, - 'HH->d': npyfuncs.np_int_truediv_impl, - 'ii->d': npyfuncs.np_int_truediv_impl, - 'II->d': npyfuncs.np_int_truediv_impl, - 'll->d': npyfuncs.np_int_truediv_impl, - 'LL->d': npyfuncs.np_int_truediv_impl, - 'qq->d': npyfuncs.np_int_truediv_impl, - 'QQ->d': npyfuncs.np_int_truediv_impl, - 'ff->f': npyfuncs.np_real_div_impl, - 'dd->d': npyfuncs.np_real_div_impl, - 'FF->F': npyfuncs.np_complex_div_impl, - 'DD->D': npyfuncs.np_complex_div_impl, - } - - ufunc_db[np.floor_divide] = { - 'bb->b': npyfuncs.np_int_sdiv_impl, - 'BB->B': npyfuncs.np_int_udiv_impl, - 'hh->h': npyfuncs.np_int_sdiv_impl, - 'HH->H': npyfuncs.np_int_udiv_impl, - 'ii->i': npyfuncs.np_int_sdiv_impl, - 'II->I': npyfuncs.np_int_udiv_impl, - 'll->l': npyfuncs.np_int_sdiv_impl, - 'LL->L': npyfuncs.np_int_udiv_impl, - 'qq->q': npyfuncs.np_int_sdiv_impl, - 'QQ->Q': npyfuncs.np_int_udiv_impl, - 'ff->f': npyfuncs.np_real_floor_div_impl, - 'dd->d': npyfuncs.np_real_floor_div_impl, - 'FF->F': npyfuncs.np_complex_floor_div_impl, - 'DD->D': npyfuncs.np_complex_floor_div_impl, - } - - ufunc_db[np.remainder] = { - 'bb->b': npyfuncs.np_int_srem_impl, - 'BB->B': npyfuncs.np_int_urem_impl, - 'hh->h': npyfuncs.np_int_srem_impl, - 'HH->H': npyfuncs.np_int_urem_impl, - 'ii->i': npyfuncs.np_int_srem_impl, - 'II->I': npyfuncs.np_int_urem_impl, - 'll->l': npyfuncs.np_int_srem_impl, - 'LL->L': npyfuncs.np_int_urem_impl, - 'qq->q': npyfuncs.np_int_srem_impl, - 'QQ->Q': npyfuncs.np_int_urem_impl, - 'ff->f': npyfuncs.np_real_mod_impl, - 'dd->d': npyfuncs.np_real_mod_impl, - } - - ufunc_db[np.fmod] = { - 'bb->b': npyfuncs.np_int_fmod_impl, - 'BB->B': npyfuncs.np_int_fmod_impl, - 'hh->h': npyfuncs.np_int_fmod_impl, - 'HH->H': npyfuncs.np_int_fmod_impl, - 'ii->i': npyfuncs.np_int_fmod_impl, - 'II->I': npyfuncs.np_int_fmod_impl, - 'll->l': npyfuncs.np_int_fmod_impl, - 'LL->L': npyfuncs.np_int_fmod_impl, - 'qq->q': npyfuncs.np_int_fmod_impl, - 'QQ->Q': npyfuncs.np_int_fmod_impl, - 'ff->f': npyfuncs.np_real_fmod_impl, - 'dd->d': npyfuncs.np_real_fmod_impl, - } - - ufunc_db[np.logaddexp] = { - 'ff->f': npyfuncs.np_real_logaddexp_impl, - 'dd->d': npyfuncs.np_real_logaddexp_impl, - } - - ufunc_db[np.logaddexp2] = { - 'ff->f': npyfuncs.np_real_logaddexp2_impl, - 'dd->d': npyfuncs.np_real_logaddexp2_impl, - } - - ufunc_db[np.power] = { - 'bb->b': numbers.int_power_impl, - 'BB->B': numbers.int_power_impl, - 'hh->h': numbers.int_power_impl, - 'HH->H': numbers.int_power_impl, - 'ii->i': numbers.int_power_impl, - 'II->I': numbers.int_power_impl, - 'll->l': numbers.int_power_impl, - 'LL->L': numbers.int_power_impl, - 'qq->q': numbers.int_power_impl, - 'QQ->Q': numbers.int_power_impl, - # XXX we would like to use `int_power_impl` for real ** integer - # as well (for better performance), but the current ufunc typing - # rules forbid that - 'ff->f': numbers.real_power_impl, - 'dd->d': numbers.real_power_impl, - 'FF->F': npyfuncs.np_complex_power_impl, - 'DD->D': npyfuncs.np_complex_power_impl, - } - - ufunc_db[np.rint] = { - 'f->f': npyfuncs.np_real_rint_impl, - 'd->d': npyfuncs.np_real_rint_impl, - 'F->F': npyfuncs.np_complex_rint_impl, - 'D->D': npyfuncs.np_complex_rint_impl, - } - - ufunc_db[np.conjugate] = { - 'b->b': numbers.real_conjugate_impl, - 'B->B': numbers.real_conjugate_impl, - 'h->h': numbers.real_conjugate_impl, - 'H->H': numbers.real_conjugate_impl, - 'i->i': numbers.real_conjugate_impl, - 'I->I': numbers.real_conjugate_impl, - 'l->l': numbers.real_conjugate_impl, - 'L->L': numbers.real_conjugate_impl, - 'q->q': numbers.real_conjugate_impl, - 'Q->Q': numbers.real_conjugate_impl, - 'f->f': numbers.real_conjugate_impl, - 'd->d': numbers.real_conjugate_impl, - 'F->F': numbers.complex_conjugate_impl, - 'D->D': numbers.complex_conjugate_impl, - } - - ufunc_db[np.exp] = { - 'f->f': npyfuncs.np_real_exp_impl, - 'd->d': npyfuncs.np_real_exp_impl, - 'F->F': npyfuncs.np_complex_exp_impl, - 'D->D': npyfuncs.np_complex_exp_impl, - } - - ufunc_db[np.exp2] = { - 'f->f': npyfuncs.np_real_exp2_impl, - 'd->d': npyfuncs.np_real_exp2_impl, - 'F->F': npyfuncs.np_complex_exp2_impl, - 'D->D': npyfuncs.np_complex_exp2_impl, - } - - ufunc_db[np.log] = { - 'f->f': npyfuncs.np_real_log_impl, - 'd->d': npyfuncs.np_real_log_impl, - 'F->F': npyfuncs.np_complex_log_impl, - 'D->D': npyfuncs.np_complex_log_impl, - } - - ufunc_db[np.log2] = { - 'f->f': npyfuncs.np_real_log2_impl, - 'd->d': npyfuncs.np_real_log2_impl, - 'F->F': npyfuncs.np_complex_log2_impl, - 'D->D': npyfuncs.np_complex_log2_impl, - } - - ufunc_db[np.log10] = { - 'f->f': npyfuncs.np_real_log10_impl, - 'd->d': npyfuncs.np_real_log10_impl, - 'F->F': npyfuncs.np_complex_log10_impl, - 'D->D': npyfuncs.np_complex_log10_impl, - } - - ufunc_db[np.expm1] = { - 'f->f': npyfuncs.np_real_expm1_impl, - 'd->d': npyfuncs.np_real_expm1_impl, - 'F->F': npyfuncs.np_complex_expm1_impl, - 'D->D': npyfuncs.np_complex_expm1_impl, - } - - ufunc_db[np.log1p] = { - 'f->f': npyfuncs.np_real_log1p_impl, - 'd->d': npyfuncs.np_real_log1p_impl, - 'F->F': npyfuncs.np_complex_log1p_impl, - 'D->D': npyfuncs.np_complex_log1p_impl, - } - - ufunc_db[np.sqrt] = { - 'f->f': npyfuncs.np_real_sqrt_impl, - 'd->d': npyfuncs.np_real_sqrt_impl, - 'F->F': npyfuncs.np_complex_sqrt_impl, - 'D->D': npyfuncs.np_complex_sqrt_impl, - } - - ufunc_db[np.square] = { - 'b->b': npyfuncs.np_int_square_impl, - 'B->B': npyfuncs.np_int_square_impl, - 'h->h': npyfuncs.np_int_square_impl, - 'H->H': npyfuncs.np_int_square_impl, - 'i->i': npyfuncs.np_int_square_impl, - 'I->I': npyfuncs.np_int_square_impl, - 'l->l': npyfuncs.np_int_square_impl, - 'L->L': npyfuncs.np_int_square_impl, - 'q->q': npyfuncs.np_int_square_impl, - 'Q->Q': npyfuncs.np_int_square_impl, - 'f->f': npyfuncs.np_real_square_impl, - 'd->d': npyfuncs.np_real_square_impl, - 'F->F': npyfuncs.np_complex_square_impl, - 'D->D': npyfuncs.np_complex_square_impl, - } - - ufunc_db[np.reciprocal] = { - 'b->b': npyfuncs.np_int_reciprocal_impl, - 'B->B': npyfuncs.np_int_reciprocal_impl, - 'h->h': npyfuncs.np_int_reciprocal_impl, - 'H->H': npyfuncs.np_int_reciprocal_impl, - 'i->i': npyfuncs.np_int_reciprocal_impl, - 'I->I': npyfuncs.np_int_reciprocal_impl, - 'l->l': npyfuncs.np_int_reciprocal_impl, - 'L->L': npyfuncs.np_int_reciprocal_impl, - 'q->q': npyfuncs.np_int_reciprocal_impl, - 'Q->Q': npyfuncs.np_int_reciprocal_impl, - 'f->f': npyfuncs.np_real_reciprocal_impl, - 'd->d': npyfuncs.np_real_reciprocal_impl, - 'F->F': npyfuncs.np_complex_reciprocal_impl, - 'D->D': npyfuncs.np_complex_reciprocal_impl, - } - - ufunc_db[np.sin] = { - 'f->f': npyfuncs.np_real_sin_impl, - 'd->d': npyfuncs.np_real_sin_impl, - 'F->F': npyfuncs.np_complex_sin_impl, - 'D->D': npyfuncs.np_complex_sin_impl, - } - - ufunc_db[np.cos] = { - 'f->f': npyfuncs.np_real_cos_impl, - 'd->d': npyfuncs.np_real_cos_impl, - 'F->F': npyfuncs.np_complex_cos_impl, - 'D->D': npyfuncs.np_complex_cos_impl, - } - - tan_impl = cmathimpl.tan_impl if v >= (1, 10) else npyfuncs.np_complex_tan_impl - - ufunc_db[np.tan] = { - 'f->f': npyfuncs.np_real_tan_impl, - 'd->d': npyfuncs.np_real_tan_impl, - 'F->F': tan_impl, - 'D->D': tan_impl, - } - - arcsin_impl = cmathimpl.asin_impl if v >= (1, 10) else npyfuncs.np_complex_asin_impl - - ufunc_db[np.arcsin] = { - 'f->f': npyfuncs.np_real_asin_impl, - 'd->d': npyfuncs.np_real_asin_impl, - 'F->F': arcsin_impl, - 'D->D': arcsin_impl, - } - - ufunc_db[np.arccos] = { - 'f->f': npyfuncs.np_real_acos_impl, - 'd->d': npyfuncs.np_real_acos_impl, - 'F->F': cmathimpl.acos_impl, - 'D->D': cmathimpl.acos_impl, - } - - arctan_impl = cmathimpl.atan_impl if v >= (1, 10) else npyfuncs.np_complex_atan_impl - - ufunc_db[np.arctan] = { - 'f->f': npyfuncs.np_real_atan_impl, - 'd->d': npyfuncs.np_real_atan_impl, - 'F->F': arctan_impl, - 'D->D': arctan_impl, - } - - ufunc_db[np.arctan2] = { - 'ff->f': npyfuncs.np_real_atan2_impl, - 'dd->d': npyfuncs.np_real_atan2_impl, - } - - ufunc_db[np.hypot] = { - 'ff->f': npyfuncs.np_real_hypot_impl, - 'dd->d': npyfuncs.np_real_hypot_impl, - } - - ufunc_db[np.sinh] = { - 'f->f': npyfuncs.np_real_sinh_impl, - 'd->d': npyfuncs.np_real_sinh_impl, - 'F->F': npyfuncs.np_complex_sinh_impl, - 'D->D': npyfuncs.np_complex_sinh_impl, - } - - ufunc_db[np.cosh] = { - 'f->f': npyfuncs.np_real_cosh_impl, - 'd->d': npyfuncs.np_real_cosh_impl, - 'F->F': npyfuncs.np_complex_cosh_impl, - 'D->D': npyfuncs.np_complex_cosh_impl, - } - - ufunc_db[np.tanh] = { - 'f->f': npyfuncs.np_real_tanh_impl, - 'd->d': npyfuncs.np_real_tanh_impl, - 'F->F': npyfuncs.np_complex_tanh_impl, - 'D->D': npyfuncs.np_complex_tanh_impl, - } - - arcsinh_impl = cmathimpl.asinh_impl if v >= (1, 10) else npyfuncs.np_complex_asinh_impl - - ufunc_db[np.arcsinh] = { - 'f->f': npyfuncs.np_real_asinh_impl, - 'd->d': npyfuncs.np_real_asinh_impl, - 'F->F': arcsinh_impl, - 'D->D': arcsinh_impl, - } - - ufunc_db[np.arccosh] = { - 'f->f': npyfuncs.np_real_acosh_impl, - 'd->d': npyfuncs.np_real_acosh_impl, - 'F->F': npyfuncs.np_complex_acosh_impl, - 'D->D': npyfuncs.np_complex_acosh_impl, - } - - arctanh_impl = cmathimpl.atanh_impl if v >= (1, 10) else npyfuncs.np_complex_atanh_impl - - ufunc_db[np.arctanh] = { - 'f->f': npyfuncs.np_real_atanh_impl, - 'd->d': npyfuncs.np_real_atanh_impl, - 'F->F': arctanh_impl, - 'D->D': arctanh_impl, - } - - ufunc_db[np.deg2rad] = { - 'f->f': mathimpl.radians_float_impl, - 'd->d': mathimpl.radians_float_impl, - } - - ufunc_db[np.radians] = ufunc_db[np.deg2rad] - - ufunc_db[np.rad2deg] = { - 'f->f': mathimpl.degrees_float_impl, - 'd->d': mathimpl.degrees_float_impl, - } - - ufunc_db[np.degrees] = ufunc_db[np.rad2deg] - - ufunc_db[np.floor] = { - 'f->f': npyfuncs.np_real_floor_impl, - 'd->d': npyfuncs.np_real_floor_impl, - } - - ufunc_db[np.ceil] = { - 'f->f': npyfuncs.np_real_ceil_impl, - 'd->d': npyfuncs.np_real_ceil_impl, - } - - ufunc_db[np.trunc] = { - 'f->f': npyfuncs.np_real_trunc_impl, - 'd->d': npyfuncs.np_real_trunc_impl, - } - - ufunc_db[np.fabs] = { - 'f->f': npyfuncs.np_real_fabs_impl, - 'd->d': npyfuncs.np_real_fabs_impl, - } - - # logical ufuncs - ufunc_db[np.greater] = { - '??->?': numbers.int_ugt_impl, - 'bb->?': numbers.int_sgt_impl, - 'BB->?': numbers.int_ugt_impl, - 'hh->?': numbers.int_sgt_impl, - 'HH->?': numbers.int_ugt_impl, - 'ii->?': numbers.int_sgt_impl, - 'II->?': numbers.int_ugt_impl, - 'll->?': numbers.int_sgt_impl, - 'LL->?': numbers.int_ugt_impl, - 'qq->?': numbers.int_sgt_impl, - 'QQ->?': numbers.int_ugt_impl, - 'ff->?': numbers.real_gt_impl, - 'dd->?': numbers.real_gt_impl, - 'FF->?': npyfuncs.np_complex_gt_impl, - 'DD->?': npyfuncs.np_complex_gt_impl, - } - - ufunc_db[np.greater_equal] = { - '??->?': numbers.int_uge_impl, - 'bb->?': numbers.int_sge_impl, - 'BB->?': numbers.int_uge_impl, - 'hh->?': numbers.int_sge_impl, - 'HH->?': numbers.int_uge_impl, - 'ii->?': numbers.int_sge_impl, - 'II->?': numbers.int_uge_impl, - 'll->?': numbers.int_sge_impl, - 'LL->?': numbers.int_uge_impl, - 'qq->?': numbers.int_sge_impl, - 'QQ->?': numbers.int_uge_impl, - 'ff->?': numbers.real_ge_impl, - 'dd->?': numbers.real_ge_impl, - 'FF->?': npyfuncs.np_complex_ge_impl, - 'DD->?': npyfuncs.np_complex_ge_impl, - } - - ufunc_db[np.less] = { - '??->?': numbers.int_ult_impl, - 'bb->?': numbers.int_slt_impl, - 'BB->?': numbers.int_ult_impl, - 'hh->?': numbers.int_slt_impl, - 'HH->?': numbers.int_ult_impl, - 'ii->?': numbers.int_slt_impl, - 'II->?': numbers.int_ult_impl, - 'll->?': numbers.int_slt_impl, - 'LL->?': numbers.int_ult_impl, - 'qq->?': numbers.int_slt_impl, - 'QQ->?': numbers.int_ult_impl, - 'ff->?': numbers.real_lt_impl, - 'dd->?': numbers.real_lt_impl, - 'FF->?': npyfuncs.np_complex_lt_impl, - 'DD->?': npyfuncs.np_complex_lt_impl, - } - - ufunc_db[np.less_equal] = { - '??->?': numbers.int_ule_impl, - 'bb->?': numbers.int_sle_impl, - 'BB->?': numbers.int_ule_impl, - 'hh->?': numbers.int_sle_impl, - 'HH->?': numbers.int_ule_impl, - 'ii->?': numbers.int_sle_impl, - 'II->?': numbers.int_ule_impl, - 'll->?': numbers.int_sle_impl, - 'LL->?': numbers.int_ule_impl, - 'qq->?': numbers.int_sle_impl, - 'QQ->?': numbers.int_ule_impl, - 'ff->?': numbers.real_le_impl, - 'dd->?': numbers.real_le_impl, - 'FF->?': npyfuncs.np_complex_le_impl, - 'DD->?': npyfuncs.np_complex_le_impl, - } - - ufunc_db[np.not_equal] = { - '??->?': numbers.int_ne_impl, - 'bb->?': numbers.int_ne_impl, - 'BB->?': numbers.int_ne_impl, - 'hh->?': numbers.int_ne_impl, - 'HH->?': numbers.int_ne_impl, - 'ii->?': numbers.int_ne_impl, - 'II->?': numbers.int_ne_impl, - 'll->?': numbers.int_ne_impl, - 'LL->?': numbers.int_ne_impl, - 'qq->?': numbers.int_ne_impl, - 'QQ->?': numbers.int_ne_impl, - 'ff->?': numbers.real_ne_impl, - 'dd->?': numbers.real_ne_impl, - 'FF->?': npyfuncs.np_complex_ne_impl, - 'DD->?': npyfuncs.np_complex_ne_impl, - } - - ufunc_db[np.equal] = { - '??->?': numbers.int_eq_impl, - 'bb->?': numbers.int_eq_impl, - 'BB->?': numbers.int_eq_impl, - 'hh->?': numbers.int_eq_impl, - 'HH->?': numbers.int_eq_impl, - 'ii->?': numbers.int_eq_impl, - 'II->?': numbers.int_eq_impl, - 'll->?': numbers.int_eq_impl, - 'LL->?': numbers.int_eq_impl, - 'qq->?': numbers.int_eq_impl, - 'QQ->?': numbers.int_eq_impl, - 'ff->?': numbers.real_eq_impl, - 'dd->?': numbers.real_eq_impl, - 'FF->?': npyfuncs.np_complex_eq_impl, - 'DD->?': npyfuncs.np_complex_eq_impl, - } - - ufunc_db[np.logical_and] = { - '??->?': npyfuncs.np_logical_and_impl, - 'bb->?': npyfuncs.np_logical_and_impl, - 'BB->?': npyfuncs.np_logical_and_impl, - 'hh->?': npyfuncs.np_logical_and_impl, - 'HH->?': npyfuncs.np_logical_and_impl, - 'ii->?': npyfuncs.np_logical_and_impl, - 'II->?': npyfuncs.np_logical_and_impl, - 'll->?': npyfuncs.np_logical_and_impl, - 'LL->?': npyfuncs.np_logical_and_impl, - 'qq->?': npyfuncs.np_logical_and_impl, - 'QQ->?': npyfuncs.np_logical_and_impl, - 'ff->?': npyfuncs.np_logical_and_impl, - 'dd->?': npyfuncs.np_logical_and_impl, - 'FF->?': npyfuncs.np_complex_logical_and_impl, - 'DD->?': npyfuncs.np_complex_logical_and_impl, - } - - ufunc_db[np.logical_or] = { - '??->?': npyfuncs.np_logical_or_impl, - 'bb->?': npyfuncs.np_logical_or_impl, - 'BB->?': npyfuncs.np_logical_or_impl, - 'hh->?': npyfuncs.np_logical_or_impl, - 'HH->?': npyfuncs.np_logical_or_impl, - 'ii->?': npyfuncs.np_logical_or_impl, - 'II->?': npyfuncs.np_logical_or_impl, - 'll->?': npyfuncs.np_logical_or_impl, - 'LL->?': npyfuncs.np_logical_or_impl, - 'qq->?': npyfuncs.np_logical_or_impl, - 'QQ->?': npyfuncs.np_logical_or_impl, - 'ff->?': npyfuncs.np_logical_or_impl, - 'dd->?': npyfuncs.np_logical_or_impl, - 'FF->?': npyfuncs.np_complex_logical_or_impl, - 'DD->?': npyfuncs.np_complex_logical_or_impl, - } - - ufunc_db[np.logical_xor] = { - '??->?': npyfuncs.np_logical_xor_impl, - 'bb->?': npyfuncs.np_logical_xor_impl, - 'BB->?': npyfuncs.np_logical_xor_impl, - 'hh->?': npyfuncs.np_logical_xor_impl, - 'HH->?': npyfuncs.np_logical_xor_impl, - 'ii->?': npyfuncs.np_logical_xor_impl, - 'II->?': npyfuncs.np_logical_xor_impl, - 'll->?': npyfuncs.np_logical_xor_impl, - 'LL->?': npyfuncs.np_logical_xor_impl, - 'qq->?': npyfuncs.np_logical_xor_impl, - 'QQ->?': npyfuncs.np_logical_xor_impl, - 'ff->?': npyfuncs.np_logical_xor_impl, - 'dd->?': npyfuncs.np_logical_xor_impl, - 'FF->?': npyfuncs.np_complex_logical_xor_impl, - 'DD->?': npyfuncs.np_complex_logical_xor_impl, - } - - ufunc_db[np.logical_not] = { - '?->?': npyfuncs.np_logical_not_impl, - 'b->?': npyfuncs.np_logical_not_impl, - 'B->?': npyfuncs.np_logical_not_impl, - 'h->?': npyfuncs.np_logical_not_impl, - 'H->?': npyfuncs.np_logical_not_impl, - 'i->?': npyfuncs.np_logical_not_impl, - 'I->?': npyfuncs.np_logical_not_impl, - 'l->?': npyfuncs.np_logical_not_impl, - 'L->?': npyfuncs.np_logical_not_impl, - 'q->?': npyfuncs.np_logical_not_impl, - 'Q->?': npyfuncs.np_logical_not_impl, - 'f->?': npyfuncs.np_logical_not_impl, - 'd->?': npyfuncs.np_logical_not_impl, - 'F->?': npyfuncs.np_complex_logical_not_impl, - 'D->?': npyfuncs.np_complex_logical_not_impl, - } - - ufunc_db[np.maximum] = { - '??->?': npyfuncs.np_logical_or_impl, - 'bb->b': npyfuncs.np_int_smax_impl, - 'BB->B': npyfuncs.np_int_umax_impl, - 'hh->h': npyfuncs.np_int_smax_impl, - 'HH->H': npyfuncs.np_int_umax_impl, - 'ii->i': npyfuncs.np_int_smax_impl, - 'II->I': npyfuncs.np_int_umax_impl, - 'll->l': npyfuncs.np_int_smax_impl, - 'LL->L': npyfuncs.np_int_umax_impl, - 'qq->q': npyfuncs.np_int_smax_impl, - 'QQ->Q': npyfuncs.np_int_umax_impl, - 'ff->f': npyfuncs.np_real_maximum_impl, - 'dd->d': npyfuncs.np_real_maximum_impl, - 'FF->F': npyfuncs.np_complex_maximum_impl, - 'DD->D': npyfuncs.np_complex_maximum_impl, - } - - ufunc_db[np.minimum] = { - '??->?': npyfuncs.np_logical_and_impl, - 'bb->b': npyfuncs.np_int_smin_impl, - 'BB->B': npyfuncs.np_int_umin_impl, - 'hh->h': npyfuncs.np_int_smin_impl, - 'HH->H': npyfuncs.np_int_umin_impl, - 'ii->i': npyfuncs.np_int_smin_impl, - 'II->I': npyfuncs.np_int_umin_impl, - 'll->l': npyfuncs.np_int_smin_impl, - 'LL->L': npyfuncs.np_int_umin_impl, - 'qq->q': npyfuncs.np_int_smin_impl, - 'QQ->Q': npyfuncs.np_int_umin_impl, - 'ff->f': npyfuncs.np_real_minimum_impl, - 'dd->d': npyfuncs.np_real_minimum_impl, - 'FF->F': npyfuncs.np_complex_minimum_impl, - 'DD->D': npyfuncs.np_complex_minimum_impl, - } - - ufunc_db[np.fmax] = { - '??->?': npyfuncs.np_logical_or_impl, - 'bb->b': npyfuncs.np_int_smax_impl, - 'BB->B': npyfuncs.np_int_umax_impl, - 'hh->h': npyfuncs.np_int_smax_impl, - 'HH->H': npyfuncs.np_int_umax_impl, - 'ii->i': npyfuncs.np_int_smax_impl, - 'II->I': npyfuncs.np_int_umax_impl, - 'll->l': npyfuncs.np_int_smax_impl, - 'LL->L': npyfuncs.np_int_umax_impl, - 'qq->q': npyfuncs.np_int_smax_impl, - 'QQ->Q': npyfuncs.np_int_umax_impl, - 'ff->f': npyfuncs.np_real_fmax_impl, - 'dd->d': npyfuncs.np_real_fmax_impl, - 'FF->F': npyfuncs.np_complex_fmax_impl, - 'DD->D': npyfuncs.np_complex_fmax_impl, - } - - ufunc_db[np.fmin] = { - '??->?': npyfuncs.np_logical_and_impl, - 'bb->b': npyfuncs.np_int_smin_impl, - 'BB->B': npyfuncs.np_int_umin_impl, - 'hh->h': npyfuncs.np_int_smin_impl, - 'HH->H': npyfuncs.np_int_umin_impl, - 'ii->i': npyfuncs.np_int_smin_impl, - 'II->I': npyfuncs.np_int_umin_impl, - 'll->l': npyfuncs.np_int_smin_impl, - 'LL->L': npyfuncs.np_int_umin_impl, - 'qq->q': npyfuncs.np_int_smin_impl, - 'QQ->Q': npyfuncs.np_int_umin_impl, - 'ff->f': npyfuncs.np_real_fmin_impl, - 'dd->d': npyfuncs.np_real_fmin_impl, - 'FF->F': npyfuncs.np_complex_fmin_impl, - 'DD->D': npyfuncs.np_complex_fmin_impl, - } - - # misc floating functions - ufunc_db[np.isnan] = { - 'f->?': npyfuncs.np_real_isnan_impl, - 'd->?': npyfuncs.np_real_isnan_impl, - 'F->?': npyfuncs.np_complex_isnan_impl, - 'D->?': npyfuncs.np_complex_isnan_impl, - } - - ufunc_db[np.isinf] = { - 'f->?': npyfuncs.np_real_isinf_impl, - 'd->?': npyfuncs.np_real_isinf_impl, - 'F->?': npyfuncs.np_complex_isinf_impl, - 'D->?': npyfuncs.np_complex_isinf_impl, - } - - ufunc_db[np.isfinite] = { - 'f->?': npyfuncs.np_real_isfinite_impl, - 'd->?': npyfuncs.np_real_isfinite_impl, - 'F->?': npyfuncs.np_complex_isfinite_impl, - 'D->?': npyfuncs.np_complex_isfinite_impl, - } - - ufunc_db[np.signbit] = { - 'f->?': npyfuncs.np_real_signbit_impl, - 'd->?': npyfuncs.np_real_signbit_impl, - } - - ufunc_db[np.copysign] = { - 'ff->f': npyfuncs.np_real_copysign_impl, - 'dd->d': npyfuncs.np_real_copysign_impl, - } - - ufunc_db[np.nextafter] = { - 'ff->f': npyfuncs.np_real_nextafter_impl, - 'dd->d': npyfuncs.np_real_nextafter_impl, - } - - ufunc_db[np.spacing] = { - 'f->f': npyfuncs.np_real_spacing_impl, - 'd->d': npyfuncs.np_real_spacing_impl, - } - - ufunc_db[np.ldexp] = { - 'fi->f': npyfuncs.np_real_ldexp_impl, - 'fl->f': npyfuncs.np_real_ldexp_impl, - 'di->d': npyfuncs.np_real_ldexp_impl, - 'dl->d': npyfuncs.np_real_ldexp_impl, - } - - # bit twiddling functions - ufunc_db[np.bitwise_and] = { - '??->?': numbers.int_and_impl, - 'bb->b': numbers.int_and_impl, - 'BB->B': numbers.int_and_impl, - 'hh->h': numbers.int_and_impl, - 'HH->H': numbers.int_and_impl, - 'ii->i': numbers.int_and_impl, - 'II->I': numbers.int_and_impl, - 'll->l': numbers.int_and_impl, - 'LL->L': numbers.int_and_impl, - 'qq->q': numbers.int_and_impl, - 'QQ->Q': numbers.int_and_impl, - } - - ufunc_db[np.bitwise_or] = { - '??->?': numbers.int_or_impl, - 'bb->b': numbers.int_or_impl, - 'BB->B': numbers.int_or_impl, - 'hh->h': numbers.int_or_impl, - 'HH->H': numbers.int_or_impl, - 'ii->i': numbers.int_or_impl, - 'II->I': numbers.int_or_impl, - 'll->l': numbers.int_or_impl, - 'LL->L': numbers.int_or_impl, - 'qq->q': numbers.int_or_impl, - 'QQ->Q': numbers.int_or_impl, - } - - ufunc_db[np.bitwise_xor] = { - '??->?': numbers.int_xor_impl, - 'bb->b': numbers.int_xor_impl, - 'BB->B': numbers.int_xor_impl, - 'hh->h': numbers.int_xor_impl, - 'HH->H': numbers.int_xor_impl, - 'ii->i': numbers.int_xor_impl, - 'II->I': numbers.int_xor_impl, - 'll->l': numbers.int_xor_impl, - 'LL->L': numbers.int_xor_impl, - 'qq->q': numbers.int_xor_impl, - 'QQ->Q': numbers.int_xor_impl, - } - - ufunc_db[np.invert] = { # aka np.bitwise_not - '?->?': numbers.int_invert_impl, - 'b->b': numbers.int_invert_impl, - 'B->B': numbers.int_invert_impl, - 'h->h': numbers.int_invert_impl, - 'H->H': numbers.int_invert_impl, - 'i->i': numbers.int_invert_impl, - 'I->I': numbers.int_invert_impl, - 'l->l': numbers.int_invert_impl, - 'L->L': numbers.int_invert_impl, - 'q->q': numbers.int_invert_impl, - 'Q->Q': numbers.int_invert_impl, - } - - ufunc_db[np.left_shift] = { - 'bb->b': numbers.int_shl_impl, - 'BB->B': numbers.int_shl_impl, - 'hh->h': numbers.int_shl_impl, - 'HH->H': numbers.int_shl_impl, - 'ii->i': numbers.int_shl_impl, - 'II->I': numbers.int_shl_impl, - 'll->l': numbers.int_shl_impl, - 'LL->L': numbers.int_shl_impl, - 'qq->q': numbers.int_shl_impl, - 'QQ->Q': numbers.int_shl_impl, - } - - ufunc_db[np.right_shift] = { - 'bb->b': numbers.int_shr_impl, - 'BB->B': numbers.int_shr_impl, - 'hh->h': numbers.int_shr_impl, - 'HH->H': numbers.int_shr_impl, - 'ii->i': numbers.int_shr_impl, - 'II->I': numbers.int_shr_impl, - 'll->l': numbers.int_shr_impl, - 'LL->L': numbers.int_shr_impl, - 'qq->q': numbers.int_shr_impl, - 'QQ->Q': numbers.int_shr_impl, - } - - # Inject datetime64 support - from . import npdatetime - ufunc_db[np.negative].update({ - 'm->m': npdatetime.timedelta_neg_impl, - }) - ufunc_db[np.absolute].update({ - 'm->m': npdatetime.timedelta_abs_impl, - }) - ufunc_db[np.sign].update({ - 'm->m': npdatetime.timedelta_sign_impl, - }) - ufunc_db[np.add].update({ - 'mm->m': npdatetime.timedelta_add_impl, - 'Mm->M': npdatetime.datetime_plus_timedelta, - 'mM->M': npdatetime.timedelta_plus_datetime, - }) - ufunc_db[np.subtract].update({ - 'mm->m': npdatetime.timedelta_sub_impl, - 'Mm->M': npdatetime.datetime_minus_timedelta, - 'MM->m': npdatetime.datetime_minus_datetime, - }) - ufunc_db[np.multiply].update({ - 'mq->m': npdatetime.timedelta_times_number, - 'md->m': npdatetime.timedelta_times_number, - 'qm->m': npdatetime.number_times_timedelta, - 'dm->m': npdatetime.number_times_timedelta, - }) - if np.divide != np.true_divide: - ufunc_db[np.divide].update({ - 'mq->m': npdatetime.timedelta_over_number, - 'md->m': npdatetime.timedelta_over_number, - 'mm->d': npdatetime.timedelta_over_timedelta, - }) - ufunc_db[np.true_divide].update({ - 'mq->m': npdatetime.timedelta_over_number, - 'md->m': npdatetime.timedelta_over_number, - 'mm->d': npdatetime.timedelta_over_timedelta, - }) - ufunc_db[np.floor_divide].update({ - 'mq->m': npdatetime.timedelta_over_number, - 'md->m': npdatetime.timedelta_over_number, - }) - ufunc_db[np.equal].update({ - 'MM->?': npdatetime.datetime_eq_datetime_impl, - 'mm->?': npdatetime.timedelta_eq_timedelta_impl, - }) - ufunc_db[np.not_equal].update({ - 'MM->?': npdatetime.datetime_ne_datetime_impl, - 'mm->?': npdatetime.timedelta_ne_timedelta_impl, - }) - ufunc_db[np.less].update({ - 'MM->?': npdatetime.datetime_lt_datetime_impl, - 'mm->?': npdatetime.timedelta_lt_timedelta_impl, - }) - ufunc_db[np.less_equal].update({ - 'MM->?': npdatetime.datetime_le_datetime_impl, - 'mm->?': npdatetime.timedelta_le_timedelta_impl, - }) - ufunc_db[np.greater].update({ - 'MM->?': npdatetime.datetime_gt_datetime_impl, - 'mm->?': npdatetime.timedelta_gt_timedelta_impl, - }) - ufunc_db[np.greater_equal].update({ - 'MM->?': npdatetime.datetime_ge_datetime_impl, - 'mm->?': npdatetime.timedelta_ge_timedelta_impl, - }) - ufunc_db[np.maximum].update({ - 'MM->M': npdatetime.datetime_max_impl, - 'mm->m': npdatetime.timedelta_max_impl, - }) - ufunc_db[np.minimum].update({ - 'MM->M': npdatetime.datetime_min_impl, - 'mm->m': npdatetime.timedelta_min_impl, - }) - # there is no difference for datetime/timedelta in maximum/fmax - # and minimum/fmin - ufunc_db[np.fmax].update({ - 'MM->M': npdatetime.datetime_max_impl, - 'mm->m': npdatetime.timedelta_max_impl, - }) - ufunc_db[np.fmin].update({ - 'MM->M': npdatetime.datetime_min_impl, - 'mm->m': npdatetime.timedelta_min_impl, - }) diff --git a/numba/numba/testing/__init__.py b/numba/numba/testing/__init__.py deleted file mode 100644 index 5594cf444..000000000 --- a/numba/numba/testing/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numba.unittest_support as unittest - -import sys -import os -from os.path import join, isfile, relpath, normpath, splitext -from fnmatch import fnmatch -import functools - -from .main import NumbaTestProgram, SerialSuite, make_tag_decorator -from numba import config - - -def load_testsuite(loader, dir): - """Find tests in 'dir'.""" - suite = unittest.TestSuite() - files = [] - for f in os.listdir(dir): - path = join(dir, f) - if isfile(path) and fnmatch(f, 'test_*.py'): - files.append(f) - elif isfile(join(path, '__init__.py')): - suite.addTests(loader.discover(path)) - for f in files: - # turn 'f' into a filename relative to the toplevel dir... - f = relpath(join(dir, f), loader._top_level_dir) - # ...and translate it to a module name. - f = splitext(normpath(f.replace(os.path.sep, '.')))[0] - suite.addTests(loader.loadTestsFromName(f)) - return suite - -def allow_interpreter_mode(fn): - """Temporarily re-enable intepreter mode - """ - @functools.wraps(fn) - def _core(*args, **kws): - config.COMPATIBILITY_MODE = True - try: - fn(*args, **kws) - finally: - config.COMPATIBILITY_MODE = False - return _core - - -def run_tests(argv=None, defaultTest=None, topleveldir=None, - xmloutput=None, verbosity=1, nomultiproc=False): - """ - args - ---- - - xmloutput [str or None] - Path of XML output directory (optional) - - verbosity [int] - Verbosity level of tests output - - Returns the TestResult object after running the test *suite*. - """ - - if xmloutput is not None: - import xmlrunner - runner = xmlrunner.XMLTestRunner(output=xmloutput) - else: - runner = None - prog = NumbaTestProgram(argv=argv, - module=None, - defaultTest=defaultTest, - topleveldir=topleveldir, - testRunner=runner, exit=False, - verbosity=verbosity, - nomultiproc=nomultiproc) - return prog.result diff --git a/numba/numba/testing/__main__.py b/numba/numba/testing/__main__.py deleted file mode 100644 index 4049fa208..000000000 --- a/numba/numba/testing/__main__.py +++ /dev/null @@ -1,4 +0,0 @@ -import sys -from . import run_tests - -sys.exit(0 if run_tests(sys.argv).wasSuccessful() else 1) diff --git a/numba/numba/testing/ddt.py b/numba/numba/testing/ddt.py deleted file mode 100644 index 338421a0c..000000000 --- a/numba/numba/testing/ddt.py +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding: utf-8 -*- -# This file is a part of DDT (https://github.com/txels/ddt) -# Copyright 2012-2015 Carles Barrobés and DDT contributors -# For the exact contribution history, see the git revision log. -# DDT is licensed under the MIT License, included in -# https://github.com/txels/ddt/blob/master/LICENSE.md - -import inspect -import json -import os -import re -import sys -from functools import wraps - -__version__ = '1.0.0' - -# These attributes will not conflict with any real python attribute -# They are added to the decorated test method and processed later -# by the `ddt` class decorator. - -DATA_ATTR = '%values' # store the data the test must run with -FILE_ATTR = '%file_path' # store the path to JSON file -UNPACK_ATTR = '%unpack' # remember that we have to unpack values - - -def unpack(func): - """ - Method decorator to add unpack feature. - - """ - setattr(func, UNPACK_ATTR, True) - return func - - -def data(*values): - """ - Method decorator to add to your test methods. - - Should be added to methods of instances of ``unittest.TestCase``. - - """ - def wrapper(func): - setattr(func, DATA_ATTR, values) - return func - return wrapper - - -def file_data(value): - """ - Method decorator to add to your test methods. - - Should be added to methods of instances of ``unittest.TestCase``. - - ``value`` should be a path relative to the directory of the file - containing the decorated ``unittest.TestCase``. The file - should contain JSON encoded data, that can either be a list or a - dict. - - In case of a list, each value in the list will correspond to one - test case, and the value will be concatenated to the test method - name. - - In case of a dict, keys will be used as suffixes to the name of the - test case, and values will be fed as test data. - - """ - def wrapper(func): - setattr(func, FILE_ATTR, value) - return func - return wrapper - - -def is_hash_randomized(): - return (((sys.hexversion >= 0x02070300 and - sys.hexversion < 0x03000000) or - (sys.hexversion >= 0x03020300)) and - sys.flags.hash_randomization and - 'PYTHONHASHSEED' not in os.environ) - - -def mk_test_name(name, value, index=0): - """ - Generate a new name for a test case. - - It will take the original test name and append an ordinal index and a - string representation of the value, and convert the result into a valid - python identifier by replacing extraneous characters with ``_``. - - If hash randomization is enabled (a feature available since 2.7.3/3.2.3 - and enabled by default since 3.3) and a "non-trivial" value is passed - this will omit the name argument by default. Set `PYTHONHASHSEED` - to a fixed value before running tests in these cases to get the - names back consistently or use the `__name__` attribute on data values. - - A "trivial" value is a plain scalar, or a tuple or list consisting - only of trivial values. - - """ - - # We avoid doing str(value) if all of the following hold: - # - # * Python version is 2.7.3 or newer (for 2 series) or 3.2.3 or - # newer (for 3 series). Also sys.flags.hash_randomization didn't - # exist before these. - # * sys.flags.hash_randomization is set to True - # * PYTHONHASHSEED is **not** defined in the environment - # * Given `value` argument is not a trivial scalar (None, str, - # int, float). - # - # Trivial scalar values are passed as is in all cases. - - trivial_types = (type(None), bool, str, int, float) - try: - trivial_types += (unicode,) - except NameError: - pass - - def is_trivial(value): - if isinstance(value, trivial_types): - return True - - if isinstance(value, (list, tuple)): - return all(map(is_trivial, value)) - - return False - - if is_hash_randomized() and not is_trivial(value): - return "{0}_{1}".format(name, index + 1) - - try: - value = str(value) - except UnicodeEncodeError: - # fallback for python2 - value = value.encode('ascii', 'backslashreplace') - test_name = "{0}_{1}_{2}".format(name, index + 1, value) - return re.sub('\W|^(?=\d)', '_', test_name) - - -def feed_data(func, new_name, *args, **kwargs): - """ - This internal method decorator feeds the test data item to the test. - - """ - @wraps(func) - def wrapper(self): - return func(self, *args, **kwargs) - wrapper.__name__ = new_name - # Try to call format on the docstring - if func.__doc__: - try: - wrapper.__doc__ = func.__doc__.format(*args, **kwargs) - except (IndexError, KeyError): - # Maybe the user has added some of the formating strings - # unintentionally in the docstring. Do not raise an exception as it - # could be that he is not aware of the formating feature. - pass - return wrapper - - -def add_test(cls, test_name, func, *args, **kwargs): - """ - Add a test case to this class. - - The test will be based on an existing function but will give it a new - name. - - """ - setattr(cls, test_name, feed_data(func, test_name, *args, **kwargs)) - - -def process_file_data(cls, name, func, file_attr): - """ - Process the parameter in the `file_data` decorator. - - """ - cls_path = os.path.abspath(inspect.getsourcefile(cls)) - data_file_path = os.path.join(os.path.dirname(cls_path), file_attr) - - def _raise_ve(*args): # pylint: disable-msg=W0613 - raise ValueError("%s does not exist" % file_attr) - - if os.path.exists(data_file_path) is False: - test_name = mk_test_name(name, "error") - add_test(cls, test_name, _raise_ve, None) - else: - data = json.loads(open(data_file_path).read()) - for i, elem in enumerate(data): - if isinstance(data, dict): - key, value = elem, data[elem] - test_name = mk_test_name(name, key, i) - elif isinstance(data, list): - value = elem - test_name = mk_test_name(name, value, i) - if isinstance(value, dict): - add_test(cls, test_name, func, **value) - else: - add_test(cls, test_name, func, value) - - -def ddt(cls): - """ - Class decorator for subclasses of ``unittest.TestCase``. - - Apply this decorator to the test case class, and then - decorate test methods with ``@data``. - - For each method decorated with ``@data``, this will effectively create as - many methods as data items are passed as parameters to ``@data``. - - The names of the test methods follow the pattern - ``original_test_name_{ordinal}_{data}``. ``ordinal`` is the position of the - data argument, starting with 1. - - For data we use a string representation of the data value converted into a - valid python identifier. If ``data.__name__`` exists, we use that instead. - - For each method decorated with ``@file_data('test_data.json')``, the - decorator will try to load the test_data.json file located relative - to the python file containing the method that is decorated. It will, - for each ``test_name`` key create as many methods in the list of values - from the ``data`` key. - - """ - for name, func in list(cls.__dict__.items()): - if hasattr(func, DATA_ATTR): - for i, v in enumerate(getattr(func, DATA_ATTR)): - test_name = mk_test_name(name, getattr(v, "__name__", v), i) - if hasattr(func, UNPACK_ATTR): - if isinstance(v, tuple) or isinstance(v, list): - add_test(cls, test_name, func, *v) - else: - # unpack dictionary - add_test(cls, test_name, func, **v) - else: - add_test(cls, test_name, func, v) - delattr(cls, name) - elif hasattr(func, FILE_ATTR): - file_attr = getattr(func, FILE_ATTR) - process_file_data(cls, name, func, file_attr) - delattr(cls, name) - return cls diff --git a/numba/numba/testing/loader.py b/numba/numba/testing/loader.py deleted file mode 100644 index 694be166c..000000000 --- a/numba/numba/testing/loader.py +++ /dev/null @@ -1,27 +0,0 @@ -import numba.unittest_support as unittest -from unittest import loader, case -from os.path import isdir, isfile, join, dirname, basename - - -class TestLoader(loader.TestLoader): - - def __init__(self, topleveldir=None): - super(TestLoader, self).__init__() - self._top_level_dir = topleveldir or dirname(dirname(dirname(__file__))) - - def _find_tests(self, start_dir, pattern, namespace=False): - # Upstream doesn't look for 'load_tests' in start_dir. - - if isdir(start_dir) and not namespace and isfile(join(start_dir, '__init__.py')): - name = self._get_name_from_path(start_dir) - package = self._get_module_from_name(name) - load_tests = getattr(package, 'load_tests', None) - tests = self.loadTestsFromModule(package) - if load_tests is not None: - try: - yield load_tests(self, tests, pattern) - except Exception as e: - yield loader._make_failed_load_tests(package.__name__, e, self.suiteClass) - else: - for t in super(TestLoader, self)._find_tests(start_dir, pattern): - yield t diff --git a/numba/numba/testing/main.py b/numba/numba/testing/main.py deleted file mode 100644 index f2bd1236a..000000000 --- a/numba/numba/testing/main.py +++ /dev/null @@ -1,639 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numba.unittest_support as unittest - -import collections -import contextlib -import cProfile -import gc -import multiprocessing -import os -import random -import sys -import time -import warnings - -from unittest import result, runner, signals, suite, loader, case - -from .loader import TestLoader -from numba.utils import PYVERSION, StringIO -from numba import config - -try: - from multiprocessing import TimeoutError -except ImportError: - from Queue import Empty as TimeoutError - - -def make_tag_decorator(known_tags): - """ - Create a decorator allowing tests to be tagged with the *known_tags*. - """ - - def tag(*tags): - """ - Tag a test method with the given tags. - Can be used in conjunction with the --tags command-line argument - for runtests.py. - """ - for t in tags: - if t not in known_tags: - raise ValueError("unknown tag: %r" % (t,)) - - def decorate(func): - if (not callable(func) or isinstance(func, type) - or not func.__name__.startswith('test_')): - raise TypeError("@tag(...) should be used on test methods") - try: - s = func.tags - except AttributeError: - s = func.tags = set() - s.update(tags) - return func - return decorate - - return tag - - -class TestLister(object): - """Simply list available tests rather than running them.""" - def __init__(self): - pass - - def run(self, test): - result = runner.TextTestResult(sys.stderr, descriptions=True, verbosity=1) - self._test_list = _flatten_suite(test) - for t in self._test_list: - print(t.id()) - print('%d tests found' % len(self._test_list)) - return result - - -class SerialSuite(unittest.TestSuite): - """A simple marker to make sure tests in this suite are run serially. - - Note: As the suite is going through internals of unittest, - it may get unpacked and stuffed into a plain TestSuite. - We need to set an attribute on the TestCase objects to - remember they should not be run in parallel. - """ - - def addTest(self, test): - if not isinstance(test, unittest.TestCase): - # It's a sub-suite, recurse - for t in test: - self.addTest(t) - else: - # It's a test case, mark it serial - test._numba_parallel_test_ = False - super(SerialSuite, self).addTest(test) - - -# "unittest.main" is really the TestProgram class! -# (defined in a module named itself "unittest.main"...) - -class NumbaTestProgram(unittest.main): - """ - A TestProgram subclass adding the following options: - * a -R option to enable reference leak detection - * a --profile option to enable profiling of the test run - * a -m option for parallel execution - * a -l option to (only) list tests - - Currently the options are only added in 3.4+. - """ - - refleak = False - profile = False - multiprocess = False - list = False - tags = None - random_select = None - random_seed = 42 - - def __init__(self, *args, **kwargs): - # Disable interpreter fallback if we are running the test suite - if config.COMPATIBILITY_MODE: - warnings.warn("Unset INTERPRETER_FALLBACK") - config.COMPATIBILITY_MODE = False - - topleveldir = kwargs.pop('topleveldir', None) - kwargs['testLoader'] = TestLoader(topleveldir) - - # HACK to force unittest not to change warning display options - # (so that NumbaWarnings don't appear all over the place) - sys.warnoptions.append(':x') - self.nomultiproc = kwargs.pop('nomultiproc', False) - super(NumbaTestProgram, self).__init__(*args, **kwargs) - - def _getParentArgParser(self): - # NOTE: this hook only exists on Python 3.4+. The options won't be - # added in earlier versions (which use optparse - 3.3 - or getopt() - # - 2.x). - parser = super(NumbaTestProgram, self)._getParentArgParser() - if self.testRunner is None: - parser.add_argument('-R', '--refleak', dest='refleak', - action='store_true', - help='Detect reference / memory leaks') - parser.add_argument('-m', '--multiprocess', dest='multiprocess', - nargs='?', - type=int, - const=multiprocessing.cpu_count(), - help='Parallelize tests') - parser.add_argument('-l', '--list', dest='list', - action='store_true', - help='List tests without running them') - parser.add_argument('--tags', dest='tags', type=str, - help='Comma-separated list of tags to select ' - 'a subset of the test suite') - parser.add_argument('--random', dest='random_select', type=float, - help='Random proportion of tests to select') - parser.add_argument('--profile', dest='profile', - action='store_true', - help='Profile the test run') - return parser - - def parseArgs(self, argv): - if '-l' in argv: - argv.remove('-l') - self.list = True - if PYVERSION < (3, 4) and '-m' in argv: - # We want '-m' to work on all versions, emulate this option. - dashm_posn = argv.index('-m') - # the default number of processes to use - nprocs = multiprocessing.cpu_count() - # see what else is in argv - # ensure next position is safe for access - try: - m_option = argv[dashm_posn + 1] - # see if next arg is "end options" - if m_option != '--': - #try and parse the next arg as an int - try: - nprocs = int(m_option) - except BaseException: - msg = ('Expected an integer argument to ' - 'option `-m`, found "%s"') - raise ValueError(msg % m_option) - # remove the value of the option - argv.remove(m_option) - # else end options, use defaults - except IndexError: - # at end of arg list, use defaults - pass - - self.multiprocess = nprocs - argv.remove('-m') - - super(NumbaTestProgram, self).parseArgs(argv) - - # If at this point self.test doesn't exist, it is because - # no test ID was given in argv. Use the default instead. - if not hasattr(self, 'test') or not self.test.countTestCases(): - self.testNames = (self.defaultTest,) - self.createTests() - - if self.tags: - tags = [s.strip() for s in self.tags.split(',')] - self.test = _choose_tagged_tests(self.test, tags) - - if self.random_select: - self.test = _choose_random_tests(self.test, self.random_select, - self.random_seed) - - if self.verbosity <= 0: - # We aren't interested in informational messages / warnings when - # running with '-q'. - self.buffer = True - - def _do_discovery(self, argv, Loader=None): - # Disable unittest's implicit test discovery when parsing - # CLI arguments, as it can select other tests than Numba's - # (e.g. some test_xxx module that may happen to be directly - # reachable from sys.path) - return - - def runTests(self): - if self.refleak: - self.testRunner = RefleakTestRunner - - if not hasattr(sys, "gettotalrefcount"): - warnings.warn("detecting reference leaks requires a debug build " - "of Python, only memory leaks will be detected") - - elif self.list: - self.testRunner = TestLister() - - elif self.testRunner is None: - self.testRunner = unittest.TextTestRunner - - if self.multiprocess and not self.nomultiproc: - if self.multiprocess < 1: - msg = ("Value specified for the number of processes to use in " - "running the suite must be > 0") - raise ValueError(msg) - self.testRunner = ParallelTestRunner(self.testRunner, - self.multiprocess, - verbosity=self.verbosity, - failfast=self.failfast, - buffer=self.buffer) - - def run_tests_real(): - super(NumbaTestProgram, self).runTests() - - if self.profile: - filename = os.path.splitext( - os.path.basename(sys.modules['__main__'].__file__) - )[0] + '.prof' - p = cProfile.Profile(timer=time.perf_counter) # 3.3+ - p.enable() - try: - p.runcall(run_tests_real) - finally: - p.disable() - print("Writing test profile data into %r" % (filename,)) - p.dump_stats(filename) - else: - run_tests_real() - - -def _flatten_suite(test): - """ - Expand nested suite into list of test cases. - """ - if isinstance(test, (unittest.TestSuite, list, tuple)): - tests = [] - for x in test: - tests.extend(_flatten_suite(x)) - return tests - else: - return [test] - - -def _choose_tagged_tests(tests, tags): - """ - Select tests that are tagged with at least one of the given tags. - """ - selected = [] - tags = set(tags) - for test in _flatten_suite(tests): - assert isinstance(test, unittest.TestCase) - func = getattr(test, test._testMethodName) - try: - # Look up the method's underlying function (Python 2) - func = func.im_func - except AttributeError: - pass - try: - if func.tags & tags: - selected.append(test) - except AttributeError: - # Test method doesn't have any tags - pass - return unittest.TestSuite(selected) - - -def _choose_random_tests(tests, ratio, seed): - """ - Choose a given proportion of tests at random. - """ - rnd = random.Random() - rnd.seed(seed) - if isinstance(tests, unittest.TestSuite): - tests = _flatten_suite(tests) - tests = rnd.sample(tests, int(len(tests) * ratio)) - tests = sorted(tests, key=lambda case: case.id()) - return unittest.TestSuite(tests) - - -# The reference leak detection code is liberally taken and adapted from -# Python's own Lib/test/regrtest.py. - -def _refleak_cleanup(): - # Collect cyclic trash and read memory statistics immediately after. - func1 = sys.getallocatedblocks - try: - func2 = sys.gettotalrefcount - except AttributeError: - func2 = lambda: 42 - - # Flush standard output, so that buffered data is sent to the OS and - # associated Python objects are reclaimed. - for stream in (sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__): - if stream is not None: - stream.flush() - - sys._clear_type_cache() - # This also clears the various internal CPython freelists. - gc.collect() - return func1(), func2() - - -class ReferenceLeakError(RuntimeError): - pass - - -class IntPool(collections.defaultdict): - - def __missing__(self, key): - return key - - -class RefleakTestResult(runner.TextTestResult): - - warmup = 3 - repetitions = 6 - - def _huntLeaks(self, test): - self.stream.flush() - - repcount = self.repetitions - nwarmup = self.warmup - rc_deltas = [0] * (repcount - nwarmup) - alloc_deltas = [0] * (repcount - nwarmup) - # Preallocate ints likely to be stored in rc_deltas and alloc_deltas, - # to make sys.getallocatedblocks() less flaky. - _int_pool = IntPool() - for i in range(-200, 200): - _int_pool[i] - - for i in range(repcount): - # Use a pristine, silent result object to avoid recursion - res = result.TestResult() - test.run(res) - # Poorly-written tests may fail when run several times. - # In this case, abort the refleak run and report the failure. - if not res.wasSuccessful(): - self.failures.extend(res.failures) - self.errors.extend(res.errors) - raise AssertionError - del res - alloc_after, rc_after = _refleak_cleanup() - if i >= nwarmup: - rc_deltas[i - nwarmup] = _int_pool[rc_after - rc_before] - alloc_deltas[i - nwarmup] = _int_pool[alloc_after - alloc_before] - alloc_before, rc_before = alloc_after, rc_after - return rc_deltas, alloc_deltas - - def addSuccess(self, test): - try: - rc_deltas, alloc_deltas = self._huntLeaks(test) - except AssertionError: - # Test failed when repeated - assert not self.wasSuccessful() - return - - # These checkers return False on success, True on failure - def check_rc_deltas(deltas): - return any(deltas) - - def check_alloc_deltas(deltas): - # At least 1/3rd of 0s - if 3 * deltas.count(0) < len(deltas): - return True - # Nothing else than 1s, 0s and -1s - if not set(deltas) <= set((1, 0, -1)): - return True - return False - - failed = False - - for deltas, item_name, checker in [ - (rc_deltas, 'references', check_rc_deltas), - (alloc_deltas, 'memory blocks', check_alloc_deltas)]: - if checker(deltas): - msg = '%s leaked %s %s, sum=%s' % ( - test, deltas, item_name, sum(deltas)) - failed = True - try: - raise ReferenceLeakError(msg) - except Exception: - exc_info = sys.exc_info() - if self.showAll: - self.stream.write("%s = %r " % (item_name, deltas)) - self.addFailure(test, exc_info) - - if not failed: - super(RefleakTestResult, self).addSuccess(test) - - -class RefleakTestRunner(runner.TextTestRunner): - resultclass = RefleakTestResult - - -class ParallelTestResult(runner.TextTestResult): - """ - A TestResult able to inject results from other results. - """ - - def add_results(self, result): - """ - Add the results from the other *result* to this result. - """ - self.stream.write(result.stream.getvalue()) - self.stream.flush() - self.testsRun += result.testsRun - self.failures.extend(result.failures) - self.errors.extend(result.errors) - self.skipped.extend(result.skipped) - self.expectedFailures.extend(result.expectedFailures) - self.unexpectedSuccesses.extend(result.unexpectedSuccesses) - - -class _MinimalResult(object): - """ - A minimal, picklable TestResult-alike object. - """ - - __slots__ = ( - 'failures', 'errors', 'skipped', 'expectedFailures', - 'unexpectedSuccesses', 'stream', 'shouldStop', 'testsRun', - 'test_id') - - def fixup_case(self, case): - """ - Remove any unpicklable attributes from TestCase instance *case*. - """ - # Python 3.3 doesn't reset this one. - case._outcomeForDoCleanups = None - - def __init__(self, original_result, test_id=None): - for attr in self.__slots__: - setattr(self, attr, getattr(original_result, attr, None)) - for case, _ in self.expectedFailures: - self.fixup_case(case) - for case, _ in self.errors: - self.fixup_case(case) - for case, _ in self.failures: - self.fixup_case(case) - self.test_id = test_id - - -class _FakeStringIO(object): - """ - A trivial picklable StringIO-alike for Python 2. - """ - - def __init__(self, value): - self._value = value - - def getvalue(self): - return self._value - - -class _MinimalRunner(object): - """ - A minimal picklable object able to instantiate a runner in a - child process and run a test case with it. - """ - - def __init__(self, runner_cls, runner_args): - self.runner_cls = runner_cls - self.runner_args = runner_args - - # Python 2 doesn't know how to pickle instance methods, so we use __call__ - # instead. - - def __call__(self, test): - # Executed in child process - kwargs = self.runner_args - # Force recording of output in a buffer (it will be printed out - # by the parent). - kwargs['stream'] = StringIO() - runner = self.runner_cls(**kwargs) - result = runner._makeResult() - # Avoid child tracebacks when Ctrl-C is pressed. - signals.installHandler() - signals.registerResult(result) - result.failfast = runner.failfast - result.buffer = runner.buffer - with self.cleanup_object(test): - test(result) - # HACK as cStringIO.StringIO isn't picklable in 2.x - result.stream = _FakeStringIO(result.stream.getvalue()) - return _MinimalResult(result, test.id()) - - @contextlib.contextmanager - def cleanup_object(self, test): - """ - A context manager which cleans up unwanted attributes on a test case - (or any other object). - """ - vanilla_attrs = set(test.__dict__) - try: - yield test - finally: - spurious_attrs = set(test.__dict__) - vanilla_attrs - for name in spurious_attrs: - del test.__dict__[name] - - -def _split_nonparallel_tests(test): - """ - Split test suite into parallel and serial tests. - """ - ptests = [] - stests = [] - - def is_parallelizable_test_case(test): - # Guard for the fake test case created by unittest when test - # discovery fails, as it isn't picklable (e.g. "LoadTestsFailure") - method_name = test._testMethodName - method = getattr(test, method_name) - if method.__name__ != method_name and method.__name__ == "testFailure": - return False - # Was parallel execution explicitly disabled? - return getattr(test, "_numba_parallel_test_", True) - - if isinstance(test, unittest.TestSuite): - # It's a sub-suite, recurse - for t in test: - p, s = _split_nonparallel_tests(t) - ptests.extend(p) - stests.extend(s) - elif is_parallelizable_test_case(test): - # Test case is suitable for parallel execution (default) - ptests = [test] - else: - # Test case explicitly disallows parallel execution - stests = _flatten_suite(test) - return ptests, stests - - -class ParallelTestRunner(runner.TextTestRunner): - """ - A test runner which delegates the actual running to a pool of child - processes. - """ - - resultclass = ParallelTestResult - # A test can't run longer than 2 minutes - timeout = 120 - - def __init__(self, runner_cls, nprocs, **kwargs): - runner.TextTestRunner.__init__(self, **kwargs) - self.runner_cls = runner_cls - self.nprocs = nprocs - self.runner_args = kwargs - - def _run_inner(self, result): - # We hijack TextTestRunner.run()'s inner logic by passing this - # method as if it were a test case. - child_runner = _MinimalRunner(self.runner_cls, self.runner_args) - - # Split the tests and recycle the worker process to tame memory usage. - chunk_size = 500 - splitted_tests = [self._ptests[i:i + chunk_size] - for i in range(0, len(self._ptests), chunk_size)] - - for tests in splitted_tests: - pool = multiprocessing.Pool(self.nprocs) - try: - self._run_parallel_tests(result, pool, child_runner, tests) - except: - # On exception, kill still active workers immediately - pool.terminate() - # Make sure exception is reported and not ignored - raise - else: - # Close the pool cleanly unless asked to early out - if result.shouldStop: - pool.terminate() - break - else: - pool.close() - finally: - # Always join the pool (this is necessary for coverage.py) - pool.join() - if not result.shouldStop: - stests = SerialSuite(self._stests) - stests.run(result) - return result - - def _run_parallel_tests(self, result, pool, child_runner, tests): - remaining_ids = set(t.id() for t in tests) - it = pool.imap_unordered(child_runner, tests) - while True: - try: - child_result = it.__next__(self.timeout) - except StopIteration: - return - except TimeoutError as e: - # Diagnose the names of unfinished tests - msg = ("Tests didn't finish before timeout (or crashed):\n%s" - % "".join("- %r\n" % tid for tid in sorted(remaining_ids)) - ) - e.args = (msg,) + e.args[1:] - raise e - else: - result.add_results(child_result) - remaining_ids.discard(child_result.test_id) - if child_result.shouldStop: - result.shouldStop = True - return - - def run(self, test): - self._ptests, self._stests = _split_nonparallel_tests(test) - # This will call self._run_inner() on the created result object, - # and print out the detailed test results at the end. - return super(ParallelTestRunner, self).run(self._run_inner) diff --git a/numba/numba/testing/notebook.py b/numba/numba/testing/notebook.py deleted file mode 100644 index 94a301ff3..000000000 --- a/numba/numba/testing/notebook.py +++ /dev/null @@ -1,172 +0,0 @@ -from numba import unittest_support as unittest -from unittest import TestCase - -from ipykernel.tests import utils -from nbformat.converter import convert -from nbformat.reader import reads - -import re -import json -from copy import copy - -try: - # py3 - from queue import Empty - - def isstr(s): - return isinstance(s, str) -except ImportError: - # py2 - from Queue import Empty - - def isstr(s): - return isinstance(s, basestring) # noqa - -class NotebookTest(TestCase): - """Validate a notebook. All code cells are executed in order. The output is either checked - for errors (if no reference output is present), or is compared against expected output. - - - Useful references: - http://nbformat.readthedocs.org/en/latest/format_description.html - http://jupyter-client.readthedocs.org/en/latest/messaging.html -""" - - - - IGNORE_TYPES = ["execute_request", "execute_input", "status", "pyin"] - STRIP_KEYS = ["execution_count", "traceback", "prompt_number", "source"] - NBFORMAT_VERSION = 4 - - def _test_notebook(self, notebook, test): - - with open(notebook) as f: - nb = convert(reads(f.read()), self.NBFORMAT_VERSION) - _, kernel = utils.start_new_kernel() - for i, c in enumerate([c for c in nb.cells if c.cell_type == 'code']): - self._test_notebook_cell(self.sanitize_cell(c), i, kernel, test) - - def _test_notebook_cell(self, cell, i, kernel, test): - - if hasattr(cell, 'source'): # nbformat 4.0 and later - code = cell.source - else: - code = cell.input - iopub = kernel.iopub_channel - kernel.execute(code) - outputs = [] - msg = None - no_error = True - first_error = -1 - error_msg = '' - while self.should_continue(msg): - try: - msg = iopub.get_msg(block=True, timeout=1) - except Empty: - continue - if msg['msg_type'] not in self.IGNORE_TYPES: - if msg['msg_type'] == 'error': - error_msg = ' ' + msg['content']['ename'] + '\n ' + msg['content']['evalue'] - no_error = False - if first_error == -1: - first_error = i - i = len(outputs) - expected = i < len(cell.outputs) and cell.outputs[i] or [] - o = self.transform_message(msg, expected) - outputs.append(o) - - if (test == 'check_error'): - self.assertTrue(no_error, 'Executing cell %d resulted in an error:\n%s'%(first_error, error_msg)) - else: - # Compare computed output against stored output. - # TODO: This doesn't work right now as the generated output is too diverse to - # be verifiable. - scrub = lambda x: self.dump_canonical(list(self.scrub_outputs(x))) - scrubbed = scrub(outputs) - expected = scrub(cell.outputs) - #print('output=%s'%outputs) - #print('expected=%s'%expected) - #self.assertEqual(scrubbed, expected, "\n{}\n\n{}".format(scrubbed, expected)) - - def dump_canonical(self, obj): - return json.dumps(obj, indent=2, sort_keys=True) - - def scrub_outputs(self, outputs): - """ - remove all scrubs from output data and text - """ - for output in outputs: - out = copy(output) - - for scrub, sub in []:#self.scrubs.items(): - def _scrubLines(lines): - if isstr(lines): - return re.sub(scrub, sub, lines) - else: - return [re.sub(scrub, sub, line) for line in lines] - - if "text" in out: - out["text"] = _scrubLines(out["text"]) - - if "data" in out: - if isinstance(out["data"], dict): - for mime, data in out["data"].items(): - out["data"][mime] = _scrubLines(data) - else: - out["data"] = _scrubLines(out["data"]) - yield out - - def strip_keys(self, d): - """ - remove keys from STRIP_KEYS to ensure comparability - """ - for key in self.STRIP_KEYS: - d.pop(key, None) - return d - - def sanitize_cell(self, cell): - """ - remove non-reproducible things - """ - for output in cell.outputs: - self.strip_keys(output) - return cell - - def transform_message(self, msg, expected): - """ - transform a message into something like the notebook - """ - SWAP_KEYS = { - "output_type": { - "pyout": "execute_result", - "pyerr": "error" - } - } - - output = { - u"output_type": msg["msg_type"] - } - output.update(msg["content"]) - - output = self.strip_keys(output) - for key, swaps in SWAP_KEYS.items(): - if key in output and output[key] in swaps: - output[key] = swaps[output[key]] - - if "data" in output and "data" not in expected: - output["text"] = output["data"] - del output["data"] - - return output - - def should_continue(self, msg): - """ - determine whether the current message is the last for this cell - """ - if msg is None: - return True - - return not (msg["msg_type"] == "status" and - msg["content"]["execution_state"] == "idle") - - diff --git a/numba/numba/tests/__init__.py b/numba/numba/tests/__init__.py deleted file mode 100644 index c89b7d226..000000000 --- a/numba/numba/tests/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -from numba import unittest_support as unittest - -import gc -from os.path import dirname, join -import multiprocessing -import sys -import time -import warnings - -from unittest.suite import TestSuite -from numba.testing import load_testsuite -from numba.testing import ddt # for backward compatibility - - -try: - import faulthandler -except ImportError: - faulthandler = None -else: - try: - # May fail in IPython Notebook with UnsupportedOperation - faulthandler.enable() - except BaseException as e: - msg = "Failed to enable faulthandler due to:\n{err}" - warnings.warn(msg.format(err=e)) - -def load_tests(loader, tests, pattern): - suite = TestSuite() - suite.addTests(load_testsuite(loader, dirname(__file__))) - # Numba CUDA tests are located in a separate directory: - cuda_dir = join(dirname(dirname(__file__)), 'cuda/tests') - suite.addTests(loader.discover(cuda_dir)) - - # Numba ROC tests are located in a separate directory - roc_dir = join(dirname(dirname(__file__)), 'roc/tests') - suite.addTests(loader.discover(roc_dir)) - - return suite - diff --git a/numba/numba/tests/annotation_usecases.py b/numba/numba/tests/annotation_usecases.py deleted file mode 100644 index a249ccbd5..000000000 --- a/numba/numba/tests/annotation_usecases.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Usecases with Python 3 function annotations. This is a separate module -in order to avoid syntax errors with Python 2. -""" - - -class AnnotatedClass: - """ - A class with annotated methods. - """ - - def __init__(self, v: int): - self.x = v - - def add(self, v: int) -> int: - return self.x + v diff --git a/numba/numba/tests/cache_usecases.py b/numba/numba/tests/cache_usecases.py deleted file mode 100644 index 3c2d84455..000000000 --- a/numba/numba/tests/cache_usecases.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -This file will be copied to a temporary directory in order to -exercise caching compiled Numba functions. - -See test_dispatcher.py. -""" -from __future__ import division, print_function, absolute_import - -import sys - -import numpy as np - -from numba import jit, generated_jit, types - -from numba.tests.ctypes_usecases import c_sin -from numba.tests.support import TestCase, captured_stderr - - -@jit(cache=True, nopython=True) -def simple_usecase(x): - return x - -def simple_usecase_caller(x): - return simple_usecase(x) - - -@jit(cache=True, nopython=True) -def add_usecase(x, y): - return x + y + Z - - -@jit(cache=True, forceobj=True) -def add_objmode_usecase(x, y): - object() - return x + y + Z - - -@jit(nopython=True) -def add_nocache_usecase(x, y): - return x + y + Z - - -@generated_jit(cache=True, nopython=True) -def generated_usecase(x, y): - if isinstance(x, types.Complex): - def impl(x, y): - return x + y - else: - def impl(x, y): - return x - y - return impl - - -@jit(cache=True, nopython=True) -def inner(x, y): - return x + y + Z - -@jit(cache=True, nopython=True) -def outer(x, y): - return inner(-y, x) - -@jit(cache=False, nopython=True) -def outer_uncached(x, y): - return inner(-y, x) - - -@jit(cache=True, forceobj=True) -def looplifted(n): - object() - res = 0 - for i in range(n): - res = res + i - return res - - -@jit(cache=True, nopython=True) -def use_c_sin(x): - return c_sin(x) - - -@jit(cache=True, nopython=True) -def ambiguous_function(x): - return x + 2 - -renamed_function1 = ambiguous_function - -@jit(cache=True, nopython=True) -def ambiguous_function(x): - return x + 6 - -renamed_function2 = ambiguous_function - - -def make_closure(x): - @jit(cache=True, nopython=True) - def closure(y): - return x + y - - return closure - -closure1 = make_closure(3) -closure2 = make_closure(5) - - -biggie = np.arange(10**6) - -@jit(cache=True, nopython=True) -def use_big_array(): - return biggie - - -Z = 1 - -# Exercise returning a record instance. This used to hardcode the dtype -# pointer's value in the bitcode. - -packed_record_type = np.dtype([('a', np.int8), ('b', np.float64)]) -aligned_record_type = np.dtype([('a', np.int8), ('b', np.float64)], align=True) - -packed_arr = np.empty(2, dtype=packed_record_type) -for i in range(packed_arr.size): - packed_arr[i]['a'] = i + 1 - packed_arr[i]['b'] = i + 42.5 - -aligned_arr = np.array(packed_arr, dtype=aligned_record_type) - -@jit(cache=True, nopython=True) -def record_return(ary, i): - return ary[i] - - -class _TestModule(TestCase): - """ - Tests for functionality of this module's functions. - Note this does not define any "test_*" method, instead check_module() - should be called by hand. - """ - - def check_module(self, mod): - self.assertPreciseEqual(mod.add_usecase(2, 3), 6) - self.assertPreciseEqual(mod.add_objmode_usecase(2, 3), 6) - self.assertPreciseEqual(mod.outer_uncached(3, 2), 2) - self.assertPreciseEqual(mod.outer(3, 2), 2) - self.assertPreciseEqual(mod.generated_usecase(3, 2), 1) - - packed_rec = mod.record_return(mod.packed_arr, 1) - self.assertPreciseEqual(tuple(packed_rec), (2, 43.5)) - aligned_rec = mod.record_return(mod.aligned_arr, 1) - self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5)) - - # For 2.x - def runTest(self): - raise NotImplementedError - - -def self_test(): - mod = sys.modules[__name__] - _TestModule().check_module(mod) diff --git a/numba/numba/tests/cffi_usecases.py b/numba/numba/tests/cffi_usecases.py deleted file mode 100644 index bc3293975..000000000 --- a/numba/numba/tests/cffi_usecases.py +++ /dev/null @@ -1,199 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import sys - -import numpy as np - -from numba import cffi_support -from numba.tests.support import import_dynamic, temp_directory -from numba.types import complex128 - - -def load_inline_module(): - """ - Create an inline module, return the corresponding ffi and dll objects. - """ - from cffi import FFI - - # We can't rely on libc availability on Windows anymore, so we use our - # own compiled wrappers (see https://bugs.python.org/issue23606). - - defs = """ - double _numba_test_sin(double x); - double _numba_test_cos(double x); - double _numba_test_funcptr(double (*func)(double)); - bool _numba_test_boolean(); - """ - - ffi = FFI() - ffi.cdef(defs) - # Load the _helperlib namespace - from numba import _helperlib - return ffi, ffi.dlopen(_helperlib.__file__) - - -def load_ool_module(): - """ - Compile an out-of-line module, return the corresponding ffi and - module objects. - """ - from cffi import FFI - - numba_complex = """ - typedef struct _numba_complex { - double real; - double imag; - } numba_complex; - """ - - bool_define = """ - #ifdef _MSC_VER - #define false 0 - #define true 1 - #define bool int - #else - #include - #endif - """ - - defs = numba_complex + """ - bool boolean(); - double sin(double x); - double cos(double x); - int foo(int a, int b, int c); - void vsSin(int n, float* x, float* y); - void vdSin(int n, double* x, double* y); - void vector_real(numba_complex *c, double *real, int n); - void vector_imag(numba_complex *c, double *imag, int n); - """ - - source = numba_complex + bool_define + """ - static bool boolean() - { - return true; - } - - static int foo(int a, int b, int c) - { - return a + b * c; - } - - void vsSin(int n, float* x, float* y) { - int i; - for (i=0; i 0: - return fa(x) - else: - return fb(x) - -def use_user_defined_symbols(): - return cffi_foo(1, 2, 3) - -# The from_buffer method is member of cffi.FFI, and also of CompiledFFI objects -# (cffi_usecases_ool.ffi is a CompiledFFI object) so we use both in these -# functions. - -def vector_sin_float32(x, y): - vsSin(len(x), ffi.from_buffer(x), ffi_ool.from_buffer(y)) - -def vector_sin_float64(x, y): - vdSin(len(x), ffi.from_buffer(x), ffi_ool.from_buffer(y)) - - -# For testing pointer to structs from buffers - -def vector_extract_real(x, y): - vector_real(ffi.from_buffer(x), ffi.from_buffer(y), len(x)) - -def vector_extract_imag(x, y): - vector_imag(ffi.from_buffer(x), ffi.from_buffer(y), len(x)) diff --git a/numba/numba/tests/cfunc_cache_usecases.py b/numba/numba/tests/cfunc_cache_usecases.py deleted file mode 100644 index b7621fccf..000000000 --- a/numba/numba/tests/cfunc_cache_usecases.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -This file will be copied to a temporary directory in order to -exercise caching compiled C callbacks. - -See test_cfunc.py. -""" -from __future__ import division, print_function, absolute_import - -import sys - -from numba import cfunc, jit -from numba.tests.support import TestCase, captured_stderr - - -Z = 1 - -add_sig = "float64(float64, float64)" - -div_sig = "float64(int64, int64)" - - -@cfunc(add_sig, cache=True, nopython=True) -def add_usecase(x, y): - return x + y + Z - -@cfunc(add_sig, nopython=True) -def add_nocache_usecase(x, y): - return x + y + Z - -@cfunc(div_sig, cache=True, nopython=True) -def div_usecase(a, b): - return a / b - - -@jit(nopython=True) -def inner(x, y): - return x + y + Z - -@cfunc(add_sig, cache=True, nopython=True) -def outer(x, y): - return inner(-y, x) - - -class _TestModule(TestCase): - """ - Tests for functionality of this module's cfuncs. - Note this does not define any "test_*" method, instead check_module() - should be called by hand. - """ - - def check_module(self, mod): - f = mod.add_usecase - self.assertPreciseEqual(f.ctypes(2.0, 3.0), 6.0) - f = mod.add_nocache_usecase - self.assertPreciseEqual(f.ctypes(2.0, 3.0), 6.0) - f = mod.outer - self.assertPreciseEqual(f.ctypes(5.0, 2.0), 4.0) - - f = mod.div_usecase - with captured_stderr() as err: - self.assertPreciseEqual(f.ctypes(7, 2), 3.5) - self.assertEqual(err.getvalue(), "") - with captured_stderr() as err: - f.ctypes(7, 0) - err = err.getvalue() - self.assertIn("ZeroDivisionError", err) - - # For 2.x - def runTest(self): - raise NotImplementedError - - -def self_test(): - mod = sys.modules[__name__] - _TestModule().check_module(mod) diff --git a/numba/numba/tests/compile_with_pycc.py b/numba/numba/tests/compile_with_pycc.py deleted file mode 100644 index 6bf5c68ac..000000000 --- a/numba/numba/tests/compile_with_pycc.py +++ /dev/null @@ -1,101 +0,0 @@ -import cmath - -import numpy as np - -from numba import float32 -from numba.pycc import CC, exportmany, export -from numba.tests.matmul_usecase import has_blas - - -# -# New API -# - -cc = CC('pycc_test_simple') -cc.use_nrt = False - -# Note the first signature omits the return type -@cc.export('multf', (float32, float32)) -@cc.export('multi', 'i4(i4, i4)') -def mult(a, b): - return a * b - -# Test imported C globals such as Py_None, PyExc_ZeroDivisionError -@cc.export('get_none', 'none()') -def get_none(): - return None - -@cc.export('div', 'f8(f8, f8)') -def div(x, y): - return x / y - -_two = 2 - -# This one can't be compiled by the legacy API as it doesn't execute -# the script in a proper module. -@cc.export('square', 'i8(i8)') -def square(u): - return u ** _two - -# These ones need helperlib -cc_helperlib = CC('pycc_test_helperlib') -cc_helperlib.use_nrt = False - -@cc_helperlib.export('power', 'i8(i8, i8)') -def power(u, v): - return u ** v - -@cc_helperlib.export('sqrt', 'c16(c16)') -def sqrt(u): - return cmath.sqrt(u) - -@cc_helperlib.export('size', 'i8(f8[:])') -def size(arr): - return arr.size - -# Exercise linking to Numpy math functions -@cc_helperlib.export('np_sqrt', 'f8(f8)') -def np_sqrt(u): - return np.sqrt(u) - -@cc_helperlib.export('spacing', 'f8(f8)') -def np_spacing(u): - return np.spacing(u) - - -# This one clashes with libc random() unless pycc is careful with naming. -@cc_helperlib.export('random', 'f8(i4)') -def random_impl(seed): - if seed != -1: - np.random.seed(seed) - return np.random.random() - -# These ones need NRT -cc_nrt = CC('pycc_test_nrt') - -@cc_nrt.export('zero_scalar', 'f8(i4)') -def zero_scalar(n): - arr = np.zeros(n) - return arr[-1] - -if has_blas: - # This one also needs BLAS - @cc_nrt.export('vector_dot', 'f8(i4)') - def vector_dot(n): - a = np.linspace(1, n, n) - return np.dot(a, a) - -# This one needs an environment -@cc_nrt.export('zeros', 'f8[:](i4)') -def zeros(n): - return np.zeros(n) - - -# -# Legacy API -# - -exportmany(['multf f4(f4,f4)', 'multi i4(i4,i4)'])(mult) -# Needs to link to helperlib to due with complex arguments -# export('multc c16(c16,c16)')(mult) -export('mult f8(f8, f8)')(mult) diff --git a/numba/numba/tests/complex_usecases.py b/numba/numba/tests/complex_usecases.py deleted file mode 100644 index 7983f56ee..000000000 --- a/numba/numba/tests/complex_usecases.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import division - -import cmath - - -def div_usecase(x, y): - return x / y - - -def real_usecase(x): - return x.real - -def imag_usecase(x): - return x.imag - -def conjugate_usecase(x): - return x.conjugate() - - -def acos_usecase(x): - return cmath.acos(x) - -def cos_usecase(x): - return cmath.cos(x) - -def asin_usecase(x): - return cmath.asin(x) - -def sin_usecase(x): - return cmath.sin(x) - -def atan_usecase(x): - return cmath.atan(x) - -def tan_usecase(x): - return cmath.tan(x) - -def acosh_usecase(x): - return cmath.acosh(x) - -def cosh_usecase(x): - return cmath.cosh(x) - -def asinh_usecase(x): - return cmath.asinh(x) - -def sinh_usecase(x): - return cmath.sinh(x) - -def atanh_usecase(x): - return cmath.atanh(x) - -def tanh_usecase(x): - return cmath.tanh(x) - -def exp_usecase(x): - return cmath.exp(x) - -def isfinite_usecase(x): - return cmath.isfinite(x) - -def isinf_usecase(x): - return cmath.isinf(x) - -def isnan_usecase(x): - return cmath.isnan(x) - -def log_usecase(x): - return cmath.log(x) - -def log_base_usecase(x, base): - return cmath.log(x, base) - -def log10_usecase(x): - return cmath.log10(x) - -def phase_usecase(x): - return cmath.phase(x) - -def polar_usecase(x): - return cmath.polar(x) - -_two = 2.0 - -def polar_as_complex_usecase(x): - # HACK: clear errno by invoking float.__pow__ - # (workaround for http://bugs.python.org/issue24489) - _two ** _two - return complex(*cmath.polar(x)) - -def rect_usecase(r, phi): - return cmath.rect(r, phi) - -def sqrt_usecase(x): - return cmath.sqrt(x) diff --git a/numba/numba/tests/ctypes_usecases.py b/numba/numba/tests/ctypes_usecases.py deleted file mode 100644 index f29226601..000000000 --- a/numba/numba/tests/ctypes_usecases.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import print_function, absolute_import, division - -from ctypes import * -import sys - -import numpy as np - - -is_windows = sys.platform.startswith('win32') - -# We can't rely on libc availability on Windows anymore, so we use our -# own compiled wrappers (see https://bugs.python.org/issue23606). - -from numba import _helperlib -libnumba = CDLL(_helperlib.__file__) -del _helperlib - -# A typed C function (cdecl under Windows) - -c_sin = libnumba._numba_test_sin -c_sin.argtypes = [c_double] -c_sin.restype = c_double - -def use_c_sin(x): - return c_sin(x) - -c_cos = libnumba._numba_test_cos -c_cos.argtypes = [c_double] -c_cos.restype = c_double - -def use_two_funcs(x): - return c_sin(x) - c_cos(x) - -# Typed C functions accepting an array pointer -# (either as a "void *" or as a typed pointer) - -c_vsquare = libnumba._numba_test_vsquare -c_vsquare.argtypes = [c_int, c_void_p, c_void_p] - -c_vcube = libnumba._numba_test_vsquare -c_vcube.argtypes = [c_int, POINTER(c_double), POINTER(c_double)] - -def use_c_vsquare(x): - out = np.empty_like(x) - c_vsquare(x.size, x.ctypes, out.ctypes) - return out - -def use_c_vcube(x): - out = np.empty_like(x) - c_vcube(x.size, x.ctypes, out.ctypes) - return out - -# An untyped C function - -c_untyped = libnumba._numba_test_exp - -def use_c_untyped(x): - return c_untyped(x) - -# A C function wrapped in a CFUNCTYPE - -ctype_wrapping = CFUNCTYPE(c_double, c_double)(use_c_sin) - -def use_ctype_wrapping(x): - return ctype_wrapping(x) - -# A Python API function - -savethread = pythonapi.PyEval_SaveThread -savethread.argtypes = [] -savethread.restype = c_void_p - -restorethread = pythonapi.PyEval_RestoreThread -restorethread.argtypes = [c_void_p] -restorethread.restype = None - -if is_windows: - # A function with the stdcall calling convention - c_sleep = windll.kernel32.Sleep - c_sleep.argtypes = [c_uint] - c_sleep.restype = None - - def use_c_sleep(x): - c_sleep(x) - - -def use_c_pointer(x): - """ - Running in Python will cause a segfault. - """ - threadstate = savethread() - x += 1 - restorethread(threadstate) - return x - - -def use_func_pointer(fa, fb, x): - if x > 0: - return fa(x) - else: - return fb(x) - - -mydct = {'what': 1232121} - -def call_me_maybe(arr): - return mydct[arr[0].decode('ascii')] - -# Create a callback into the python interpreter -py_call_back = CFUNCTYPE(c_int, py_object)(call_me_maybe) - - -def take_array_ptr(ptr): - return ptr - -c_take_array_ptr = CFUNCTYPE(c_void_p, c_void_p)(take_array_ptr) diff --git a/numba/numba/tests/dummy_module.py b/numba/numba/tests/dummy_module.py deleted file mode 100644 index 4152f3ff5..000000000 --- a/numba/numba/tests/dummy_module.py +++ /dev/null @@ -1,4 +0,0 @@ -'''Dummy module''' - -def function(): - '''Do nothing''' diff --git a/numba/numba/tests/enum_usecases.py b/numba/numba/tests/enum_usecases.py deleted file mode 100644 index 1de3762db..000000000 --- a/numba/numba/tests/enum_usecases.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import print_function - -from enum import Enum, IntEnum - - -class Color(Enum): - red = 1 - green = 2 - blue = 3 - - -class Shake(Enum): - vanilla = 7 - chocolate = 4 - cookies = 9 - # Same as Color.blue - mint = 3 - - -class Planet(Enum): - MERCURY = (3.303e+23, 2.4397e6) - VENUS = (4.869e+24, 6.0518e6) - EARTH = (5.976e+24, 6.37814e6) - MARS = (6.421e+23, 3.3972e6) - JUPITER = (1.9e+27, 7.1492e7) - SATURN = (5.688e+26, 6.0268e7) - URANUS = (8.686e+25, 2.5559e7) - NEPTUNE = (1.024e+26, 2.4746e7) - - -class HeterogeneousEnum(Enum): - red = 1.0 - green = 2.0 - blue = 3j - - -class Shape(IntEnum): - # Same as Color.green - circle = 2 - # Same as RequestError.internal_error - square = 500 - - -class RequestError(IntEnum): - dummy = 2 - not_found = 404 - internal_error = 500 - diff --git a/numba/numba/tests/matmul_usecase.py b/numba/numba/tests/matmul_usecase.py deleted file mode 100644 index 1a10f0301..000000000 --- a/numba/numba/tests/matmul_usecase.py +++ /dev/null @@ -1,57 +0,0 @@ -import sys - -try: - import scipy.linalg.cython_blas - has_blas = True -except ImportError: - has_blas = False - -import numba.unittest_support as unittest -from numba.numpy_support import version as numpy_version - - -# The "@" operator only compiles on Python 3.5+. -# It is only supported by Numpy 1.10+. -has_matmul = sys.version_info >= (3, 5) and numpy_version >= (1, 10) - -if has_matmul: - code = """if 1: - def matmul_usecase(x, y): - return x @ y - - def imatmul_usecase(x, y): - x @= y - return x - """ - co = compile(code, "", "exec") - ns = {} - eval(co, globals(), ns) - globals().update(ns) - del code, co, ns - -else: - matmul_usecase = None - imatmul_usecase = None - -needs_matmul = unittest.skipUnless( - has_matmul, - "the matrix multiplication operator needs Python 3.5+ and Numpy 1.10+") - -needs_blas = unittest.skipUnless(has_blas, "BLAS needs Scipy 0.16+") - - -class DumbMatrix(object): - - def __init__(self, value): - self.value = value - - def __matmul__(self, other): - if isinstance(other, DumbMatrix): - return DumbMatrix(self.value * other.value) - return NotImplemented - - def __imatmul__(self, other): - if isinstance(other, DumbMatrix): - self.value *= other.value - return self - return NotImplemented diff --git a/numba/numba/tests/npyufunc/__init__.py b/numba/numba/tests/npyufunc/__init__.py deleted file mode 100644 index 83bc55d68..000000000 --- a/numba/numba/tests/npyufunc/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from numba import unittest_support as unittest - -from os.path import dirname -from unittest.suite import TestSuite - -from numba.testing import load_testsuite - -def load_tests(loader, tests, pattern): - suite = TestSuite() - suite.addTests(load_testsuite(loader, dirname(__file__))) - return suite diff --git a/numba/numba/tests/npyufunc/cache_usecases.py b/numba/numba/tests/npyufunc/cache_usecases.py deleted file mode 100644 index 4d250756d..000000000 --- a/numba/numba/tests/npyufunc/cache_usecases.py +++ /dev/null @@ -1,76 +0,0 @@ -import numba as nb - - -# -# UFunc -# - -def direct_ufunc_cache_usecase(**kwargs): - @nb.vectorize(["intp(intp)", "float64(float64)"], cache=True, **kwargs) - def ufunc(inp): - return inp * 2 - - return ufunc - - -def indirect_ufunc_cache_usecase(**kwargs): - @nb.njit(cache=True) - def indirect_ufunc_core(inp): - return inp * 3 - - @nb.vectorize(["intp(intp)", "float64(float64)", "complex64(complex64)"], - **kwargs) - def ufunc(inp): - return indirect_ufunc_core(inp) - - return ufunc - - -# -# DUFunc -# - -def direct_dufunc_cache_usecase(**kwargs): - @nb.vectorize(cache=True, **kwargs) - def ufunc(inp): - return inp * 2 - - return ufunc - - -def indirect_dufunc_cache_usecase(**kwargs): - @nb.njit(cache=True) - def indirect_ufunc_core(inp): - return inp * 3 - - @nb.vectorize(**kwargs) - def ufunc(inp): - return indirect_ufunc_core(inp) - - return ufunc - - -# -# GUFunc -# - -def direct_gufunc_cache_usecase(**kwargs): - @nb.guvectorize(["(intp, intp[:])", "(float64, float64[:])"], - "()->()", cache=True, **kwargs) - def gufunc(inp, out): - out[0] = inp * 2 - - return gufunc - - -def indirect_gufunc_cache_usecase(**kwargs): - @nb.njit(cache=True) - def core(x): - return x * 3 - - @nb.guvectorize(["(intp, intp[:])", "(float64, float64[:])", - "(complex64, complex64[:])"], "()->()", **kwargs) - def gufunc(inp, out): - out[0] = core(inp) - - return gufunc diff --git a/numba/numba/tests/npyufunc/test_caching.py b/numba/numba/tests/npyufunc/test_caching.py deleted file mode 100644 index 438a26d5b..000000000 --- a/numba/numba/tests/npyufunc/test_caching.py +++ /dev/null @@ -1,229 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import sys -import os.path -import re -import subprocess - -import numpy as np - -from numba import unittest_support as unittest -from numba import config - -from ..support import capture_cache_log -from ..test_dispatcher import BaseCacheTest - - -class UfuncCacheTest(BaseCacheTest): - """ - Since the cache stats is not exposed by ufunc, we test by looking at the - cache debug log. - """ - here = os.path.dirname(__file__) - usecases_file = os.path.join(here, "cache_usecases.py") - modname = "ufunc_caching_test_fodder" - - regex_data_saved = re.compile(r'\[cache\] data saved to') - regex_index_saved = re.compile(r'\[cache\] index saved to') - - regex_data_loaded = re.compile(r'\[cache\] data loaded from') - regex_index_loaded = re.compile(r'\[cache\] index loaded from') - - def check_cache_saved(self, cachelog, count): - """ - Check number of cache-save were issued - """ - data_saved = self.regex_data_saved.findall(cachelog) - index_saved = self.regex_index_saved.findall(cachelog) - self.assertEqual(len(data_saved), count) - self.assertEqual(len(index_saved), count) - - def check_cache_loaded(self, cachelog, count): - """ - Check number of cache-load were issued - """ - data_loaded = self.regex_data_loaded.findall(cachelog) - index_loaded = self.regex_index_loaded.findall(cachelog) - self.assertEqual(len(data_loaded), count) - self.assertEqual(len(index_loaded), count) - - def check_ufunc_cache(self, usecase_name, n_overloads, **kwargs): - """ - Check number of cache load/save. - There should be one per overloaded version. - """ - mod = self.import_module() - usecase = getattr(mod, usecase_name) - # New cache entry saved - with capture_cache_log() as out: - new_ufunc = usecase(**kwargs) - cachelog = out.getvalue() - self.check_cache_saved(cachelog, count=n_overloads) - - # Use cached version - with capture_cache_log() as out: - cached_ufunc = usecase(**kwargs) - cachelog = out.getvalue() - self.check_cache_loaded(cachelog, count=n_overloads) - - return new_ufunc, cached_ufunc - - -class TestUfuncCacheTest(UfuncCacheTest): - - def test_direct_ufunc_cache(self, **kwargs): - new_ufunc, cached_ufunc = self.check_ufunc_cache( - "direct_ufunc_cache_usecase", n_overloads=2, **kwargs) - # Test the cached and original versions - inp = np.random.random(10).astype(np.float64) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - inp = np.arange(10, dtype=np.intp) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - - def test_direct_ufunc_cache_objmode(self): - self.test_direct_ufunc_cache(forceobj=True) - - def test_direct_ufunc_cache_parallel(self): - self.test_direct_ufunc_cache(target='parallel') - - def test_indirect_ufunc_cache(self, **kwargs): - new_ufunc, cached_ufunc = self.check_ufunc_cache( - "indirect_ufunc_cache_usecase", n_overloads=3, **kwargs) - # Test the cached and original versions - inp = np.random.random(10).astype(np.float64) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - inp = np.arange(10, dtype=np.intp) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - - def test_indirect_ufunc_cache_parallel(self): - self.test_indirect_ufunc_cache(target='parallel') - - -class TestDUfuncCacheTest(UfuncCacheTest): - # Note: DUFunc doesn't support parallel target yet - - def check_dufunc_usecase(self, usecase_name): - mod = self.import_module() - usecase = getattr(mod, usecase_name) - # Create dufunc - with capture_cache_log() as out: - ufunc = usecase() - self.check_cache_saved(out.getvalue(), count=0) - # Compile & cache - with capture_cache_log() as out: - ufunc(np.arange(10)) - self.check_cache_saved(out.getvalue(), count=1) - self.check_cache_loaded(out.getvalue(), count=0) - # Use cached - with capture_cache_log() as out: - ufunc = usecase() - ufunc(np.arange(10)) - self.check_cache_loaded(out.getvalue(), count=1) - - def test_direct_dufunc_cache(self): - # We don't test for objmode because DUfunc don't support it. - self.check_dufunc_usecase('direct_dufunc_cache_usecase') - - def test_indirect_dufunc_cache(self): - self.check_dufunc_usecase('indirect_dufunc_cache_usecase') - - -def _fix_raw_path(rstr): - if config.IS_WIN32: - rstr = rstr.replace(r'/', r'\\\\') - return rstr - - -class TestGUfuncCacheTest(UfuncCacheTest): - - def test_filename_prefix(self): - mod = self.import_module() - usecase = getattr(mod, "direct_gufunc_cache_usecase") - with capture_cache_log() as out: - usecase() - cachelog = out.getvalue() - # find number filename with "guf-" prefix - fmt1 = _fix_raw_path(r'/__pycache__/guf-{}') - prefixed = re.findall(fmt1.format(self.modname), cachelog) - fmt2 = _fix_raw_path(r'/__pycache__/{}') - normal = re.findall(fmt2.format(self.modname), cachelog) - # expecting 2 overloads - self.assertGreater(len(normal), 2) - # expecting equal number of wrappers and overloads cache entries - self.assertEqual(len(normal), len(prefixed)) - - def test_direct_gufunc_cache(self, **kwargs): - # 2 cache entry for the 2 overloads - # and 2 cache entry for the gufunc wrapper - new_ufunc, cached_ufunc = self.check_ufunc_cache( - "direct_gufunc_cache_usecase", n_overloads=2 + 2, **kwargs) - # Test the cached and original versions - inp = np.random.random(10).astype(np.float64) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - inp = np.arange(10, dtype=np.intp) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - - def test_direct_gufunc_cache_objmode(self): - self.test_direct_gufunc_cache(forceobj=True) - - def test_direct_gufunc_cache_parallel(self): - self.test_direct_gufunc_cache(target='parallel') - - def test_indirect_gufunc_cache(self, **kwargs): - # 3 cache entry for the 3 overloads - # and no cache entry for the gufunc wrapper - new_ufunc, cached_ufunc = self.check_ufunc_cache( - "indirect_gufunc_cache_usecase", n_overloads=3, **kwargs) - # Test the cached and original versions - inp = np.random.random(10).astype(np.float64) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - inp = np.arange(10, dtype=np.intp) - np.testing.assert_equal(new_ufunc(inp), cached_ufunc(inp)) - - def test_indirect_gufunc_cache_parallel(self, **kwargs): - self.test_indirect_gufunc_cache(target='parallel') - - -class TestCacheSpecificIssue(UfuncCacheTest): - - def run_in_separate_process(self, runcode): - # Based on the same name util function in test_dispatcher but modified - # to allow user to define what to run. - code = """if 1: - import sys - - sys.path.insert(0, %(tempdir)r) - mod = __import__(%(modname)r) - mod.%(runcode)s - """ % dict(tempdir=self.tempdir, modname=self.modname, - runcode=runcode) - - popen = subprocess.Popen([sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError("process failed with code %s: stderr follows\n%s\n" - % (popen.returncode, err.decode())) - - # - # The following test issue #2198 that loading cached (g)ufunc first - # bypasses some target context initialization. - # - - def test_first_load_cached_ufunc(self): - # ensure function is cached - self.run_in_separate_process('direct_ufunc_cache_usecase()') - # use the cached function - # this will fail if the target context is not init'ed - self.run_in_separate_process('direct_ufunc_cache_usecase()') - - def test_first_load_cached_gufunc(self): - # ensure function is cached - self.run_in_separate_process('direct_gufunc_cache_usecase()') - # use the cached function - # this will fail out if the target context is not init'ed - self.run_in_separate_process('direct_gufunc_cache_usecase()') - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_dufunc.py b/numba/numba/tests/npyufunc/test_dufunc.py deleted file mode 100644 index 4d341037c..000000000 --- a/numba/numba/tests/npyufunc/test_dufunc.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import print_function, absolute_import, division -from numba import unittest_support as unittest - -import numpy as np - -from numba import njit -from numba.npyufunc import dufunc -from ..support import MemoryLeakMixin - - -def pyuadd(a0, a1): - return a0 + a1 - - -class TestDUFunc(MemoryLeakMixin, unittest.TestCase): - - def nopython_dufunc(self, pyfunc): - return dufunc.DUFunc(pyfunc, targetoptions=dict(nopython=True)) - - def test_frozen(self): - duadd = self.nopython_dufunc(pyuadd) - self.assertFalse(duadd._frozen) - duadd._frozen = True - self.assertTrue(duadd._frozen) - with self.assertRaises(ValueError): - duadd._frozen = False - with self.assertRaises(TypeError): - duadd(np.linspace(0,1,10), np.linspace(1,2,10)) - - def test_scalar(self): - duadd = self.nopython_dufunc(pyuadd) - self.assertEqual(pyuadd(1,2), duadd(1,2)) - - def test_npm_call(self): - duadd = self.nopython_dufunc(pyuadd) - @njit - def npmadd(a0, a1, o0): - duadd(a0, a1, o0) - X = np.linspace(0,1.9,20) - X0 = X[:10] - X1 = X[10:] - out0 = np.zeros(10) - npmadd(X0, X1, out0) - np.testing.assert_array_equal(X0 + X1, out0) - Y0 = X0.reshape((2,5)) - Y1 = X1.reshape((2,5)) - out1 = np.zeros((2,5)) - npmadd(Y0, Y1, out1) - np.testing.assert_array_equal(Y0 + Y1, out1) - Y2 = X1[:5] - out2 = np.zeros((2,5)) - npmadd(Y0, Y2, out2) - np.testing.assert_array_equal(Y0 + Y2, out2) - - def test_npm_call_implicit_output(self): - duadd = self.nopython_dufunc(pyuadd) - @njit - def npmadd(a0, a1): - return duadd(a0, a1) - X = np.linspace(0,1.9,20) - X0 = X[:10] - X1 = X[10:] - out0 = npmadd(X0, X1) - np.testing.assert_array_equal(X0 + X1, out0) - Y0 = X0.reshape((2,5)) - Y1 = X1.reshape((2,5)) - out1 = npmadd(Y0, Y1) - np.testing.assert_array_equal(Y0 + Y1, out1) - Y2 = X1[:5] - out2 = npmadd(Y0, Y2) - np.testing.assert_array_equal(Y0 + Y2, out2) - out3 = npmadd(1.,2.) - self.assertEqual(out3, 3.) - - def test_ufunc_props(self): - duadd = self.nopython_dufunc(pyuadd) - self.assertEqual(duadd.nin, 2) - self.assertEqual(duadd.nout, 1) - self.assertEqual(duadd.nargs, duadd.nin + duadd.nout) - self.assertEqual(duadd.ntypes, 0) - self.assertEqual(duadd.types, []) - self.assertEqual(duadd.identity, None) - duadd(1, 2) - self.assertEqual(duadd.ntypes, 1) - self.assertEqual(duadd.ntypes, len(duadd.types)) - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_errors.py b/numba/numba/tests/npyufunc/test_errors.py deleted file mode 100644 index 237308843..000000000 --- a/numba/numba/tests/npyufunc/test_errors.py +++ /dev/null @@ -1,199 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import contextlib -import sys -import warnings - -import numpy as np - -from numba import unittest_support as unittest -from numba import vectorize, guvectorize -from numba.numpy_support import version as np_version - -from ..support import TestCase - - -def sqrt(val): - if val < 0.0: - raise ValueError('Value must be positive') - return val ** 0.5 - - -def gufunc_foo(inp, n, out): - for i in range(inp.shape[0]): - if inp[i] < 0: - raise ValueError('Value must be positive') - out[i] = inp[i] * n[0] - -def truediv(a, b): - return a / b - -def floordiv(a, b): - return a // b - -def remainder(a, b): - return a % b - -def power(a, b): - return a ** b - -# See https://github.com/numpy/numpy/pull/3691 -skipIfFPStatusBug = unittest.skipIf( - sys.platform == 'win32' and np_version < (1, 8) and sys.maxsize < 2 ** 32, - "test disabled because of FPU state handling issue on Numpy < 1.8") - - -class TestExceptions(TestCase): - """ - Test raising exceptions inside ufuncs. - """ - - def check_ufunc_raise(self, **vectorize_args): - f = vectorize(['float64(float64)'], **vectorize_args)(sqrt) - arr = np.array([1, 4, -2, 9, -1, 16], dtype=np.float64) - out = np.zeros_like(arr) - with self.assertRaises(ValueError) as cm: - f(arr, out) - self.assertIn('Value must be positive', str(cm.exception)) - # All values were computed except for the ones giving an error - self.assertEqual(list(out), [1, 2, 0, 3, 0, 4]) - - def test_ufunc_raise(self): - self.check_ufunc_raise(nopython=True) - - def test_ufunc_raise_objmode(self): - self.check_ufunc_raise(forceobj=True) - - def check_gufunc_raise(self, **vectorize_args): - f = guvectorize(['int32[:], int32[:], int32[:]'], '(n),()->(n)', - **vectorize_args)(gufunc_foo) - arr = np.array([1, 2, -3, 4], dtype=np.int32) - out = np.zeros_like(arr) - with self.assertRaises(ValueError) as cm: - f(arr, 2, out) - # The gufunc bailed out after the error - self.assertEqual(list(out), [2, 4, 0, 0]) - - def test_gufunc_raise(self): - self.check_gufunc_raise(nopython=True) - - def test_gufunc_raise_objmode(self): - self.check_gufunc_raise(forceobj=True) - - -class TestFloatingPointExceptions(TestCase): - """ - Test floating-point exceptions inside ufuncs. - - Note the warnings emitted by Numpy reflect IEEE-754 semantics. - """ - - @contextlib.contextmanager - def check_warnings(self, messages, category=RuntimeWarning): - with warnings.catch_warnings(record=True) as catch: - warnings.simplefilter("always") - yield - # Check warnings for 1/0 and 0/0 - found = 0 - for w in catch: - for m in messages: - if m in str(w.message): - self.assertEqual(w.category, category) - found += 1 - self.assertEqual(found, len(messages)) - - @skipIfFPStatusBug - def check_truediv_real(self, dtype): - """ - Test 1 / 0 and 0 / 0. - """ - f = vectorize(nopython=True)(truediv) - a = np.array([5., 6., 0., 8.], dtype=dtype) - b = np.array([1., 0., 0., 4.], dtype=dtype) - expected = np.array([5., float('inf'), float('nan'), 2.]) - with self.check_warnings(["divide by zero encountered", - "invalid value encountered"]): - res = f(a, b) - self.assertPreciseEqual(res, expected) - - def test_truediv_float(self): - self.check_truediv_real(np.float64) - - def test_truediv_integer(self): - self.check_truediv_real(np.int32) - - @skipIfFPStatusBug - def check_divmod_float(self, pyfunc, values, messages): - """ - Test 1 // 0 and 0 // 0. - """ - f = vectorize(nopython=True)(pyfunc) - a = np.array([5., 6., 0., 9.]) - b = np.array([1., 0., 0., 4.]) - expected = np.array(values) - with self.check_warnings(messages): - res = f(a, b) - self.assertPreciseEqual(res, expected) - - def test_floordiv_float(self): - self.check_divmod_float(floordiv, - [5.0, float('inf'), float('nan'), 2.0], - ["divide by zero encountered", - "invalid value encountered"]) - - def test_remainder_float(self): - self.check_divmod_float(remainder, - [0.0, float('nan'), float('nan'), 1.0], - ["invalid value encountered"]) - - def check_divmod_int(self, pyfunc, values): - """ - Test 1 % 0 and 0 % 0. - """ - f = vectorize(nopython=True)(pyfunc) - a = np.array([5, 6, 0, 9]) - b = np.array([1, 0, 0, 4]) - expected = np.array(values) - # No warnings raised because LLVM makes it difficult - with self.check_warnings([]): - res = f(a, b) - self.assertPreciseEqual(res, expected) - - def test_floordiv_int(self): - self.check_divmod_int(floordiv, [5, 0, 0, 2]) - - def test_remainder_int(self): - self.check_divmod_int(remainder, [0, 0, 0, 1]) - - @skipIfFPStatusBug - def test_power_float(self): - """ - Test 0 ** -1 and 2 ** . - """ - f = vectorize(nopython=True)(power) - a = np.array([5., 0., 2., 8.]) - b = np.array([1., -1., 1e20, 4.]) - expected = np.array([5., float('inf'), float('inf'), 4096.]) - with self.check_warnings(["divide by zero encountered", - "overflow encountered"]): - res = f(a, b) - self.assertPreciseEqual(res, expected) - - def test_power_integer(self): - """ - Test 0 ** -1. - Note 2 ** returns an undefined value (depending - on the algorithm). - """ - dtype = np.int64 - f = vectorize(["int64(int64, int64)"], nopython=True)(power) - a = np.array([5, 0, 6], dtype=dtype) - b = np.array([1, -1, 2], dtype=dtype) - expected = np.array([5, -2**63, 36], dtype=dtype) - with self.check_warnings([]): - res = f(a, b) - self.assertPreciseEqual(res, expected) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_gufunc.py b/numba/numba/tests/npyufunc/test_gufunc.py deleted file mode 100644 index 274ec7ef2..000000000 --- a/numba/numba/tests/npyufunc/test_gufunc.py +++ /dev/null @@ -1,174 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import numpy as np -import numpy.core.umath_tests as ut - -from numba import unittest_support as unittest -from numba import void, float32, jit, guvectorize -from numba.npyufunc import GUVectorize -from ..support import tag, TestCase - - -def matmulcore(A, B, C): - m, n = A.shape - n, p = B.shape - for i in range(m): - for j in range(p): - C[i, j] = 0 - for k in range(n): - C[i, j] += A[i, k] * B[k, j] - - -def axpy(a, x, y, out): - out[0] = a * x + y - - -class TestGUFunc(TestCase): - target = 'cpu' - - def check_matmul_gufunc(self, gufunc): - matrix_ct = 1001 - A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4) - B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5) - - C = gufunc(A, B) - Gold = ut.matrix_multiply(A, B) - - np.testing.assert_allclose(C, Gold, rtol=1e-5, atol=1e-8) - - @tag('important') - def test_gufunc(self): - gufunc = GUVectorize(matmulcore, '(m,n),(n,p)->(m,p)', - target=self.target) - gufunc.add((float32[:, :], float32[:, :], float32[:, :])) - gufunc = gufunc.build_ufunc() - - self.check_matmul_gufunc(gufunc) - - @tag('important') - def test_guvectorize_decor(self): - gufunc = guvectorize([void(float32[:,:], float32[:,:], float32[:,:])], - '(m,n),(n,p)->(m,p)', - target=self.target)(matmulcore) - - self.check_matmul_gufunc(gufunc) - - def test_ufunc_like(self): - # Test problem that the stride of "scalar" gufunc argument not properly - # handled when the actual argument is an array, - # causing the same value (first value) being repeated. - gufunc = GUVectorize(axpy, '(), (), () -> ()', target=self.target) - gufunc.add('(intp, intp, intp, intp[:])') - gufunc = gufunc.build_ufunc() - - x = np.arange(10, dtype=np.intp) - out = gufunc(x, x, x) - - np.testing.assert_equal(out, x * x + x) - - -class TestGUFuncParallel(TestGUFunc): - target = 'parallel' - - -class TestGUVectorizeScalar(TestCase): - """ - Nothing keeps user from out-of-bound memory access - """ - target = 'cpu' - - @tag('important') - def test_scalar_output(self): - """ - Note that scalar output is a 0-dimension array that acts as - a pointer to the output location. - """ - - @guvectorize(['void(int32[:], int32[:])'], '(n)->()', - target=self.target, nopython=True) - def sum_row(inp, out): - tmp = 0. - for i in range(inp.shape[0]): - tmp += inp[i] - out[()] = tmp - - # inp is (10000, 3) - # out is (10000) - # The outter (leftmost) dimension must match or numpy broadcasting is performed. - - inp = np.arange(30000, dtype=np.int32).reshape(10000, 3) - out = sum_row(inp) - - # verify result - for i in range(inp.shape[0]): - assert out[i] == inp[i].sum() - - @tag('important') - def test_scalar_input(self): - - @guvectorize(['int32[:], int32[:], int32[:]'], '(n),()->(n)', - target=self.target, nopython=True) - def foo(inp, n, out): - for i in range(inp.shape[0]): - out[i] = inp[i] * n[0] - - inp = np.arange(3 * 10, dtype=np.int32).reshape(10, 3) - # out = np.empty_like(inp) - out = foo(inp, 2) - - # verify result - self.assertPreciseEqual(inp * 2, out) - - def test_scalar_input_core_type(self): - def pyfunc(inp, n, out): - for i in range(inp.size): - out[i] = n * (inp[i] + 1) - - my_gufunc = guvectorize(['int32[:], int32, int32[:]'], - '(n),()->(n)', - target=self.target)(pyfunc) - - # test single core loop execution - arr = np.arange(10).astype(np.int32) - got = my_gufunc(arr, 2) - - expected = np.zeros_like(got) - pyfunc(arr, 2, expected) - - np.testing.assert_equal(got, expected) - - # test multiple core loop execution - arr = np.arange(20).astype(np.int32).reshape(10, 2) - got = my_gufunc(arr, 2) - - expected = np.zeros_like(got) - for ax in range(expected.shape[0]): - pyfunc(arr[ax], 2, expected[ax]) - - np.testing.assert_equal(got, expected) - - def test_scalar_input_core_type_error(self): - with self.assertRaises(TypeError) as raises: - @guvectorize(['int32[:], int32, int32[:]'], '(n),(n)->(n)', - target=self.target) - def pyfunc(a, b, c): - pass - self.assertEqual("scalar type int32 given for non scalar argument #2", - str(raises.exception)) - - def test_ndim_mismatch(self): - with self.assertRaises(TypeError) as raises: - @guvectorize(['int32[:], int32[:]'], '(m,n)->(n)', - target=self.target) - def pyfunc(a, b): - pass - self.assertEqual("type and shape signature mismatch for arg #1", - str(raises.exception)) - - -class TestGUVectorizeScalarParallel(TestGUVectorizeScalar): - target = 'parallel' - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_parallel_env_variable.py b/numba/numba/tests/npyufunc/test_parallel_env_variable.py deleted file mode 100644 index ff783b77c..000000000 --- a/numba/numba/tests/npyufunc/test_parallel_env_variable.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import absolute_import, print_function, division -from numba import unittest_support as unittest -from numba.npyufunc.parallel import get_thread_count -from os import environ as env -from numba import config - - -class TestParallelEnvVariable(unittest.TestCase): - """ - Tests environment variables related to the underlying "parallel" - functions for npyufuncs. - """ - - def test_num_threads_variable(self): - """ - Tests the NUMBA_NUM_THREADS env variable behaves as expected. - """ - key = 'NUMBA_NUM_THREADS' - current = str(getattr(env, key, config.NUMBA_DEFAULT_NUM_THREADS)) - threads = "3154" - env[key] = threads - config.reload_config() - try: - self.assertEqual(threads, str(get_thread_count())) - self.assertEqual(threads, str(config.NUMBA_NUM_THREADS)) - finally: - # reset the env variable/set to default - env[key] = current - config.reload_config() - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_parallel_low_work.py b/numba/numba/tests/npyufunc/test_parallel_low_work.py deleted file mode 100644 index 17d2df868..000000000 --- a/numba/numba/tests/npyufunc/test_parallel_low_work.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -There was a deadlock problem when work count is smaller than number of threads. -""" -from __future__ import absolute_import, print_function, division - -from numba import unittest_support as unittest - -import numpy as np - -from numba import float32, float64, int32, uint32 -from numba.npyufunc import Vectorize - - -def vector_add(a, b): - return a + b - - -class TestParallelLowWorkCount(unittest.TestCase): - def test_low_workcount(self): - # build parallel native code ufunc - pv = Vectorize(vector_add, target='parallel') - for ty in (int32, uint32, float32, float64): - pv.add(ty(ty, ty)) - para_ufunc = pv.build_ufunc() - - # build python ufunc - np_ufunc = np.vectorize(vector_add) - - # test it out - def test(ty): - data = np.arange(1).astype(ty) # just one item - result = para_ufunc(data, data) - gold = np_ufunc(data, data) - np.testing.assert_allclose(gold, result) - - test(np.double) - test(np.float32) - test(np.int32) - test(np.uint32) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_parallel_ufunc_issues.py b/numba/numba/tests/npyufunc/test_parallel_ufunc_issues.py deleted file mode 100644 index 7f02a8164..000000000 --- a/numba/numba/tests/npyufunc/test_parallel_ufunc_issues.py +++ /dev/null @@ -1,130 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import time -import ctypes - -import numpy as np - -from numba import unittest_support as unittest -from numba.tests.support import captured_stdout -from numba import vectorize, guvectorize - - -class TestParUfuncIssues(unittest.TestCase): - - _numba_parallel_test_ = False - - def test_thread_response(self): - """ - Related to #89. - This does not test #89 but tests the fix for it. - We want to make sure the worker threads can be used multiple times - and with different time gap between each execution. - """ - - @vectorize('float64(float64, float64)', target='parallel') - def fnv(a, b): - return a + b - - sleep_time = 1 # 1 second - while sleep_time > 0.00001: # 10us - time.sleep(sleep_time) - a = b = np.arange(10**5) - np.testing.assert_equal(a + b, fnv(a, b)) - # Reduce sleep time - sleep_time /= 2 - - def test_gil_reacquire_deadlock(self): - """ - Testing issue #1998 due to GIL reacquiring - """ - # make a ctypes callback that requires the GIL - proto = ctypes.CFUNCTYPE(None, ctypes.c_int32) - characters = 'abcdefghij' - - def bar(x): - print(characters[x]) - - cbar = proto(bar) - - # our unit under test - @vectorize(['int32(int32)'], target='parallel', nopython=True) - def foo(x): - print(x % 10) # this reacquires the GIL - cbar(x % 10) # this reacquires the GIL - return x * 2 - - # Numpy ufunc has a heuristic to determine whether to release the GIL - # during execution. Small input size (10) seems to not release the GIL. - # Large input size (1000) seems to release the GIL. - for nelem in [1, 10, 100, 1000]: - # inputs - a = np.arange(nelem, dtype=np.int32) - acopy = a.copy() - # run and capture stdout - with captured_stdout() as buf: - got = foo(a) - stdout = buf.getvalue() - buf.close() - # process outputs from print - got_output = sorted(map(lambda x: x.strip(), stdout.splitlines())) - # build expected output - expected_output = [str(x % 10) for x in range(nelem)] - expected_output += [characters[x % 10] for x in range(nelem)] - expected_output = sorted(expected_output) - # verify - self.assertEqual(got_output, expected_output) - np.testing.assert_equal(got, 2 * acopy) - - - -class TestParGUfuncIssues(unittest.TestCase): - - _numba_parallel_test_ = False - - def test_gil_reacquire_deadlock(self): - """ - Testing similar issue to #1998 due to GIL reacquiring for Gufunc - """ - # make a ctypes callback that requires the GIL - proto = ctypes.CFUNCTYPE(None, ctypes.c_int32) - characters = 'abcdefghij' - - def bar(x): - print(characters[x]) - - cbar = proto(bar) - - # our unit under test - @guvectorize(['(int32, int32[:])'], "()->()", - target='parallel', nopython=True) - def foo(x, out): - print(x % 10) # this reacquires the GIL - cbar(x % 10) # this reacquires the GIL - out[0] = x * 2 - - # Numpy ufunc has a heuristic to determine whether to release the GIL - # during execution. Small input size (10) seems to not release the GIL. - # Large input size (1000) seems to release the GIL. - for nelem in [1, 10, 100, 1000]: - # inputs - a = np.arange(nelem, dtype=np.int32) - acopy = a.copy() - # run and capture stdout - with captured_stdout() as buf: - got = foo(a) - stdout = buf.getvalue() - buf.close() - # process outputs from print - got_output = sorted(map(lambda x: x.strip(), stdout.splitlines())) - # build expected output - expected_output = [str(x % 10) for x in range(nelem)] - expected_output += [characters[x % 10] for x in range(nelem)] - expected_output = sorted(expected_output) - # verify - self.assertEqual(got_output, expected_output) - np.testing.assert_equal(got, 2 * acopy) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_ufunc.py b/numba/numba/tests/npyufunc/test_ufunc.py deleted file mode 100644 index 6b6541503..000000000 --- a/numba/numba/tests/npyufunc/test_ufunc.py +++ /dev/null @@ -1,105 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import float32 -from numba.npyufunc import Vectorize -from ..support import tag, TestCase - - -dtype = np.float32 -a = np.arange(80, dtype=dtype).reshape(8, 10) -b = a.copy() -c = a.copy(order='F') -d = np.arange(16 * 20, dtype=dtype).reshape(16, 20)[::2, ::2] - - -def add(a, b): - return a + b - - -def add_multiple_args(a, b, c, d): - return a + b + c + d - - -def gufunc_add(a, b): - result = 0.0 - for i in range(a.shape[0]): - result += a[i] * b[i] - - return result - - -def ufunc_reduce(ufunc, arg): - for i in range(arg.ndim): - arg = ufunc.reduce(arg) - return arg - - -vectorizers = [ - Vectorize, - # ParallelVectorize, - # StreamVectorize, - # CudaVectorize, - # GUFuncVectorize, -] - - -class TestUFuncs(TestCase): - - def _test_ufunc_attributes(self, cls, a, b, *args): - "Test ufunc attributes" - vectorizer = cls(add, *args) - vectorizer.add(float32(float32, float32)) - ufunc = vectorizer.build_ufunc() - - info = (cls, a.ndim) - self.assertPreciseEqual(ufunc(a, b), a + b, msg=info) - self.assertPreciseEqual(ufunc_reduce(ufunc, a), np.sum(a), msg=info) - self.assertPreciseEqual(ufunc.accumulate(a), np.add.accumulate(a), msg=info) - self.assertPreciseEqual(ufunc.outer(a, b), np.add.outer(a, b), msg=info) - - def _test_broadcasting(self, cls, a, b, c, d): - "Test multiple args" - vectorizer = cls(add_multiple_args) - vectorizer.add(float32(float32, float32, float32, float32)) - ufunc = vectorizer.build_ufunc() - - info = (cls, a.shape) - self.assertPreciseEqual(ufunc(a, b, c, d), a + b + c + d, msg=info) - - @tag('important') - def test_ufunc_attributes(self): - for v in vectorizers: # 1D - self._test_ufunc_attributes(v, a[0], b[0]) - for v in vectorizers: # 2D - self._test_ufunc_attributes(v, a, b) - for v in vectorizers: # 3D - self._test_ufunc_attributes(v, a[:, np.newaxis, :], - b[np.newaxis, :, :]) - - @tag('important') - def test_broadcasting(self): - for v in vectorizers: # 1D - self._test_broadcasting(v, a[0], b[0], c[0], d[0]) - for v in vectorizers: # 2D - self._test_broadcasting(v, a, b, c, d) - for v in vectorizers: # 3D - self._test_broadcasting(v, a[:, np.newaxis, :], b[np.newaxis, :, :], - c[:, np.newaxis, :], d[np.newaxis, :, :]) - - @tag('important') - def test_implicit_broadcasting(self): - for v in vectorizers: - vectorizer = v(add) - vectorizer.add(float32(float32, float32)) - ufunc = vectorizer.build_ufunc() - - broadcasting_b = b[np.newaxis, :, np.newaxis, np.newaxis, :] - self.assertPreciseEqual(ufunc(a, broadcasting_b), - a + broadcasting_b) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_ufuncbuilding.py b/numba/numba/tests/npyufunc/test_ufuncbuilding.py deleted file mode 100644 index e4763b470..000000000 --- a/numba/numba/tests/npyufunc/test_ufuncbuilding.py +++ /dev/null @@ -1,377 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import sys - -import numpy as np - -from numba import config, unittest_support as unittest -from numba.npyufunc.ufuncbuilder import GUFuncBuilder -from numba import vectorize, guvectorize -from numba.npyufunc import PyUFunc_One -from numba.npyufunc.dufunc import DUFunc as UFuncBuilder -from ..support import tag, TestCase - - -def add(a, b): - """An addition""" - return a + b - -def equals(a, b): - return a == b - -def mul(a, b): - """A multiplication""" - return a * b - -def guadd(a, b, c): - """A generalized addition""" - x, y = c.shape - for i in range(x): - for j in range(y): - c[i, j] = a[i, j] + b[i, j] - -@vectorize(nopython=True) -def inner(a, b): - return a + b - -@vectorize(["int64(int64, int64)"], nopython=True) -def inner_explicit(a, b): - return a + b - -def outer(a, b): - return inner(a, b) - -def outer_explicit(a, b): - return inner_explicit(a, b) - - -class Dummy: pass - - -def guadd_obj(a, b, c): - Dummy() # to force object mode - x, y = c.shape - for i in range(x): - for j in range(y): - c[i, j] = a[i, j] + b[i, j] - -def guadd_scalar_obj(a, b, c): - Dummy() # to force object mode - x, y = c.shape - for i in range(x): - for j in range(y): - c[i, j] = a[i, j] + b - - -class MyException(Exception): - pass - - -def guerror(a, b, c): - raise MyException - - -class TestUfuncBuilding(TestCase): - - @tag('important') - def test_basic_ufunc(self): - ufb = UFuncBuilder(add) - cres = ufb.add("int32(int32, int32)") - self.assertFalse(cres.objectmode) - cres = ufb.add("int64(int64, int64)") - self.assertFalse(cres.objectmode) - ufunc = ufb.build_ufunc() - - def check(a): - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - self.assertEqual(b.dtype, a.dtype) - - a = np.arange(12, dtype='int32') - check(a) - # Non-contiguous dimension - a = a[::2] - check(a) - a = a.reshape((2, 3)) - check(a) - - # Metadata - self.assertEqual(ufunc.__name__, "add") - self.assertIn("An addition", ufunc.__doc__) - - def test_ufunc_struct(self): - ufb = UFuncBuilder(add) - cres = ufb.add("complex64(complex64, complex64)") - self.assertFalse(cres.objectmode) - ufunc = ufb.build_ufunc() - - def check(a): - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - self.assertEqual(b.dtype, a.dtype) - - a = np.arange(12, dtype='complex64') + 1j - check(a) - # Non-contiguous dimension - a = a[::2] - check(a) - a = a.reshape((2, 3)) - check(a) - - def test_ufunc_forceobj(self): - ufb = UFuncBuilder(add, targetoptions={'forceobj': True}) - cres = ufb.add("int32(int32, int32)") - self.assertTrue(cres.objectmode) - ufunc = ufb.build_ufunc() - - a = np.arange(10, dtype='int32') - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - - def test_nested_call(self): - """ - Check nested call to an implicitly-typed ufunc. - """ - builder = UFuncBuilder(outer, - targetoptions={'nopython': True}) - builder.add("(int64, int64)") - ufunc = builder.build_ufunc() - self.assertEqual(ufunc(-1, 3), 2) - - def test_nested_call_explicit(self): - """ - Check nested call to an explicitly-typed ufunc. - """ - builder = UFuncBuilder(outer_explicit, - targetoptions={'nopython': True}) - builder.add("(int64, int64)") - ufunc = builder.build_ufunc() - self.assertEqual(ufunc(-1, 3), 2) - - -class TestUfuncBuildingJitDisabled(TestUfuncBuilding): - - def setUp(self): - self.old_disable_jit = config.DISABLE_JIT - config.DISABLE_JIT = False - - def tearDown(self): - config.DISABLE_JIT = self.old_disable_jit - - -class TestGUfuncBuilding(TestCase): - - def test_basic_gufunc(self): - gufb = GUFuncBuilder(guadd, "(x, y),(x, y)->(x, y)") - cres = gufb.add("void(int32[:,:], int32[:,:], int32[:,:])") - self.assertFalse(cres.objectmode) - ufunc = gufb.build_ufunc() - - a = np.arange(10, dtype="int32").reshape(2, 5) - b = ufunc(a, a) - - self.assertPreciseEqual(a + a, b) - self.assertEqual(b.dtype, np.dtype('int32')) - - # Metadata - self.assertEqual(ufunc.__name__, "guadd") - self.assertIn("A generalized addition", ufunc.__doc__) - - @tag('important') - def test_gufunc_struct(self): - gufb = GUFuncBuilder(guadd, "(x, y),(x, y)->(x, y)") - cres = gufb.add("void(complex64[:,:], complex64[:,:], complex64[:,:])") - self.assertFalse(cres.objectmode) - ufunc = gufb.build_ufunc() - - a = np.arange(10, dtype="complex64").reshape(2, 5) + 1j - b = ufunc(a, a) - - self.assertPreciseEqual(a + a, b) - - def test_gufunc_struct_forceobj(self): - gufb = GUFuncBuilder(guadd, "(x, y),(x, y)->(x, y)", - targetoptions=dict(forceobj=True)) - cres = gufb.add("void(complex64[:,:], complex64[:,:], complex64[:," - ":])") - self.assertTrue(cres.objectmode) - ufunc = gufb.build_ufunc() - - a = np.arange(10, dtype="complex64").reshape(2, 5) + 1j - b = ufunc(a, a) - - self.assertPreciseEqual(a + a, b) - - -class TestGUfuncBuildingJitDisabled(TestGUfuncBuilding): - - def setUp(self): - self.old_disable_jit = config.DISABLE_JIT - config.DISABLE_JIT = False - - def tearDown(self): - config.DISABLE_JIT = self.old_disable_jit - - -class TestVectorizeDecor(TestCase): - - _supported_identities = [0, 1, None, "reorderable"] - - def test_vectorize(self): - ufunc = vectorize(['int32(int32, int32)'])(add) - a = np.arange(10, dtype='int32') - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - - def test_vectorize_objmode(self): - ufunc = vectorize(['int32(int32, int32)'], forceobj=True)(add) - a = np.arange(10, dtype='int32') - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - - @tag('important') - def test_vectorize_bool_return(self): - ufunc = vectorize(['bool_(int32, int32)'])(equals) - a = np.arange(10, dtype='int32') - r = ufunc(a,a) - self.assertPreciseEqual(r, np.ones(r.shape, dtype=np.bool_)) - - @tag('important') - def test_vectorize_identity(self): - sig = 'int32(int32, int32)' - for identity in self._supported_identities: - ufunc = vectorize([sig], identity=identity)(add) - expected = None if identity == 'reorderable' else identity - self.assertEqual(ufunc.identity, expected) - # Default value is None - ufunc = vectorize([sig])(add) - self.assertIs(ufunc.identity, None) - # Invalid values - with self.assertRaises(ValueError): - vectorize([sig], identity='none')(add) - with self.assertRaises(ValueError): - vectorize([sig], identity=2)(add) - - def test_vectorize_no_args(self): - a = np.linspace(0,1,10) - b = np.linspace(1,2,10) - ufunc = vectorize(add) - self.assertPreciseEqual(ufunc(a,b), a + b) - ufunc2 = vectorize(add) - c = np.empty(10) - ufunc2(a, b, c) - self.assertPreciseEqual(c, a + b) - - def test_vectorize_only_kws(self): - a = np.linspace(0,1,10) - b = np.linspace(1,2,10) - ufunc = vectorize(identity=PyUFunc_One, nopython=True)(mul) - self.assertPreciseEqual(ufunc(a,b), a * b) - - def test_vectorize_output_kwarg(self): - """ - Passing the output array as a keyword argument (issue #1867). - """ - def check(ufunc): - a = np.arange(10, 16, dtype='int32') - out = np.zeros_like(a) - got = ufunc(a, a, out=out) - self.assertIs(got, out) - self.assertPreciseEqual(out, a + a) - with self.assertRaises(TypeError): - ufunc(a, a, zzz=out) - - # With explicit sigs - ufunc = vectorize(['int32(int32, int32)'], nopython=True)(add) - check(ufunc) - # With implicit sig - ufunc = vectorize(nopython=True)(add) - check(ufunc) # compiling - check(ufunc) # after compiling - - @tag('important') - def test_guvectorize(self): - ufunc = guvectorize(['(int32[:,:], int32[:,:], int32[:,:])'], - "(x,y),(x,y)->(x,y)")(guadd) - a = np.arange(10, dtype='int32').reshape(2, 5) - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - - @tag('important') - def test_guvectorize_no_output(self): - ufunc = guvectorize(['(int32[:,:], int32[:,:], int32[:,:])'], - "(x,y),(x,y),(x,y)")(guadd) - a = np.arange(10, dtype='int32').reshape(2, 5) - out = np.zeros_like(a) - ufunc(a, a, out) - self.assertPreciseEqual(a + a, out) - - def test_guvectorize_objectmode(self): - ufunc = guvectorize(['(int32[:,:], int32[:,:], int32[:,:])'], - "(x,y),(x,y)->(x,y)")(guadd_obj) - a = np.arange(10, dtype='int32').reshape(2, 5) - b = ufunc(a, a) - self.assertPreciseEqual(a + a, b) - - def test_guvectorize_scalar_objectmode(self): - """ - Test passing of scalars to object mode gufuncs. - """ - ufunc = guvectorize(['(int32[:,:], int32, int32[:,:])'], - "(x,y),()->(x,y)")(guadd_scalar_obj) - a = np.arange(10, dtype='int32').reshape(2, 5) - b = ufunc(a, 3) - self.assertPreciseEqual(a + 3, b) - - def test_guvectorize_error_in_objectmode(self): - ufunc = guvectorize(['(int32[:,:], int32[:,:], int32[:,:])'], - "(x,y),(x,y)->(x,y)", forceobj=True)(guerror) - a = np.arange(10, dtype='int32').reshape(2, 5) - with self.assertRaises(MyException): - ufunc(a, a) - - @tag('important') - def test_guvectorize_identity(self): - args = (['(int32[:,:], int32[:,:], int32[:,:])'], "(x,y),(x,y)->(x,y)") - for identity in self._supported_identities: - ufunc = guvectorize(*args, identity=identity)(guadd) - expected = None if identity == 'reorderable' else identity - self.assertEqual(ufunc.identity, expected) - # Default value is None - ufunc = guvectorize(*args)(guadd) - self.assertIs(ufunc.identity, None) - # Invalid values - with self.assertRaises(ValueError): - guvectorize(*args, identity='none')(add) - with self.assertRaises(ValueError): - guvectorize(*args, identity=2)(add) - - def test_guvectorize_invalid_layout(self): - sigs = ['(int32[:,:], int32[:,:], int32[:,:])'] - # Syntax error - with self.assertRaises(ValueError) as raises: - guvectorize(sigs, ")-:")(guadd) - self.assertIn("bad token in signature", str(raises.exception)) - # Output shape can't be inferred from inputs - with self.assertRaises(NameError) as raises: - guvectorize(sigs, "(x,y),(x,y)->(x,z,v)")(guadd) - self.assertEqual(str(raises.exception), - "undefined output symbols: v,z") - # Arrow but no outputs - with self.assertRaises(ValueError) as raises: - guvectorize(sigs, "(x,y),(x,y),(x,y)->")(guadd) - # (error message depends on Numpy version) - - -class TestVectorizeDecorJitDisabled(TestVectorizeDecor): - - def setUp(self): - self.old_disable_jit = config.DISABLE_JIT - config.DISABLE_JIT = False - - def tearDown(self): - config.DISABLE_JIT = self.old_disable_jit - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/npyufunc/test_vectorize_decor.py b/numba/numba/tests/npyufunc/test_vectorize_decor.py deleted file mode 100644 index 872a6180c..000000000 --- a/numba/numba/tests/npyufunc/test_vectorize_decor.py +++ /dev/null @@ -1,105 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import math - -import numpy as np - -from numba import unittest_support as unittest -from numba import int32, uint32, float32, float64, jit, vectorize -from ..support import tag - - -pi = math.pi - - -def sinc(x): - if x == 0.0: - return 1.0 - else: - return math.sin(x * pi) / (pi * x) - -def scaled_sinc(x, scale): - if x == 0.0: - return scale - else: - return scale * (math.sin(x * pi) / (pi * x)) - -def vector_add(a, b): - return a + b - - -class BaseVectorizeDecor(object): - target = None - wrapper = None - funcs = { - 'func1': sinc, - 'func2': scaled_sinc, - 'func3': vector_add, - } - - @classmethod - def _run_and_compare(cls, func, sig, A, *args, **kwargs): - if cls.wrapper is not None: - func = cls.wrapper(func) - numba_func = vectorize(sig, target=cls.target)(func) - numpy_func = np.vectorize(func) - result = numba_func(A, *args) - gold = numpy_func(A, *args) - np.testing.assert_allclose(result, gold, **kwargs) - - @tag('important') - def test_1(self): - sig = ['float64(float64)', 'float32(float32)'] - func = self.funcs['func1'] - A = np.arange(100, dtype=np.float64) - self._run_and_compare(func, sig, A) - - @tag('important') - def test_2(self): - sig = [float64(float64), float32(float32)] - func = self.funcs['func1'] - A = np.arange(100, dtype=np.float64) - self._run_and_compare(func, sig, A) - - @tag('important') - def test_3(self): - sig = ['float64(float64, uint32)'] - func = self.funcs['func2'] - A = np.arange(100, dtype=np.float64) - scale = np.uint32(3) - self._run_and_compare(func, sig, A, scale, atol=1e-8) - - @tag('important') - def test_4(self): - sig = [ - int32(int32, int32), - uint32(uint32, uint32), - float32(float32, float32), - float64(float64, float64), - ] - func = self.funcs['func3'] - A = np.arange(100, dtype=np.float64) - self._run_and_compare(func, sig, A, A) - A = A.astype(np.float32) - self._run_and_compare(func, sig, A, A) - A = A.astype(np.int32) - self._run_and_compare(func, sig, A, A) - A = A.astype(np.uint32) - self._run_and_compare(func, sig, A, A) - - -class TestCPUVectorizeDecor(unittest.TestCase, BaseVectorizeDecor): - target = 'cpu' - - -class TestParallelVectorizeDecor(unittest.TestCase, BaseVectorizeDecor): - target = 'parallel' - - -class TestCPUVectorizeJitted(unittest.TestCase, BaseVectorizeDecor): - target = 'cpu' - wrapper = staticmethod(jit) # staticmethod required for py27 - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/pdlike_usecase.py b/numba/numba/tests/pdlike_usecase.py deleted file mode 100644 index f155c2232..000000000 --- a/numba/numba/tests/pdlike_usecase.py +++ /dev/null @@ -1,306 +0,0 @@ -""" -Implementation of a minimal Pandas-like API. -""" - -import numpy as np - -from numba import types, cgutils -from numba.datamodel import models -from numba.extending import ( - typeof_impl, type_callable, register_model, - lower_builtin, box, unbox, NativeValue, - overload, overload_attribute, overload_method, make_attribute_wrapper) -from numba.targets.imputils import impl_ret_borrowed - - -class Index(object): - """ - A minimal pandas.Index-like object. - """ - - def __init__(self, data): - assert isinstance(data, np.ndarray) - assert data.ndim == 1 - self._data = data - - def __iter__(self): - return iter(self._data) - - @property - def dtype(self): - return self._data.dtype - - @property - def flags(self): - return self._data.flags - - -class IndexType(types.Buffer): - """ - The type class for Index objects. - """ - array_priority = 1000 - - def __init__(self, dtype, layout, pyclass): - self.pyclass = pyclass - super(IndexType, self).__init__(dtype, 1, layout) - - @property - def key(self): - return self.pyclass, self.dtype, self.layout - - @property - def as_array(self): - return types.Array(self.dtype, 1, self.layout) - - def copy(self, dtype=None, ndim=1, layout=None): - assert ndim == 1 - if dtype is None: - dtype = self.dtype - layout = layout or self.layout - return type(self)(dtype, layout, self.pyclass) - - -class Series(object): - """ - A minimal pandas.Series-like object. - """ - - def __init__(self, data, index): - assert isinstance(data, np.ndarray) - assert isinstance(index, Index) - assert data.ndim == 1 - self._values = data - self._index = index - - def __iter__(self): - return iter(self._values) - - @property - def dtype(self): - return self._values.dtype - - @property - def flags(self): - return self._values.flags - - -class SeriesType(types.ArrayCompatible): - """ - The type class for Series objects. - """ - array_priority = 1000 - - def __init__(self, dtype, index): - assert isinstance(index, IndexType) - self.dtype = dtype - self.index = index - self.values = types.Array(self.dtype, 1, 'C') - name = "series(%s, %s)" % (dtype, index) - super(SeriesType, self).__init__(name) - - @property - def key(self): - return self.dtype, self.index - - @property - def as_array(self): - return self.values - - def copy(self, dtype=None, ndim=1, layout='C'): - assert ndim == 1 - assert layout == 'C' - if dtype is None: - dtype = self.dtype - return type(self)(dtype, self.index) - - -@typeof_impl.register(Index) -def typeof_index(val, c): - arrty = typeof_impl(val._data, c) - assert arrty.ndim == 1 - return IndexType(arrty.dtype, arrty.layout, type(val)) - -@typeof_impl.register(Series) -def typeof_series(val, c): - index = typeof_impl(val._index, c) - arrty = typeof_impl(val._values, c) - assert arrty.ndim == 1 - assert arrty.layout == 'C' - return SeriesType(arrty.dtype, index) - -@type_callable('__array_wrap__') -def type_array_wrap(context): - def typer(input_type, result): - if isinstance(input_type, (IndexType, SeriesType)): - return input_type.copy(dtype=result.dtype, - ndim=result.ndim, - layout=result.layout) - - return typer - -@type_callable(Series) -def type_series_constructor(context): - def typer(data, index): - if isinstance(index, IndexType) and isinstance(data, types.Array): - assert data.layout == 'C' - assert data.ndim == 1 - return SeriesType(data.dtype, index) - - return typer - - -# Backend extensions for Index and Series - -@register_model(IndexType) -class IndexModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [('data', fe_type.as_array)] - models.StructModel.__init__(self, dmm, fe_type, members) - -@register_model(SeriesType) -class SeriesModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('index', fe_type.index), - ('values', fe_type.as_array), - ] - models.StructModel.__init__(self, dmm, fe_type, members) - -make_attribute_wrapper(IndexType, 'data', '_data') -make_attribute_wrapper(SeriesType, 'index', '_index') -make_attribute_wrapper(SeriesType, 'values', '_values') - -def make_index(context, builder, typ, **kwargs): - return cgutils.create_struct_proxy(typ)(context, builder, **kwargs) - -def make_series(context, builder, typ, **kwargs): - return cgutils.create_struct_proxy(typ)(context, builder, **kwargs) - -@lower_builtin('__array__', IndexType) -def index_as_array(context, builder, sig, args): - val = make_index(context, builder, sig.args[0], ref=args[0]) - return val._get_ptr_by_name('data') - -@lower_builtin('__array__', SeriesType) -def series_as_array(context, builder, sig, args): - val = make_series(context, builder, sig.args[0], ref=args[0]) - return val._get_ptr_by_name('values') - -@lower_builtin('__array_wrap__', IndexType, types.Array) -def index_wrap_array(context, builder, sig, args): - dest = make_index(context, builder, sig.return_type) - dest.data = args[1] - return impl_ret_borrowed(context, builder, sig.return_type, dest._getvalue()) - -@lower_builtin('__array_wrap__', SeriesType, types.Array) -def series_wrap_array(context, builder, sig, args): - src = make_series(context, builder, sig.args[0], value=args[0]) - dest = make_series(context, builder, sig.return_type) - dest.values = args[1] - dest.index = src.index - return impl_ret_borrowed(context, builder, sig.return_type, dest._getvalue()) - -@lower_builtin(Series, types.Array, IndexType) -def pdseries_constructor(context, builder, sig, args): - data, index = args - series = make_series(context, builder, sig.return_type) - series.index = index - series.values = data - return impl_ret_borrowed(context, builder, sig.return_type, series._getvalue()) - - -@unbox(IndexType) -def unbox_index(typ, obj, c): - """ - Convert a Index object to a native structure. - """ - data = c.pyapi.object_getattr_string(obj, "_data") - index = make_index(c.context, c.builder, typ) - index.data = c.unbox(typ.as_array, data).value - - return NativeValue(index._getvalue()) - -@unbox(SeriesType) -def unbox_series(typ, obj, c): - """ - Convert a Series object to a native structure. - """ - index = c.pyapi.object_getattr_string(obj, "_index") - values = c.pyapi.object_getattr_string(obj, "_values") - series = make_series(c.context, c.builder, typ) - series.index = c.unbox(typ.index, index).value - series.values = c.unbox(typ.values, values).value - - return NativeValue(series._getvalue()) - - -@box(IndexType) -def box_index(typ, val, c): - """ - Convert a native index structure to a Index object. - """ - # First build a Numpy array object, then wrap it in a Index - index = make_index(c.context, c.builder, typ, value=val) - classobj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.pyclass)) - arrayobj = c.box(typ.as_array, index.data) - indexobj = c.pyapi.call_function_objargs(classobj, (arrayobj,)) - return indexobj - -@box(SeriesType) -def box_series(typ, val, c): - """ - Convert a native series structure to a Series object. - """ - series = make_series(c.context, c.builder, typ, value=val) - classobj = c.pyapi.unserialize(c.pyapi.serialize_object(Series)) - indexobj = c.box(typ.index, series.index) - arrayobj = c.box(typ.as_array, series.values) - seriesobj = c.pyapi.call_function_objargs(classobj, (arrayobj, indexobj)) - return seriesobj - - -@overload_attribute(IndexType, 'is_monotonic_increasing') -def index_is_monotonic_increasing(typ): - """ - Index.is_monotonic_increasing - """ - def getter(index): - data = index._data - if len(data) == 0: - return True - u = data[0] - for v in data: - if v < u: - return False - v = u - return True - - return getter - -@overload(len) -def series_len(series): - """ - len(Series) - """ - if isinstance(series, SeriesType): - def len_impl(series): - return len(series._values) - return len_impl - -@overload_method(SeriesType, 'clip') -def series_clip(series, lower, upper): - """ - Series.clip(...) - """ - def clip_impl(series, lower, upper): - data = series._values.copy() - for i in range(len(data)): - v = data[i] - if v < lower: - data[i] = lower - elif v > upper: - data[i] = upper - return Series(data, series._index) - - return clip_impl diff --git a/numba/numba/tests/pycc_distutils_usecase/setup_distutils.py b/numba/numba/tests/pycc_distutils_usecase/setup_distutils.py deleted file mode 100644 index 0a210359e..000000000 --- a/numba/numba/tests/pycc_distutils_usecase/setup_distutils.py +++ /dev/null @@ -1,15 +0,0 @@ -from distutils.core import setup - -from source_module import cc - -from numba.pycc.platform import _patch_exec_command - - -def run_setup(): - # Avoid sporadic crashes on Windows due to MSVCRT spawnve() - _patch_exec_command() - setup(ext_modules=[cc.distutils_extension()]) - - -if __name__ == '__main__': - run_setup() diff --git a/numba/numba/tests/pycc_distutils_usecase/setup_setuptools.py b/numba/numba/tests/pycc_distutils_usecase/setup_setuptools.py deleted file mode 100644 index 8e09b4d28..000000000 --- a/numba/numba/tests/pycc_distutils_usecase/setup_setuptools.py +++ /dev/null @@ -1,15 +0,0 @@ -from setuptools import setup - -from source_module import cc - -from numba.pycc.platform import _patch_exec_command - - -def run_setup(): - # Avoid sporadic crashes on Windows due to MSVCRT spawnve() - _patch_exec_command() - setup(ext_modules=[cc.distutils_extension()]) - - -if __name__ == '__main__': - run_setup() diff --git a/numba/numba/tests/pycc_distutils_usecase/source_module.py b/numba/numba/tests/pycc_distutils_usecase/source_module.py deleted file mode 100644 index e5e6ea82f..000000000 --- a/numba/numba/tests/pycc_distutils_usecase/source_module.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np - -from numba.pycc import CC - - -cc = CC('pycc_compiled_module') - -_const = 42 - -# This ones references a global variable at compile time -@cc.export('get_const', 'i8()') -def get_const(): - return _const - -# This one needs NRT and an environment -@cc.export('ones', 'f8[:](i4)') -def ones(n): - return np.ones(n) diff --git a/numba/numba/tests/recursion_usecases.py b/numba/numba/tests/recursion_usecases.py deleted file mode 100644 index a8242bdbe..000000000 --- a/numba/numba/tests/recursion_usecases.py +++ /dev/null @@ -1,217 +0,0 @@ -""" -Usecases of recursive functions. - -Some functions are compiled at import time, hence a separate module. -""" - -from numba import jit - - -@jit("i8(i8)", nopython=True) -def fib1(n): - if n < 2: - return n - # Note the second call uses a named argument - return fib1(n - 1) + fib1(n=n - 2) - - -def make_fib2(): - @jit("i8(i8)", nopython=True) - def fib2(n): - if n < 2: - return n - return fib2(n - 1) + fib2(n=n - 2) - - return fib2 - -fib2 = make_fib2() - - -def make_type_change_self(jit=lambda x: x): - @jit - def type_change_self(x, y): - if x > 1 and y > 0: - return x + type_change_self(x - y, y) - else: - return y - return type_change_self - - -# Implicit signature -@jit(nopython=True) -def fib3(n): - if n < 2: - return n - return fib3(n - 1) + fib3(n - 2) - - -# Run-away self recursion -@jit(nopython=True) -def runaway_self(x): - return runaway_self(x) - - -@jit(nopython=True) -def raise_self(x): - if x == 1: - raise ValueError("raise_self") - elif x > 0: - return raise_self(x - 1) - else: - return 1 - - -# Mutual recursion -@jit(nopython=True) -def outer_fac(n): - if n < 1: - return 1 - return n * inner_fac(n - 1) - - -@jit(nopython=True) -def inner_fac(n): - if n < 1: - return 1 - return n * outer_fac(n - 1) - - -# Mutual recursion with different arg names -def make_mutual2(jit=lambda x: x): - @jit - def foo(x): - if x > 0: - return 2 * bar(z=1, y=x) - return 1 + x - - @jit - def bar(y, z): - return foo(x=y - z) - - return foo, bar - - -# Mutual runaway recursion - -@jit(nopython=True) -def runaway_mutual(x): - return runaway_mutual_inner(x) - - -@jit(nopython=True) -def runaway_mutual_inner(x): - return runaway_mutual(x) - - -# Mutual type changing recursion - -def make_type_change_mutual(jit=lambda x: x): - @jit - def foo(x, y): - if x > 1 and y > 0: - # call bar first to exercise partial type inference. - # typeinferer suspended at the call to bar() and haven't determined - # the potential return type from the else-branch - return x + bar(x - y, y) - else: - return y - - @jit - def bar(x, y): - if x > 1 and y > 0: - return x + foo(x - y, y) - else: - return y - - return foo - - -# Indirect mutual recursion -def make_four_level(jit=lambda x: x): - @jit - def first(x): - # The recursing call must have a path that is non-recursing. - if x > 0: - return second(x) * 2 - else: - return 1 - - @jit - def second(x): - return third(x) * 3 - - @jit - def third(x): - return fourth(x) * 4 - - @jit - def fourth(x): - return first(x / 2 - 1) - - return first - - -def make_inner_error(jit=lambda x: x): - @jit - def outer(x): - if x > 0: - return inner(x) - - else: - return 1 - - @jit - def inner(x): - if x > 0: - return outer(x - 1) - else: - # this branch is actually never executed - return error_fun(x) - - @jit - def error_fun(x): - # to trigger an untyped attribute error - return x.ndim - - return outer - - -def make_raise_mutual(jit=lambda x: x): - @jit - def outer(x): - if x > 0: - return inner(x) - else: - return 1 - - @jit - def inner(x): - if x == 1: - raise ValueError('raise_mutual') - elif x > 0: - return outer(x - 1) - else: - return 1 - - return outer - - -def make_optional_return_case(jit=lambda x: x): - @jit - def foo(x): - if x > 5: - return x - 1 - else: - return - - @jit - def bar(x): - out = foo(x) - if out is None: - return out - elif out < 8: - return out - else: - return x * bar(out) - - return bar diff --git a/numba/numba/tests/serialize_usecases.py b/numba/numba/tests/serialize_usecases.py deleted file mode 100644 index bfc99e4a0..000000000 --- a/numba/numba/tests/serialize_usecases.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -Separate module with function samples for serialization tests, -to avoid issues with __main__. -""" - -import math - -from numba import jit, generated_jit, types -from numba.six import exec_ - - -@jit((types.int32, types.int32)) -def add_with_sig(a, b): - return a + b - -@jit -def add_without_sig(a, b): - return a + b - -@jit(nopython=True) -def add_nopython(a, b): - return a + b - -@jit(nopython=True) -def add_nopython_fail(a, b): - object() - return a + b - -def closure(a): - @jit(nopython=True) - def inner(b, c): - return a + b + c - return inner - -K = 3.0 - -from math import sqrt - -def closure_with_globals(x, **jit_args): - @jit(**jit_args) - def inner(y): - # Exercise a builtin function and a module-level constant - k = max(K, K + 1) - # Exercise two functions from another module, one accessed with - # dotted notation, one imported explicitly. - return math.hypot(x, y) + sqrt(k) - return inner - -@jit(nopython=True) -def other_function(x, y): - return math.hypot(x, y) - -@jit(forceobj=True) -def get_global_objmode(x): - return K * x - -import numpy as np -import numpy.random as nprand - -@jit(nopython=True) -def get_renamed_module(x): - nprand.seed(42) - return np.cos(x), nprand.random() - - -def closure_calling_other_function(x): - @jit(nopython=True) - def inner(y, z): - return other_function(x, y) + z - return inner - -def closure_calling_other_closure(x): - @jit(nopython=True) - def other_inner(y): - return math.hypot(x, y) - - @jit(nopython=True) - def inner(y): - return other_inner(y) + x - return inner - - -# A generated function using some globals and closure vars - -k1 = 5 -k2 = 42 - -@generated_jit(nopython=True) -def generated_add(x, y): - k3 = 1 - if isinstance(x, types.Complex): - def impl(x, y): - return x + y + k1 - else: - def impl(x, y): - return x + y + k2 + k3 - return impl - - -# A dynamic function calling a builtin function -def _get_dyn_func(**jit_args): - code = """ - def dyn_func(x): - res = 0 - for i in range(x): - res += x - return res - """ - ns = {} - exec_(code.strip(), ns) - return jit(**jit_args)(ns['dyn_func']) - -dyn_func = _get_dyn_func(nopython=True) -dyn_func_objmode = _get_dyn_func(forceobj=True) diff --git a/numba/numba/tests/support.py b/numba/numba/tests/support.py deleted file mode 100644 index caedb5ce3..000000000 --- a/numba/numba/tests/support.py +++ /dev/null @@ -1,670 +0,0 @@ -""" -Assorted utilities for use in tests. -""" - -import cmath -import contextlib -import enum -import errno -import gc -import math -import os -import shutil -import subprocess -import sys -import tempfile -import time - -import numpy as np - -from numba import config, errors, typing, utils, numpy_support, testing -from numba.compiler import compile_extra, compile_isolated, Flags, DEFAULT_FLAGS -from numba.targets import cpu -import numba.unittest_support as unittest -from numba.runtime import rtsys - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - -nrt_flags = Flags() -nrt_flags.set("nrt") - - -tag = testing.make_tag_decorator(['important']) - - -class CompilationCache(object): - """ - A cache of compilation results for various signatures and flags. - This can make tests significantly faster (or less slow). - """ - - def __init__(self): - self.typingctx = typing.Context() - self.targetctx = cpu.CPUContext(self.typingctx) - self.cr_cache = {} - - def compile(self, func, args, return_type=None, flags=DEFAULT_FLAGS): - """ - Compile the function or retrieve an already compiled result - from the cache. - """ - from numba.targets.registry import cpu_target - - cache_key = (func, args, return_type, flags) - try: - cr = self.cr_cache[cache_key] - except KeyError: - # Register the contexts in case for nested @jit or @overload calls - # (same as compile_isolated()) - with cpu_target.nested_context(self.typingctx, self.targetctx): - cr = compile_extra(self.typingctx, self.targetctx, func, - args, return_type, flags, locals={}) - self.cr_cache[cache_key] = cr - return cr - - -class TestCase(unittest.TestCase): - - longMessage = True - - # A random state yielding the same random numbers for any test case. - # Use as `self.random.` - @utils.cached_property - def random(self): - return np.random.RandomState(42) - - def reset_module_warnings(self, module): - """ - Reset the warnings registry of a module. This can be necessary - as the warnings module is buggy in that regard. - See http://bugs.python.org/issue4180 - """ - if isinstance(module, str): - module = sys.modules[module] - try: - del module.__warningregistry__ - except AttributeError: - pass - - @contextlib.contextmanager - def assertTypingError(self): - """ - A context manager that asserts the enclosed code block fails - compiling in nopython mode. - """ - _accepted_errors = (errors.LoweringError, errors.TypingError, - TypeError, NotImplementedError) - with self.assertRaises(_accepted_errors) as cm: - yield cm - - @contextlib.contextmanager - def assertRefCount(self, *objects): - """ - A context manager that asserts the given objects have the - same reference counts before and after executing the - enclosed block. - """ - old_refcounts = [sys.getrefcount(x) for x in objects] - yield - new_refcounts = [sys.getrefcount(x) for x in objects] - for old, new, obj in zip(old_refcounts, new_refcounts, objects): - if old != new: - self.fail("Refcount changed from %d to %d for object: %r" - % (old, new, obj)) - - @contextlib.contextmanager - def assertNoNRTLeak(self): - """ - A context manager that asserts no NRT leak was created during - the execution of the enclosed block. - """ - old = rtsys.get_allocation_stats() - yield - new = rtsys.get_allocation_stats() - total_alloc = new.alloc - old.alloc - total_free = new.free - old.free - total_mi_alloc = new.mi_alloc - old.mi_alloc - total_mi_free = new.mi_free - old.mi_free - self.assertEqual(total_alloc, total_free, - "number of data allocs != number of data frees") - self.assertEqual(total_mi_alloc, total_mi_free, - "number of meminfo allocs != number of meminfo frees") - - - _bool_types = (bool, np.bool_) - _exact_typesets = [_bool_types, utils.INT_TYPES, (str,), (np.integer,), (utils.text_type), ] - _approx_typesets = [(float,), (complex,), (np.inexact)] - _sequence_typesets = [(tuple, list)] - _float_types = (float, np.floating) - _complex_types = (complex, np.complexfloating) - - def _detect_family(self, numeric_object): - """ - This function returns a string description of the type family - that the object in question belongs to. Possible return values - are: "exact", "complex", "approximate", "sequence", and "unknown" - """ - if isinstance(numeric_object, np.ndarray): - return "ndarray" - - if isinstance(numeric_object, enum.Enum): - return "enum" - - for tp in self._sequence_typesets: - if isinstance(numeric_object, tp): - return "sequence" - - for tp in self._exact_typesets: - if isinstance(numeric_object, tp): - return "exact" - - for tp in self._complex_types: - if isinstance(numeric_object, tp): - return "complex" - - for tp in self._approx_typesets: - if isinstance(numeric_object, tp): - return "approximate" - - return "unknown" - - def _fix_dtype(self, dtype): - """ - Fix the given *dtype* for comparison. - """ - # Under 64-bit Windows, Numpy may return either int32 or int64 - # arrays depending on the function. - if (sys.platform == 'win32' and sys.maxsize > 2**32 and - dtype == np.dtype('int32')): - return np.dtype('int64') - else: - return dtype - - def _fix_strides(self, arr): - """ - Return the strides of the given array, fixed for comparison. - Strides for 0- or 1-sized dimensions are ignored. - """ - if arr.size == 0: - return [0] * arr.ndim - else: - return [stride / arr.itemsize - for (stride, shape) in zip(arr.strides, arr.shape) - if shape > 1] - - def assertStridesEqual(self, first, second): - """ - Test that two arrays have the same shape and strides. - """ - self.assertEqual(first.shape, second.shape, "shapes differ") - self.assertEqual(first.itemsize, second.itemsize, "itemsizes differ") - self.assertEqual(self._fix_strides(first), self._fix_strides(second), - "strides differ") - - def assertPreciseEqual(self, first, second, prec='exact', ulps=1, - msg=None, ignore_sign_on_zero=False, - abs_tol=None - ): - """ - Versatile equality testing function with more built-in checks than - standard assertEqual(). - - For arrays, test that layout, dtype, shape are identical, and - recursively call assertPreciseEqual() on the contents. - - For other sequences, recursively call assertPreciseEqual() on - the contents. - - For scalars, test that two scalars or have similar types and are - equal up to a computed precision. - If the scalars are instances of exact types or if *prec* is - 'exact', they are compared exactly. - If the scalars are instances of inexact types (float, complex) - and *prec* is not 'exact', then the number of significant bits - is computed according to the value of *prec*: 53 bits if *prec* - is 'double', 24 bits if *prec* is single. This number of bits - can be lowered by raising the *ulps* value. - ignore_sign_on_zero can be set to True if zeros are to be considered - equal regardless of their sign bit. - abs_tol if this is set to a float value its value is used in the - following. If, however, this is set to the string "eps" then machine - precision of the type(first) is used in the following instead. This - kwarg is used to check if the absolute difference in value between first - and second is less than the value set, if so the numbers being compared - are considered equal. (This is to handle small numbers typically of - magnitude less than machine precision). - - Any value of *prec* other than 'exact', 'single' or 'double' - will raise an error. - """ - try: - self._assertPreciseEqual(first, second, prec, ulps, msg, - ignore_sign_on_zero, abs_tol) - except AssertionError as exc: - failure_msg = str(exc) - # Fall off of the 'except' scope to avoid Python 3 exception - # chaining. - else: - return - # Decorate the failure message with more information - self.fail("when comparing %s and %s: %s" % (first, second, failure_msg)) - - def _assertPreciseEqual(self, first, second, prec='exact', ulps=1, - msg=None, ignore_sign_on_zero=False, - abs_tol=None): - """Recursive workhorse for assertPreciseEqual().""" - - def _assertNumberEqual(first, second, delta=None): - if (delta is None or first == second == 0.0 - or math.isinf(first) or math.isinf(second)): - self.assertEqual(first, second, msg=msg) - # For signed zeros - if not ignore_sign_on_zero: - try: - if math.copysign(1, first) != math.copysign(1, second): - self.fail( - self._formatMessage(msg, - "%s != %s" % - (first, second))) - except TypeError: - pass - else: - self.assertAlmostEqual(first, second, delta=delta, msg=msg) - - first_family = self._detect_family(first) - second_family = self._detect_family(second) - - assertion_message = "Type Family mismatch. (%s != %s)" % (first_family, - second_family) - if msg: - assertion_message += ': %s' % (msg,) - self.assertEqual(first_family, second_family, msg=assertion_message) - - # We now know they are in the same comparison family - compare_family = first_family - - # For recognized sequences, recurse - if compare_family == "ndarray": - dtype = self._fix_dtype(first.dtype) - self.assertEqual(dtype, self._fix_dtype(second.dtype)) - self.assertEqual(first.ndim, second.ndim, - "different number of dimensions") - self.assertEqual(first.shape, second.shape, - "different shapes") - self.assertEqual(first.flags.writeable, second.flags.writeable, - "different mutability") - # itemsize is already checked by the dtype test above - self.assertEqual(self._fix_strides(first), - self._fix_strides(second), "different strides") - if first.dtype != dtype: - first = first.astype(dtype) - if second.dtype != dtype: - second = second.astype(dtype) - for a, b in zip(first.flat, second.flat): - self._assertPreciseEqual(a, b, prec, ulps, msg, - ignore_sign_on_zero, abs_tol) - return - - elif compare_family == "sequence": - self.assertEqual(len(first), len(second), msg=msg) - for a, b in zip(first, second): - self._assertPreciseEqual(a, b, prec, ulps, msg, - ignore_sign_on_zero, abs_tol) - return - - elif compare_family == "exact": - exact_comparison = True - - elif compare_family in ["complex", "approximate"]: - exact_comparison = False - - elif compare_family == "enum": - self.assertIs(first.__class__, second.__class__) - self._assertPreciseEqual(first.value, second.value, - prec, ulps, msg, - ignore_sign_on_zero, abs_tol) - return - - elif compare_family == "unknown": - # Assume these are non-numeric types: we will fall back - # on regular unittest comparison. - self.assertIs(first.__class__, second.__class__) - exact_comparison = True - - else: - assert 0, "unexpected family" - - # If a Numpy scalar, check the dtype is exactly the same too - # (required for datetime64 and timedelta64). - if hasattr(first, 'dtype') and hasattr(second, 'dtype'): - self.assertEqual(first.dtype, second.dtype) - - # Mixing bools and non-bools should always fail - if (isinstance(first, self._bool_types) != - isinstance(second, self._bool_types)): - assertion_message = ("Mismatching return types (%s vs. %s)" - % (first.__class__, second.__class__)) - if msg: - assertion_message += ': %s' % (msg,) - self.fail(assertion_message) - - try: - if cmath.isnan(first) and cmath.isnan(second): - # The NaNs will compare unequal, skip regular comparison - return - except TypeError: - # Not floats. - pass - - # if absolute comparison is set, use it - if abs_tol is not None: - if abs_tol == "eps": - rtol = np.finfo(type(first)).eps - elif isinstance(abs_tol, float): - rtol = abs_tol - else: - raise ValueError("abs_tol is not \"eps\" or a float, found %s" - % abs_tol) - if abs(first - second) < rtol: - return - - exact_comparison = exact_comparison or prec == 'exact' - - if not exact_comparison and prec != 'exact': - if prec == 'single': - bits = 24 - elif prec == 'double': - bits = 53 - else: - raise ValueError("unsupported precision %r" % (prec,)) - k = 2 ** (ulps - bits - 1) - delta = k * (abs(first) + abs(second)) - else: - delta = None - if isinstance(first, self._complex_types): - _assertNumberEqual(first.real, second.real, delta) - _assertNumberEqual(first.imag, second.imag, delta) - else: - _assertNumberEqual(first, second, delta) - - def run_nullary_func(self, pyfunc, flags): - """ - Compile the 0-argument *pyfunc* with the given *flags*, and check - it returns the same result as the pure Python function. - The got and expected results are returned. - """ - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - expected = pyfunc() - got = cfunc() - self.assertPreciseEqual(got, expected) - return got, expected - - -class SerialMixin(object): - """Mixin to mark test for serial execution. - """ - _numba_parallel_test_ = False - - -# Various helpers - -@contextlib.contextmanager -def override_config(name, value): - """ - Return a context manager that temporarily sets Numba config variable - *name* to *value*. *name* must be the name of an existing variable - in numba.config. - """ - old_value = getattr(config, name) - setattr(config, name, value) - try: - yield - finally: - setattr(config, name, old_value) - - -@contextlib.contextmanager -def override_env_config(name, value): - """ - Return a context manager that temporarily sets an Numba config environment - *name* to *value*. - """ - old = os.environ.get(name) - os.environ[name] = value - config.reload_config() - - try: - yield - finally: - if old is None: - # If it wasn't set originally, delete the environ var - del os.environ[name] - else: - # Otherwise, restore to the old value - os.environ[name] = old - # Always reload config - config.reload_config() - - -def compile_function(name, code, globs): - """ - Given a *code* string, compile it with globals *globs* and return - the function named *name*. - """ - co = compile(code.rstrip(), "", "single") - ns = {} - eval(co, globs, ns) - return ns[name] - -def tweak_code(func, codestring=None, consts=None): - """ - Tweak the code object of the given function by replacing its - *codestring* (a bytes object) and *consts* tuple, optionally. - """ - co = func.__code__ - tp = type(co) - if codestring is None: - codestring = co.co_code - if consts is None: - consts = co.co_consts - if sys.version_info >= (3,): - new_code = tp(co.co_argcount, co.co_kwonlyargcount, co.co_nlocals, - co.co_stacksize, co.co_flags, codestring, - consts, co.co_names, co.co_varnames, - co.co_filename, co.co_name, co.co_firstlineno, - co.co_lnotab) - else: - new_code = tp(co.co_argcount, co.co_nlocals, - co.co_stacksize, co.co_flags, codestring, - consts, co.co_names, co.co_varnames, - co.co_filename, co.co_name, co.co_firstlineno, - co.co_lnotab) - func.__code__ = new_code - - -_trashcan_dir = 'numba-tests' - -if os.name == 'nt': - # Under Windows, gettempdir() points to the user-local temp dir - _trashcan_dir = os.path.join(tempfile.gettempdir(), _trashcan_dir) -else: - # Mix the UID into the directory name to allow different users to - # run the test suite without permission errors (issue #1586) - _trashcan_dir = os.path.join(tempfile.gettempdir(), - "%s.%s" % (_trashcan_dir, os.getuid())) - -# Stale temporary directories are deleted after they are older than this value. -# The test suite probably won't ever take longer than this... -_trashcan_timeout = 24 * 3600 # 1 day - -def _create_trashcan_dir(): - try: - os.mkdir(_trashcan_dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - -def _purge_trashcan_dir(): - freshness_threshold = time.time() - _trashcan_timeout - for fn in sorted(os.listdir(_trashcan_dir)): - fn = os.path.join(_trashcan_dir, fn) - try: - st = os.stat(fn) - if st.st_mtime < freshness_threshold: - shutil.rmtree(fn, ignore_errors=True) - except OSError as e: - # In parallel testing, several processes can attempt to - # remove the same entry at once, ignore. - pass - -def _create_trashcan_subdir(prefix): - _purge_trashcan_dir() - path = tempfile.mkdtemp(prefix=prefix + '-', dir=_trashcan_dir) - return path - -def temp_directory(prefix): - """ - Create a temporary directory with the given *prefix* that will survive - at least as long as this process invocation. The temporary directory - will be eventually deleted when it becomes stale enough. - - This is necessary because a DLL file can't be deleted while in use - under Windows. - - An interesting side-effect is to be able to inspect the test files - shortly after a test suite run. - """ - _create_trashcan_dir() - return _create_trashcan_subdir(prefix) - - -def import_dynamic(modname): - """ - Import and return a module of the given name. Care is taken to - avoid issues due to Python's internal directory caching. - """ - if sys.version_info >= (3, 3): - import importlib - importlib.invalidate_caches() - __import__(modname) - return sys.modules[modname] - - -# From CPython - -@contextlib.contextmanager -def captured_output(stream_name): - """Return a context manager used by captured_stdout/stdin/stderr - that temporarily replaces the sys stream *stream_name* with a StringIO.""" - orig_stdout = getattr(sys, stream_name) - setattr(sys, stream_name, utils.StringIO()) - try: - yield getattr(sys, stream_name) - finally: - setattr(sys, stream_name, orig_stdout) - -def captured_stdout(): - """Capture the output of sys.stdout: - - with captured_stdout() as stdout: - print("hello") - self.assertEqual(stdout.getvalue(), "hello\n") - """ - return captured_output("stdout") - -def captured_stderr(): - """Capture the output of sys.stderr: - - with captured_stderr() as stderr: - print("hello", file=sys.stderr) - self.assertEqual(stderr.getvalue(), "hello\n") - """ - return captured_output("stderr") - - -@contextlib.contextmanager -def capture_cache_log(): - with captured_stdout() as out: - with override_config('DEBUG_CACHE', True): - yield out - - -class MemoryLeak(object): - - __enable_leak_check = True - - def memory_leak_setup(self): - # Clean up any NRT-backed objects hanging in a dead reference cycle - gc.collect() - self.__init_stats = rtsys.get_allocation_stats() - - def memory_leak_teardown(self): - if self.__enable_leak_check: - self.assert_no_memory_leak() - - def assert_no_memory_leak(self): - old = self.__init_stats - new = rtsys.get_allocation_stats() - total_alloc = new.alloc - old.alloc - total_free = new.free - old.free - total_mi_alloc = new.mi_alloc - old.mi_alloc - total_mi_free = new.mi_free - old.mi_free - self.assertEqual(total_alloc, total_free) - self.assertEqual(total_mi_alloc, total_mi_free) - - def disable_leak_check(self): - # For per-test use when MemoryLeakMixin is injected into a TestCase - self.__enable_leak_check = False - - -class MemoryLeakMixin(MemoryLeak): - - def setUp(self): - super(MemoryLeakMixin, self).setUp() - self.memory_leak_setup() - - def tearDown(self): - super(MemoryLeakMixin, self).tearDown() - gc.collect() - self.memory_leak_teardown() - - -@contextlib.contextmanager -def forbid_codegen(): - """ - Forbid LLVM code generation during the execution of the context - manager's enclosed block. - - If code generation is invoked, a RuntimeError is raised. - """ - from numba.targets import codegen - patchpoints = ['CodeLibrary._finalize_final_module'] - - old = {} - def fail(*args, **kwargs): - raise RuntimeError("codegen forbidden by test case") - try: - # XXX use the mock library instead? - for name in patchpoints: - parts = name.split('.') - obj = codegen - for attrname in parts[:-1]: - obj = getattr(obj, attrname) - attrname = parts[-1] - value = getattr(obj, attrname) - assert callable(value), ("%r should be callable" % name) - old[obj, attrname] = value - setattr(obj, attrname, fail) - yield - finally: - for (obj, attrname), value in old.items(): - setattr(obj, attrname, value) - diff --git a/numba/numba/tests/test_alignment.py b/numba/numba/tests/test_alignment.py deleted file mode 100644 index 1b3a7395d..000000000 --- a/numba/numba/tests/test_alignment.py +++ /dev/null @@ -1,40 +0,0 @@ -# See also numba.cuda.tests.test_alignment - -import numpy as np -from numba import from_dtype, njit, void -from .support import TestCase - - -class TestAlignment(TestCase): - - def test_record_alignment(self): - rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')], align=True) - rec = from_dtype(rec_dtype) - - @njit((rec[:],)) - def foo(a): - for i in range(a.size): - a[i].a = a[i].b - - a_recarray = np.recarray(3, dtype=rec_dtype) - for i in range(a_recarray.size): - a_rec = a_recarray[i] - a_rec.a = 0 - a_rec.b = (i + 1) * 123 - - foo(a_recarray) - np.testing.assert_equal(a_recarray.a, a_recarray.b) - - def test_record_misaligned(self): - rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')]) - rec = from_dtype(rec_dtype) - - # Unlike the CUDA target, this will not generate an error - @njit((rec[:],)) - def foo(a): - for i in range(a.size): - a[i].a = a[i].b - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_annotations.py b/numba/numba/tests/test_annotations.py deleted file mode 100644 index 3eb611468..000000000 --- a/numba/numba/tests/test_annotations.py +++ /dev/null @@ -1,183 +0,0 @@ -from __future__ import absolute_import, division - -import re - -import numba -from numba import unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, utils -from numba.io_support import StringIO - -try: - import jinja2 -except ImportError: - jinja2 = None - -try: - import pygments -except ImportError: - pygments = None - -@unittest.skipIf(jinja2 is None, "please install the 'jinja2' package") -class TestAnnotation(unittest.TestCase): - - def test_exercise_code_path(self): - """ - Ensures template.html is available - """ - - def foo(n, a): - s = a - for i in range(n): - s += i - return s - - cres = compile_isolated(foo, [types.int32, types.int32]) - ta = cres.type_annotation - - buf = StringIO() - ta.html_annotate(buf) - output = buf.getvalue() - buf.close() - self.assertIn("foo", output) - - def test_exercise_code_path_with_lifted_loop(self): - """ - Ensures that lifted loops are handled correctly in obj mode - """ - # the functions to jit - def bar(x): - return x - - def foo(x): - h = 0. - for k in range(x): - h = h + k - if x: - h = h - bar(x) - return h - - # compile into an isolated context - flags = Flags() - flags.set('enable_pyobject') - flags.set('enable_looplift') - cres = compile_isolated(foo, [types.intp], flags=flags) - - ta = cres.type_annotation - - buf = StringIO() - ta.html_annotate(buf) - output = buf.getvalue() - buf.close() - self.assertIn("bar", output) - self.assertIn("foo", output) - self.assertIn("LiftedLoop", output) - - def test_html_output_with_lifted_loop(self): - """ - Test some format and behavior of the html annotation with lifted loop - """ - @numba.jit - def udt(x): - object() # to force object mode - z = 0 - for i in range(x): # this line is tagged - z += i - return z - - # Regex pattern to check for the "lifted_tag" in the line of the loop - re_lifted_tag = re.compile( - r'\s*' - r'\s*
' - r'\s*' - r'\s*' - r'\s*[0-9]+:' - r'\s*[ ]+for i in range\(x\): # this line is tagged\s*', - re.MULTILINE) - - # Compile int64 version - sig_i64 = (types.int64,) - udt.compile(sig_i64) # compile with lifted loop - cres = udt.overloads[sig_i64] - - # Make html output - buf = StringIO() - cres.type_annotation.html_annotate(buf) - output = buf.getvalue() - buf.close() - - # There should be only one function output. - self.assertEqual(output.count("Function name: udt"), 1) - - sigfmt = "with signature: {} -> pyobject" - self.assertEqual(output.count(sigfmt.format(sig_i64)), 1) - # Ensure the loop is tagged - self.assertEqual(len(re.findall(re_lifted_tag, output)), 1, - msg='%s not found in %s' % (re_lifted_tag, output)) - - # Compile float64 version - sig_f64 = (types.float64,) - udt.compile(sig_f64) - cres = udt.overloads[sig_f64] - - # Make html output - buf = StringIO() - cres.type_annotation.html_annotate(buf) - output = buf.getvalue() - buf.close() - - # There should be two function output - self.assertEqual(output.count("Function name: udt"), 2) - self.assertEqual(output.count(sigfmt.format(sig_i64)), 1) - self.assertEqual(output.count(sigfmt.format(sig_f64)), 1) - # Ensure the loop is tagged in both output - self.assertEqual(len(re.findall(re_lifted_tag, output)), 2) - - @unittest.skipIf(pygments is None, "please install the 'pygments' package") - def test_pretty_print(self): - - @numba.njit - def foo(x, y): - return x, y - - foo(1, 2) - # Exercise the method - obj = foo.inspect_types(pretty=True) - - # Exercise but supply a not None file kwarg, this is invalid - with self.assertRaises(ValueError) as raises: - obj = foo.inspect_types(pretty=True, file='should be None') - self.assertIn('`file` must be None if `pretty=True`', str(raises.exception)) - - -class TestTypeAnnotation(unittest.TestCase): - def test_delete(self): - @numba.njit - def foo(appleorange, berrycherry): - return appleorange + berrycherry - - foo(1, 2) - # Exercise the method - strbuf = utils.StringIO() - foo.inspect_types(strbuf) - # Ensure deletion show up after their use - lines = strbuf.getvalue().splitlines() - - def findpatloc(pat): - for i, ln in enumerate(lines): - if pat in ln: - return i - raise ValueError("can't find {!r}".format(pat)) - - sa = findpatloc('appleorange = arg(0, name=appleorange)') - sb = findpatloc('berrycherry = arg(1, name=berrycherry)') - - ea = findpatloc('del appleorange') - eb = findpatloc('del berrycherry') - - self.assertLess(sa, ea) - self.assertLess(sb, eb) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_api.py b/numba/numba/tests/test_api.py deleted file mode 100644 index 004cae87d..000000000 --- a/numba/numba/tests/test_api.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import division - -import numba - -from numba import unittest_support as unittest -from .support import TestCase, tag - - -class TestNumbaModule(TestCase): - """ - Test the APIs exposed by the top-level `numba` module. - """ - - def check_member(self, name): - self.assertTrue(hasattr(numba, name), name) - self.assertIn(name, numba.__all__) - - @tag('important') - def test_numba_module(self): - # jit - self.check_member("jit") - self.check_member("vectorize") - self.check_member("guvectorize") - self.check_member("njit") - self.check_member("autojit") - # errors - self.check_member("NumbaError") - self.check_member("TypingError") - # types - self.check_member("int32") - # misc - numba.__version__ # not in __all__ - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_analysis.py b/numba/numba/tests/test_array_analysis.py deleted file mode 100644 index 8a0ce9b72..000000000 --- a/numba/numba/tests/test_array_analysis.py +++ /dev/null @@ -1,854 +0,0 @@ -from __future__ import division - -import itertools - -import numpy as np -import sys -from collections import namedtuple - -from numba import unittest_support as unittest -from numba import njit, typeof, types, typing, typeof, ir, utils, bytecode -from .support import TestCase, tag -from numba.array_analysis import EquivSet, ArrayAnalysis -from numba.compiler import Pipeline, Flags, _PipelineManager -from numba.targets import cpu, registry -from numba.numpy_support import version as numpy_version -from numba.ir_utils import remove_dead - -# for parallel tests, marking that Windows with Python 2.7 is not supported -_windows_py27 = (sys.platform.startswith('win32') and - sys.version_info[:2] == (2, 7)) -_32bit = sys.maxsize <= 2 ** 32 -_reason = 'parfors not supported' -skip_unsupported = unittest.skipIf(_32bit or _windows_py27, _reason) - -class TestEquivSet(TestCase): - - """ - Test array_analysis.EquivSet. - """ - @tag('important') - def test_insert_equiv(self): - s1 = EquivSet() - s1.insert_equiv('a', 'b') - self.assertTrue(s1.is_equiv('a', 'b')) - self.assertTrue(s1.is_equiv('b', 'a')) - s1.insert_equiv('c', 'd') - self.assertTrue(s1.is_equiv('c', 'd')) - self.assertFalse(s1.is_equiv('c', 'a')) - s1.insert_equiv('a', 'c') - self.assertTrue(s1.is_equiv('a', 'b', 'c', 'd')) - self.assertFalse(s1.is_equiv('a', 'e')) - - @tag('important') - def test_intersect(self): - s1 = EquivSet() - s2 = EquivSet() - r = s1.intersect(s2) - self.assertTrue(r.is_empty()) - s1.insert_equiv('a', 'b') - r = s1.intersect(s2) - self.assertTrue(r.is_empty()) - s2.insert_equiv('b', 'c') - r = s1.intersect(s2) - self.assertTrue(r.is_empty()) - s2.insert_equiv('d', 'a') - r = s1.intersect(s2) - self.assertTrue(r.is_empty()) - s1.insert_equiv('a', 'e') - s2.insert_equiv('c', 'd') - r = s1.intersect(s2) - self.assertTrue(r.is_equiv('a', 'b')) - self.assertFalse(r.is_equiv('a', 'e')) - self.assertFalse(r.is_equiv('c', 'd')) - - -class ArrayAnalysisTester(Pipeline): - - @classmethod - def mk_pipeline(cls, args, return_type=None, flags=None, locals={}, - library=None, typing_context=None, target_context=None): - if not flags: - flags = Flags() - flags.nrt = True - if typing_context is None: - typing_context = registry.cpu_target.typing_context - if target_context is None: - target_context = registry.cpu_target.target_context - return cls(typing_context, target_context, library, args, return_type, - flags, locals) - - def compile_to_ir(self, func, test_idempotence=None): - """ - Populate and run compiler pipeline - """ - self.func_id = bytecode.FunctionIdentity.from_function(func) - - try: - bc = self.extract_bytecode(self.func_id) - except BaseException as e: - raise e - - self.bc = bc - self.lifted = () - self.lifted_from = None - - pm = _PipelineManager() - - pm.create_pipeline("nopython") - if self.func_ir is None: - pm.add_stage(self.stage_analyze_bytecode, "analyzing bytecode") - pm.add_stage(self.stage_process_ir, "processing IR") - if not self.flags.no_rewrites: - if self.status.can_fallback: - pm.add_stage( - self.stage_preserve_ir, "preserve IR for fallback") - pm.add_stage(self.stage_generic_rewrites, "nopython rewrites") - pm.add_stage( - self.stage_inline_pass, "inline calls to locally defined closures") - pm.add_stage(self.stage_nopython_frontend, "nopython frontend") - pm.add_stage(self.stage_annotate_type, "annotate type") - if not self.flags.no_rewrites: - pm.add_stage(self.stage_nopython_rewrites, "nopython rewrites") - func_ir_copies = [] - - def stage_array_analysis(): - self.array_analysis = ArrayAnalysis(self.typingctx, self.func_ir, - self.type_annotation.typemap, - self.type_annotation.calltypes) - self.array_analysis.run(self.func_ir.blocks) - func_ir_copies.append(self.func_ir.copy()) - if test_idempotence and len(func_ir_copies) > 1: - test_idempotence(func_ir_copies) - - pm.add_stage(stage_array_analysis, "analyze array equivalences") - if test_idempotence: - # Do another pass of array analysis to test idempontence - pm.add_stage(stage_array_analysis, "analyze array equivalences") - - pm.finalize() - res = pm.run(self.status) - return self.array_analysis - - -class TestArrayAnalysis(TestCase): - - def compare_ir(self, ir_list): - outputs = [] - for func_ir in ir_list: - remove_dead(func_ir.blocks, func_ir.arg_names, func_ir) - output = utils.StringIO() - func_ir.dump(file=output) - outputs.append(output.getvalue()) - self.assertTrue(len(set(outputs)) == 1) # assert all outputs are equal - - def _compile_and_test(self, fn, arg_tys, asserts=[], equivs=[], idempotent=True): - """ - Compile the given function and get its IR. - """ - test_pipeline = ArrayAnalysisTester.mk_pipeline(arg_tys) - test_idempotence = self.compare_ir if idempotent else lambda x:() - analysis = test_pipeline.compile_to_ir(fn, test_idempotence) - if equivs: - for func in equivs: - # only test the equiv_set of the first block - func(analysis.equiv_sets[0]) - if asserts == None: - self.assertTrue(self._has_no_assertcall(analysis.func_ir)) - else: - for func in asserts: - func(analysis.func_ir, analysis.typemap) - - def _has_assertcall(self, func_ir, typemap, args): - msg = "Sizes of {} do not match".format(', '.join(args)) - for label, block in func_ir.blocks.items(): - for expr in block.find_exprs(op='call'): - fn = func_ir.get_definition(expr.func.name) - if isinstance(fn, ir.Global) and fn.name == 'assert_equiv': - typ = typemap[expr.args[0].name] - if typ.value.startswith(msg): - return True - return False - - def _has_shapecall(self, func_ir, x): - for label, block in func_ir.blocks.items(): - for expr in block.find_exprs(op='getattr'): - if expr.attr == 'shape': - y = func_ir.get_definition(expr.value, lhs_only=True) - z = func_ir.get_definition(x, lhs_only=True) - y = y.name if isinstance(y, ir.Var) else y - z = z.name if isinstance(z, ir.Var) else z - if y == z: - return True - return False - - def _has_no_assertcall(self, func_ir): - for label, block in func_ir.blocks.items(): - for expr in block.find_exprs(op='call'): - fn = func_ir.get_definition(expr.func.name) - if isinstance(fn, ir.Global) and fn.name == 'assert_equiv': - return False - return True - - def with_assert(self, *args): - return lambda func_ir, typemap: self.assertTrue( - self._has_assertcall(func_ir, typemap, args)) - - def without_assert(self, *args): - return lambda func_ir, typemap: self.assertFalse( - self._has_assertcall(func_ir, typemap, args)) - - def with_equiv(self, *args): - def check(equiv_set): - n = len(args) - for i in range(n - 1): - if not equiv_set.is_equiv(args[i], args[n - 1]): - return False - return True - return lambda equiv_set: self.assertTrue(check(equiv_set)) - - def without_equiv(self, *args): - def check(equiv_set): - n = len(args) - for i in range(n - 1): - if equiv_set.is_equiv(args[i], args[n - 1]): - return False - return True - return lambda equiv_set: self.assertTrue(check(equiv_set)) - - def with_shapecall(self, x): - return lambda func_ir, s: self.assertTrue(self._has_shapecall(func_ir, x)) - - def without_shapecall(self, x): - return lambda func_ir, s: self.assertFalse(self._has_shapecall(func_ir, x)) - - def test_base_cases(self): - def test_0(): - a = np.zeros(0) - b = np.zeros(1) - m = 0 - n = 1 - c = np.zeros((m, n)) - return - self._compile_and_test(test_0, (), - equivs=[self.with_equiv('a', (0,)), - self.with_equiv('b', (1,)), - self.with_equiv('c', (0, 1))]) - - def test_1(n): - a = np.zeros(n) - b = np.zeros(n) - return a + b - self._compile_and_test(test_1, (types.intp,), asserts=None) - - def test_2(m, n): - a = np.zeros(n) - b = np.zeros(m) - return a + b - self._compile_and_test(test_2, (types.intp, types.intp), - asserts=[self.with_assert('a', 'b')]) - - def test_3(n): - a = np.zeros(n) - return a + n - self._compile_and_test(test_3, (types.intp,), asserts=None) - - def test_4(n): - a = np.zeros(n) - b = a + 1 - c = a + 2 - return a + c - self._compile_and_test(test_4, (types.intp,), asserts=None) - - def test_5(n): - a = np.zeros((n, n)) - m = n - b = np.zeros((m, n)) - return a + b - self._compile_and_test(test_5, (types.intp,), asserts=None) - - def test_6(m, n): - a = np.zeros(n) - b = np.zeros(m) - d = a + b - e = a - b - return d + e - self._compile_and_test(test_6, (types.intp, types.intp), - asserts=[self.with_assert('a', 'b'), - self.without_assert('d', 'e')]) - - def test_7(m, n): - a = np.zeros(n) - b = np.zeros(m) - if m == 10: - d = a + b - else: - d = a - b - return d + a - self._compile_and_test(test_7, (types.intp, types.intp), - asserts=[self.with_assert('a', 'b'), - self.without_assert('d', 'a')]) - - def test_8(m, n): - a = np.zeros(n) - b = np.zeros(m) - if m == 10: - d = b + a - else: - d = a + a - return b + d - self._compile_and_test(test_8, (types.intp, types.intp), - asserts=[self.with_assert('b', 'a'), - self.with_assert('b', 'd')]) - - def test_9(m): - A = np.ones(m) - s = 0 - while m < 2: - m += 1 - B = np.ones(m) - s += np.sum(A + B) - return s - self._compile_and_test(test_9, (types.intp,), - asserts=[self.with_assert('A', 'B')]) - - def test_10(m, n): - p = m - 1 - q = n + 1 - r = q + 1 - A = np.zeros(p) - B = np.zeros(q) - C = np.zeros(r) - D = np.zeros(m) - s = np.sum(A + B) - t = np.sum(C + D) - return s + t - self._compile_and_test(test_10, (types.intp,types.intp,), - asserts=[self.with_assert('A', 'B'), - self.without_assert('C', 'D')]) - - T = namedtuple("T", ['a','b']) - def test_namedtuple(n): - r = T(n, n) - return r[0] - self._compile_and_test(test_namedtuple, (types.intp,), - equivs=[self.with_equiv('r', ('n', 'n'))],) - - def test_shape(A): - (m, n) = A.shape - B = np.ones((m, n)) - return A + B - self._compile_and_test(test_shape, (types.Array(types.intp, 2, 'C'),), - asserts=None) - - def test_cond(l, m, n): - A = np.ones(l) - B = np.ones(m) - C = np.ones(n) - if l == m: - r = np.sum(A + B) - else: - r = 0 - if m != n: - s = 0 - else: - s = np.sum(B + C) - t = 0 - if l == m: - if m == n: - t = np.sum(A + B + C) - return r + s + t - self._compile_and_test(test_cond, (types.intp, types.intp, types.intp), - asserts=None) - - def test_assert_1(m, n): - assert(m == n) - A = np.ones(m) - B = np.ones(n) - return np.sum(A + B) - self._compile_and_test(test_assert_1, (types.intp, types.intp), - asserts=None) - - def test_assert_2(A, B): - assert(A.shape == B.shape) - return np.sum(A + B) - - self._compile_and_test(test_assert_2, (types.Array(types.intp, 1, 'C'), - types.Array(types.intp, 1, 'C'),), - asserts=None) - self._compile_and_test(test_assert_2, (types.Array(types.intp, 2, 'C'), - types.Array(types.intp, 2, 'C'),), - asserts=None) - # expected failure - with self.assertRaises(AssertionError) as raises: - self._compile_and_test(test_assert_2, (types.Array(types.intp, 1, 'C'), - types.Array(types.intp, 2, 'C'),), - asserts=None) - msg = "Dimension mismatch" - self.assertIn(msg, str(raises.exception)) - - - def test_stencilcall(self): - from numba import stencil - @stencil - def kernel_1(a): - return 0.25 * (a[0,1] + a[1,0] + a[0,-1] + a[-1,0]) - - def test_1(n): - a = np.ones((n,n)) - b = kernel_1(a) - return a + b - - self._compile_and_test(test_1, (types.intp,), - equivs=[self.with_equiv('a', 'b')], - asserts=[self.without_assert('a', 'b')]) - - def test_2(n): - a = np.ones((n,n)) - b = np.ones((n+1,n+1)) - kernel_1(a, out=b) - return a - - self._compile_and_test(test_2, (types.intp,), - equivs=[self.without_equiv('a', 'b')]) - - @stencil(standard_indexing=('c',)) - def kernel_2(a, b, c): - return a[0,1,0] + b[0,-1,0] + c[0] - - def test_3(n): - a = np.arange(64).reshape(4,8,2) - b = np.arange(64).reshape(n,8,2) - u = np.zeros(1) - v = kernel_2(a, b, u) - return v - - # standard indexed arrays are not considered in size equivalence - self._compile_and_test(test_3, (types.intp,), - equivs=[self.with_equiv('a', 'b', 'v'), - self.without_equiv('a', 'u')], - asserts=[self.with_assert('a', 'b')]) - - def test_slice(self): - def test_1(m, n): - A = np.zeros(m) - B = np.zeros(n) - s = np.sum(A + B) - C = A[1:m-1] - D = B[1:n-1] - t = np.sum(C + D) - return s + t - self._compile_and_test(test_1, (types.intp,types.intp,), - asserts=[self.with_assert('A', 'B'), - self.without_assert('C', 'D')], - idempotent=False) - - def test_2(m): - A = np.zeros(m) - B = A[0:m-3] - C = A[1:m-2] - D = A[2:m-1] - E = B + C - return D + E - self._compile_and_test(test_2, (types.intp,), - asserts=[self.without_assert('B', 'C'), - self.without_assert('D', 'E')], - idempotent=False) - - def test_3(m): - A = np.zeros((m,m)) - B = A[0:m-2,0:m-2] - C = A[1:m-1,1:m-1] - E = B + C - return E - self._compile_and_test(test_3, (types.intp,), - asserts=[self.without_assert('B', 'C')], - idempotent=False) - - def test_4(m): - A = np.zeros((m,m)) - B = A[0:m-2,:] - C = A[1:m-1,:] - E = B + C - return E - self._compile_and_test(test_4, (types.intp,), - asserts=[self.without_assert('B', 'C')], - idempotent=False) - - def test_5(m,n): - A = np.zeros(m) - B = np.zeros(m) - B[0:m-2] = A[1:m-1] - C = np.zeros(n) - D = A[1:m-1] - C[0:n-2] = D - # B and C are not necessarily of the same size because we can't - # derive m == n from (m-2) % m == (n-2) % n - return B + C - self._compile_and_test(test_5, (types.intp,types.intp), - asserts=[self.without_assert('B', 'A'), - self.with_assert('C', 'D'), - self.with_assert('B', 'C')], - idempotent=False) - - def test_6(m): - A = np.zeros((m,m)) - B = A[0:m-2,:-1] - C = A[1:m-1,:-1] - E = B + C - return E - self._compile_and_test(test_6, (types.intp,), - asserts=[self.without_assert('B', 'C')], - idempotent=False) - - def test_7(m): - A = np.zeros((m,m)) - B = A[0:m-2,-3:-1] - C = A[1:m-1,-4:-2] - E = B + C - return E - self._compile_and_test(test_7, (types.intp,), - asserts=[self.without_assert('B', 'C')], - idempotent=False) - - def test_8(m): - A = np.zeros((m,m)) - B = A[:m-2,0:] - C = A[1:-1,:] - E = B + C - return E - self._compile_and_test(test_8, (types.intp,), - asserts=[self.without_assert('B', 'C')], - idempotent=False) - - def test_numpy_calls(self): - def test_zeros(n): - a = np.zeros(n) - b = np.zeros((n, n)) - c = np.zeros(shape=(n, n)) - self._compile_and_test(test_zeros, (types.intp,), - equivs=[self.with_equiv('a', 'n'), - self.with_equiv('b', ('n', 'n')), - self.with_equiv('b', 'c')]) - - def test_0d_array(n): - a = np.array(1) - b = np.ones(2) - return a + b - self._compile_and_test(test_0d_array, (types.intp,), - equivs=[self.without_equiv('a', 'b')], - asserts=[self.without_shapecall('a')]) - - def test_ones(n): - a = np.ones(n) - b = np.ones((n, n)) - c = np.ones(shape=(n, n)) - self._compile_and_test(test_ones, (types.intp,), - equivs=[self.with_equiv('a', 'n'), - self.with_equiv('b', ('n', 'n')), - self.with_equiv('b', 'c')]) - - def test_empty(n): - a = np.empty(n) - b = np.empty((n, n)) - c = np.empty(shape=(n, n)) - self._compile_and_test(test_empty, (types.intp,), - equivs=[self.with_equiv('a', 'n'), - self.with_equiv('b', ('n', 'n')), - self.with_equiv('b', 'c')]) - - def test_eye(n): - a = np.eye(n) - b = np.eye(N=n) - c = np.eye(N=n, M=n) - d = np.eye(N=n, M=n + 1) - self._compile_and_test(test_eye, (types.intp,), - equivs=[self.with_equiv('a', ('n', 'n')), - self.with_equiv('b', ('n', 'n')), - self.with_equiv('b', 'c'), - self.without_equiv('b', 'd')]) - - def test_identity(n): - a = np.identity(n) - self._compile_and_test(test_identity, (types.intp,), - equivs=[self.with_equiv('a', ('n', 'n'))]) - - def test_diag(n): - a = np.identity(n) - b = np.diag(a) - c = np.diag(b) - d = np.diag(a, k=1) - self._compile_and_test(test_diag, (types.intp,), - equivs=[self.with_equiv('b', ('n',)), - self.with_equiv('c', ('n', 'n'))], - asserts=[self.with_shapecall('d'), - self.without_shapecall('c')]) - - def test_array_like(a): - b = np.empty_like(a) - c = np.zeros_like(a) - d = np.ones_like(a) - e = np.full_like(a, 1) - f = np.asfortranarray(a) - - self._compile_and_test(test_array_like, (types.Array(types.intp, 2, 'C'),), - equivs=[ - self.with_equiv('a', 'b', 'd', 'e', 'f')], - asserts=[self.with_shapecall('a'), - self.without_shapecall('b')]) - - def test_reshape(n): - a = np.ones(n * n) - b = a.reshape((n, n)) - return a.sum() + b.sum() - self._compile_and_test(test_reshape, (types.intp,), - equivs=[self.with_equiv('b', ('n', 'n'))], - asserts=[self.without_shapecall('b')]) - - - def test_transpose(m, n): - a = np.ones((m, n)) - b = a.T - c = a.transpose() - # Numba njit cannot compile explicit transpose call! - # c = np.transpose(b) - self._compile_and_test(test_transpose, (types.intp, types.intp), - equivs=[self.with_equiv('a', ('m', 'n')), - self.with_equiv('b', ('n', 'm')), - self.with_equiv('c', ('n', 'm'))]) - - - def test_transpose_3d(m, n, k): - a = np.ones((m, n, k)) - b = a.T - c = a.transpose() - d = a.transpose(2,0,1) - dt = a.transpose((2,0,1)) - e = a.transpose(0,2,1) - et = a.transpose((0,2,1)) - # Numba njit cannot compile explicit transpose call! - # c = np.transpose(b) - self._compile_and_test(test_transpose_3d, (types.intp, types.intp, types.intp), - equivs=[self.with_equiv('a', ('m', 'n', 'k')), - self.with_equiv('b', ('k', 'n', 'm')), - self.with_equiv('c', ('k', 'n', 'm')), - self.with_equiv('d', ('k', 'm', 'n')), - self.with_equiv('dt', ('k', 'm', 'n')), - self.with_equiv('e', ('m', 'k', 'n')), - self.with_equiv('et', ('m', 'k', 'n'))]) - - def test_random(n): - a0 = np.random.rand(n) - a1 = np.random.rand(n, n) - b0 = np.random.randn(n) - b1 = np.random.randn(n, n) - c0 = np.random.ranf(n) - c1 = np.random.ranf((n, n)) - c2 = np.random.ranf(size=(n, n)) - d0 = np.random.random_sample(n) - d1 = np.random.random_sample((n, n)) - d2 = np.random.random_sample(size=(n, n)) - e0 = np.random.sample(n) - e1 = np.random.sample((n, n)) - e2 = np.random.sample(size=(n, n)) - f0 = np.random.random(n) - f1 = np.random.random((n, n)) - f2 = np.random.random(size=(n, n)) - g0 = np.random.standard_normal(n) - g1 = np.random.standard_normal((n, n)) - g2 = np.random.standard_normal(size=(n, n)) - h0 = np.random.chisquare(10, n) - h1 = np.random.chisquare(10, (n, n)) - h2 = np.random.chisquare(10, size=(n, n)) - i0 = np.random.weibull(10, n) - i1 = np.random.weibull(10, (n, n)) - i2 = np.random.weibull(10, size=(n, n)) - j0 = np.random.power(10, n) - j1 = np.random.power(10, (n, n)) - j2 = np.random.power(10, size=(n, n)) - k0 = np.random.geometric(0.1, n) - k1 = np.random.geometric(0.1, (n, n)) - k2 = np.random.geometric(0.1, size=(n, n)) - l0 = np.random.exponential(10, n) - l1 = np.random.exponential(10, (n, n)) - l2 = np.random.exponential(10, size=(n, n)) - m0 = np.random.poisson(10, n) - m1 = np.random.poisson(10, (n, n)) - m2 = np.random.poisson(10, size=(n, n)) - n0 = np.random.rayleigh(10, n) - n1 = np.random.rayleigh(10, (n, n)) - n2 = np.random.rayleigh(10, size=(n, n)) - o0 = np.random.normal(0, 1, n) - o1 = np.random.normal(0, 1, (n, n)) - o2 = np.random.normal(0, 1, size=(n, n)) - p0 = np.random.uniform(0, 1, n) - p1 = np.random.uniform(0, 1, (n, n)) - p2 = np.random.uniform(0, 1, size=(n, n)) - q0 = np.random.beta(0.1, 1, n) - q1 = np.random.beta(0.1, 1, (n, n)) - q2 = np.random.beta(0.1, 1, size=(n, n)) - r0 = np.random.binomial(0, 1, n) - r1 = np.random.binomial(0, 1, (n, n)) - r2 = np.random.binomial(0, 1, size=(n, n)) - s0 = np.random.f(0.1, 1, n) - s1 = np.random.f(0.1, 1, (n, n)) - s2 = np.random.f(0.1, 1, size=(n, n)) - t0 = np.random.gamma(0.1, 1, n) - t1 = np.random.gamma(0.1, 1, (n, n)) - t2 = np.random.gamma(0.1, 1, size=(n, n)) - u0 = np.random.lognormal(0, 1, n) - u1 = np.random.lognormal(0, 1, (n, n)) - u2 = np.random.lognormal(0, 1, size=(n, n)) - v0 = np.random.laplace(0, 1, n) - v1 = np.random.laplace(0, 1, (n, n)) - v2 = np.random.laplace(0, 1, size=(n, n)) - w0 = np.random.randint(0, 10, n) - w1 = np.random.randint(0, 10, (n, n)) - w2 = np.random.randint(0, 10, size=(n, n)) - x0 = np.random.triangular(-3, 0, 10, n) - x1 = np.random.triangular(-3, 0, 10, (n, n)) - x2 = np.random.triangular(-3, 0, 10, size=(n, n)) - - last = ord('x') + 1 - vars1d = [('n',)] + [chr(x) + '0' for x in range(ord('a'), last)] - vars2d = [('n', 'n')] + [chr(x) + '1' for x in range(ord('a'), last)] - vars2d += [chr(x) + '1' for x in range(ord('c'), last)] - self._compile_and_test(test_random, (types.intp,), - equivs=[self.with_equiv(*vars1d), - self.with_equiv(*vars2d)]) - - def test_concatenate(m, n): - a = np.ones(m) - b = np.ones(n) - c = np.concatenate((a, b)) - d = np.ones((2, n)) - e = np.ones((3, n)) - f = np.concatenate((d, e)) - # Numba njit cannot compile concatenate with single array! - # g = np.ones((3,4,5)) - # h = np.concatenate(g) - i = np.ones((m, 2)) - j = np.ones((m, 3)) - k = np.concatenate((i, j), axis=1) - l = np.ones((m, n)) - o = np.ones((m, n)) - p = np.concatenate((l, o)) - # Numba njit cannot support list argument! - # q = np.concatenate([d, e]) - self._compile_and_test(test_concatenate, (types.intp, types.intp), - equivs=[self.with_equiv('f', (5, 'n')), - #self.with_equiv('h', (3 + 4 + 5, )), - self.with_equiv('k', ('m', 5))], - asserts=[self.with_shapecall('c'), - self.without_shapecall('f'), - self.without_shapecall('k'), - self.with_shapecall('p')]) - - def test_vsd_stack(): - k = np.ones((2,)) - l = np.ones((2, 3)) - o = np.ones((2, 3, 4)) - p = np.vstack((k, k)) - q = np.vstack((l, l)) - r = np.hstack((k, k)) - s = np.hstack((l, l)) - t = np.dstack((k, k)) - u = np.dstack((l, l)) - v = np.dstack((o, o)) - - self._compile_and_test(test_vsd_stack, (), - equivs=[self.with_equiv('p', (2, 2)), - self.with_equiv('q', (4, 3)), - self.with_equiv('r', (4,)), - self.with_equiv('s', (2, 6)), - self.with_equiv('t', (1, 2, 2)), - self.with_equiv('u', (2, 3, 2)), - self.with_equiv('v', (2, 3, 8)), - ]) - - if numpy_version >= (1, 10): - def test_stack(m, n): - a = np.ones(m) - b = np.ones(n) - c = np.stack((a, b)) - d = np.ones((m, n)) - e = np.ones((m, n)) - f = np.stack((d, e)) - g = np.stack((d, e), axis=0) - h = np.stack((d, e), axis=1) - i = np.stack((d, e), axis=2) - j = np.stack((d, e), axis=-1) - - self._compile_and_test(test_stack, (types.intp, types.intp), - equivs=[self.with_equiv('m', 'n'), - self.with_equiv('c', (2, 'm')), - self.with_equiv( - 'f', 'g', (2, 'm', 'n')), - self.with_equiv( - 'h', ('m', 2, 'n')), - self.with_equiv( - 'i', 'j', ('m', 'n', 2)), - ]) - - def test_linspace(m, n): - a = np.linspace(m, n) - b = np.linspace(m, n, 10) - # Numba njit does not support num keyword to linspace call! - # c = np.linspace(m,n,num=10) - self._compile_and_test(test_linspace, (types.float64, types.float64), - equivs=[self.with_equiv('a', (50,)), - self.with_equiv('b', (10,))]) - - def test_dot(l, m, n): - a = np.dot(np.ones(1), np.ones(1)) - b = np.dot(np.ones(2), np.ones((2, 3))) - # Numba njit does not support higher dimensional inputs - #c = np.dot(np.ones(2),np.ones((3,2,4))) - #d = np.dot(np.ones(2),np.ones((3,5,2,4))) - e = np.dot(np.ones((1, 2)), np.ones(2,)) - #f = np.dot(np.ones((1,2,3)),np.ones(3,)) - #g = np.dot(np.ones((1,2,3,4)),np.ones(4,)) - h = np.dot(np.ones((2, 3)), np.ones((3, 4))) - i = np.dot(np.ones((m, n)), np.ones((n, m))) - j = np.dot(np.ones((m, m)), np.ones((l, l))) - - self._compile_and_test(test_dot, (types.intp, types.intp, types.intp), - equivs=[self.without_equiv('a', (1,)), # not array - self.with_equiv('b', (3,)), - self.with_equiv('e', (1,)), - self.with_equiv('h', (2, 4)), - self.with_equiv('i', ('m', 'm')), - self.with_equiv('j', ('m', 'm')), - ], - asserts=[self.with_assert('m', 'l')]) - - def test_broadcast(m, n): - a = np.ones((m, n)) - b = np.ones(n) - c = a + b - d = np.ones((1, n)) - e = a + c - d - self._compile_and_test(test_broadcast, (types.intp, types.intp), - equivs=[self.with_equiv('a', 'c', 'e')], - asserts=None) - - @skip_unsupported - def test_misc(self): - - @njit - def swap(x, y): - return(y, x) - - def test_bug2537(m): - a = np.ones(m) - b = np.ones(m) - for i in range(m): - a[i], b[i] = swap(a[i], b[i]) - - try: - njit(test_bug2537, parallel=True)(10) - except IndexError: - self.fail("test_bug2537 raised IndexError!") - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_attr.py b/numba/numba/tests/test_array_attr.py deleted file mode 100644 index a18e468d7..000000000 --- a/numba/numba/tests/test_array_attr.py +++ /dev/null @@ -1,376 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated -from numba.numpy_support import from_dtype -from numba import types, njit, typeof, numpy_support -from .support import TestCase, CompilationCache, MemoryLeakMixin, tag -from numba.errors import TypingError -from .test_parfors import skip_unsupported - - -def array_dtype(a): - return a.dtype - - -def use_dtype(a, b): - return a.view(b.dtype) - - -def array_itemsize(a): - return a.itemsize - - -def array_shape(a, i): - return a.shape[i] - - -def array_strides(a, i): - return a.strides[i] - - -def array_ndim(a): - return a.ndim - - -def array_size(a): - return a.size - - -def array_flags_contiguous(a): - return a.flags.contiguous - -def array_flags_c_contiguous(a): - return a.flags.c_contiguous - -def array_flags_f_contiguous(a): - return a.flags.f_contiguous - - -def nested_array_itemsize(a): - return a.f.itemsize - - -def nested_array_shape(a): - return a.f.shape - - -def nested_array_strides(a): - return a.f.strides - - -def nested_array_ndim(a): - return a.f.ndim - - -def nested_array_size(a): - return a.f.size - - -def size_after_slicing_usecase(buf, i): - sliced = buf[i] - # Make sure size attribute is not lost - return sliced.size - - -def array_ctypes_data(arr): - return arr.ctypes.data - - -def array_real(arr): - return arr.real - - -def array_imag(arr): - return arr.imag - - -class TestArrayAttr(MemoryLeakMixin, TestCase): - - def setUp(self): - super(TestArrayAttr, self).setUp() - self.ccache = CompilationCache() - self.a = np.arange(20, dtype=np.int32).reshape(4, 5) - - def check_unary(self, pyfunc, arr): - aryty = typeof(arr) - cfunc = self.get_cfunc(pyfunc, (aryty,)) - expected = pyfunc(arr) - self.assertPreciseEqual(cfunc(arr), expected) - # Retry with forced any layout - cfunc = self.get_cfunc(pyfunc, (aryty.copy(layout='A'),)) - self.assertPreciseEqual(cfunc(arr), expected) - - def check_unary_with_arrays(self, pyfunc, - use_reshaped_empty_array=True): - self.check_unary(pyfunc, self.a) - self.check_unary(pyfunc, self.a.T) - self.check_unary(pyfunc, self.a[::2]) - # 0-d array - arr = np.array([42]).reshape(()) - self.check_unary(pyfunc, arr) - # array with an empty dimension - arr = np.zeros(0) - self.check_unary(pyfunc, arr) - if use_reshaped_empty_array: - self.check_unary(pyfunc, arr.reshape((1, 0, 2))) - - def get_cfunc(self, pyfunc, argspec): - cres = self.ccache.compile(pyfunc, argspec) - return cres.entry_point - - @tag('important') - def test_shape(self): - pyfunc = array_shape - cfunc = self.get_cfunc(pyfunc, (types.int32[:,:], types.int32)) - - for i in range(self.a.ndim): - self.assertEqual(pyfunc(self.a, i), cfunc(self.a, i)) - - def test_strides(self): - pyfunc = array_strides - cfunc = self.get_cfunc(pyfunc, (types.int32[:,:], types.int32)) - - for i in range(self.a.ndim): - self.assertEqual(pyfunc(self.a, i), cfunc(self.a, i)) - - def test_ndim(self): - self.check_unary_with_arrays(array_ndim) - - def test_size(self): - self.check_unary_with_arrays(array_size) - - def test_itemsize(self): - self.check_unary_with_arrays(array_itemsize) - - def test_dtype(self): - pyfunc = array_dtype - self.check_unary(pyfunc, self.a) - dtype = np.dtype([('x', np.int8), ('y', np.int8)]) - arr = np.zeros(4, dtype=dtype) - self.check_unary(pyfunc, arr) - - def test_use_dtype(self): - # Test using the dtype attribute inside the Numba function itself - b = np.empty(1, dtype=np.int16) - pyfunc = use_dtype - cfunc = self.get_cfunc(pyfunc, (typeof(self.a), typeof(b))) - expected = pyfunc(self.a, b) - self.assertPreciseEqual(cfunc(self.a, b), expected) - - def test_flags_contiguous(self): - self.check_unary_with_arrays(array_flags_contiguous) - - def test_flags_c_contiguous(self): - self.check_unary_with_arrays(array_flags_c_contiguous) - - def test_flags_f_contiguous(self): - # Numpy 1.12+ is more opportunistic when computing contiguousness - # of empty arrays. - use_reshaped_empty_array = numpy_support.version > (1, 11) - self.check_unary_with_arrays(array_flags_f_contiguous, - use_reshaped_empty_array=use_reshaped_empty_array) - - -class TestNestedArrayAttr(MemoryLeakMixin, unittest.TestCase): - def setUp(self): - super(TestNestedArrayAttr, self).setUp() - dtype = np.dtype([('a', np.int32), ('f', np.int32, (2, 5))]) - self.a = np.recarray(1, dtype)[0] - self.nbrecord = from_dtype(self.a.dtype) - - def get_cfunc(self, pyfunc): - cres = compile_isolated(pyfunc, (self.nbrecord,)) - return cres.entry_point - - @tag('important') - def test_shape(self): - pyfunc = nested_array_shape - cfunc = self.get_cfunc(pyfunc) - - self.assertEqual(pyfunc(self.a), cfunc(self.a)) - - def test_strides(self): - pyfunc = nested_array_strides - cfunc = self.get_cfunc(pyfunc) - - self.assertEqual(pyfunc(self.a), cfunc(self.a)) - - def test_ndim(self): - pyfunc = nested_array_ndim - cfunc = self.get_cfunc(pyfunc) - - self.assertEqual(pyfunc(self.a), cfunc(self.a)) - - def test_size(self): - pyfunc = nested_array_size - cfunc = self.get_cfunc(pyfunc) - - self.assertEqual(pyfunc(self.a), cfunc(self.a)) - - def test_itemsize(self): - pyfunc = nested_array_itemsize - cfunc = self.get_cfunc(pyfunc) - - self.assertEqual(pyfunc(self.a), cfunc(self.a)) - - -class TestSlicedArrayAttr(MemoryLeakMixin, unittest.TestCase): - def test_size_after_slicing(self): - pyfunc = size_after_slicing_usecase - cfunc = njit(pyfunc) - arr = np.arange(2 * 5).reshape(2, 5) - for i in range(arr.shape[0]): - self.assertEqual(pyfunc(arr, i), cfunc(arr, i)) - arr = np.arange(2 * 5 * 3).reshape(2, 5, 3) - for i in range(arr.shape[0]): - self.assertEqual(pyfunc(arr, i), cfunc(arr, i)) - - -class TestArrayCTypes(MemoryLeakMixin, TestCase): - def test_array_ctypes_data(self): - pyfunc = array_ctypes_data - cfunc = njit(pyfunc) - arr = np.arange(3) - self.assertEqual(pyfunc(arr), cfunc(arr)) - - @skip_unsupported - def test_array_ctypes_ref_error_in_parallel(self): - # Issue #2887 - from ctypes import CFUNCTYPE, c_void_p, c_int32, c_double, c_bool - - @CFUNCTYPE(c_bool, c_void_p, c_int32, c_void_p) - def callback(inptr, size, outptr): - # A ctypes callback that manipulate the incoming pointers. - try: - inbuf = (c_double * size).from_address(inptr) - outbuf = (c_double * 1).from_address(outptr) - a = np.ndarray(size, buffer=inbuf, dtype=np.float64) - b = np.ndarray(1, buffer=outbuf, dtype=np.float64) - b[0] = (a + a.size)[0] - return True - except: - import traceback - traceback.print_exception() - return False - - - # parallel=True is required to reproduce the error. - @njit(parallel=True) - def foo(size): - arr = np.ones(size) - out = np.empty(1) - # Exercise array.ctypes - inct = arr.ctypes - outct = out.ctypes - # The reference to `arr` is dead by now - status = callback(inct.data, size, outct.data) - return status, out[0] - - size = 3 - status, got = foo(size) - self.assertTrue(status) - self.assertPreciseEqual(got, (np.ones(size) + size)[0]) - - -class TestRealImagAttr(MemoryLeakMixin, TestCase): - def check_complex(self, pyfunc): - cfunc = njit(pyfunc) - # test 1D - size = 10 - arr = np.arange(size) + np.arange(size) * 10j - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - # test 2D - arr = arr.reshape(2, 5) - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - - def test_complex_real(self): - self.check_complex(array_real) - - def test_complex_imag(self): - self.check_complex(array_imag) - - def check_number_real(self, dtype): - pyfunc = array_real - cfunc = njit(pyfunc) - # test 1D - size = 10 - arr = np.arange(size, dtype=dtype) - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - # test 2D - arr = arr.reshape(2, 5) - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - # test identity - self.assertEqual(arr.data, pyfunc(arr).data) - self.assertEqual(arr.data, cfunc(arr).data) - # test writable - real = cfunc(arr) - self.assertNotEqual(arr[0, 0], 5) - real[0, 0] = 5 - self.assertEqual(arr[0, 0], 5) - - def test_number_real(self): - """ - Testing .real of non-complex dtypes - """ - for dtype in [np.uint8, np.int32, np.float32, np.float64]: - self.check_number_real(dtype) - - def check_number_imag(self, dtype): - pyfunc = array_imag - cfunc = njit(pyfunc) - # test 1D - size = 10 - arr = np.arange(size, dtype=dtype) - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - # test 2D - arr = arr.reshape(2, 5) - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - # test are zeros - self.assertEqual(cfunc(arr).tolist(), np.zeros_like(arr).tolist()) - # test readonly - imag = cfunc(arr) - with self.assertRaises(ValueError) as raises: - imag[0] = 1 - self.assertEqual('assignment destination is read-only', - str(raises.exception)) - - def test_number_imag(self): - """ - Testing .imag of non-complex dtypes - """ - for dtype in [np.uint8, np.int32, np.float32, np.float64]: - self.check_number_imag(dtype) - - def test_record_real(self): - rectyp = np.dtype([('real', np.float32), ('imag', np.complex64)]) - arr = np.zeros(3, dtype=rectyp) - arr['real'] = np.random.random(arr.size) - arr['imag'] = np.random.random(arr.size) * 1.3j - - # check numpy behavior - # .real is identity - self.assertIs(array_real(arr), arr) - # .imag is zero_like - self.assertEqual(array_imag(arr).tolist(), np.zeros_like(arr).tolist()) - - # check numba behavior - # it's most likely a user error, anyway - jit_array_real = njit(array_real) - jit_array_imag = njit(array_imag) - - with self.assertRaises(TypingError) as raises: - jit_array_real(arr) - self.assertIn("cannot access .real of array of Record", - str(raises.exception)) - - with self.assertRaises(TypingError) as raises: - jit_array_imag(arr) - self.assertIn("cannot access .imag of array of Record", - str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_constants.py b/numba/numba/tests/test_array_constants.py deleted file mode 100644 index de31d434c..000000000 --- a/numba/numba/tests/test_array_constants.py +++ /dev/null @@ -1,178 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated -from numba.errors import TypingError -from numba import jit, types, typeof - - -a0 = np.array(42) - -s1 = np.int32(64) - -a1 = np.arange(12) -a2 = a1[::2] -a3 = a1.reshape((3, 4)).T - -dt = np.dtype([('x', np.int8), ('y', 'S3')]) - -a4 = np.arange(32, dtype=np.int8).view(dt) -a5 = a4[::-2] - -# A recognizable data string -a6 = np.frombuffer(b"XXXX_array_contents_XXXX", dtype=np.float32) - - -def getitem0(i): - return a0[()] - - -def getitem1(i): - return a1[i] - - -def getitem2(i): - return a2[i] - - -def getitem3(i): - return a3[i] - - -def getitem4(i): - return a4[i] - - -def getitem5(i): - return a5[i] - - -def getitem6(i): - return a6[i] - - -def use_arrayscalar_const(): - return s1 - - -def write_to_global_array(): - myarray[0] = 1 - - -class TestConstantArray(unittest.TestCase): - """ - Test array constants. - """ - - def check_array_const(self, pyfunc): - cres = compile_isolated(pyfunc, (types.int32,)) - cfunc = cres.entry_point - for i in [0, 1, 2]: - np.testing.assert_array_equal(pyfunc(i), cfunc(i)) - - def test_array_const_0d(self): - self.check_array_const(getitem0) - - def test_array_const_1d_contig(self): - self.check_array_const(getitem1) - - def test_array_const_1d_noncontig(self): - self.check_array_const(getitem2) - - def test_array_const_2d(self): - self.check_array_const(getitem3) - - def test_record_array_const_contig(self): - self.check_array_const(getitem4) - - def test_record_array_const_noncontig(self): - self.check_array_const(getitem5) - - def test_array_const_alignment(self): - """ - Issue #1933: the array declaration in the LLVM IR must have - the right alignment specified. - """ - sig = (types.intp,) - cfunc = jit(sig, nopython=True)(getitem6) - ir = cfunc.inspect_llvm(sig) - for line in ir.splitlines(): - if 'XXXX_array_contents_XXXX' in line: - self.assertIn("constant [24 x i8]", line) # sanity check - # Should be the ABI-required alignment for float32 - # on most platforms... - self.assertIn(", align 4", line) - break - else: - self.fail("could not find array declaration in LLVM IR") - - def test_arrayscalar_const(self): - pyfunc = use_arrayscalar_const - cres = compile_isolated(pyfunc, ()) - cfunc = cres.entry_point - - self.assertEqual(pyfunc(), cfunc()) - - def test_write_to_global_array(self): - pyfunc = write_to_global_array - with self.assertRaises(TypingError): - compile_isolated(pyfunc, ()) - - def test_issue_1850(self): - """ - This issue is caused by an unresolved bug in numpy since version 1.6. - See numpy GH issue #3147. - """ - constarr = np.array([86]) - - def pyfunc(): - return constarr[0] - - cres = compile_isolated(pyfunc, ()) - out = cres.entry_point() - self.assertEqual(out, 86) - - def test_too_big_to_freeze(self): - """ - Test issue https://github.com/numba/numba/issues/2188 where freezing - a constant array into the code thats prohibitively long and consume - too much RAM. - """ - def test(biggie): - expect = np.copy(biggie) - self.assertEqual(typeof(biggie), typeof(expect)) - - def pyfunc(): - return biggie - - cres = compile_isolated(pyfunc, ()) - # Check that the array is not frozen into the LLVM IR. - # LLVM size must be less than the array size. - self.assertLess(len(cres.library.get_llvm_str()), biggie.nbytes) - # Run and test result - out = cres.entry_point() - self.assertIs(biggie, out) - # Remove all local references to biggie - del out - biggie = None # del biggie is syntax error in py2 - # Run again and verify result - out = cres.entry_point() - np.testing.assert_equal(expect, out) - self.assertEqual(typeof(expect), typeof(out)) - - nelem = 10**7 # 10 million items - - c_array = np.arange(nelem).reshape(nelem) - f_array = np.asfortranarray(np.random.random((2, nelem // 2))) - self.assertEqual(typeof(c_array).layout, 'C') - self.assertEqual(typeof(f_array).layout, 'F') - # Test C contig - test(c_array) - # Test F contig - test(f_array) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_exprs.py b/numba/numba/tests/test_array_exprs.py deleted file mode 100644 index 0f5fd125d..000000000 --- a/numba/numba/tests/test_array_exprs.py +++ /dev/null @@ -1,490 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import gc - -import numpy as np - -from numba import njit, vectorize -from numba import unittest_support as unittest -from numba import compiler, typing, typeof, ir, utils -from numba.compiler import Pipeline, _PipelineManager, Flags -from numba.targets import cpu -from .support import MemoryLeakMixin, TestCase - - -class Namespace(dict): - def __getattr__(s, k): - return s[k] if k in s else super(Namespace, s).__getattr__(k) - -def axy(a, x, y): - return a * x + y - -def ax2(a, x, y): - return a * x + y - -def pos_root(As, Bs, Cs): - return (-Bs + (((Bs ** 2.) - (4. * As * Cs)) ** 0.5)) / (2. * As) - -def neg_root_common_subexpr(As, Bs, Cs): - _2As = 2. * As - _4AsCs = 2. * _2As * Cs - _Bs2_4AsCs = (Bs ** 2. - _4AsCs) - return (-Bs - (_Bs2_4AsCs ** 0.5)) / _2As - -def neg_root_complex_subexpr(As, Bs, Cs): - _2As = 2. * As - _4AsCs = 2. * _2As * Cs - _Bs2_4AsCs = (Bs ** 2. - _4AsCs) + 0j # Force into the complex domain. - return (-Bs - (_Bs2_4AsCs ** 0.5)) / _2As - -vaxy = vectorize(axy) - -def call_stuff(a0, a1): - return np.cos(vaxy(a0, np.sin(a1) - 1., 1.)) - -def are_roots_imaginary(As, Bs, Cs): - return (Bs ** 2 - 4 * As * Cs) < 0 - -def div_add(As, Bs, Cs): - return As / Bs + Cs - -def cube(As): - return As ** 3 - -def explicit_output(a, b, out): - np.cos(a, out) - return np.add(out, b, out) - -def variable_name_reuse(a, b, c, d): - u = a + b - u = u - a * b - u = u * c + d - return u - - -# From issue #1264 -def distance_matrix(vectors): - n_vectors = vectors.shape[0] - result = np.empty((n_vectors, n_vectors), dtype=np.float64) - - for i in range(n_vectors): - for j in range(i, n_vectors): - result[i,j] = result[j,i] = np.sum( - (vectors[i] - vectors[j]) ** 2) ** 0.5 - - return result - - -class RewritesTester(Pipeline): - @classmethod - def mk_pipeline(cls, args, return_type=None, flags=None, locals={}, - library=None, typing_context=None, target_context=None): - if not flags: - flags = Flags() - flags.nrt = True - if typing_context is None: - typing_context = typing.Context() - if target_context is None: - target_context = cpu.CPUContext(typing_context) - return cls(typing_context, target_context, library, args, return_type, - flags, locals) - - @classmethod - def mk_no_rw_pipeline(cls, args, return_type=None, flags=None, locals={}, - library=None, **kws): - if not flags: - flags = Flags() - flags.no_rewrites = True - return cls.mk_pipeline(args, return_type, flags, locals, library, **kws) - - -class TestArrayExpressions(MemoryLeakMixin, TestCase): - - def _compile_function(self, fn, arg_tys): - """ - Compile the given function both without and with rewrites enabled. - """ - control_pipeline = RewritesTester.mk_no_rw_pipeline(arg_tys) - cres_0 = control_pipeline.compile_extra(fn) - control_cfunc = cres_0.entry_point - - test_pipeline = RewritesTester.mk_pipeline(arg_tys) - cres_1 = test_pipeline.compile_extra(fn) - test_cfunc = cres_1.entry_point - - return control_pipeline, control_cfunc, test_pipeline, test_cfunc - - def test_simple_expr(self): - ''' - Using a simple array expression, verify that rewriting is taking - place, and is fusing loops. - ''' - A = np.linspace(0,1,10) - X = np.linspace(2,1,10) - Y = np.linspace(1,2,10) - arg_tys = [typeof(arg) for arg in (A, X, Y)] - - control_pipeline, nb_axy_0, test_pipeline, nb_axy_1 = \ - self._compile_function(axy, arg_tys) - - control_pipeline2 = RewritesTester.mk_no_rw_pipeline(arg_tys) - cres_2 = control_pipeline2.compile_extra(ax2) - nb_ctl = cres_2.entry_point - - expected = nb_axy_0(A, X, Y) - actual = nb_axy_1(A, X, Y) - control = nb_ctl(A, X, Y) - np.testing.assert_array_equal(expected, actual) - np.testing.assert_array_equal(control, actual) - - ir0 = control_pipeline.func_ir.blocks - ir1 = test_pipeline.func_ir.blocks - ir2 = control_pipeline2.func_ir.blocks - self.assertEqual(len(ir0), len(ir1)) - self.assertEqual(len(ir0), len(ir2)) - # The rewritten IR should be smaller than the original. - self.assertGreater(len(ir0[0].body), len(ir1[0].body)) - self.assertEqual(len(ir0[0].body), len(ir2[0].body)) - - def _get_array_exprs(self, block): - for instr in block: - if isinstance(instr, ir.Assign): - if isinstance(instr.value, ir.Expr): - if instr.value.op == 'arrayexpr': - yield instr - - def _array_expr_to_set(self, expr, out=None): - ''' - Convert an array expression tree into a set of operators. - ''' - if out is None: - out = set() - if not isinstance(expr, tuple): - raise ValueError("{0} not a tuple".format(expr)) - operation, operands = expr - processed_operands = [] - for operand in operands: - if isinstance(operand, tuple): - operand, _ = self._array_expr_to_set(operand, out) - processed_operands.append(operand) - processed_expr = operation, tuple(processed_operands) - out.add(processed_expr) - return processed_expr, out - - def _test_root_function(self, fn=pos_root): - A = np.random.random(10) - B = np.random.random(10) + 1. # Increase likelihood of real - # root (could add 2 to force all - # roots to be real). - C = np.random.random(10) - arg_tys = [typeof(arg) for arg in (A, B, C)] - - control_pipeline = RewritesTester.mk_no_rw_pipeline(arg_tys) - control_cres = control_pipeline.compile_extra(fn) - nb_fn_0 = control_cres.entry_point - - test_pipeline = RewritesTester.mk_pipeline(arg_tys) - test_cres = test_pipeline.compile_extra(fn) - nb_fn_1 = test_cres.entry_point - - np_result = fn(A, B, C) - nb_result_0 = nb_fn_0(A, B, C) - nb_result_1 = nb_fn_1(A, B, C) - np.testing.assert_array_almost_equal(np_result, nb_result_0) - np.testing.assert_array_almost_equal(nb_result_0, nb_result_1) - - return Namespace(locals()) - - def _test_cube_function(self, fn=cube): - A = np.arange(10, dtype=np.float64) - arg_tys = (typeof(A),) - - control_pipeline = RewritesTester.mk_no_rw_pipeline(arg_tys) - control_cres = control_pipeline.compile_extra(fn) - nb_fn_0 = control_cres.entry_point - - test_pipeline = RewritesTester.mk_pipeline(arg_tys) - test_cres = test_pipeline.compile_extra(fn) - nb_fn_1 = test_cres.entry_point - - expected = A ** 3 - self.assertPreciseEqual(expected, nb_fn_0(A)) - self.assertPreciseEqual(expected, nb_fn_1(A)) - - return Namespace(locals()) - - def _test_explicit_output_function(self, fn): - """ - Test function having a (a, b, out) signature where *out* is - an output array the function writes into. - """ - A = np.arange(10, dtype=np.float64) - B = A + 1 - arg_tys = (typeof(A),) * 3 - - control_pipeline, control_cfunc, test_pipeline, test_cfunc = \ - self._compile_function(fn, arg_tys) - - def run_func(fn): - out = np.zeros_like(A) - fn(A, B, out) - return out - - expected = run_func(fn) - self.assertPreciseEqual(expected, run_func(control_cfunc)) - self.assertPreciseEqual(expected, run_func(test_cfunc)) - - return Namespace(locals()) - - def _assert_array_exprs(self, block, expected_count): - """ - Assert the *block* has the expected number of array expressions - in it. - """ - rewrite_count = len(list(self._get_array_exprs(block))) - self.assertEqual(rewrite_count, expected_count) - - def _assert_total_rewrite(self, control_ir, test_ir, trivial=False): - """ - Given two dictionaries of Numba IR blocks, check to make sure the - control IR has no array expressions, while the test IR - contains one and only one. - """ - # Both IRs have the same number of blocks (presumably 1) - self.assertEqual(len(control_ir), len(test_ir)) - control_block = control_ir[0].body - test_block = test_ir[0].body - self._assert_array_exprs(control_block, 0) - self._assert_array_exprs(test_block, 1) - if not trivial: - # If the expression wasn't trivial, the block length should - # have decreased (since a sequence of exprs was replaced - # with a single nested array expr). - self.assertGreater(len(control_block), len(test_block)) - - def _assert_no_rewrite(self, control_ir, test_ir): - """ - Given two dictionaries of Numba IR blocks, check to make sure - the control IR and the test IR both have no array expressions. - """ - self.assertEqual(len(control_ir), len(test_ir)) - # All blocks should be identical, and not rewritten - for k, v in control_ir.items(): - control_block = v.body - test_block = test_ir[k].body - self.assertEqual(len(control_block), len(test_block)) - self._assert_array_exprs(control_block, 0) - self._assert_array_exprs(test_block, 0) - - def test_trivial_expr(self): - """ - Ensure even a non-nested expression is rewritten, as it can enable - scalar optimizations such as rewriting `x ** 2`. - """ - ns = self._test_cube_function() - self._assert_total_rewrite(ns.control_pipeline.func_ir.blocks, - ns.test_pipeline.func_ir.blocks, - trivial=True) - - def test_complicated_expr(self): - ''' - Using the polynomial root function, ensure the full expression is - being put in the same kernel with no remnants of intermediate - array expressions. - ''' - ns = self._test_root_function() - self._assert_total_rewrite(ns.control_pipeline.func_ir.blocks, - ns.test_pipeline.func_ir.blocks) - - def test_common_subexpressions(self, fn=neg_root_common_subexpr): - ''' - Attempt to verify that rewriting will incorporate user common - subexpressions properly. - ''' - ns = self._test_root_function(fn) - ir0 = ns.control_pipeline.func_ir.blocks - ir1 = ns.test_pipeline.func_ir.blocks - self.assertEqual(len(ir0), len(ir1)) - self.assertGreater(len(ir0[0].body), len(ir1[0].body)) - self.assertEqual(len(list(self._get_array_exprs(ir0[0].body))), 0) - # Verify that we didn't rewrite everything into a monolithic - # array expression since we stored temporary values in - # variables that might be used later (from the optimization's - # point of view). - array_expr_instrs = list(self._get_array_exprs(ir1[0].body)) - self.assertGreater(len(array_expr_instrs), 1) - # Now check that we haven't duplicated any subexpressions in - # the rewritten code. - array_sets = list(self._array_expr_to_set(instr.value.expr)[1] - for instr in array_expr_instrs) - for expr_set_0, expr_set_1 in zip(array_sets[:-1], array_sets[1:]): - intersections = expr_set_0.intersection(expr_set_1) - if intersections: - self.fail("Common subexpressions detected in array " - "expressions ({0})".format(intersections)) - - def test_complex_subexpression(self): - return self.test_common_subexpressions(neg_root_complex_subexpr) - - def test_ufunc_and_dufunc_calls(self): - ''' - Verify that ufunc and DUFunc calls are being properly included in - array expressions. - ''' - A = np.random.random(10) - B = np.random.random(10) - arg_tys = [typeof(arg) for arg in (A, B)] - - vaxy_descr = vaxy._dispatcher.targetdescr - control_pipeline = RewritesTester.mk_no_rw_pipeline( - arg_tys, - typing_context=vaxy_descr.typing_context, - target_context=vaxy_descr.target_context) - cres_0 = control_pipeline.compile_extra(call_stuff) - nb_call_stuff_0 = cres_0.entry_point - - test_pipeline = RewritesTester.mk_pipeline( - arg_tys, - typing_context=vaxy_descr.typing_context, - target_context=vaxy_descr.target_context) - cres_1 = test_pipeline.compile_extra(call_stuff) - nb_call_stuff_1 = cres_1.entry_point - - expected = call_stuff(A, B) - control = nb_call_stuff_0(A, B) - actual = nb_call_stuff_1(A, B) - np.testing.assert_array_almost_equal(expected, control) - np.testing.assert_array_almost_equal(expected, actual) - - self._assert_total_rewrite(control_pipeline.func_ir.blocks, - test_pipeline.func_ir.blocks) - - def test_cmp_op(self): - ''' - Verify that comparison operators are supported by the rewriter. - ''' - ns = self._test_root_function(are_roots_imaginary) - self._assert_total_rewrite(ns.control_pipeline.func_ir.blocks, - ns.test_pipeline.func_ir.blocks) - - def test_explicit_output(self): - """ - Check that ufunc calls with explicit outputs are not rewritten. - """ - ns = self._test_explicit_output_function(explicit_output) - self._assert_no_rewrite(ns.control_pipeline.func_ir.blocks, - ns.test_pipeline.func_ir.blocks) - - -class TestRewriteIssues(MemoryLeakMixin, TestCase): - - def test_issue_1184(self): - from numba import jit - import numpy as np - - @jit(nopython=True) - def foo(arr): - return arr - - @jit(nopython=True) - def bar(arr): - c = foo(arr) - d = foo(arr) # two calls to trigger rewrite - return c, d - - arr = np.arange(10) - out_c, out_d = bar(arr) - self.assertIs(out_c, out_d) - self.assertIs(out_c, arr) - - def test_issue_1264(self): - n = 100 - x = np.random.uniform(size=n*3).reshape((n,3)) - expected = distance_matrix(x) - actual = njit(distance_matrix)(x) - np.testing.assert_array_almost_equal(expected, actual) - # Avoid sporadic failures in MemoryLeakMixin.tearDown() - gc.collect() - - def test_issue_1372(self): - """Test array expression with duplicated term""" - from numba import njit - - @njit - def foo(a, b): - b = np.sin(b) - return b + b + a - - a = np.random.uniform(10) - b = np.random.uniform(10) - expect = foo.py_func(a, b) - got = foo(a, b) - np.testing.assert_allclose(got, expect) - - def test_unary_arrayexpr(self): - """ - Typing of unary array expression (np.negate) can be incorrect. - """ - @njit - def foo(a, b): - return b - a + -a - - b = 1.5 - a = np.arange(10, dtype=np.int32) - - expect = foo.py_func(a, b) - got = foo(a, b) - self.assertPreciseEqual(got, expect) - - def test_bitwise_arrayexpr(self): - """ - Typing of bitwise boolean array expression can be incorrect - (issue #1813). - """ - @njit - def foo(a, b): - return ~(a & (~b)) - - a = np.array([True, True, False, False]) - b = np.array([False, True, False, True]) - - expect = foo.py_func(a, b) - got = foo(a, b) - self.assertPreciseEqual(got, expect) - - def test_annotations(self): - """ - Type annotation of array expressions with disambiguated - variable names (issue #1466). - """ - cfunc = njit(variable_name_reuse) - - a = np.linspace(0, 1, 10) - cfunc(a, a, a, a) - - buf = utils.StringIO() - cfunc.inspect_types(buf) - res = buf.getvalue() - self.assertIn("# u.1 = ", res) - self.assertIn("# u.2 = ", res) - - -class TestSemantics(MemoryLeakMixin, unittest.TestCase): - - def test_division_by_zero(self): - # Array expressions should follow the Numpy error model - # i.e. 1./0. returns +inf instead of raising ZeroDivisionError - pyfunc = div_add - cfunc = njit(pyfunc) - - a = np.float64([0.0, 1.0, float('inf')]) - b = np.float64([0.0, 0.0, 1.0]) - c = np.ones_like(a) - - expect = pyfunc(a, b, c) - got = cfunc(a, b, c) - np.testing.assert_array_equal(expect, got) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_array_iterators.py b/numba/numba/tests/test_array_iterators.py deleted file mode 100644 index 117348b9e..000000000 --- a/numba/numba/tests/test_array_iterators.py +++ /dev/null @@ -1,487 +0,0 @@ -from __future__ import division - -import itertools - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, typeof, types -from numba.compiler import compile_isolated -from .support import TestCase, CompilationCache, MemoryLeakMixin, tag - - -def array_iter(arr): - total = 0 - for i, v in enumerate(arr): - total += i * v - return total - -def array_view_iter(arr, idx): - total = 0 - for i, v in enumerate(arr[idx]): - total += i * v - return total - -def array_flat(arr, out): - for i, v in enumerate(arr.flat): - out[i] = v - -def array_flat_getitem(arr, ind): - return arr.flat[ind] - -def array_flat_setitem(arr, ind, val): - arr.flat[ind] = val - -def array_flat_sum(arr): - s = 0 - for i, v in enumerate(arr.flat): - s = s + (i + 1) * v - return s - -def array_flat_len(arr): - return len(arr.flat) - -def array_ndenumerate_sum(arr): - s = 0 - for (i, j), v in np.ndenumerate(arr): - s = s + (i + 1) * (j + 1) * v - return s - -def np_ndindex_empty(): - s = 0 - for ind in np.ndindex(()): - s += s + len(ind) + 1 - return s - -def np_ndindex(x, y): - s = 0 - n = 0 - for i, j in np.ndindex(x, y): - s = s + (i + 1) * (j + 1) - return s - -def np_ndindex_array(arr): - s = 0 - n = 0 - for indices in np.ndindex(arr.shape): - for i, j in enumerate(indices): - s = s + (i + 1) * (j + 1) - return s - -def np_nditer1(a): - res = [] - for u in np.nditer(a): - res.append(u.item()) - return res - -def np_nditer2(a, b): - res = [] - for u, v in np.nditer((a, b)): - res.append((u.item(), v.item())) - return res - -def np_nditer3(a, b, c): - res = [] - for u, v, w in np.nditer((a, b, c)): - res.append((u.item(), v.item(), w.item())) - return res - -def iter_next(arr): - it = iter(arr) - it2 = iter(arr) - return next(it), next(it), next(it2) - - -# -# Test premature free (see issue #2112). -# The following test allocates an array ``x`` inside the body. -# The compiler will put a ``del x`` right after the last use of ``x``, -# which is right after the creation of the array iterator and -# before the loop is entered. If the iterator does not incref the array, -# the iterator will be reading garbage data of free'ed memory. -# - -def array_flat_premature_free(size): - x = np.arange(size) - res = np.zeros_like(x, dtype=np.intp) - for i, v in enumerate(x.flat): - res[i] = v - return res - -def array_ndenumerate_premature_free(size): - x = np.arange(size) - res = np.zeros_like(x, dtype=np.intp) - for i, v in np.ndenumerate(x): - res[i] = v - return res - - -class TestArrayIterators(MemoryLeakMixin, TestCase): - """ - Test array.flat, np.ndenumerate(), etc. - """ - - def setUp(self): - super(TestArrayIterators, self).setUp() - self.ccache = CompilationCache() - - def check_array_iter(self, arr): - pyfunc = array_iter - cres = compile_isolated(pyfunc, [typeof(arr)]) - cfunc = cres.entry_point - expected = pyfunc(arr) - self.assertPreciseEqual(cfunc(arr), expected) - - def check_array_view_iter(self, arr, index): - pyfunc = array_view_iter - cres = compile_isolated(pyfunc, [typeof(arr), typeof(index)]) - cfunc = cres.entry_point - expected = pyfunc(arr, index) - self.assertPreciseEqual(cfunc(arr, index), expected) - - def check_array_flat(self, arr, arrty=None): - out = np.zeros(arr.size, dtype=arr.dtype) - nb_out = out.copy() - if arrty is None: - arrty = typeof(arr) - - cres = compile_isolated(array_flat, [arrty, typeof(out)]) - cfunc = cres.entry_point - - array_flat(arr, out) - cfunc(arr, nb_out) - - self.assertPreciseEqual(out, nb_out) - - def check_array_unary(self, arr, arrty, func): - cres = compile_isolated(func, [arrty]) - cfunc = cres.entry_point - self.assertPreciseEqual(cfunc(arr), func(arr)) - - def check_array_flat_sum(self, arr, arrty): - self.check_array_unary(arr, arrty, array_flat_sum) - - def check_array_ndenumerate_sum(self, arr, arrty): - self.check_array_unary(arr, arrty, array_ndenumerate_sum) - - @tag('important') - def test_array_iter(self): - # Test iterating over a 1d array - arr = np.arange(6) - self.check_array_iter(arr) - arr = arr[::2] - self.assertFalse(arr.flags.c_contiguous) - self.assertFalse(arr.flags.f_contiguous) - self.check_array_iter(arr) - arr = np.bool_([1, 0, 0, 1]) - self.check_array_iter(arr) - - def test_array_view_iter(self): - # Test iterating over a 1d view over a 2d array - arr = np.arange(12).reshape((3, 4)) - self.check_array_view_iter(arr, 1) - self.check_array_view_iter(arr.T, 1) - arr = arr[::2] - self.check_array_view_iter(arr, 1) - arr = np.bool_([1, 0, 0, 1]).reshape((2, 2)) - self.check_array_view_iter(arr, 1) - - @tag('important') - def test_array_flat_3d(self): - arr = np.arange(24).reshape(4, 2, 3) - - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 3) - self.assertEqual(arrty.layout, 'C') - self.assertTrue(arr.flags.c_contiguous) - # Test with C-contiguous array - self.check_array_flat(arr) - # Test with Fortran-contiguous array - arr = arr.transpose() - self.assertFalse(arr.flags.c_contiguous) - self.assertTrue(arr.flags.f_contiguous) - self.assertEqual(typeof(arr).layout, 'F') - self.check_array_flat(arr) - # Test with non-contiguous array - arr = arr[::2] - self.assertFalse(arr.flags.c_contiguous) - self.assertFalse(arr.flags.f_contiguous) - self.assertEqual(typeof(arr).layout, 'A') - self.check_array_flat(arr) - # Boolean array - arr = np.bool_([1, 0, 0, 1] * 2).reshape((2, 2, 2)) - self.check_array_flat(arr) - - def test_array_flat_empty(self): - # Test .flat with various shapes of empty arrays, contiguous - # and non-contiguous (see issue #846). - arr = np.zeros(0, dtype=np.int32) - arr = arr.reshape(0, 2) - arrty = types.Array(types.int32, 2, layout='C') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='F') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='A') - self.check_array_flat_sum(arr, arrty) - arr = arr.reshape(2, 0) - arrty = types.Array(types.int32, 2, layout='C') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='F') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='A') - self.check_array_flat_sum(arr, arrty) - - def test_array_flat_getitem(self): - # Test indexing of array.flat object - pyfunc = array_flat_getitem - def check(arr, ind): - cr = self.ccache.compile(pyfunc, (typeof(arr), typeof(ind))) - expected = pyfunc(arr, ind) - self.assertEqual(cr.entry_point(arr, ind), expected) - - arr = np.arange(24).reshape(4, 2, 3) - for i in range(arr.size): - check(arr, i) - arr = arr.T - for i in range(arr.size): - check(arr, i) - arr = arr[::2] - for i in range(arr.size): - check(arr, i) - arr = np.array([42]).reshape(()) - for i in range(arr.size): - check(arr, i) - # Boolean array - arr = np.bool_([1, 0, 0, 1]) - for i in range(arr.size): - check(arr, i) - arr = arr[::2] - for i in range(arr.size): - check(arr, i) - - def test_array_flat_setitem(self): - # Test indexing of array.flat object - pyfunc = array_flat_setitem - def check(arr, ind): - arrty = typeof(arr) - cr = self.ccache.compile(pyfunc, (arrty, typeof(ind), arrty.dtype)) - # Use np.copy() to keep the layout - expected = np.copy(arr) - got = np.copy(arr) - pyfunc(expected, ind, 123) - cr.entry_point(got, ind, 123) - self.assertPreciseEqual(got, expected) - - arr = np.arange(24).reshape(4, 2, 3) - for i in range(arr.size): - check(arr, i) - arr = arr.T - for i in range(arr.size): - check(arr, i) - arr = arr[::2] - for i in range(arr.size): - check(arr, i) - arr = np.array([42]).reshape(()) - for i in range(arr.size): - check(arr, i) - # Boolean array - arr = np.bool_([1, 0, 0, 1]) - for i in range(arr.size): - check(arr, i) - arr = arr[::2] - for i in range(arr.size): - check(arr, i) - - def test_array_flat_len(self): - # Test len(array.flat) - pyfunc = array_flat_len - def check(arr): - cr = self.ccache.compile(pyfunc, (typeof(arr),)) - expected = pyfunc(arr) - self.assertPreciseEqual(cr.entry_point(arr), expected) - - arr = np.arange(24).reshape(4, 2, 3) - check(arr) - arr = arr.T - check(arr) - arr = arr[::2] - check(arr) - arr = np.array([42]).reshape(()) - check(arr) - - def test_array_flat_premature_free(self): - cres = compile_isolated(array_flat_premature_free, [types.intp]) - cfunc = cres.entry_point - expect = array_flat_premature_free(6) - got = cfunc(6) - self.assertTrue(got.sum()) - self.assertPreciseEqual(expect, got) - - @tag('important') - def test_array_ndenumerate_2d(self): - arr = np.arange(12).reshape(4, 3) - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 2) - self.assertEqual(arrty.layout, 'C') - self.assertTrue(arr.flags.c_contiguous) - # Test with C-contiguous array - self.check_array_ndenumerate_sum(arr, arrty) - # Test with Fortran-contiguous array - arr = arr.transpose() - self.assertFalse(arr.flags.c_contiguous) - self.assertTrue(arr.flags.f_contiguous) - arrty = typeof(arr) - self.assertEqual(arrty.layout, 'F') - self.check_array_ndenumerate_sum(arr, arrty) - # Test with non-contiguous array - arr = arr[::2] - self.assertFalse(arr.flags.c_contiguous) - self.assertFalse(arr.flags.f_contiguous) - arrty = typeof(arr) - self.assertEqual(arrty.layout, 'A') - self.check_array_ndenumerate_sum(arr, arrty) - # Boolean array - arr = np.bool_([1, 0, 0, 1]).reshape((2, 2)) - self.check_array_ndenumerate_sum(arr, typeof(arr)) - - def test_array_ndenumerate_empty(self): - arr = np.zeros(0, dtype=np.int32) - arr = arr.reshape(0, 2) - arrty = types.Array(types.int32, 2, layout='C') - self.check_array_ndenumerate_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='F') - self.check_array_ndenumerate_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='A') - self.check_array_ndenumerate_sum(arr, arrty) - arr = arr.reshape(2, 0) - arrty = types.Array(types.int32, 2, layout='C') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='F') - self.check_array_flat_sum(arr, arrty) - arrty = types.Array(types.int32, 2, layout='A') - self.check_array_flat_sum(arr, arrty) - - def test_array_ndenumerate_premature_free(self): - cres = compile_isolated(array_ndenumerate_premature_free, [types.intp]) - cfunc = cres.entry_point - expect = array_ndenumerate_premature_free(6) - got = cfunc(6) - self.assertTrue(got.sum()) - self.assertPreciseEqual(expect, got) - - def test_np_ndindex(self): - func = np_ndindex - cres = compile_isolated(func, [types.int32, types.int32]) - cfunc = cres.entry_point - self.assertPreciseEqual(cfunc(3, 4), func(3, 4)) - self.assertPreciseEqual(cfunc(3, 0), func(3, 0)) - self.assertPreciseEqual(cfunc(0, 3), func(0, 3)) - self.assertPreciseEqual(cfunc(0, 0), func(0, 0)) - - @tag('important') - def test_np_ndindex_array(self): - func = np_ndindex_array - arr = np.arange(12, dtype=np.int32) + 10 - self.check_array_unary(arr, typeof(arr), func) - arr = arr.reshape((4, 3)) - self.check_array_unary(arr, typeof(arr), func) - arr = arr.reshape((2, 2, 3)) - self.check_array_unary(arr, typeof(arr), func) - - def test_np_ndindex_empty(self): - func = np_ndindex_empty - cres = compile_isolated(func, []) - cfunc = cres.entry_point - self.assertPreciseEqual(cfunc(), func()) - - @tag('important') - def test_iter_next(self): - # This also checks memory management with iter() and next() - func = iter_next - arr = np.arange(12, dtype=np.int32) + 10 - self.check_array_unary(arr, typeof(arr), func) - - -class TestNdIter(MemoryLeakMixin, TestCase): - """ - Test np.nditer() - """ - - def inputs(self): - # All those inputs are compatible with a (3, 4) main shape - - # scalars - yield np.float32(100) - - # 0-d arrays - yield np.array(102, dtype=np.int16) - - # 1-d arrays - yield np.arange(4).astype(np.complex64) - yield np.arange(8)[::2] - - # 2-d arrays - a = np.arange(12).reshape((3, 4)) - yield a - yield a.copy(order='F') - a = np.arange(24).reshape((6, 4))[::2] - yield a - - def basic_inputs(self): - yield np.arange(4).astype(np.complex64) - yield np.arange(8)[::2] - a = np.arange(12).reshape((3, 4)) - yield a - yield a.copy(order='F') - - def check_result(self, got, expected): - self.assertEqual(set(got), set(expected), (got, expected)) - - def test_nditer1(self): - pyfunc = np_nditer1 - cfunc = jit(nopython=True)(pyfunc) - for a in self.inputs(): - expected = pyfunc(a) - got = cfunc(a) - self.check_result(got, expected) - - @tag('important') - def test_nditer2(self): - pyfunc = np_nditer2 - cfunc = jit(nopython=True)(pyfunc) - for a, b in itertools.product(self.inputs(), self.inputs()): - expected = pyfunc(a, b) - got = cfunc(a, b) - self.check_result(got, expected) - - def test_nditer3(self): - pyfunc = np_nditer3 - cfunc = jit(nopython=True)(pyfunc) - # Use a restricted set of inputs, to shorten test time - inputs = self.basic_inputs - for a, b, c in itertools.product(inputs(), inputs(), inputs()): - expected = pyfunc(a, b, c) - got = cfunc(a, b, c) - self.check_result(got, expected) - - def test_errors(self): - # Incompatible shapes - pyfunc = np_nditer2 - cfunc = jit(nopython=True)(pyfunc) - - self.disable_leak_check() - - def check_incompatible(a, b): - with self.assertRaises(ValueError) as raises: - cfunc(a, b) - self.assertIn("operands could not be broadcast together", - str(raises.exception)) - - check_incompatible(np.arange(2), np.arange(3)) - a = np.arange(12).reshape((3, 4)) - b = np.arange(3) - check_incompatible(a, b) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_manipulation.py b/numba/numba/tests/test_array_manipulation.py deleted file mode 100644 index 61ca9e9b7..000000000 --- a/numba/numba/tests/test_array_manipulation.py +++ /dev/null @@ -1,590 +0,0 @@ -from __future__ import print_function - -from functools import partial -from itertools import permutations -import numba.unittest_support as unittest - -import numpy as np - -from numba.compiler import compile_isolated, Flags -from numba import jit, types, from_dtype, errors, typeof -from numba.errors import TypingError -from .support import TestCase, MemoryLeakMixin, CompilationCache, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -no_pyobj_flags = Flags() -no_pyobj_flags.set('nrt') - - -def from_generic(pyfuncs_to_use): - """Decorator for generic check functions. - Iterates over 'pyfuncs_to_use', calling 'func' with the iterated - item as first argument. Example: - - @from_generic(numpy_array_reshape, array_reshape) - def check_only_shape(pyfunc, arr, shape, expected_shape): - # Only check Numba result to avoid Numpy bugs - self.memory_leak_setup() - got = generic_run(pyfunc, arr, shape) - self.assertEqual(got.shape, expected_shape) - self.assertEqual(got.size, arr.size) - del got - self.memory_leak_teardown() - """ - def decorator(func): - def result(*args, **kwargs): - return (func(pyfunc, *args, **kwargs) for pyfunc in pyfuncs_to_use) - return result - return decorator - - -def array_reshape(arr, newshape): - return arr.reshape(newshape) - - -def numpy_array_reshape(arr, newshape): - return np.reshape(arr, newshape) - - -def flatten_array(a): - return a.flatten() - - -def ravel_array(a): - return a.ravel() - - -def ravel_array_size(a): - return a.ravel().size - - -def numpy_ravel_array(a): - return np.ravel(a) - - -def transpose_array(a): - return a.transpose() - - -def numpy_transpose_array(a): - return np.transpose(a) - -def numpy_transpose_array_axes_kwarg(arr, axes): - return np.transpose(arr, axes=axes) - -def array_transpose_axes(arr, axes): - return arr.transpose(axes) - -def squeeze_array(a): - return a.squeeze() - - -def expand_dims(a, axis): - return np.expand_dims(a, axis) - - -def atleast_1d(*args): - return np.atleast_1d(*args) - - -def atleast_2d(*args): - return np.atleast_2d(*args) - - -def atleast_3d(*args): - return np.atleast_3d(*args) - - -def as_strided1(a): - # as_strided() with implicit shape - strides = (a.strides[0] // 2,) + a.strides[1:] - return np.lib.stride_tricks.as_strided(a, strides=strides) - - -def as_strided2(a): - # Rolling window example as in https://github.com/numba/numba/issues/1884 - window = 3 - shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) - strides = a.strides + (a.strides[-1],) - return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) - - -def add_axis2(a): - return a[np.newaxis, :] - - -def bad_index(arr, arr2d): - x = arr.x, - y = arr.y - # note that `x` is a tuple, which causes a new axis to be created. - arr2d[x, y] = 1.0 - - -def bad_float_index(arr): - # 2D index required for this function because 1D index - # fails typing - return arr[1, 2.0] - - -class TestArrayManipulation(MemoryLeakMixin, TestCase): - """ - Check shape-changing operations on arrays. - """ - - def setUp(self): - super(TestArrayManipulation, self).setUp() - self.ccache = CompilationCache() - - @tag('important') - def test_array_reshape(self): - pyfuncs_to_use = [array_reshape, numpy_array_reshape] - - def generic_run(pyfunc, arr, shape): - cres = compile_isolated(pyfunc, (typeof(arr), typeof(shape))) - return cres.entry_point(arr, shape) - - @from_generic(pyfuncs_to_use) - def check(pyfunc, arr, shape): - expected = pyfunc(arr, shape) - self.memory_leak_setup() - got = generic_run(pyfunc, arr, shape) - self.assertPreciseEqual(got, expected) - del got - self.memory_leak_teardown() - - @from_generic(pyfuncs_to_use) - def check_only_shape(pyfunc, arr, shape, expected_shape): - # Only check Numba result to avoid Numpy bugs - self.memory_leak_setup() - got = generic_run(pyfunc, arr, shape) - self.assertEqual(got.shape, expected_shape) - self.assertEqual(got.size, arr.size) - del got - self.memory_leak_teardown() - - @from_generic(pyfuncs_to_use) - def check_err_shape(pyfunc, arr, shape): - with self.assertRaises(NotImplementedError) as raises: - generic_run(pyfunc, arr, shape) - self.assertEqual(str(raises.exception), - "incompatible shape for array") - - @from_generic(pyfuncs_to_use) - def check_err_size(pyfunc, arr, shape): - with self.assertRaises(ValueError) as raises: - generic_run(pyfunc, arr, shape) - self.assertEqual(str(raises.exception), - "total size of new array must be unchanged") - - @from_generic(pyfuncs_to_use) - def check_err_multiple_negative(pyfunc, arr, shape): - with self.assertRaises(ValueError) as raises: - generic_run(pyfunc, arr, shape) - self.assertEqual(str(raises.exception), - "multiple negative shape values") - - - # C-contiguous - arr = np.arange(24) - check(arr, (24,)) - check(arr, (4, 6)) - check(arr, (8, 3)) - check(arr, (8, 1, 3)) - check(arr, (1, 8, 1, 1, 3, 1)) - arr = np.arange(24).reshape((2, 3, 4)) - check(arr, (24,)) - check(arr, (4, 6)) - check(arr, (8, 3)) - check(arr, (8, 1, 3)) - check(arr, (1, 8, 1, 1, 3, 1)) - check_err_size(arr, ()) - check_err_size(arr, (25,)) - check_err_size(arr, (8, 4)) - arr = np.arange(24).reshape((1, 8, 1, 1, 3, 1)) - check(arr, (24,)) - check(arr, (4, 6)) - check(arr, (8, 3)) - check(arr, (8, 1, 3)) - - # F-contiguous - arr = np.arange(24).reshape((2, 3, 4)).T - check(arr, (4, 3, 2)) - check(arr, (1, 4, 1, 3, 1, 2, 1)) - check_err_shape(arr, (2, 3, 4)) - check_err_shape(arr, (6, 4)) - check_err_shape(arr, (2, 12)) - - # Test negative shape value - arr = np.arange(25).reshape(5,5) - check(arr, -1) - check(arr, (-1,)) - check(arr, (-1, 5)) - check(arr, (5, -1, 5)) - check(arr, (5, 5, -1)) - check_err_size(arr, (-1, 4)) - check_err_multiple_negative(arr, (-1, -2, 5, 5)) - check_err_multiple_negative(arr, (5, 5, -1, -1)) - - # 0-sized arrays - def check_empty(arr): - check(arr, 0) - check(arr, (0,)) - check(arr, (1, 0, 2)) - check(arr, (0, 55, 1, 0, 2)) - # -1 is buggy in Numpy with 0-sized arrays - check_only_shape(arr, -1, (0,)) - check_only_shape(arr, (-1,), (0,)) - check_only_shape(arr, (0, -1), (0, 0)) - check_only_shape(arr, (4, -1), (4, 0)) - check_only_shape(arr, (-1, 0, 4), (0, 0, 4)) - check_err_size(arr, ()) - check_err_size(arr, 1) - check_err_size(arr, (1, 2)) - - arr = np.array([]) - check_empty(arr) - check_empty(arr.reshape((3, 2, 0))) - - # Exceptions leak references - self.disable_leak_check() - - def test_array_transpose_axes(self): - pyfuncs_to_use = [numpy_transpose_array_axes_kwarg, - array_transpose_axes] - - def run(pyfunc, arr, axes): - cres = self.ccache.compile(pyfunc, (typeof(arr), typeof(axes))) - return cres.entry_point(arr, axes) - - @from_generic(pyfuncs_to_use) - def check(pyfunc, arr, axes): - expected = pyfunc(arr, axes) - got = run(pyfunc, arr, axes) - self.assertPreciseEqual(got, expected) - self.assertEqual(got.flags.f_contiguous, - expected.flags.f_contiguous) - self.assertEqual(got.flags.c_contiguous, - expected.flags.c_contiguous) - - @from_generic(pyfuncs_to_use) - def check_err_axis_repeated(pyfunc, arr, axes): - with self.assertRaises(ValueError) as raises: - run(pyfunc, arr, axes) - self.assertEqual(str(raises.exception), - "repeated axis in transpose") - - @from_generic(pyfuncs_to_use) - def check_err_axis_oob(pyfunc, arr, axes): - with self.assertRaises(ValueError) as raises: - run(pyfunc, arr, axes) - self.assertEqual(str(raises.exception), - "axis is out of bounds for array of given dimension") - - @from_generic(pyfuncs_to_use) - def check_err_invalid_args(pyfunc, arr, axes): - with self.assertRaises((TypeError, TypingError)): - run(pyfunc, arr, axes) - - arrs = [np.arange(24), - np.arange(24).reshape(4, 6), - np.arange(24).reshape(2, 3, 4), - np.arange(24).reshape(1, 2, 3, 4), - np.arange(64).reshape(8, 4, 2)[::3,::2,:]] - - for i in range(len(arrs)): - # First check `None`, the default, which is to reverse dims - check(arrs[i], None) - # Check supplied axis permutations - for axes in permutations(tuple(range(arrs[i].ndim))): - ndim = len(axes) - neg_axes = tuple([x - ndim for x in axes]) - check(arrs[i], axes) - check(arrs[i], neg_axes) - - # Exceptions leak references - self.disable_leak_check() - - check_err_invalid_args(arrs[1], "foo") - check_err_invalid_args(arrs[1], ("foo",)) - check_err_invalid_args(arrs[1], 5.3) - check_err_invalid_args(arrs[2], (1.2, 5)) - - check_err_axis_repeated(arrs[1], (0, 0)) - check_err_axis_repeated(arrs[2], (2, 0, 0)) - check_err_axis_repeated(arrs[3], (3, 2, 1, 1)) - - check_err_axis_oob(arrs[0], (1,)) - check_err_axis_oob(arrs[0], (-2,)) - check_err_axis_oob(arrs[1], (0, 2)) - check_err_axis_oob(arrs[1], (-3, 2)) - check_err_axis_oob(arrs[1], (0, -3)) - check_err_axis_oob(arrs[2], (3, 1, 2)) - check_err_axis_oob(arrs[2], (-4, 1, 2)) - check_err_axis_oob(arrs[3], (3, 1, 2, 5)) - check_err_axis_oob(arrs[3], (3, 1, 2, -5)) - - - @tag('important') - def test_expand_dims(self): - pyfunc = expand_dims - - def run(arr, axis): - cres = self.ccache.compile(pyfunc, (typeof(arr), typeof(axis))) - return cres.entry_point(arr, axis) - - def check(arr, axis): - expected = pyfunc(arr, axis) - self.memory_leak_setup() - got = run(arr, axis) - self.assertPreciseEqual(got, expected) - del got - self.memory_leak_teardown() - - def check_all_axes(arr): - for axis in range(-arr.ndim - 1, arr.ndim + 1): - check(arr, axis) - - # 1d - arr = np.arange(5) - check_all_axes(arr) - # 3d (C, F, A) - arr = np.arange(24).reshape((2, 3, 4)) - check_all_axes(arr) - check_all_axes(arr.T) - check_all_axes(arr[::-1]) - # 0d - arr = np.array(42) - check_all_axes(arr) - - def check_atleast_nd(self, pyfunc, cfunc): - def check_result(got, expected): - # We would like to check the result has the same contiguity, - # but we can't rely on the "flags" attribute when there are - # 1-sized dimensions. - self.assertStridesEqual(got, expected) - self.assertPreciseEqual(got.flatten(), expected.flatten()) - - def check_single(arg): - check_result(cfunc(arg), pyfunc(arg)) - - def check_tuple(*args): - expected_tuple = pyfunc(*args) - got_tuple = cfunc(*args) - self.assertEqual(len(got_tuple), len(expected_tuple)) - for got, expected in zip(got_tuple, expected_tuple): - check_result(got, expected) - - # 0d - a1 = np.array(42) - a2 = np.array(5j) - check_single(a1) - check_tuple(a1, a2) - # 1d - b1 = np.arange(5) - b2 = np.arange(6) + 1j - b3 = b1[::-1] - check_single(b1) - check_tuple(b1, b2, b3) - # 2d - c1 = np.arange(6).reshape((2, 3)) - c2 = c1.T - c3 = c1[::-1] - check_single(c1) - check_tuple(c1, c2, c3) - # 3d - d1 = np.arange(24).reshape((2, 3, 4)) - d2 = d1.T - d3 = d1[::-1] - check_single(d1) - check_tuple(d1, d2, d3) - # 4d - e = np.arange(16).reshape((2, 2, 2, 2)) - check_single(e) - # mixed dimensions - check_tuple(a1, b2, c3, d2) - - def test_atleast_1d(self): - pyfunc = atleast_1d - cfunc = jit(nopython=True)(pyfunc) - self.check_atleast_nd(pyfunc, cfunc) - - def test_atleast_2d(self): - pyfunc = atleast_2d - cfunc = jit(nopython=True)(pyfunc) - self.check_atleast_nd(pyfunc, cfunc) - - def test_atleast_3d(self): - pyfunc = atleast_3d - cfunc = jit(nopython=True)(pyfunc) - self.check_atleast_nd(pyfunc, cfunc) - - def check_as_strided(self, pyfunc): - def run(arr): - cres = self.ccache.compile(pyfunc, (typeof(arr),)) - return cres.entry_point(arr) - def check(arr): - expected = pyfunc(arr) - got = run(arr) - self.assertPreciseEqual(got, expected) - - arr = np.arange(24) - check(arr) - check(arr.reshape((6, 4))) - check(arr.reshape((4, 1, 6))) - - def test_as_strided(self): - self.check_as_strided(as_strided1) - self.check_as_strided(as_strided2) - - def test_flatten_array(self, flags=enable_pyobj_flags, layout='C'): - a = np.arange(9).reshape(3, 3) - if layout == 'F': - a = a.T - - pyfunc = flatten_array - arraytype1 = typeof(a) - if layout == 'A': - # Force A layout - arraytype1 = arraytype1.copy(layout='A') - - self.assertEqual(arraytype1.layout, layout) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - np.testing.assert_equal(expected, got) - - def test_flatten_array_npm(self): - self.test_flatten_array(flags=no_pyobj_flags) - self.test_flatten_array(flags=no_pyobj_flags, layout='F') - self.test_flatten_array(flags=no_pyobj_flags, layout='A') - - def test_ravel_array(self, flags=enable_pyobj_flags): - def generic_check(pyfunc, a, assume_layout): - # compile - arraytype1 = typeof(a) - self.assertEqual(arraytype1.layout, assume_layout) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - # Check result matches - np.testing.assert_equal(expected, got) - # Check copying behavior - py_copied = (a.ctypes.data != expected.ctypes.data) - nb_copied = (a.ctypes.data != got.ctypes.data) - self.assertEqual(py_copied, assume_layout != 'C') - self.assertEqual(py_copied, nb_copied) - - check_method = partial(generic_check, ravel_array) - check_function = partial(generic_check, numpy_ravel_array) - - def check(*args, **kwargs): - check_method(*args, **kwargs) - check_function(*args, **kwargs) - - # Check 2D - check(np.arange(9).reshape(3, 3), assume_layout='C') - check(np.arange(9).reshape(3, 3, order='F'), assume_layout='F') - check(np.arange(18).reshape(3, 3, 2)[:, :, 0], assume_layout='A') - - # Check 3D - check(np.arange(18).reshape(2, 3, 3), assume_layout='C') - check(np.arange(18).reshape(2, 3, 3, order='F'), assume_layout='F') - check(np.arange(36).reshape(2, 3, 3, 2)[:, :, :, 0], assume_layout='A') - - def test_ravel_array_size(self, flags=enable_pyobj_flags): - a = np.arange(9).reshape(3, 3) - - pyfunc = ravel_array_size - arraytype1 = typeof(a) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - np.testing.assert_equal(expected, got) - - def test_ravel_array_npm(self): - self.test_ravel_array(flags=no_pyobj_flags) - - def test_ravel_array_size_npm(self): - self.test_ravel_array_size(flags=no_pyobj_flags) - - def test_transpose_array(self, flags=enable_pyobj_flags): - @from_generic([transpose_array, numpy_transpose_array]) - def check(pyfunc): - a = np.arange(9).reshape(3, 3) - - arraytype1 = typeof(a) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - np.testing.assert_equal(expected, got) - - check() - - def test_transpose_array_npm(self): - self.test_transpose_array(flags=no_pyobj_flags) - - def test_squeeze_array(self, flags=enable_pyobj_flags): - a = np.arange(2 * 1 * 3 * 1 * 4).reshape(2, 1, 3, 1, 4) - - pyfunc = squeeze_array - arraytype1 = typeof(a) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - np.testing.assert_equal(expected, got) - - def test_squeeze_array_npm(self): - with self.assertRaises(errors.TypingError) as raises: - self.test_squeeze_array(flags=no_pyobj_flags) - - self.assertIn("squeeze", str(raises.exception)) - - def test_add_axis2(self, flags=enable_pyobj_flags): - a = np.arange(9).reshape(3, 3) - - pyfunc = add_axis2 - arraytype1 = typeof(a) - cr = compile_isolated(pyfunc, (arraytype1,), flags=flags) - cfunc = cr.entry_point - - expected = pyfunc(a) - got = cfunc(a) - np.testing.assert_equal(expected, got) - - def test_add_axis2_npm(self): - with self.assertTypingError() as raises: - self.test_add_axis2(flags=no_pyobj_flags) - self.assertIn("unsupported array index type none in", - str(raises.exception)) - - def test_bad_index_npm(self): - with self.assertTypingError() as raises: - arraytype1 = from_dtype(np.dtype([('x', np.int32), - ('y', np.int32)])) - arraytype2 = types.Array(types.int32, 2, 'C') - compile_isolated(bad_index, (arraytype1, arraytype2), - flags=no_pyobj_flags) - self.assertIn('unsupported array index type', str(raises.exception)) - - def test_bad_float_index_npm(self): - with self.assertTypingError() as raises: - compile_isolated(bad_float_index, - (types.Array(types.float64, 2, 'C'),)) - self.assertIn('unsupported array index type float64', - str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_methods.py b/numba/numba/tests/test_array_methods.py deleted file mode 100644 index 4c8f457fb..000000000 --- a/numba/numba/tests/test_array_methods.py +++ /dev/null @@ -1,923 +0,0 @@ -from __future__ import division - -from itertools import product, cycle, permutations -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, typeof, types -from numba.compiler import compile_isolated -from numba.errors import TypingError, LoweringError -from numba.numpy_support import (as_dtype, strict_ufunc_typing, - version as numpy_version) -from .support import TestCase, CompilationCache, MemoryLeak, MemoryLeakMixin, tag -from .matmul_usecase import needs_blas - - -def np_around_array(arr, decimals, out): - np.around(arr, decimals, out) - -def np_around_binary(val, decimals): - return np.around(val, decimals) - -def np_around_unary(val): - return np.around(val) - -def np_round_array(arr, decimals, out): - np.round(arr, decimals, out) - -def np_round_binary(val, decimals): - return np.round(val, decimals) - -def np_round_unary(val): - return np.round(val) - -def _fixed_np_round(arr, decimals=0, out=None): - """ - A slightly bugfixed version of np.round(). - """ - if out is not None and arr.dtype.kind == 'c': - # workaround for https://github.com/numpy/numpy/issues/5779 - _fixed_np_round(arr.real, decimals, out.real) - _fixed_np_round(arr.imag, decimals, out.imag) - return out - else: - res = np.round(arr, decimals, out) - if out is None: - # workaround for https://github.com/numpy/numpy/issues/5780 - def fixup_signed_zero(arg, res): - if res == 0.0 and arg < 0: - return -np.abs(res) - else: - return res - if isinstance(arr, (complex, np.complexfloating)): - res = complex(fixup_signed_zero(arr.real, res.real), - fixup_signed_zero(arr.imag, res.imag)) - else: - res = fixup_signed_zero(arr, res) - return res - - -def array_T(arr): - return arr.T - -def array_transpose(arr): - return arr.transpose() - -def array_copy(arr): - return arr.copy() - -def np_copy(arr): - return np.copy(arr) - -def np_asfortranarray(arr): - return np.asfortranarray(arr) - -def np_ascontiguousarray(arr): - return np.ascontiguousarray(arr) - -def array_view(arr, newtype): - return arr.view(newtype) - -def array_take(arr, indices): - return arr.take(indices) - -def array_take_kws(arr, indices, axis): - return arr.take(indices, axis=axis) - -def array_fill(arr, val): - return arr.fill(val) - -# XXX Can't pass a dtype as a Dispatcher argument for now -def make_array_view(newtype): - def array_view(arr): - return arr.view(newtype) - return array_view - -def array_sliced_view(arr, ): - return arr[0:4].view(np.float32)[0] - -def make_array_astype(newtype): - def array_astype(arr): - return arr.astype(newtype) - return array_astype - - -def np_frombuffer(b): - """ - np.frombuffer() on a Python-allocated buffer. - """ - return np.frombuffer(b) - -def np_frombuffer_dtype(b): - return np.frombuffer(b, dtype=np.complex64) - -def np_frombuffer_allocated(shape): - """ - np.frombuffer() on a Numba-allocated buffer. - """ - arr = np.ones(shape, dtype=np.int32) - return np.frombuffer(arr) - -def np_frombuffer_allocated_dtype(shape): - arr = np.ones(shape, dtype=np.int32) - return np.frombuffer(arr, dtype=np.complex64) - -def identity_usecase(a, b): - return (a is b), (a is not b) - -def array_nonzero(a): - return a.nonzero() - -def np_nonzero(a): - return np.nonzero(a) - -def np_where_1(c): - return np.where(c) - -def np_where_3(c, x, y): - return np.where(c, x, y) - -def array_item(a): - return a.item() - -def array_itemset(a, v): - a.itemset(v) - -def array_sum(a, *args): - return a.sum(*args) - -def array_sum_kws(a, axis): - return a.sum(axis=axis) - -def array_sum_const_multi(arr, axis): - # use np.sum with different constant args multiple times to check - # for internal compile cache to see if constant-specialization is - # applied properly. - a = np.sum(arr, axis=4) - b = np.sum(arr, 3) - # the last invocation uses runtime-variable - c = np.sum(arr, axis) - # as method - d = arr.sum(axis=5) - # negative const axis - e = np.sum(arr, axis=-1) - return a, b, c, d, e - -def array_cumsum(a, *args): - return a.cumsum(*args) - -def array_cumsum_kws(a, axis): - return a.cumsum(axis=axis) - - -def array_real(a): - return np.real(a) - - -def array_imag(a): - return np.imag(a) - - -def np_unique(a): - return np.unique(a) - - -def array_dot(a, b): - return a.dot(b) - - -def array_dot_chain(a, b): - return a.dot(b).dot(b) - - -class TestArrayMethods(MemoryLeakMixin, TestCase): - """ - Test various array methods and array-related functions. - """ - - def setUp(self): - super(TestArrayMethods, self).setUp() - self.ccache = CompilationCache() - - def check_round_scalar(self, unary_pyfunc, binary_pyfunc): - base_values = [-3.0, -2.5, -2.25, -1.5, 1.5, 2.25, 2.5, 2.75] - complex_values = [x * (1 - 1j) for x in base_values] - int_values = [int(x) for x in base_values] - argtypes = (types.float64, types.float32, types.int32, - types.complex64, types.complex128) - argvalues = [base_values, base_values, int_values, - complex_values, complex_values] - - pyfunc = binary_pyfunc - for ty, values in zip(argtypes, argvalues): - cres = compile_isolated(pyfunc, (ty, types.int32)) - cfunc = cres.entry_point - for decimals in (1, 0, -1): - for v in values: - if decimals > 0: - v *= 10 - expected = _fixed_np_round(v, decimals) - got = cfunc(v, decimals) - self.assertPreciseEqual(got, expected) - - pyfunc = unary_pyfunc - for ty, values in zip(argtypes, argvalues): - cres = compile_isolated(pyfunc, (ty,)) - cfunc = cres.entry_point - for v in values: - expected = _fixed_np_round(v) - got = cfunc(v) - self.assertPreciseEqual(got, expected) - - def test_round_scalar(self): - self.check_round_scalar(np_round_unary, np_round_binary) - - def test_around_scalar(self): - self.check_round_scalar(np_around_unary, np_around_binary) - - def check_round_array(self, pyfunc): - def check_round(cfunc, values, inty, outty, decimals): - # Create input and output arrays of the right type - arr = values.astype(as_dtype(inty)) - out = np.zeros_like(arr).astype(as_dtype(outty)) - pyout = out.copy() - _fixed_np_round(arr, decimals, pyout) - self.memory_leak_setup() - cfunc(arr, decimals, out) - self.memory_leak_teardown() - np.testing.assert_allclose(out, pyout) - # Output shape mismatch - with self.assertRaises(ValueError) as raises: - cfunc(arr, decimals, out[1:]) - self.assertEqual(str(raises.exception), - "invalid output shape") - - def check_types(argtypes, outtypes, values): - for inty, outty in product(argtypes, outtypes): - cres = compile_isolated(pyfunc, - (types.Array(inty, 1, 'A'), - types.int32, - types.Array(outty, 1, 'A'))) - cfunc = cres.entry_point - check_round(cres.entry_point, values, inty, outty, 0) - check_round(cres.entry_point, values, inty, outty, 1) - if not isinstance(outty, types.Integer): - check_round(cres.entry_point, values * 10, inty, outty, -1) - else: - # Avoid Numpy bug when output is an int: - # https://github.com/numpy/numpy/issues/5777 - pass - - values = np.array([-3.0, -2.5, -2.25, -1.5, 1.5, 2.25, 2.5, 2.75]) - - if strict_ufunc_typing: - argtypes = (types.float64, types.float32) - else: - argtypes = (types.float64, types.float32, types.int32) - check_types(argtypes, argtypes, values) - - argtypes = (types.complex64, types.complex128) - check_types(argtypes, argtypes, values * (1 - 1j)) - - # Exceptions leak references - self.disable_leak_check() - - def test_round_array(self): - self.check_round_array(np_round_array) - - def test_around_array(self): - self.check_round_array(np_around_array) - - def test_array_view(self): - - def run(arr, dtype): - pyfunc = make_array_view(dtype) - cres = self.ccache.compile(pyfunc, (typeof(arr),)) - return cres.entry_point(arr) - def check(arr, dtype): - expected = arr.view(dtype) - self.memory_leak_setup() - got = run(arr, dtype) - self.assertPreciseEqual(got, expected) - del got - self.memory_leak_teardown() - def check_err(arr, dtype): - with self.assertRaises(ValueError) as raises: - run(arr, dtype) - self.assertEqual(str(raises.exception), - "new type not compatible with array") - - dt1 = np.dtype([('a', np.int8), ('b', np.int8)]) - dt2 = np.dtype([('u', np.int16), ('v', np.int8)]) - dt3 = np.dtype([('x', np.int16), ('y', np.int16)]) - - # C-contiguous - arr = np.arange(24, dtype=np.int8) - check(arr, np.dtype('int16')) - check(arr, np.int16) - check(arr, np.int8) - check(arr, np.float32) - check(arr, np.complex64) - check(arr, dt1) - check(arr, dt2) - check_err(arr, np.complex128) - - # Last dimension must have a compatible size - arr = arr.reshape((3, 8)) - check(arr, np.int8) - check(arr, np.float32) - check(arr, np.complex64) - check(arr, dt1) - check_err(arr, dt2) - check_err(arr, np.complex128) - - # F-contiguous - arr = np.arange(24, dtype=np.int8).reshape((3, 8)).T - check(arr, np.int8) - check(arr, np.float32) - check(arr, np.complex64) - check(arr, dt1) - check_err(arr, dt2) - check_err(arr, np.complex128) - - # Non-contiguous: only a type with the same itemsize can be used - arr = np.arange(16, dtype=np.int32)[::2] - check(arr, np.uint32) - check(arr, np.float32) - check(arr, dt3) - check_err(arr, np.int8) - check_err(arr, np.int16) - check_err(arr, np.int64) - check_err(arr, dt1) - check_err(arr, dt2) - - # Zero-dim array: only a type with the same itemsize can be used - arr = np.array([42], dtype=np.int32).reshape(()) - check(arr, np.uint32) - check(arr, np.float32) - check(arr, dt3) - check_err(arr, np.int8) - check_err(arr, np.int16) - check_err(arr, np.int64) - check_err(arr, dt1) - check_err(arr, dt2) - - # Exceptions leak references - self.disable_leak_check() - - def test_array_sliced_view(self): - """ - Test .view() on A layout array but has contiguous innermost dimension. - """ - pyfunc = array_sliced_view - cres = self.ccache.compile(pyfunc, (types.uint8[:],)) - cfunc = cres.entry_point - - orig = np.array([1.5, 2], dtype=np.float32) - byteary = orig.view(np.uint8) - - expect = pyfunc(byteary) - got = cfunc(byteary) - - self.assertEqual(expect, got) - - def test_array_astype(self): - - def run(arr, dtype): - pyfunc = make_array_astype(dtype) - cres = self.ccache.compile(pyfunc, (typeof(arr),)) - return cres.entry_point(arr) - def check(arr, dtype): - expected = arr.astype(dtype).copy(order='A') - got = run(arr, dtype) - self.assertPreciseEqual(got, expected) - - # C-contiguous - arr = np.arange(24, dtype=np.int8) - check(arr, np.dtype('int16')) - check(arr, np.int32) - check(arr, np.float32) - check(arr, np.complex128) - - # F-contiguous - arr = np.arange(24, dtype=np.int8).reshape((3, 8)).T - check(arr, np.float32) - - # Non-contiguous - arr = np.arange(16, dtype=np.int32)[::2] - check(arr, np.uint64) - - # Invalid conversion - dt = np.dtype([('x', np.int8)]) - with self.assertTypingError() as raises: - check(arr, dt) - self.assertIn('cannot convert from int32 to Record', - str(raises.exception)) - - def check_np_frombuffer(self, pyfunc): - def run(buf): - cres = self.ccache.compile(pyfunc, (typeof(buf),)) - return cres.entry_point(buf) - def check(buf): - old_refcnt = sys.getrefcount(buf) - expected = pyfunc(buf) - self.memory_leak_setup() - got = run(buf) - self.assertPreciseEqual(got, expected) - del expected - self.assertEqual(sys.getrefcount(buf), old_refcnt + 1) - del got - self.assertEqual(sys.getrefcount(buf), old_refcnt) - self.memory_leak_teardown() - - b = bytearray(range(16)) - check(b) - if sys.version_info >= (3,): - check(bytes(b)) - check(memoryview(b)) - check(np.arange(12)) - b = np.arange(12).reshape((3, 4)) - check(b) - - # Exceptions leak references - self.disable_leak_check() - - with self.assertRaises(ValueError) as raises: - run(bytearray(b"xxx")) - self.assertEqual("buffer size must be a multiple of element size", - str(raises.exception)) - - def test_np_frombuffer(self): - self.check_np_frombuffer(np_frombuffer) - - def test_np_frombuffer_dtype(self): - self.check_np_frombuffer(np_frombuffer_dtype) - - def check_layout_dependent_func(self, pyfunc, fac=np.arange, - check_sameness=True): - def is_same(a, b): - return a.ctypes.data == b.ctypes.data - def check_arr(arr): - cres = compile_isolated(pyfunc, (typeof(arr),)) - expected = pyfunc(arr) - got = cres.entry_point(arr) - self.assertPreciseEqual(expected, got) - if check_sameness: - self.assertEqual(is_same(expected, arr), is_same(got, arr)) - arr = fac(24) - check_arr(arr) - check_arr(arr.reshape((3, 8))) - check_arr(arr.reshape((3, 8)).T) - check_arr(arr.reshape((3, 8))[::2]) - check_arr(arr.reshape((2, 3, 4))) - check_arr(arr.reshape((2, 3, 4)).T) - check_arr(arr.reshape((2, 3, 4))[::2]) - arr = np.array([0]).reshape(()) - check_arr(arr) - - - def test_array_transpose(self): - self.check_layout_dependent_func(array_transpose) - - @tag('important') - def test_array_T(self): - self.check_layout_dependent_func(array_T) - - @tag('important') - def test_array_copy(self): - self.check_layout_dependent_func(array_copy) - - def test_np_copy(self): - self.check_layout_dependent_func(np_copy) - - def test_np_asfortranarray(self): - self.check_layout_dependent_func(np_asfortranarray, - check_sameness=numpy_version >= (1, 8)) - - def test_np_ascontiguousarray(self): - self.check_layout_dependent_func(np_ascontiguousarray, - check_sameness=numpy_version > (1, 11)) - - def check_np_frombuffer_allocated(self, pyfunc): - def run(shape): - cres = self.ccache.compile(pyfunc, (typeof(shape),)) - return cres.entry_point(shape) - def check(shape): - expected = pyfunc(shape) - got = run(shape) - self.assertPreciseEqual(got, expected) - - check((16,)) - check((4, 4)) - check((1, 0, 1)) - - def test_np_frombuffer_allocated(self): - self.check_np_frombuffer_allocated(np_frombuffer_allocated) - - def test_np_frombuffer_allocated(self): - self.check_np_frombuffer_allocated(np_frombuffer_allocated_dtype) - - def check_nonzero(self, pyfunc): - def fac(N): - np.random.seed(42) - arr = np.random.random(N) - arr[arr < 0.3] = 0.0 - arr[arr > 0.7] = float('nan') - return arr - - def check_arr(arr): - cres = compile_isolated(pyfunc, (typeof(arr),)) - expected = pyfunc(arr) - # NOTE: Numpy 1.9 returns readonly arrays for multidimensional - # arrays. Workaround this by copying the results. - expected = [a.copy() for a in expected] - self.assertPreciseEqual(cres.entry_point(arr), expected) - - arr = np.int16([1, 0, -1, 0]) - check_arr(arr) - arr = np.bool_([1, 0, 1]) - check_arr(arr) - - arr = fac(24) - check_arr(arr) - check_arr(arr.reshape((3, 8))) - check_arr(arr.reshape((3, 8)).T) - check_arr(arr.reshape((3, 8))[::2]) - check_arr(arr.reshape((2, 3, 4))) - check_arr(arr.reshape((2, 3, 4)).T) - check_arr(arr.reshape((2, 3, 4))[::2]) - for v in (0.0, 1.5, float('nan')): - arr = np.array([v]).reshape(()) - check_arr(arr) - - def test_array_nonzero(self): - self.check_nonzero(array_nonzero) - - def test_np_nonzero(self): - self.check_nonzero(np_nonzero) - - def test_np_where_1(self): - self.check_nonzero(np_where_1) - - def test_np_where_3(self): - pyfunc = np_where_3 - def fac(N): - np.random.seed(42) - arr = np.random.random(N) - arr[arr < 0.3] = 0.0 - arr[arr > 0.7] = float('nan') - return arr - - def check_arr(arr): - x = np.zeros_like(arr, dtype=np.float64) - y = np.copy(x) - x.fill(4) - y.fill(9) - cres = compile_isolated(pyfunc, (typeof(arr), typeof(x), typeof(y))) - expected = pyfunc(arr, x, y) - got = cres.entry_point(arr, x, y) - # Contiguity of result varies accross Numpy versions, only - # check contents. - self.assertEqual(got.dtype, expected.dtype) - np.testing.assert_array_equal(got, expected) - - def check_scal(scal): - x = 4 - y = 5 - cres = compile_isolated(pyfunc, (typeof(scal), typeof(x), typeof(y))) - expected = pyfunc(scal, x, y) - got = cres.entry_point(scal, x, y) - self.assertPreciseEqual(got, expected) - - arr = np.int16([1, 0, -1, 0]) - check_arr(arr) - arr = np.bool_([1, 0, 1]) - check_arr(arr) - - arr = fac(24) - check_arr(arr) - check_arr(arr.reshape((3, 8))) - check_arr(arr.reshape((3, 8)).T) - check_arr(arr.reshape((3, 8))[::2]) - check_arr(arr.reshape((2, 3, 4))) - check_arr(arr.reshape((2, 3, 4)).T) - check_arr(arr.reshape((2, 3, 4))[::2]) - for v in (0.0, 1.5, float('nan')): - arr = np.array([v]).reshape(()) - check_arr(arr) - - for x in (0, 1, True, False, 2.5, 0j): - check_scal(x) - - def test_item(self): - pyfunc = array_item - cfunc = jit(nopython=True)(pyfunc) - - def check_ok(arg): - expected = pyfunc(arg) - got = cfunc(arg) - self.assertPreciseEqual(got, expected) - - def check_err(arg): - with self.assertRaises(ValueError) as raises: - cfunc(arg) - self.assertIn("item(): can only convert an array of size 1 to a Python scalar", - str(raises.exception)) - - # Exceptions leak references - self.disable_leak_check() - - # Test on different kinds of scalars and 1-item arrays - check_ok(np.float32([1.5])) - check_ok(np.complex128([[1.5j]])) - check_ok(np.array(1.5)) - check_ok(np.bool_(True)) - check_ok(np.float32(1.5)) - - check_err(np.array([1, 2])) - check_err(np.array([])) - - def test_itemset(self): - pyfunc = array_itemset - cfunc = jit(nopython=True)(pyfunc) - - def check_ok(a, v): - expected = a.copy() - got = a.copy() - pyfunc(expected, v) - cfunc(got, v) - self.assertPreciseEqual(got, expected) - - def check_err(a): - with self.assertRaises(ValueError) as raises: - cfunc(a, 42) - self.assertIn("itemset(): can only write to an array of size 1", - str(raises.exception)) - - # Exceptions leak references - self.disable_leak_check() - - # Test on different kinds of 1-item arrays - check_ok(np.float32([1.5]), 42) - check_ok(np.complex128([[1.5j]]), 42) - check_ok(np.array(1.5), 42) - - check_err(np.array([1, 2])) - check_err(np.array([])) - - def test_sum(self): - pyfunc = array_sum - cfunc = jit(nopython=True)(pyfunc) - # OK - a = np.ones((7, 6, 5, 4, 3)) - self.assertPreciseEqual(pyfunc(a), cfunc(a)) - # OK - self.assertPreciseEqual(pyfunc(a, 0), cfunc(a, 0)) - - def test_sum_kws(self): - pyfunc = array_sum_kws - cfunc = jit(nopython=True)(pyfunc) - # OK - a = np.ones((7, 6, 5, 4, 3)) - self.assertPreciseEqual(pyfunc(a, axis=1), cfunc(a, axis=1)) - # OK - self.assertPreciseEqual(pyfunc(a, axis=2), cfunc(a, axis=2)) - - def test_sum_const(self): - pyfunc = array_sum_const_multi - cfunc = jit(nopython=True)(pyfunc) - - arr = np.ones((3, 4, 5, 6, 7, 8)) - axis = 1 - self.assertPreciseEqual(pyfunc(arr, axis), cfunc(arr, axis)) - axis = 2 - self.assertPreciseEqual(pyfunc(arr, axis), cfunc(arr, axis)) - - def test_sum_exceptions(self): - # Exceptions leak references - self.disable_leak_check() - pyfunc = array_sum - cfunc = jit(nopython=True)(pyfunc) - - a = np.ones((7, 6, 5, 4, 3)) - b = np.ones((4, 3)) - # BAD: axis > dimensions - with self.assertRaises(ValueError): - cfunc(b, 2) - # BAD: negative axis - with self.assertRaises(ValueError): - cfunc(a, -1) - # BAD: axis greater than 3 - with self.assertRaises(ValueError): - cfunc(a, 4) - - def test_sum_const_negative(self): - # Exceptions leak references - self.disable_leak_check() - - @jit(nopython=True) - def foo(arr): - return arr.sum(axis=-3) - - # ndim == 4, axis == -3, OK - a = np.ones((1, 2, 3, 4)) - self.assertPreciseEqual(foo(a), foo.py_func(a)) - # ndim == 3, axis == -3, OK - a = np.ones((1, 2, 3)) - self.assertPreciseEqual(foo(a), foo.py_func(a)) - # ndim == 2, axis == -3, BAD - a = np.ones((1, 2)) - with self.assertRaises(LoweringError) as raises: - foo(a) - errmsg = "'axis' entry is out of bounds" - self.assertIn(errmsg, str(raises.exception)) - with self.assertRaises(ValueError) as raises: - foo.py_func(a) - # Numpy 1.13 has a different error message than prior numpy - # Just check for the "out of bounds" phrase in it. - self.assertIn("out of bounds", str(raises.exception)) - - def test_cumsum(self): - pyfunc = array_cumsum - cfunc = jit(nopython=True)(pyfunc) - # OK - a = np.ones((2, 3)) - self.assertPreciseEqual(pyfunc(a), cfunc(a)) - # BAD: with axis - with self.assertRaises(TypingError): - cfunc(a, 1) - # BAD: with kw axis - pyfunc = array_cumsum_kws - cfunc = jit(nopython=True)(pyfunc) - with self.assertRaises(TypingError): - cfunc(a, axis=1) - - def test_take(self): - pyfunc = array_take - cfunc = jit(nopython=True)(pyfunc) - - def check(arr, ind): - expected = pyfunc(arr, ind) - got = cfunc(arr, ind) - self.assertPreciseEqual(expected, got) - if hasattr(expected, 'order'): - self.assertEqual(expected.order == got.order) - - # need to check: - # 1. scalar index - # 2. 1d array index - # 3. nd array index, >2d and F order - # 4. reflected list - # 5. tuples - - test_indices = [] - test_indices.append(1) - test_indices.append(5) - test_indices.append(11) - test_indices.append(-2) - test_indices.append(np.array([1, 5, 1, 11, 3])) - test_indices.append(np.array([[1, 5, 1], [11, 3, 0]], order='F')) - test_indices.append(np.array([[[1, 5, 1], [11, 3, 0]]])) - test_indices.append(np.array([[[[1, 5]], [[11, 0]],[[1, 2]]]])) - test_indices.append([1, 5, 1, 11, 3]) - test_indices.append((1, 5, 1)) - test_indices.append(((1, 5, 1), (11, 3, 2))) - test_indices.append((((1,), (5,), (1,)), ((11,), (3,), (2,)))) - - layouts = cycle(['C', 'F', 'A']) - - for dt in [np.float64, np.int64, np.complex128]: - A = np.arange(12, dtype=dt).reshape((4, 3), order=next(layouts)) - for ind in test_indices: - check(A, ind) - - #check illegal access raises - A = np.arange(12, dtype=dt).reshape((4, 3), order=next(layouts)) - szA = A.size - illegal_indices = [szA, -szA - 1, np.array(szA), np.array(-szA - 1), - [szA], [-szA - 1]] - for x in illegal_indices: - with self.assertRaises(IndexError): - cfunc(A, x) # oob raises - - # check float indexing raises - with self.assertRaises(TypingError): - cfunc(A, [1.7]) - - # check unsupported arg raises - with self.assertRaises(TypingError): - take_kws = jit(nopython=True)(array_take_kws) - take_kws(A, 1, 1) - - # check kwarg unsupported raises - with self.assertRaises(TypingError): - take_kws = jit(nopython=True)(array_take_kws) - take_kws(A, 1, axis=1) - - #exceptions leak refs - self.disable_leak_check() - - def test_fill(self): - pyfunc = array_fill - cfunc = jit(nopython=True)(pyfunc) - def check(arr, val): - expected = np.copy(arr) - erv = pyfunc(expected, val) - self.assertTrue(erv is None) - got = np.copy(arr) - grv = cfunc(got, val) - self.assertTrue(grv is None) - # check mutation is the same - self.assertPreciseEqual(expected, got) - - # scalar - A = np.arange(1) - for x in [np.float64, np.bool_]: - check(A, x(10)) - - # 2d - A = np.arange(12).reshape(3, 4) - for x in [np.float64, np.bool_]: - check(A, x(10)) - - # 4d - A = np.arange(48, dtype=np.complex64).reshape(2, 3, 4, 2) - for x in [np.float64, np.complex128, np.bool_]: - check(A, x(10)) - - def test_real(self): - pyfunc = array_real - cfunc = jit(nopython=True)(pyfunc) - - x = np.linspace(-10, 10) - np.testing.assert_equal(pyfunc(x), cfunc(x)) - - x, y = np.meshgrid(x, x) - z = x + 1j*y - np.testing.assert_equal(pyfunc(z), cfunc(z)) - - def test_imag(self): - pyfunc = array_imag - cfunc = jit(nopython=True)(pyfunc) - - x = np.linspace(-10, 10) - np.testing.assert_equal(pyfunc(x), cfunc(x)) - - x, y = np.meshgrid(x, x) - z = x + 1j*y - np.testing.assert_equal(pyfunc(z), cfunc(z)) - - def test_unique(self): - pyfunc = np_unique - cfunc = jit(nopython=True)(pyfunc) - - def check(a): - np.testing.assert_equal(pyfunc(a), cfunc(a)) - - check(np.array([[1, 1, 3], [3, 4, 5]])) - check(np.array(np.zeros(5))) - check(np.array([[3.1, 3.1], [1.7, 2.29], [3.3, 1.7]])) - check(np.array([])) - - @needs_blas - def test_array_dot(self): - # just ensure that the dot impl dispatches correctly, do - # not test dot itself, this is done in test_linalg. - pyfunc = array_dot - cfunc = jit(nopython=True)(pyfunc) - a = np.arange(20.).reshape(4, 5) - b = np.arange(5.) - np.testing.assert_equal(pyfunc(a, b), cfunc(a, b)) - - # check that chaining works - pyfunc = array_dot_chain - cfunc = jit(nopython=True)(pyfunc) - a = np.arange(16.).reshape(4, 4) - np.testing.assert_equal(pyfunc(a, a), cfunc(a, a)) - - -class TestArrayComparisons(TestCase): - - def test_identity(self): - def check(a, b, expected): - cres = compile_isolated(pyfunc, (typeof(a), typeof(b))) - self.assertPreciseEqual(cres.entry_point(a, b), - (expected, not expected)) - - pyfunc = identity_usecase - - arr = np.zeros(10, dtype=np.int32).reshape((2, 5)) - check(arr, arr, True) - check(arr, arr[:], True) - check(arr, arr.copy(), False) - check(arr, arr.view('uint32'), False) - check(arr, arr.T, False) - check(arr, arr[:-1], False) - - # Other comparison operators ('==', etc.) are tested in test_ufuncs - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_reductions.py b/numba/numba/tests/test_array_reductions.py deleted file mode 100644 index f13e5ad19..000000000 --- a/numba/numba/tests/test_array_reductions.py +++ /dev/null @@ -1,677 +0,0 @@ -from __future__ import division - -from itertools import product, combinations_with_replacement - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, typeof -from numba.compiler import compile_isolated -from numba.numpy_support import version as np_version -from .support import TestCase, MemoryLeakMixin, tag - - -def array_all(arr): - return arr.all() - -def array_all_global(arr): - return np.all(arr) - -def array_any(arr): - return arr.any() - -def array_any_global(arr): - return np.any(arr) - -def array_cumprod(arr): - return arr.cumprod() - -def array_cumprod_global(arr): - return np.cumprod(arr) - -def array_cumsum(arr): - return arr.cumsum() - -def array_cumsum_global(arr): - return np.cumsum(arr) - -def array_sum(arr): - return arr.sum() - -def array_sum_global(arr): - return np.sum(arr) - -def array_prod(arr): - return arr.prod() - -def array_prod_global(arr): - return np.prod(arr) - -def array_mean(arr): - return arr.mean() - -def array_mean_global(arr): - return np.mean(arr) - -def array_var(arr): - return arr.var() - -def array_var_global(arr): - return np.var(arr) - -def array_std(arr): - return arr.std() - -def array_std_global(arr): - return np.std(arr) - -def array_min(arr): - return arr.min() - -def array_min_global(arr): - return np.min(arr) - -def array_max(arr): - return arr.max() - -def array_max_global(arr): - return np.max(arr) - -def array_argmin(arr): - return arr.argmin() - -def array_argmin_global(arr): - return np.argmin(arr) - -def array_argmax(arr): - return arr.argmax() - -def array_argmax_global(arr): - return np.argmax(arr) - -def array_median_global(arr): - return np.median(arr) - -def array_nanmin(arr): - return np.nanmin(arr) - -def array_nanmax(arr): - return np.nanmax(arr) - -def array_nanmean(arr): - return np.nanmean(arr) - -def array_nansum(arr): - return np.nansum(arr) - -def array_nanprod(arr): - return np.nanprod(arr) - -def array_nanstd(arr): - return np.nanstd(arr) - -def array_nanvar(arr): - return np.nanvar(arr) - -def array_nanmedian_global(arr): - return np.nanmedian(arr) - -def array_percentile_global(arr, q): - return np.percentile(arr, q) - -def array_nanpercentile_global(arr, q): - return np.nanpercentile(arr, q) - -def base_test_arrays(dtype): - if dtype == np.bool_: - def factory(n): - assert n % 2 == 0 - return np.bool_([0, 1] * (n // 2)) - else: - def factory(n): - return np.arange(n, dtype=dtype) + 1 - - a1 = factory(10) - a2 = factory(10).reshape(2, 5) - # The prod() of this array fits in a 32-bit int - a3 = (factory(12))[::-1].reshape((2, 3, 2), order='A') - assert not (a3.flags.c_contiguous or a3.flags.f_contiguous) - - return [a1, a2, a3] - -def full_test_arrays(dtype): - array_list = base_test_arrays(dtype) - - # Add floats with some mantissa - if dtype == np.float32: - array_list += [a / 10 for a in array_list] - - # add imaginary part - if dtype == np.complex64: - acc = [] - for a in array_list: - tmp = a / 10 + 1j * a / 11 - tmp[::2] = np.conj(tmp[::2]) - acc.append(tmp) - array_list.extend(acc) - - for a in array_list: - assert a.dtype == np.dtype(dtype) - return array_list - -def run_comparative(compare_func, test_array): - arrty = typeof(test_array) - cres = compile_isolated(compare_func, [arrty]) - numpy_result = compare_func(test_array) - numba_result = cres.entry_point(test_array) - - return numpy_result, numba_result - - -class TestArrayReductions(MemoryLeakMixin, TestCase): - """ - Test array reduction methods and functions such as .sum(), .max(), etc. - """ - - def setUp(self): - super(TestArrayReductions, self).setUp() - np.random.seed(42) - - def check_reduction_basic(self, pyfunc, all_nans=True, **kwargs): - # Basic reduction checks on 1-d float64 arrays - cfunc = jit(nopython=True)(pyfunc) - def check(arr): - self.assertPreciseEqual(pyfunc(arr), cfunc(arr), **kwargs) - - arr = np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]) - check(arr) - arr = np.float64([-0.0, -1.5]) - check(arr) - arr = np.float64([-1.5, 2.5, 'inf']) - check(arr) - arr = np.float64([-1.5, 2.5, '-inf']) - check(arr) - arr = np.float64([-1.5, 2.5, 'inf', '-inf']) - check(arr) - arr = np.float64(['nan', -1.5, 2.5, 'nan', 3.0]) - check(arr) - arr = np.float64(['nan', -1.5, 2.5, 'nan', 'inf', '-inf', 3.0]) - check(arr) - if all_nans: - # Only NaNs - arr = np.float64(['nan', 'nan']) - check(arr) - - @tag('important') - def test_all_basic(self, pyfunc=array_all): - cfunc = jit(nopython=True)(pyfunc) - def check(arr): - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - - arr = np.float64([1.0, 0.0, float('inf'), float('nan')]) - check(arr) - arr[1] = -0.0 - check(arr) - arr[1] = 1.5 - check(arr) - arr = arr.reshape((2, 2)) - check(arr) - check(arr[::-1]) - - @tag('important') - def test_any_basic(self, pyfunc=array_any): - cfunc = jit(nopython=True)(pyfunc) - def check(arr): - self.assertPreciseEqual(pyfunc(arr), cfunc(arr)) - - arr = np.float64([0.0, -0.0, 0.0, 0.0]) - check(arr) - arr[2] = float('nan') - check(arr) - arr[2] = float('inf') - check(arr) - arr[2] = 1.5 - check(arr) - arr = arr.reshape((2, 2)) - check(arr) - check(arr[::-1]) - - @tag('important') - def test_sum_basic(self): - self.check_reduction_basic(array_sum) - - @tag('important') - def test_mean_basic(self): - self.check_reduction_basic(array_mean) - - @tag('important') - def test_var_basic(self): - self.check_reduction_basic(array_var, prec='double') - - @tag('important') - def test_std_basic(self): - self.check_reduction_basic(array_std) - - @tag('important') - def test_min_basic(self): - self.check_reduction_basic(array_min) - - @tag('important') - def test_max_basic(self): - self.check_reduction_basic(array_max) - - @tag('important') - def test_argmin_basic(self): - self.check_reduction_basic(array_argmin) - - @tag('important') - def test_argmax_basic(self): - self.check_reduction_basic(array_argmax) - - @tag('important') - def test_nanmin_basic(self): - self.check_reduction_basic(array_nanmin) - - @tag('important') - def test_nanmax_basic(self): - self.check_reduction_basic(array_nanmax) - - @tag('important') - @unittest.skipUnless(np_version >= (1, 8), "nanmean needs Numpy 1.8+") - def test_nanmean_basic(self): - self.check_reduction_basic(array_nanmean) - - @tag('important') - def test_nansum_basic(self): - # Note Numpy < 1.9 has different behaviour for all NaNs: - # it returns Nan while later Numpy returns 0. - self.check_reduction_basic(array_nansum, - all_nans=np_version >= (1, 9)) - - @tag('important') - @unittest.skipUnless(np_version >= (1, 10), "nanprod needs Numpy 1.10+") - def test_nanprod_basic(self): - self.check_reduction_basic(array_nanprod) - - @tag('important') - @unittest.skipUnless(np_version >= (1, 8), "nanstd needs Numpy 1.8+") - def test_nanstd_basic(self): - self.check_reduction_basic(array_nanstd) - - @tag('important') - @unittest.skipUnless(np_version >= (1, 8), "nanvar needs Numpy 1.8+") - def test_nanvar_basic(self): - self.check_reduction_basic(array_nanvar, prec='double') - - def check_median_basic(self, pyfunc, array_variations): - cfunc = jit(nopython=True)(pyfunc) - def check(arr): - expected = pyfunc(arr) - got = cfunc(arr) - self.assertPreciseEqual(got, expected) - - # Odd sizes - def check_odd(a): - check(a) - a = a.reshape((9, 7)) - check(a) - check(a.T) - for a in array_variations(np.arange(63) + 10.5): - check_odd(a) - - # Even sizes - def check_even(a): - check(a) - a = a.reshape((4, 16)) - check(a) - check(a.T) - for a in array_variations(np.arange(64) + 10.5): - check_even(a) - - @tag('important') - def test_median_basic(self): - pyfunc = array_median_global - - def variations(a): - # Sorted, reversed, random, many duplicates - yield a - a = a[::-1].copy() - yield a - np.random.shuffle(a) - yield a - a[a % 4 >= 1] = 3.5 - yield a - - self.check_median_basic(pyfunc, variations) - - def check_percentile_basic(self, pyfunc, array_variations, percentile_variations): - cfunc = jit(nopython=True)(pyfunc) - - def check(a, q): - expected = pyfunc(a, q) - got = cfunc(a, q) - self.assertPreciseEqual(got, expected, abs_tol='eps') - - def check_err(a, q): - with self.assertRaises(ValueError) as raises: - cfunc(a, q) - self.assertEqual("Percentiles must be in the range [0,100]", str(raises.exception)) - - def perform_checks(a, q): - check(a, q) - a = a.reshape((3, 3, 7)) - check(a, q) - check(a.astype(np.int32), q) - - for a in array_variations(np.arange(63) - 10.5): - for q in percentile_variations(np.array([0, 50, 100, 66.6])): - perform_checks(a, q) - - # Exceptions leak references - self.disable_leak_check() - - a = np.arange(5) - check_err(a, -5) # q less than 0 - check_err(a, (1, 10, 105)) # q contains value greater than 100 - check_err(a, (1, 10, np.nan)) # q contains nan - - @staticmethod - def _array_variations(a): - # Sorted, reversed, random, many duplicates, many NaNs, all NaNs - yield a - a = a[::-1].copy() - yield a - np.random.shuffle(a) - yield a - a[a % 4 >= 1] = 3.5 - yield a - a[a % 4 >= 2] = np.nan - yield a - a[:] = np.nan - yield a - - @staticmethod - def _percentile_variations(q): - yield q - yield q[::-1].astype(np.int32).tolist() - yield q[-1] - yield int(q[-1]) - yield tuple(q) - yield False - - def check_percentile_edge_cases(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - - def check(a, q, abs_tol): - expected = pyfunc(a, q) - got = cfunc(a, q) - self.assertPreciseEqual(got, expected, abs_tol=abs_tol) - - def _array_combinations(elements): - for i in range(1, 10): - for comb in combinations_with_replacement(elements, i): - yield np.array(comb) - - # high number of combinations, many including non-finite values - q = (0, 10, 20, 100) - element_pool = (1, -1, np.nan, np.inf, -np.inf) - for a in _array_combinations(element_pool): - check(a, q, abs_tol=1e-14) # 'eps' fails, tbd... - - @unittest.skipUnless(np_version >= (1, 10), "percentile needs Numpy 1.10+") - def test_percentile_basic(self): - pyfunc = array_percentile_global - self.check_percentile_basic(pyfunc, self._array_variations, self._percentile_variations) - self.check_percentile_edge_cases(pyfunc) - - @unittest.skipUnless(np_version >= (1, 11), "nanpercentile needs Numpy 1.11+") - def test_nanpercentile_basic(self): - pyfunc = array_nanpercentile_global - self.check_percentile_basic(pyfunc, self._array_variations, self._percentile_variations) - self.check_percentile_edge_cases(pyfunc) - - @unittest.skipUnless(np_version >= (1, 9), "nanmedian needs Numpy 1.9+") - def test_nanmedian_basic(self): - pyfunc = array_nanmedian_global - self.check_median_basic(pyfunc, self._array_variations) - - def test_array_sum_global(self): - arr = np.arange(10, dtype=np.int32) - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 1) - self.assertEqual(arrty.layout, 'C') - - cres = compile_isolated(array_sum_global, [arrty]) - cfunc = cres.entry_point - - self.assertEqual(np.sum(arr), cfunc(arr)) - - def test_array_prod_int_1d(self): - arr = np.arange(10, dtype=np.int32) + 1 - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 1) - self.assertEqual(arrty.layout, 'C') - - cres = compile_isolated(array_prod, [arrty]) - cfunc = cres.entry_point - - self.assertEqual(arr.prod(), cfunc(arr)) - - def test_array_prod_float_1d(self): - arr = np.arange(10, dtype=np.float32) + 1 / 10 - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 1) - self.assertEqual(arrty.layout, 'C') - - cres = compile_isolated(array_prod, [arrty]) - cfunc = cres.entry_point - - np.testing.assert_allclose(arr.prod(), cfunc(arr)) - - def test_array_prod_global(self): - arr = np.arange(10, dtype=np.int32) - arrty = typeof(arr) - self.assertEqual(arrty.ndim, 1) - self.assertEqual(arrty.layout, 'C') - - cres = compile_isolated(array_prod_global, [arrty]) - cfunc = cres.entry_point - - np.testing.assert_allclose(np.prod(arr), cfunc(arr)) - - def check_cumulative(self, pyfunc): - arr = np.arange(2, 10, dtype=np.int16) - expected, got = run_comparative(pyfunc, arr) - self.assertPreciseEqual(got, expected) - arr = np.linspace(2, 8, 6) - expected, got = run_comparative(pyfunc, arr) - self.assertPreciseEqual(got, expected) - arr = arr.reshape((3, 2)) - expected, got = run_comparative(pyfunc, arr) - self.assertPreciseEqual(got, expected) - - @tag('important') - def test_array_cumsum(self): - self.check_cumulative(array_cumsum) - - def test_array_cumsum_global(self): - self.check_cumulative(array_cumsum_global) - - @tag('important') - def test_array_cumprod(self): - self.check_cumulative(array_cumprod) - - def test_array_cumprod_global(self): - self.check_cumulative(array_cumprod_global) - - def check_aggregation_magnitude(self, pyfunc, is_prod=False): - """ - Check that integer overflows are avoided (issue #931). - """ - # Overflows are avoided here (ints are cast either to intp - # or float64). - n_items = 2 if is_prod else 10 # avoid overflow on prod() - arr = (np.arange(n_items) + 40000).astype('int16') - npr, nbr = run_comparative(pyfunc, arr) - self.assertPreciseEqual(npr, nbr) - # Overflows are avoided for functions returning floats here. - # Other functions may wrap around. - arr = (np.arange(10) + 2**60).astype('int64') - npr, nbr = run_comparative(pyfunc, arr) - self.assertPreciseEqual(npr, nbr) - arr = arr.astype('uint64') - npr, nbr = run_comparative(pyfunc, arr) - self.assertPreciseEqual(npr, nbr) - - def test_sum_magnitude(self): - self.check_aggregation_magnitude(array_sum) - self.check_aggregation_magnitude(array_sum_global) - - def test_cumsum_magnitude(self): - self.check_aggregation_magnitude(array_cumsum) - self.check_aggregation_magnitude(array_cumsum_global) - - def test_prod_magnitude(self): - self.check_aggregation_magnitude(array_prod, is_prod=True) - self.check_aggregation_magnitude(array_prod_global, is_prod=True) - - def test_cumprod_magnitude(self): - self.check_aggregation_magnitude(array_cumprod, is_prod=True) - self.check_aggregation_magnitude(array_cumprod_global, is_prod=True) - - def test_mean_magnitude(self): - self.check_aggregation_magnitude(array_mean) - self.check_aggregation_magnitude(array_mean_global) - - def test_var_magnitude(self): - self.check_aggregation_magnitude(array_var) - self.check_aggregation_magnitude(array_var_global) - - def test_std_magnitude(self): - self.check_aggregation_magnitude(array_std) - self.check_aggregation_magnitude(array_std_global) - - def _do_check_nptimedelta(self, pyfunc, arr): - arrty = typeof(arr) - cfunc = jit(nopython=True)(pyfunc) - - self.assertPreciseEqual(cfunc(arr), pyfunc(arr)) - # Even vs. odd size, for np.median - self.assertPreciseEqual(cfunc(arr[:-1]), pyfunc(arr[:-1])) - # Test with different orders, for np.median - arr = arr[::-1].copy() # Keep 'C' layout - self.assertPreciseEqual(cfunc(arr), pyfunc(arr)) - np.random.shuffle(arr) - self.assertPreciseEqual(cfunc(arr), pyfunc(arr)) - # Test with a NaT - arr[arr.size // 2] = 'NaT' - self.assertPreciseEqual(cfunc(arr), pyfunc(arr)) - # Test with all NaTs - arr.fill(arrty.dtype('NaT')) - self.assertPreciseEqual(cfunc(arr), pyfunc(arr)) - - def check_npdatetime(self, pyfunc): - arr = np.arange(10).astype(dtype='M8[Y]') - self._do_check_nptimedelta(pyfunc, arr) - - def check_nptimedelta(self, pyfunc): - arr = np.arange(10).astype(dtype='m8[s]') - self._do_check_nptimedelta(pyfunc, arr) - - def test_min_npdatetime(self): - self.check_npdatetime(array_min) - self.check_nptimedelta(array_min) - - def test_max_npdatetime(self): - self.check_npdatetime(array_max) - self.check_nptimedelta(array_max) - - def test_argmin_npdatetime(self): - self.check_npdatetime(array_argmin) - self.check_nptimedelta(array_argmin) - - def test_argmax_npdatetime(self): - self.check_npdatetime(array_argmax) - self.check_nptimedelta(array_argmax) - - def test_median_npdatetime(self): - self.check_nptimedelta(array_median_global) - - def test_sum_npdatetime(self): - self.check_nptimedelta(array_sum) - - def test_cumsum_npdatetime(self): - self.check_nptimedelta(array_cumsum) - - def test_mean_npdatetime(self): - self.check_nptimedelta(array_mean) - - @classmethod - def install_generated_tests(cls): - # These form a testing product where each of the combinations are tested - - # these function are tested in real and complex space - reduction_funcs = [array_sum, array_sum_global, - array_prod, array_prod_global, - array_mean, array_mean_global, - array_var, array_var_global, - array_std, array_std_global, - array_all, array_all_global, - array_any, array_any_global, - array_nansum, - ] - - # these functions only work in real space as no complex comparison - # operator is implemented - reduction_funcs_rspace = [array_min, array_min_global, - array_max, array_max_global, - array_argmin, array_argmin_global, - array_argmax, array_argmax_global, - array_nanmax, array_nanmin] - - if np_version >= (1, 8): - reduction_funcs += [array_nanmean, array_nanstd, array_nanvar] - if np_version >= (1, 10): - reduction_funcs += [array_nanprod] - - dtypes_to_test = [np.int32, np.float32, np.bool_, np.complex64] - - def install_tests(dtypes, funcs): - # Install tests on class - for dt in dtypes: - test_arrays = full_test_arrays(dt) - for red_func, test_array in product(funcs, test_arrays): - # Create the name for the test function - test_name = "test_{0}_{1}_{2}d" - test_name = test_name.format(red_func.__name__, - test_array.dtype.name, - test_array.ndim) - - def new_test_function(self, redFunc=red_func, - testArray=test_array, - testName=test_name): - ulps = 1 - if 'prod' in red_func.__name__ and \ - np.iscomplexobj(testArray): - # prod family accumulate slightly more error on - # some architectures (power, 32bit) for complex input - ulps = 3 - npr, nbr = run_comparative(redFunc, testArray) - self.assertPreciseEqual(npr, nbr, msg=test_name, - prec="single", ulps=ulps) - - # Install it into the class - setattr(cls, test_name, new_test_function) - - # install tests for reduction functions that only work in real space - install_tests(dtypes_to_test[:-1], reduction_funcs_rspace) - - # install tests for reduction functions - install_tests(dtypes_to_test, reduction_funcs) - - -TestArrayReductions.install_generated_tests() - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_array_return.py b/numba/numba/tests/test_array_return.py deleted file mode 100644 index e774f75a9..000000000 --- a/numba/numba/tests/test_array_return.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba.compiler import compile_isolated -from numba import typeof -from numba import unittest_support as unittest -from .support import MemoryLeakMixin - - -def array_return(a, i): - a[i] = 123 - return a - - -def array_return_start_with_loop(a): - for i in range(a.size): - a[i] += 1 - return a - - -class TestArrayReturn(MemoryLeakMixin, unittest.TestCase): - def test_array_return(self): - a = np.arange(10) - i = 2 - at, it = typeof(a), typeof(i) - cres = compile_isolated(array_return, (at, it)) - cfunc = cres.entry_point - self.assertIs(a, cfunc(a, i)) - - def test_array_return_start_with_loop(self): - """ - A bug breaks array return if the function starts with a loop - """ - a = np.arange(10) - at = typeof(a) - cres = compile_isolated(array_return_start_with_loop, [at]) - cfunc = cres.entry_point - self.assertIs(a, cfunc(a)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_auto_constants.py b/numba/numba/tests/test_auto_constants.py deleted file mode 100644 index f38fbc985..000000000 --- a/numba/numba/tests/test_auto_constants.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import math -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated - - -class TestAutoConstants(unittest.TestCase): - def test_numpy_nan(self): - def pyfunc(): - return np.nan - - cres = compile_isolated(pyfunc, ()) - cfunc = cres.entry_point - - self.assertTrue(math.isnan(pyfunc())) - self.assertTrue(math.isnan(cfunc())) - - def test_sys_constant(self): - def pyfunc(): - return sys.hexversion - - cres = compile_isolated(pyfunc, ()) - cfunc = cres.entry_point - - self.assertEqual(pyfunc(), cfunc()) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_blackscholes.py b/numba/numba/tests/test_blackscholes.py deleted file mode 100644 index 99717a808..000000000 --- a/numba/numba/tests/test_blackscholes.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import print_function - -import math - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, compile_extra, Flags -from numba import types, typing -from .support import TestCase - - -RISKFREE = 0.02 -VOLATILITY = 0.30 - - -A1 = 0.31938153 -A2 = -0.356563782 -A3 = 1.781477937 -A4 = -1.821255978 -A5 = 1.330274429 -RSQRT2PI = 0.39894228040143267793994605993438 - - -def cnd_array(d): - K = 1.0 / (1.0 + 0.2316419 * np.abs(d)) - ret_val = (RSQRT2PI * np.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - return np.where(d > 0, 1.0 - ret_val, ret_val) - - -def cnd(d): - K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) - ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - if d > 0: - ret_val = 1.0 - ret_val - return ret_val - - -def blackscholes_arrayexpr(stockPrice, optionStrike, optionYears, Riskfree, - Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - sqrtT = np.sqrt(T) - d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_array(d1) - cndd2 = cnd_array(d2) - - expRT = np.exp(- R * T) - - callResult = (S * cndd1 - X * expRT * cndd2) - putResult = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)) - return callResult, putResult - - -def blackscholes_arrayexpr_jitted(stockPrice, optionStrike, optionYears, - Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - sqrtT = np.sqrt(T) - d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_array_jitted(d1) - cndd2 = cnd_array_jitted(d2) - - expRT = np.exp(- R * T) - - callResult = (S * cndd1 - X * expRT * cndd2) - putResult = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)) - return callResult, putResult - - -def blackscholes_scalar(callResult, putResult, stockPrice, optionStrike, - optionYears, Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - for i in range(len(S)): - sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd(d1) - cndd2 = cnd(d2) - - expRT = math.exp((-1. * R) * T[i]) - callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2) - putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)) - - -def blackscholes_scalar_jitted(callResult, putResult, stockPrice, optionStrike, - optionYears, Riskfree, Volatility): - S = stockPrice - X = optionStrike - T = optionYears - R = Riskfree - V = Volatility - for i in range(len(S)): - sqrtT = math.sqrt(T[i]) - d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (V * sqrtT) - d2 = d1 - V * sqrtT - cndd1 = cnd_jitted(d1) - cndd2 = cnd_jitted(d2) - - expRT = math.exp((-1. * R) * T[i]) - callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2) - putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)) - - -def randfloat(rand_var, low, high): - return (1.0 - rand_var) * low + rand_var * high - - -class TestBlackScholes(TestCase): - def test_array_expr(self): - flags = Flags() - flags.set("enable_pyobject") - - global cnd_array_jitted - scalty = types.float64 - arrty = types.Array(scalty, 1, 'C') - cr1 = compile_isolated(cnd_array, args=(arrty,), flags=flags) - cnd_array_jitted = cr1.entry_point - cr2 = compile_isolated(blackscholes_arrayexpr_jitted, - args=(arrty, arrty, arrty, scalty, scalty), - flags=flags) - jitted_bs = cr2.entry_point - - OPT_N = 400 - iterations = 10 - - - stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0) - optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0) - optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0) - - args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY - - callResultGold, putResultGold = blackscholes_arrayexpr(*args) - callResultNumba, putResultNumba = jitted_bs(*args) - - delta = np.abs(callResultGold - callResultNumba) - L1norm = delta.sum() / np.abs(callResultGold).sum() - print("L1 norm: %E" % L1norm) - print("Max absolute error: %E" % delta.max()) - self.assertEqual(delta.max(), 0) - - def test_scalar(self): - flags = Flags() - - # Compile the inner function - global cnd_jitted - cr1 = compile_isolated(cnd, (types.float64,)) - cnd_jitted = cr1.entry_point - # Manually type the compiled function for calling into - tyctx = cr1.typing_context - ctx = cr1.target_context - signature = typing.make_concrete_template("cnd_jitted", cnd_jitted, - [cr1.signature]) - tyctx.insert_user_function(cnd_jitted, signature) - - # Compile the outer function - array = types.Array(types.float64, 1, 'C') - argtys = (array,) * 5 + (types.float64, types.float64) - cr2 = compile_extra(tyctx, ctx, blackscholes_scalar_jitted, - args=argtys, return_type=None, flags=flags, - locals={}) - jitted_bs = cr2.entry_point - - OPT_N = 400 - iterations = 10 - - callResultGold = np.zeros(OPT_N) - putResultGold = np.zeros(OPT_N) - - callResultNumba = np.zeros(OPT_N) - putResultNumba = np.zeros(OPT_N) - - stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0) - optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0) - optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0) - - args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY - - blackscholes_scalar(callResultGold, putResultGold, *args) - jitted_bs(callResultNumba, putResultNumba, *args) - - delta = np.abs(callResultGold - callResultNumba) - L1norm = delta.sum() / np.abs(callResultGold).sum() - print("L1 norm: %E" % L1norm) - print("Max absolute error: %E" % delta.max()) - self.assertAlmostEqual(delta.max(), 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_buffer_protocol.py b/numba/numba/tests/test_buffer_protocol.py deleted file mode 100644 index eb500c731..000000000 --- a/numba/numba/tests/test_buffer_protocol.py +++ /dev/null @@ -1,316 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import array -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit -from .support import TestCase, compile_function, MemoryLeakMixin - - -@jit(nopython=True) -def len_usecase(buf): - return len(buf) - - -@jit(nopython=True) -def getitem_usecase(buf, i): - return buf[i] - - -@jit(nopython=True) -def getslice_usecase(buf, i, j): - s = buf[i:j] - return s[0] + 2 * s[-1] - - -@jit(nopython=True) -def setitem_usecase(buf, i, v): - buf[i] = v - - -@jit(nopython=True) -def iter_usecase(buf): - res = 0.0 - for i, x in enumerate(buf): - res += x - res *= i + 1 - return res - - -def attrgetter(attr): - code = """def func(x): - return x.%(attr)s -""" % locals() - pyfunc = compile_function("func", code, globals()) - return jit(nopython=True)(pyfunc) - - -contiguous_usecase = attrgetter("contiguous") -c_contiguous_usecase = attrgetter("c_contiguous") -f_contiguous_usecase = attrgetter("f_contiguous") -itemsize_usecase = attrgetter("itemsize") -nbytes_usecase = attrgetter("nbytes") -ndim_usecase = attrgetter("ndim") -readonly_usecase = attrgetter("readonly") -shape_usecase = attrgetter("shape") -strides_usecase = attrgetter("strides") - -# On Python 2, array.array doesn't support the PEP 3118 buffer API -array_supported = sys.version_info >= (3,) -# On Python 2, bytes is really the str object -bytes_supported = sys.version_info >= (3,) -# On Python 2, indexing a memoryview returns bytes -memoryview_structured_indexing = sys.version_info >= (3,) - - -class TestBufferProtocol(MemoryLeakMixin, TestCase): - """ - Test operations on buffer-providing objects. - """ - - def _arrays(self): - n = 10 - for letter, offset in [ - ('b', -3), - ('B', 0), - ('h', -5000), - ('H', 40000), - ('i', -100000), - ('I', 1000000), - ('l', -100000), - ('L', 1000000), - ('q', -2**60), - ('Q', 2**63 + 1), - ('f', 1.5), - ('d', -1.5), - ]: - yield array.array(letter, [i + offset for i in range(n)]) - - def _memoryviews(self): - n = 10 - yield memoryview(bytearray(b"abcdefghi")) - yield memoryview(b"abcdefghi") - # Different item types - for dtype, start, stop in [ - ('int8', -10, 10), - ('uint8', 0, 10), - ('int16', -5000, 1000), - ('uint16', 40000, 50000), - ('int32', -100000, 100000), - ('uint32', 0, 1000000), - ('int64', -2**60, 10), - ('uint64', 0, 2**64 - 10), - ('float32', 1.5, 3.5), - ('float64', 1.5, 3.5), - ('complex64', -8j, 12 + 5j), - ('complex128', -8j, 12 + 5j), - ]: - yield memoryview(np.linspace(start, stop, n).astype(dtype)) - # Different layouts - arr = np.arange(12).reshape((3, 4)) - assert arr.flags.c_contiguous and not arr.flags.f_contiguous - yield memoryview(arr) - arr = arr.T - assert arr.flags.f_contiguous and not arr.flags.c_contiguous - yield memoryview(arr) - arr = arr[::2] - assert not arr.flags.f_contiguous and not arr.flags.c_contiguous - yield memoryview(arr) - - def _readonlies(self): - if bytes_supported: - yield b"xyz" - if memoryview_structured_indexing: - yield memoryview(b"abcdefghi") - arr = np.arange(5) - arr.setflags(write=False) - yield memoryview(arr) - - def _check_unary(self, jitfunc, *args): - pyfunc = jitfunc.py_func - self.assertPreciseEqual(jitfunc(*args), pyfunc(*args)) - - def check_len(self, obj): - self._check_unary(len_usecase, obj) - - def check_iter(self, obj): - self._check_unary(iter_usecase, obj) - - def check_getitem(self, obj): - # Be careful to index all dimensions, since we don't support - # partial indexing yet. - def yield_indices(obj): - try: - shape = obj.shape - except AttributeError: - shape = len(obj), - for tup in np.ndindex(shape): - # Simple 1d buffer-providing objects usually don't support - # tuple indexing. - if len(tup) == 1: - yield tup[0] - else: - yield tup - - for i in yield_indices(obj): - try: - expected = obj[i] - except (NotImplementedError, TypeError): - if isinstance(obj, memoryview): - # The memoryview object doesn't support all codes yet, - # fall back on the underlying object. - expected = obj.obj[i] - else: - raise - self.assertPreciseEqual(getitem_usecase(obj, i), expected) - - def check_setitem(self, obj): - for i in range(len(obj)): - orig = list(obj) - val = obj[i] // 2 + 1 - setitem_usecase(obj, i, val) - self.assertEqual(obj[i], val) - for j, val in enumerate(orig): - if j != i: - self.assertEqual(obj[j], val) - - def check_getslice(self, obj): - self._check_unary(getslice_usecase, obj, 1, len(obj) - 1) - - def test_len(self): - self.check_len(bytearray(5)) - if bytes_supported: - self.check_len(b"xyz") - for mem in self._memoryviews(): - self.check_len(mem) - if array_supported: - for arr in self._arrays(): - self.check_len(arr) - for buf in self._readonlies(): - self.check_getitem(buf) - - def test_getitem(self): - self.check_getitem(bytearray(b"abc")) - if bytes_supported: - self.check_getitem(b"xyz") - if memoryview_structured_indexing: - for mem in self._memoryviews(): - self.check_getitem(mem) - if array_supported: - for arr in self._arrays(): - self.check_getitem(arr) - for buf in self._readonlies(): - self.check_getitem(buf) - - def test_getslice(self): - with self.assertTypingError(): - self.check_getslice(bytearray(b"abcde")) - if bytes_supported: - self.check_getslice(b"xyzuvw") - if memoryview_structured_indexing: - self.check_getslice(memoryview(b"xyzuvw")) - if array_supported: - with self.assertTypingError(): - self.check_getslice(array.array('i', range(10))) - for buf in self._readonlies(): - self.check_getitem(buf) - - def test_setitem(self): - self.check_setitem(bytearray(b"abcdefghi")) - if array_supported: - for arr in self._arrays(): - self.check_setitem(arr) - if memoryview_structured_indexing: - for mem in self._memoryviews(): - self.check_getitem(mem) - # Read-only buffers - for buf in self._readonlies(): - with self.assertTypingError(): - self.check_setitem(buf) - - def test_iter(self): - self.check_iter(bytearray(b"abc")) - if bytes_supported: - self.check_iter(b"xyz") - if memoryview_structured_indexing: - self.check_iter(memoryview(b"xyz")) - if array_supported: - for arr in self._arrays(): - self.check_iter(arr) - for buf in self._readonlies(): - self.check_getitem(buf) - - -class TestMemoryView(MemoryLeakMixin, TestCase): - """ - Test memoryview-specific attributes and operations. - """ - - def _arrays(self): - arr = np.arange(12) - yield arr - arr = arr.reshape((3, 4)) - yield arr - yield arr.T - yield arr[::2] - arr.setflags(write=False) - yield arr - arr = np.zeros(()) - assert arr.ndim == 0 - yield arr - - def test_ndim(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertPreciseEqual(ndim_usecase(m), arr.ndim) - - def test_shape(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertPreciseEqual(shape_usecase(m), arr.shape) - - def test_strides(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertPreciseEqual(strides_usecase(m), arr.strides) - - def test_itemsize(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertPreciseEqual(itemsize_usecase(m), arr.itemsize) - - def test_nbytes(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertPreciseEqual(nbytes_usecase(m), arr.size * arr.itemsize) - - def test_readonly(self): - for arr in self._arrays(): - m = memoryview(arr) - self.assertIs(readonly_usecase(m), not arr.flags.writeable) - m = memoryview(b"xyz") - self.assertIs(readonly_usecase(m), True) - m = memoryview(bytearray(b"xyz")) - self.assertIs(readonly_usecase(m), False) - - @unittest.skipUnless(sys.version_info >= (3,), - "memoryview.*contiguous doesn't exist on 2.7") - def test_contiguous(self): - m = memoryview(bytearray(b"xyz")) - self.assertIs(contiguous_usecase(m), True) - self.assertIs(c_contiguous_usecase(m), True) - self.assertIs(f_contiguous_usecase(m), True) - for arr in self._arrays(): - m = memoryview(arr) - # Note `arr.flags.contiguous` is wrong (it mimicks c_contiguous) - self.assertIs(contiguous_usecase(m), - arr.flags.f_contiguous or arr.flags.c_contiguous) - self.assertIs(c_contiguous_usecase(m), arr.flags.c_contiguous) - self.assertIs(f_contiguous_usecase(m), arr.flags.f_contiguous) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_builtins.py b/numba/numba/tests/test_builtins.py deleted file mode 100644 index 92fbd6e2e..000000000 --- a/numba/numba/tests/test_builtins.py +++ /dev/null @@ -1,962 +0,0 @@ -from __future__ import print_function - -import itertools -import functools -import sys - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import jit, typeof, errors, types, utils -from .support import TestCase, tag - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -forceobj_flags = Flags() -forceobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - -nrt_no_pyobj_flags = Flags() -nrt_no_pyobj_flags.set("nrt") - - -def abs_usecase(x): - return abs(x) - -def all_usecase(x, y): - if x == None and y == None: - return all([]) - elif x == None: - return all([y]) - elif y == None: - return all([x]) - else: - return all([x, y]) - -def any_usecase(x, y): - if x == None and y == None: - return any([]) - elif x == None: - return any([y]) - elif y == None: - return any([x]) - else: - return any([x, y]) - -def bool_usecase(x): - return bool(x) - -def chr_usecase(x): - return chr(x) - -def cmp_usecase(x, y): - return cmp(x, y) - -def complex_usecase(x, y): - return complex(x, y) - -def divmod_usecase(x, y): - return divmod(x, y) - -def enumerate_usecase(): - result = 0 - for i, j in enumerate((1., 2.5, 3.)): - result += i * j - return result - -def enumerate_start_usecase(): - result = 0 - for i, j in enumerate((1., 2.5, 3.), 42): - result += i * j - return result - -def filter_usecase(x, filter_func): - return filter(filter_func, x) - -def float_usecase(x): - return float(x) - -def format_usecase(x, y): - return x.format(y) - -def globals_usecase(): - return globals() - -# NOTE: hash() is tested in test_hashing - -def hex_usecase(x): - return hex(x) - -def int_usecase(x, base): - return int(x, base=base) - -def iter_next_usecase(x): - it = iter(x) - return next(it), next(it) - -def locals_usecase(x): - y = 5 - return locals()['y'] - -def long_usecase(x, base): - return long(x, base=base) - -def map_usecase(x, map_func): - return map(map_func, x) - - -def max_usecase1(x, y): - return max(x, y) - -def max_usecase2(x, y): - return max([x, y]) - -def max_usecase3(x): - return max(x) - -def max_usecase4(): - return max(()) - - -def min_usecase1(x, y): - return min(x, y) - -def min_usecase2(x, y): - return min([x, y]) - -def min_usecase3(x): - return min(x) - -def min_usecase4(): - return min(()) - - -def oct_usecase(x): - return oct(x) - -def ord_usecase(x): - return ord(x) - -def reduce_usecase(reduce_func, x): - return functools.reduce(reduce_func, x) - -def round_usecase1(x): - return round(x) - -def round_usecase2(x, n): - return round(x, n) - -def sum_usecase(x): - return sum(x) - -def type_unary_usecase(a, b): - return type(a)(b) - -def unichr_usecase(x): - return unichr(x) - -def zip_usecase(): - result = 0 - for i, j in zip((1, 2, 3), (4.5, 6.7)): - result += i * j - return result - -def zip_0_usecase(): - result = 0 - for i in zip(): - result += 1 - return result - -def zip_1_usecase(): - result = 0 - for i, in zip((1, 2)): - result += i - return result - - -def zip_3_usecase(): - result = 0 - for i, j, k in zip((1, 2), (3, 4, 5), (6.7, 8.9)): - result += i * j * k - return result - - -def zip_first_exhausted(): - iterable = range(7) - n = 3 - it = iter(iterable) - # 1st iterator is shorter - front = list(zip(range(n), it)) - # Make sure that we didn't skip one in `it` - back = list(it) - return front, back - - -def pow_op_usecase(x, y): - return x ** y - - -def pow_usecase(x, y): - return pow(x, y) - - -class TestBuiltins(TestCase): - - def run_nullary_func(self, pyfunc, flags): - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - expected = pyfunc() - self.assertPreciseEqual(cfunc(), expected) - - def test_abs(self, flags=enable_pyobj_flags): - pyfunc = abs_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - cr = compile_isolated(pyfunc, (types.float32,), flags=flags) - cfunc = cr.entry_point - for x in [-1.1, 0.0, 1.1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x), prec='single') - - complex_values = [-1.1 + 0.5j, 0.0 + 0j, 1.1 + 3j, - float('inf') + 1j * float('nan'), - float('nan') - 1j * float('inf')] - cr = compile_isolated(pyfunc, (types.complex64,), flags=flags) - cfunc = cr.entry_point - for x in complex_values: - self.assertPreciseEqual(cfunc(x), pyfunc(x), prec='single') - cr = compile_isolated(pyfunc, (types.complex128,), flags=flags) - cfunc = cr.entry_point - for x in complex_values: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - @tag('important') - def test_abs_npm(self): - self.test_abs(flags=no_pyobj_flags) - - def test_all(self, flags=enable_pyobj_flags): - pyfunc = all_usecase - - cr = compile_isolated(pyfunc, (types.int32,types.int32), flags=flags) - cfunc = cr.entry_point - x_operands = [-1, 0, 1, None] - y_operands = [-1, 0, 1, None] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - def test_all_npm(self): - with self.assertTypingError(): - self.test_all(flags=no_pyobj_flags) - - def test_any(self, flags=enable_pyobj_flags): - pyfunc = any_usecase - - cr = compile_isolated(pyfunc, (types.int32,types.int32), flags=flags) - cfunc = cr.entry_point - x_operands = [-1, 0, 1, None] - y_operands = [-1, 0, 1, None] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - def test_any_npm(self): - with self.assertTypingError(): - self.test_any(flags=no_pyobj_flags) - - def test_bool(self, flags=enable_pyobj_flags): - pyfunc = bool_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - cr = compile_isolated(pyfunc, (types.float64,), flags=flags) - cfunc = cr.entry_point - for x in [0.0, -0.0, 1.5, float('inf'), float('nan')]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - cr = compile_isolated(pyfunc, (types.complex128,), flags=flags) - cfunc = cr.entry_point - for x in [complex(0, float('inf')), complex(0, float('nan'))]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_bool_npm(self): - self.test_bool(flags=no_pyobj_flags) - - def test_bool_nonnumber(self, flags=enable_pyobj_flags): - pyfunc = bool_usecase - - cr = compile_isolated(pyfunc, (types.string,), flags=flags) - cfunc = cr.entry_point - for x in ['x', '']: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - cr = compile_isolated(pyfunc, (types.Dummy('list'),), flags=flags) - cfunc = cr.entry_point - for x in [[1], []]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_bool_nonnumber_npm(self): - with self.assertTypingError(): - self.test_bool_nonnumber(flags=no_pyobj_flags) - - def test_chr(self, flags=enable_pyobj_flags): - pyfunc = chr_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in range(256): - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_chr_npm(self): - with self.assertTypingError(): - self.test_chr(flags=no_pyobj_flags) - - @unittest.skipIf(utils.IS_PY3, "cmp not available as global is Py3") - def test_cmp(self, flags=enable_pyobj_flags): - pyfunc = cmp_usecase - - cr = compile_isolated(pyfunc, (types.int32, types.int32), flags=flags) - cfunc = cr.entry_point - - x_operands = [-1, 0, 1] - y_operands = [-1, 0, 1] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - @unittest.skipIf(utils.IS_PY3, "cmp not available as global is Py3") - def test_cmp_npm(self): - with self.assertTypingError(): - self.test_cmp(flags=no_pyobj_flags) - - def test_complex(self, flags=enable_pyobj_flags): - pyfunc = complex_usecase - - cr = compile_isolated(pyfunc, (types.int32, types.int32), flags=flags) - cfunc = cr.entry_point - - x_operands = [-1, 0, 1] - y_operands = [-1, 0, 1] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - @tag('important') - def test_complex_npm(self): - self.test_complex(flags=no_pyobj_flags) - - def test_divmod_ints(self, flags=enable_pyobj_flags): - pyfunc = divmod_usecase - - cr = compile_isolated(pyfunc, (types.int64, types.int64), - flags=flags) - cfunc = cr.entry_point - - def truncate_result(x, bits=64): - # Remove any extraneous bits (since Numba will return - # a 64-bit result by definition) - if x >= 0: - x &= (1 << (bits - 1)) - 1 - return x - - denominators = [1, 3, 7, 15, -1, -3, -7, -15, 2**63 - 1, -2**63] - numerators = denominators + [0] - for x, y, in itertools.product(numerators, denominators): - expected_quot, expected_rem = pyfunc(x, y) - quot, rem = cfunc(x, y) - f = truncate_result - self.assertPreciseEqual((f(quot), f(rem)), - (f(expected_quot), f(expected_rem))) - - for x in numerators: - with self.assertRaises(ZeroDivisionError): - cfunc(x, 0) - - @tag('important') - def test_divmod_ints_npm(self): - self.test_divmod_ints(flags=no_pyobj_flags) - - def test_divmod_floats(self, flags=enable_pyobj_flags): - pyfunc = divmod_usecase - - cr = compile_isolated(pyfunc, (types.float64, types.float64), - flags=flags) - cfunc = cr.entry_point - - denominators = [1., 3.5, 1e100, -2., -7.5, -1e101, - np.inf, -np.inf, np.nan] - numerators = denominators + [-0.0, 0.0] - for x, y, in itertools.product(numerators, denominators): - expected_quot, expected_rem = pyfunc(x, y) - quot, rem = cfunc(x, y) - self.assertPreciseEqual((quot, rem), (expected_quot, expected_rem)) - - for x in numerators: - with self.assertRaises(ZeroDivisionError): - cfunc(x, 0.0) - - @tag('important') - def test_divmod_floats_npm(self): - self.test_divmod_floats(flags=no_pyobj_flags) - - def test_enumerate(self, flags=enable_pyobj_flags): - self.run_nullary_func(enumerate_usecase, flags) - - def test_enumerate_npm(self): - self.test_enumerate(flags=no_pyobj_flags) - - def test_enumerate_start(self, flags=enable_pyobj_flags): - self.run_nullary_func(enumerate_start_usecase, flags) - - def test_enumerate_start_npm(self): - self.test_enumerate_start(flags=no_pyobj_flags) - - def test_filter(self, flags=enable_pyobj_flags): - pyfunc = filter_usecase - cr = compile_isolated(pyfunc, (types.Dummy('list'), - types.Dummy('function_ptr')), - flags=flags) - cfunc = cr.entry_point - - filter_func = lambda x: x % 2 - x = [0, 1, 2, 3, 4] - self.assertSequenceEqual(list(cfunc(x, filter_func)), - list(pyfunc(x, filter_func))) - - def test_filter_npm(self): - with self.assertTypingError(): - self.test_filter(flags=no_pyobj_flags) - - def test_float(self, flags=enable_pyobj_flags): - pyfunc = float_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - cr = compile_isolated(pyfunc, (types.float32,), flags=flags) - cfunc = cr.entry_point - for x in [-1.1, 0.0, 1.1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x), prec='single') - - cr = compile_isolated(pyfunc, (types.string,), flags=flags) - cfunc = cr.entry_point - for x in ['-1.1', '0.0', '1.1']: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - @tag('important') - def test_float_npm(self): - with self.assertTypingError(): - self.test_float(flags=no_pyobj_flags) - - def test_format(self, flags=enable_pyobj_flags): - pyfunc = format_usecase - - cr = compile_isolated(pyfunc, (types.string, types.int32,), flags=flags) - cfunc = cr.entry_point - x = '{0}' - for y in [-1, 0, 1]: - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - cr = compile_isolated(pyfunc, (types.string, - types.float32,), flags=flags) - cfunc = cr.entry_point - x = '{0}' - for y in [-1.1, 0.0, 1.1]: - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - cr = compile_isolated(pyfunc, (types.string, - types.string,), flags=flags) - cfunc = cr.entry_point - x = '{0}' - for y in ['a', 'b', 'c']: - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - def test_format_npm(self): - with self.assertTypingError(): - self.test_format(flags=no_pyobj_flags) - - def test_globals(self, flags=enable_pyobj_flags): - pyfunc = globals_usecase - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - g = cfunc() - self.assertIs(g, globals()) - - def test_globals_npm(self): - with self.assertTypingError(): - self.test_globals(flags=no_pyobj_flags) - - def test_globals_jit(self, **jit_flags): - # Issue #416: weird behaviour of globals() in combination with - # the @jit decorator. - pyfunc = globals_usecase - jitted = jit(**jit_flags)(pyfunc) - self.assertIs(jitted(), globals()) - self.assertIs(jitted(), globals()) - - def test_globals_jit_npm(self): - with self.assertTypingError(): - self.test_globals_jit(nopython=True) - - def test_hex(self, flags=enable_pyobj_flags): - pyfunc = hex_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_hex_npm(self): - with self.assertTypingError(): - self.test_hex(flags=no_pyobj_flags) - - def test_int(self, flags=enable_pyobj_flags): - pyfunc = int_usecase - - cr = compile_isolated(pyfunc, (types.string, types.int32), flags=flags) - cfunc = cr.entry_point - - x_operands = ['-1', '0', '1', '10'] - y_operands = [2, 8, 10, 16] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - @tag('important') - def test_int_npm(self): - with self.assertTypingError(): - self.test_int(flags=no_pyobj_flags) - - def test_iter_next(self, flags=enable_pyobj_flags): - pyfunc = iter_next_usecase - cr = compile_isolated(pyfunc, (types.UniTuple(types.int32, 3),), - flags=flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc((1, 42, 5)), (1, 42)) - - cr = compile_isolated(pyfunc, (types.UniTuple(types.int32, 1),), - flags=flags) - cfunc = cr.entry_point - with self.assertRaises(StopIteration): - cfunc((1,)) - - @tag('important') - def test_iter_next_npm(self): - self.test_iter_next(flags=no_pyobj_flags) - - def test_locals(self, flags=enable_pyobj_flags): - pyfunc = locals_usecase - with self.assertRaises(errors.ForbiddenConstruct): - cr = compile_isolated(pyfunc, (types.int64,), flags=flags) - - def test_locals_forceobj(self): - self.test_locals(flags=forceobj_flags) - - def test_locals_npm(self): - with self.assertTypingError(): - self.test_locals(flags=no_pyobj_flags) - - @unittest.skipIf(utils.IS_PY3, "long is not available as global is Py3") - def test_long(self, flags=enable_pyobj_flags): - pyfunc = long_usecase - - cr = compile_isolated(pyfunc, (types.string, types.int64), flags=flags) - cfunc = cr.entry_point - - x_operands = ['-1', '0', '1', '10'] - y_operands = [2, 8, 10, 16] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - @unittest.skipIf(utils.IS_PY3, "cmp not available as global is Py3") - def test_long_npm(self): - with self.assertTypingError(): - self.test_long(flags=no_pyobj_flags) - - def test_map(self, flags=enable_pyobj_flags): - pyfunc = map_usecase - cr = compile_isolated(pyfunc, (types.Dummy('list'), - types.Dummy('function_ptr')), - flags=flags) - cfunc = cr.entry_point - - map_func = lambda x: x * 2 - x = [0, 1, 2, 3, 4] - self.assertSequenceEqual(list(cfunc(x, map_func)), - list(pyfunc(x, map_func))) - - def test_map_npm(self): - with self.assertTypingError(): - self.test_map(flags=no_pyobj_flags) - - # - # min() and max() - # - - def check_minmax_1(self, pyfunc, flags): - cr = compile_isolated(pyfunc, (types.int32, types.int32), flags=flags) - cfunc = cr.entry_point - - x_operands = [-1, 0, 1] - y_operands = [-1, 0, 1] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - def test_max_1(self, flags=enable_pyobj_flags): - """ - max(*args) - """ - self.check_minmax_1(max_usecase1, flags) - - def test_min_1(self, flags=enable_pyobj_flags): - """ - min(*args) - """ - self.check_minmax_1(min_usecase1, flags) - - @tag('important') - def test_max_npm_1(self): - self.test_max_1(flags=no_pyobj_flags) - - @tag('important') - def test_min_npm_1(self): - self.test_min_1(flags=no_pyobj_flags) - - def check_minmax_2(self, pyfunc, flags): - cr = compile_isolated(pyfunc, (types.int32, types.int32), flags=flags) - cfunc = cr.entry_point - - x_operands = [-1, 0, 1] - y_operands = [-1, 0, 1] - for x, y in itertools.product(x_operands, y_operands): - self.assertPreciseEqual(cfunc(x, y), pyfunc(x, y)) - - def test_max_2(self, flags=enable_pyobj_flags): - """ - max(list) - """ - self.check_minmax_2(max_usecase2, flags) - - def test_min_2(self, flags=enable_pyobj_flags): - """ - min(list) - """ - self.check_minmax_2(min_usecase2, flags) - - def test_max_npm_2(self): - with self.assertTypingError(): - self.test_max_2(flags=no_pyobj_flags) - - def test_min_npm_2(self): - with self.assertTypingError(): - self.test_min_2(flags=no_pyobj_flags) - - def check_minmax_3(self, pyfunc, flags): - def check(argty): - cr = compile_isolated(pyfunc, (argty,), flags=flags) - cfunc = cr.entry_point - # Check that the algorithm matches Python's with a non-total order - tup = (1.5, float('nan'), 2.5) - for val in [tup, tup[::-1]]: - self.assertPreciseEqual(cfunc(val), pyfunc(val)) - - check(types.UniTuple(types.float64, 3)) - check(types.Tuple((types.float32, types.float64, types.float32))) - - def test_max_3(self, flags=enable_pyobj_flags): - """ - max(tuple) - """ - self.check_minmax_3(max_usecase3, flags) - - def test_min_3(self, flags=enable_pyobj_flags): - """ - min(tuple) - """ - self.check_minmax_3(min_usecase3, flags) - - @tag('important') - def test_max_npm_3(self): - self.test_max_3(flags=no_pyobj_flags) - - @tag('important') - def test_min_npm_3(self): - self.test_min_3(flags=no_pyobj_flags) - - def check_min_max_invalid_types(self, pyfunc, flags=enable_pyobj_flags): - cr = compile_isolated(pyfunc, (types.int32, types.Dummy('list')), - flags=flags) - cfunc = cr.entry_point - cfunc(1, [1]) - - def test_max_1_invalid_types(self): - # Heterogeneous ordering is valid in Python 2 - if utils.IS_PY3: - with self.assertRaises(TypeError): - self.check_min_max_invalid_types(max_usecase1) - else: - self.check_min_max_invalid_types(max_usecase1) - - def test_max_1_invalid_types_npm(self): - with self.assertTypingError(): - self.check_min_max_invalid_types(max_usecase1, flags=no_pyobj_flags) - - def test_min_1_invalid_types(self): - # Heterogeneous ordering is valid in Python 2 - if utils.IS_PY3: - with self.assertRaises(TypeError): - self.check_min_max_invalid_types(min_usecase1) - else: - self.check_min_max_invalid_types(min_usecase1) - - def test_min_1_invalid_types_npm(self): - with self.assertTypingError(): - self.check_min_max_invalid_types(min_usecase1, flags=no_pyobj_flags) - - # Test that max(1) and min(1) fail - - def check_min_max_unary_non_iterable(self, pyfunc, flags=enable_pyobj_flags): - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - cfunc(1) - - def test_max_unary_non_iterable(self): - with self.assertRaises(TypeError): - self.check_min_max_unary_non_iterable(max_usecase3) - - def test_max_unary_non_iterable_npm(self): - with self.assertTypingError(): - self.check_min_max_unary_non_iterable(max_usecase3) - - def test_min_unary_non_iterable(self): - with self.assertRaises(TypeError): - self.check_min_max_unary_non_iterable(min_usecase3) - - def test_min_unary_non_iterable_npm(self): - with self.assertTypingError(): - self.check_min_max_unary_non_iterable(min_usecase3) - - # Test that max(()) and min(()) fail - - def check_min_max_empty_tuple(self, pyfunc, func_name): - with self.assertTypingError() as raises: - compile_isolated(pyfunc, (), flags=no_pyobj_flags) - self.assertIn("%s() argument is an empty tuple" % func_name, - str(raises.exception)) - - def test_max_empty_tuple(self): - self.check_min_max_empty_tuple(max_usecase4, "max") - - def test_min_empty_tuple(self): - self.check_min_max_empty_tuple(min_usecase4, "min") - - - def test_oct(self, flags=enable_pyobj_flags): - pyfunc = oct_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-8, -1, 0, 1, 8]: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_oct_npm(self): - with self.assertTypingError(): - self.test_oct(flags=no_pyobj_flags) - - def test_ord(self, flags=enable_pyobj_flags): - pyfunc = ord_usecase - - cr = compile_isolated(pyfunc, (types.string,), flags=flags) - cfunc = cr.entry_point - for x in ['a', u'\u2020']: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_ord_npm(self): - with self.assertTypingError(): - self.test_ord(flags=no_pyobj_flags) - - def test_reduce(self, flags=enable_pyobj_flags): - pyfunc = reduce_usecase - cr = compile_isolated(pyfunc, (types.Dummy('function_ptr'), - types.Dummy('list')), - flags=flags) - cfunc = cr.entry_point - - reduce_func = lambda x, y: x + y - - x = range(10) - self.assertPreciseEqual(cfunc(reduce_func, x), pyfunc(reduce_func, x)) - - x = [x + x/10.0 for x in range(10)] - self.assertPreciseEqual(cfunc(reduce_func, x), pyfunc(reduce_func, x)) - - x = [complex(x, x) for x in range(10)] - self.assertPreciseEqual(cfunc(reduce_func, x), pyfunc(reduce_func, x)) - - def test_reduce_npm(self): - with self.assertTypingError(): - self.test_reduce(flags=no_pyobj_flags) - - # Under Windows, the LLVM "round" intrinsic (used for Python 2) - # mistreats signed zeros. - _relax_round = sys.platform == 'win32' and sys.version_info < (3,) - - def test_round1(self, flags=enable_pyobj_flags): - pyfunc = round_usecase1 - - for tp in (types.float64, types.float32): - cr = compile_isolated(pyfunc, (tp,), flags=flags) - cfunc = cr.entry_point - values = [-1.6, -1.5, -1.4, -0.5, 0.0, 0.1, 0.5, 0.6, 1.4, 1.5, 5.0] - if not self._relax_round: - values += [-0.1, -0.0] - for x in values: - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_round1_npm(self): - self.test_round1(flags=no_pyobj_flags) - - def test_round2(self, flags=enable_pyobj_flags): - pyfunc = round_usecase2 - - for tp in (types.float64, types.float32): - prec = 'single' if tp is types.float32 else 'exact' - cr = compile_isolated(pyfunc, (tp, types.int32), flags=flags) - cfunc = cr.entry_point - for x in [0.0, 0.1, 0.125, 0.25, 0.5, 0.75, 1.25, - 1.5, 1.75, 2.25, 2.5, 2.75, 12.5, 15.0, 22.5]: - for n in (-1, 0, 1, 2): - self.assertPreciseEqual(cfunc(x, n), pyfunc(x, n), - prec=prec) - expected = pyfunc(-x, n) - if not (expected == 0.0 and self._relax_round): - self.assertPreciseEqual(cfunc(-x, n), pyfunc(-x, n), - prec=prec) - - @tag('important') - def test_round2_npm(self): - self.test_round2(flags=no_pyobj_flags) - - def test_sum(self, flags=enable_pyobj_flags): - pyfunc = sum_usecase - - cr = compile_isolated(pyfunc, (types.Dummy('list'),), flags=flags) - cfunc = cr.entry_point - - x = range(10) - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - x = [x + x/10.0 for x in range(10)] - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - x = [complex(x, x) for x in range(10)] - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - def test_sum_npm(self): - with self.assertTypingError(): - self.test_sum(flags=no_pyobj_flags) - - def test_type_unary(self): - # Test type(val) and type(val)(other_val) - pyfunc = type_unary_usecase - cfunc = jit(nopython=True)(pyfunc) - - def check(*args): - expected = pyfunc(*args) - self.assertPreciseEqual(cfunc(*args), expected) - - check(1.5, 2) - check(1, 2.5) - check(1.5j, 2) - check(True, 2) - check(2.5j, False) - - @unittest.skipIf(utils.IS_PY3, "unichr not available as global is Py3") - def test_unichr(self, flags=enable_pyobj_flags): - pyfunc = unichr_usecase - - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in range(0, 1000, 10): - self.assertPreciseEqual(cfunc(x), pyfunc(x)) - - @unittest.skipIf(utils.IS_PY3, "unichr not available as global is Py3") - def test_unichr_npm(self): - with self.assertTypingError(): - self.test_unichr(flags=no_pyobj_flags) - - def test_zip(self, flags=forceobj_flags): - self.run_nullary_func(zip_usecase, flags) - - @tag('important') - def test_zip_npm(self): - self.test_zip(flags=no_pyobj_flags) - - def test_zip_1(self, flags=forceobj_flags): - self.run_nullary_func(zip_1_usecase, flags) - - @tag('important') - def test_zip_1_npm(self): - self.test_zip_1(flags=no_pyobj_flags) - - def test_zip_3(self, flags=forceobj_flags): - self.run_nullary_func(zip_3_usecase, flags) - - @tag('important') - def test_zip_3_npm(self): - self.test_zip_3(flags=no_pyobj_flags) - - def test_zip_0(self, flags=forceobj_flags): - self.run_nullary_func(zip_0_usecase, flags) - - def test_zip_0_npm(self): - self.test_zip_0(flags=no_pyobj_flags) - - def test_zip_first_exhausted(self, flags=forceobj_flags): - """ - Test side effect to the input iterators when a left iterator has been - exhausted before the ones on the right. - """ - self.run_nullary_func(zip_first_exhausted, flags) - - @tag('important') - def test_zip_first_exhausted_npm(self): - self.test_zip_first_exhausted(flags=nrt_no_pyobj_flags) - - def test_pow_op_usecase(self): - args = [ - (2, 3), - (2.0, 3), - (2, 3.0), - (2j, 3.0j), - ] - - for x, y in args: - cres = compile_isolated(pow_op_usecase, (typeof(x), typeof(y)), - flags=no_pyobj_flags) - r = cres.entry_point(x, y) - self.assertPreciseEqual(r, pow_op_usecase(x, y)) - - @tag('important') - def test_pow_usecase(self): - args = [ - (2, 3), - (2.0, 3), - (2, 3.0), - (2j, 3.0j), - ] - - for x, y in args: - cres = compile_isolated(pow_usecase, (typeof(x), typeof(y)), - flags=no_pyobj_flags) - r = cres.entry_point(x, y) - self.assertPreciseEqual(r, pow_usecase(x, y)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_casting.py b/numba/numba/tests/test_casting.py deleted file mode 100644 index 4fd5ce821..000000000 --- a/numba/numba/tests/test_casting.py +++ /dev/null @@ -1,102 +0,0 @@ -from numba import unittest_support as unittest -import numpy as np -from numba.compiler import compile_isolated -from numba import types, njit -import struct - - -def float_to_int(x): - return types.int32(x) - - -def int_to_float(x): - return types.float64(x) / 2 - - -def float_to_unsigned(x): - return types.uint32(x) - - -def float_to_complex(x): - return types.complex128(x) - - -class TestCasting(unittest.TestCase): - def test_float_to_int(self): - pyfunc = float_to_int - cr = compile_isolated(pyfunc, [types.float32]) - cfunc = cr.entry_point - - self.assertEqual(cr.signature.return_type, types.int32) - self.assertEqual(cfunc(12.3), pyfunc(12.3)) - self.assertEqual(cfunc(12.3), int(12.3)) - self.assertEqual(cfunc(-12.3), pyfunc(-12.3)) - self.assertEqual(cfunc(-12.3), int(-12.3)) - - def test_int_to_float(self): - pyfunc = int_to_float - cr = compile_isolated(pyfunc, [types.int64]) - cfunc = cr.entry_point - - self.assertEqual(cr.signature.return_type, types.float64) - self.assertEqual(cfunc(321), pyfunc(321)) - self.assertEqual(cfunc(321), 321. / 2) - - def test_float_to_unsigned(self): - pyfunc = float_to_unsigned - cr = compile_isolated(pyfunc, [types.float32]) - cfunc = cr.entry_point - - self.assertEqual(cr.signature.return_type, types.uint32) - self.assertEqual(cfunc(3.21), pyfunc(3.21)) - self.assertEqual(cfunc(3.21), struct.unpack('I', struct.pack('i', - 3))[0]) - - def test_float_to_complex(self): - pyfunc = float_to_complex - cr = compile_isolated(pyfunc, [types.float64]) - cfunc = cr.entry_point - self.assertEqual(cr.signature.return_type, types.complex128) - self.assertEqual(cfunc(-3.21), pyfunc(-3.21)) - self.assertEqual(cfunc(-3.21), -3.21 + 0j) - - def test_array_to_array(self): - """Make sure this compiles. - - Cast C to A array - """ - @njit("f8(f8[:])") - def inner(x): - return x[0] - - inner.disable_compile() - - @njit("f8(f8[::1])") - def driver(x): - return inner(x) - - x = np.array([1234], dtype=np.float64) - self.assertEqual(driver(x), x[0]) - self.assertEqual(len(inner.overloads), 1) - - def test_optional_to_optional(self): - """ - Test error due mishandling of Optional to Optional casting - - Related issue: https://github.com/numba/numba/issues/1718 - """ - # Attempt to cast optional(intp) to optional(float64) - opt_int = types.Optional(types.intp) - opt_flt = types.Optional(types.float64) - sig = opt_flt(opt_int) - - @njit(sig) - def foo(a): - return a - - self.assertEqual(foo(2), 2) - self.assertIsNone(foo(None)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_cffi.py b/numba/numba/tests/test_cffi.py deleted file mode 100644 index 26589830a..000000000 --- a/numba/numba/tests/test_cffi.py +++ /dev/null @@ -1,199 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import array -import numpy as np -import sys - -from numba import unittest_support as unittest -from numba import jit, cffi_support, types, errors -from numba.compiler import compile_isolated, Flags -from numba.tests.support import TestCase, tag - -import numba.tests.cffi_usecases as mod - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -no_pyobj_flags = Flags() - - -@unittest.skipUnless(cffi_support.SUPPORTED, - "CFFI not supported -- please install the cffi module") -class TestCFFI(TestCase): - - # Need to run the tests serially because of race conditions in - # cffi's OOL mode. - _numba_parallel_test_ = False - - def setUp(self): - mod.init() - mod.init_ool() - - def test_type_map(self): - signature = cffi_support.map_type(mod.ffi.typeof(mod.cffi_sin)) - self.assertEqual(len(signature.args), 1) - self.assertEqual(signature.args[0], types.double) - - def _test_function(self, pyfunc, flags=enable_pyobj_flags): - cres = compile_isolated(pyfunc, [types.double], flags=flags) - cfunc = cres.entry_point - - for x in [-1.2, -1, 0, 0.1, 3.14]: - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - def test_sin_function(self): - self._test_function(mod.use_cffi_sin) - - def test_bool_function_ool(self): - pyfunc = mod.use_cffi_boolean_true - cres = compile_isolated(pyfunc, (), flags=no_pyobj_flags) - cfunc = cres.entry_point - self.assertEqual(pyfunc(), True) - self.assertEqual(cfunc(), True) - - @tag('important') - def test_sin_function_npm(self): - self._test_function(mod.use_cffi_sin, flags=no_pyobj_flags) - - def test_sin_function_ool(self, flags=enable_pyobj_flags): - self._test_function(mod.use_cffi_sin_ool) - - def test_sin_function_npm_ool(self): - self._test_function(mod.use_cffi_sin_ool, flags=no_pyobj_flags) - - def test_two_funcs(self): - # Check that two constant functions don't get mixed up. - self._test_function(mod.use_two_funcs) - - def test_two_funcs_ool(self): - self._test_function(mod.use_two_funcs_ool) - - def test_function_pointer(self): - pyfunc = mod.use_func_pointer - cfunc = jit(nopython=True)(pyfunc) - for (fa, fb, x) in [ - (mod.cffi_sin, mod.cffi_cos, 1.0), - (mod.cffi_sin, mod.cffi_cos, -1.0), - (mod.cffi_cos, mod.cffi_sin, 1.0), - (mod.cffi_cos, mod.cffi_sin, -1.0), - (mod.cffi_sin_ool, mod.cffi_cos_ool, 1.0), - (mod.cffi_sin_ool, mod.cffi_cos_ool, -1.0), - (mod.cffi_cos_ool, mod.cffi_sin_ool, 1.0), - (mod.cffi_cos_ool, mod.cffi_sin_ool, -1.0), - (mod.cffi_sin, mod.cffi_cos_ool, 1.0), - (mod.cffi_sin, mod.cffi_cos_ool, -1.0), - (mod.cffi_cos, mod.cffi_sin_ool, 1.0), - (mod.cffi_cos, mod.cffi_sin_ool, -1.0)]: - expected = pyfunc(fa, fb, x) - got = cfunc(fa, fb, x) - self.assertEqual(got, expected) - # A single specialization was compiled for all calls - self.assertEqual(len(cfunc.overloads), 1, cfunc.overloads) - - def test_user_defined_symbols(self): - pyfunc = mod.use_user_defined_symbols - cfunc = jit(nopython=True)(pyfunc) - self.assertEqual(pyfunc(), cfunc()) - - def check_vector_sin(self, cfunc, x, y): - cfunc(x, y) - np.testing.assert_allclose(y, np.sin(x)) - - def _test_from_buffer_numpy_array(self, pyfunc, dtype): - x = np.arange(10).astype(dtype) - y = np.zeros_like(x) - cfunc = jit(nopython=True)(pyfunc) - self.check_vector_sin(cfunc, x, y) - - @tag('important') - def test_from_buffer_float32(self): - self._test_from_buffer_numpy_array(mod.vector_sin_float32, np.float32) - - def test_from_buffer_float64(self): - self._test_from_buffer_numpy_array(mod.vector_sin_float64, np.float64) - - def test_from_buffer_struct(self): - n = 10 - x = np.arange(n) + np.arange(n * 2, n * 3) * 1j - y = np.zeros(n) - real_cfunc = jit(nopython=True)(mod.vector_extract_real) - real_cfunc(x, y) - np.testing.assert_equal(x.real, y) - imag_cfunc = jit(nopython=True)(mod.vector_extract_imag) - imag_cfunc(x, y) - np.testing.assert_equal(x.imag, y) - - @unittest.skipIf(sys.version_info < (3,), - "buffer protocol on array.array needs Python 3+") - def test_from_buffer_pyarray(self): - pyfunc = mod.vector_sin_float32 - cfunc = jit(nopython=True)(pyfunc) - x = array.array("f", range(10)) - y = array.array("f", [0] * len(x)) - self.check_vector_sin(cfunc, x, y) - - def test_from_buffer_error(self): - pyfunc = mod.vector_sin_float32 - cfunc = jit(nopython=True)(pyfunc) - # Non-contiguous array - x = np.arange(10).astype(np.float32)[::2] - y = np.zeros_like(x) - with self.assertRaises(errors.TypingError) as raises: - cfunc(x, y) - self.assertIn("from_buffer() unsupported on non-contiguous buffers", - str(raises.exception)) - - def test_from_buffer_numpy_multi_array(self): - c1 = np.array([1, 2], order='C', dtype=np.float32) - c1_zeros = np.zeros_like(c1) - c2 = np.array([[1, 2], [3, 4]], order='C', dtype=np.float32) - c2_zeros = np.zeros_like(c2) - f1 = np.array([1, 2], order='F', dtype=np.float32) - f1_zeros = np.zeros_like(f1) - f2 = np.array([[1, 2], [3, 4]], order='F', dtype=np.float32) - f2_zeros = np.zeros_like(f2) - f2_copy = f2.copy('K') - pyfunc = mod.vector_sin_float32 - cfunc = jit(nopython=True)(pyfunc) - # No exception because of C layout and single dimension - self.check_vector_sin(cfunc, c1, c1_zeros) - # No exception because of C layout - cfunc(c2, c2_zeros) - sin_c2 = np.sin(c2) - sin_c2[1] = [0, 0] # Reset to zero, since cfunc only processes one row - np.testing.assert_allclose(c2_zeros, sin_c2) - # No exception because of single dimension - self.check_vector_sin(cfunc, f1, f1_zeros) - # Exception because multi-dimensional with F layout - with self.assertRaises(errors.TypingError) as raises: - cfunc(f2, f2_zeros) - np.testing.assert_allclose(f2, f2_copy) - self.assertIn("from_buffer() only supports multidimensional arrays with C layout", - str(raises.exception)) - - def test_indirect_multiple_use(self): - """ - Issue #2263 - - Linkage error due to multiple definition of global tracking symbol. - """ - my_sin = mod.cffi_sin - - # Use two jit functions that references `my_sin` to ensure multiple - # modules - @jit(nopython=True) - def inner(x): - return my_sin(x) - - @jit(nopython=True) - def foo(x): - return inner(x) + my_sin(x + 1) - - # Error occurs when foo is being compiled - x = 1.123 - self.assertEqual(foo(x), my_sin(x) + my_sin(x + 1)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_cfunc.py b/numba/numba/tests/test_cfunc.py deleted file mode 100644 index d5d5f3054..000000000 --- a/numba/numba/tests/test_cfunc.py +++ /dev/null @@ -1,370 +0,0 @@ -""" -Tests for @cfunc and friends. -""" - -from __future__ import division, print_function, absolute_import - -import ctypes -import os -import subprocess -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba import cfunc, carray, farray, types, typing, utils -from numba import cffi_support -from .support import TestCase, tag, captured_stderr -from .test_dispatcher import BaseCacheTest - - -def add_usecase(a, b): - return a + b - -def div_usecase(a, b): - c = a / b - return c - -def square_usecase(a): - return a ** 2 - -add_sig = "float64(float64, float64)" - -div_sig = "float64(int64, int64)" - -square_sig = "float64(float64)" - -def objmode_usecase(a, b): - object() - return a + b - -# Test functions for carray() and farray() - -CARRAY_USECASE_OUT_LEN = 8 - -def make_cfarray_usecase(func): - - def cfarray_usecase(in_ptr, out_ptr, m, n): - # Tuple shape - in_ = func(in_ptr, (m, n)) - # Integer shape - out = func(out_ptr, CARRAY_USECASE_OUT_LEN) - out[0] = in_.ndim - out[1:3] = in_.shape - out[3:5] = in_.strides - out[5] = in_.flags.c_contiguous - out[6] = in_.flags.f_contiguous - s = 0 - for i, j in np.ndindex(m, n): - s += in_[i, j] * (i - j) - out[7] = s - - return cfarray_usecase - -carray_usecase = make_cfarray_usecase(carray) -farray_usecase = make_cfarray_usecase(farray) - - -def make_cfarray_dtype_usecase(func): - # Same as make_cfarray_usecase(), but with explicit dtype. - - def cfarray_usecase(in_ptr, out_ptr, m, n): - # Tuple shape - in_ = func(in_ptr, (m, n), dtype=np.float32) - # Integer shape - out = func(out_ptr, CARRAY_USECASE_OUT_LEN, np.float32) - out[0] = in_.ndim - out[1:3] = in_.shape - out[3:5] = in_.strides - out[5] = in_.flags.c_contiguous - out[6] = in_.flags.f_contiguous - s = 0 - for i, j in np.ndindex(m, n): - s += in_[i, j] * (i - j) - out[7] = s - - return cfarray_usecase - -carray_dtype_usecase = make_cfarray_dtype_usecase(carray) -farray_dtype_usecase = make_cfarray_dtype_usecase(farray) - -carray_float32_usecase_sig = types.void(types.CPointer(types.float32), - types.CPointer(types.float32), - types.intp, types.intp) - -carray_float64_usecase_sig = types.void(types.CPointer(types.float64), - types.CPointer(types.float64), - types.intp, types.intp) - -carray_voidptr_usecase_sig = types.void(types.voidptr, types.voidptr, - types.intp, types.intp) - - -class TestCFunc(TestCase): - - @tag('important') - def test_basic(self): - """ - Basic usage and properties of a cfunc. - """ - f = cfunc(add_sig)(add_usecase) - - self.assertEqual(f.__name__, "add_usecase") - self.assertEqual(f.__qualname__, "add_usecase") - self.assertIs(f.__wrapped__, add_usecase) - - symbol = f.native_name - self.assertIsInstance(symbol, str) - self.assertIn("add_usecase", symbol) - - addr = f.address - self.assertIsInstance(addr, utils.INT_TYPES) - - ct = f.ctypes - self.assertEqual(ctypes.cast(ct, ctypes.c_void_p).value, addr) - - self.assertPreciseEqual(ct(2.0, 3.5), 5.5) - - @tag('important') - @unittest.skipUnless(cffi_support.SUPPORTED, - "CFFI not supported -- please install the cffi module") - def test_cffi(self): - from . import cffi_usecases - ffi, lib = cffi_usecases.load_inline_module() - - f = cfunc(square_sig)(square_usecase) - - res = lib._numba_test_funcptr(f.cffi) - self.assertPreciseEqual(res, 2.25) # 1.5 ** 2 - - def test_locals(self): - # By forcing the intermediate result into an integer, we - # truncate the ultimate function result - f = cfunc(div_sig, locals={'c': types.int64})(div_usecase) - self.assertPreciseEqual(f.ctypes(8, 3), 2.0) - - @tag('important') - def test_errors(self): - f = cfunc(div_sig)(div_usecase) - - with captured_stderr() as err: - self.assertPreciseEqual(f.ctypes(5, 2), 2.5) - self.assertEqual(err.getvalue(), "") - - with captured_stderr() as err: - res = f.ctypes(5, 0) - # This is just a side effect of Numba zero-initializing - # stack variables, and could change in the future. - self.assertPreciseEqual(res, 0.0) - err = err.getvalue() - if sys.version_info >= (3,): - self.assertIn("Exception ignored", err) - self.assertIn("ZeroDivisionError: division by zero", err) - else: - self.assertIn("ZeroDivisionError('division by zero',)", err) - self.assertIn(" ignored", err) - - def test_llvm_ir(self): - f = cfunc(add_sig)(add_usecase) - ir = f.inspect_llvm() - self.assertIn(f.native_name, ir) - self.assertIn("fadd double", ir) - - def test_object_mode(self): - """ - Object mode is currently unsupported. - """ - with self.assertRaises(NotImplementedError): - cfunc(add_sig, forceobj=True)(add_usecase) - with self.assertTypingError() as raises: - cfunc(add_sig)(objmode_usecase) - self.assertIn("Untyped global name 'object'", str(raises.exception)) - - -class TestCFuncCache(BaseCacheTest): - - here = os.path.dirname(__file__) - usecases_file = os.path.join(here, "cfunc_cache_usecases.py") - modname = "cfunc_caching_test_fodder" - - def run_in_separate_process(self): - # Cached functions can be run from a distinct process. - code = """if 1: - import sys - - sys.path.insert(0, %(tempdir)r) - mod = __import__(%(modname)r) - mod.self_test() - - f = mod.add_usecase - assert f.cache_hits == 1 - f = mod.outer - assert f.cache_hits == 1 - f = mod.div_usecase - assert f.cache_hits == 1 - """ % dict(tempdir=self.tempdir, modname=self.modname) - - popen = subprocess.Popen([sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError("process failed with code %s: stderr follows\n%s\n" - % (popen.returncode, err.decode())) - - def check_module(self, mod): - mod.self_test() - - @tag('important') - def test_caching(self): - self.check_pycache(0) - mod = self.import_module() - self.check_pycache(6) # 3 index, 3 data - - self.assertEqual(mod.add_usecase.cache_hits, 0) - self.assertEqual(mod.outer.cache_hits, 0) - self.assertEqual(mod.add_nocache_usecase.cache_hits, 0) - self.assertEqual(mod.div_usecase.cache_hits, 0) - self.check_module(mod) - - # Reload module to hit the cache - mod = self.import_module() - self.check_pycache(6) # 3 index, 3 data - - self.assertEqual(mod.add_usecase.cache_hits, 1) - self.assertEqual(mod.outer.cache_hits, 1) - self.assertEqual(mod.add_nocache_usecase.cache_hits, 0) - self.assertEqual(mod.div_usecase.cache_hits, 1) - self.check_module(mod) - - self.run_in_separate_process() - - -class TestCArray(TestCase): - """ - Tests for carray() and farray(). - """ - - def run_carray_usecase(self, pointer_factory, func): - a = np.arange(10, 16).reshape((2, 3)).astype(np.float32) - out = np.empty(CARRAY_USECASE_OUT_LEN, dtype=np.float32) - func(pointer_factory(a), pointer_factory(out), *a.shape) - return out - - def check_carray_usecase(self, pointer_factory, pyfunc, cfunc): - expected = self.run_carray_usecase(pointer_factory, pyfunc) - got = self.run_carray_usecase(pointer_factory, cfunc) - self.assertPreciseEqual(expected, got) - - def make_voidptr(self, arr): - return arr.ctypes.data_as(ctypes.c_void_p) - - def make_float32_pointer(self, arr): - return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) - - def make_float64_pointer(self, arr): - return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) - - def check_carray_farray(self, func, order): - def eq(got, expected): - # Same layout, dtype, shape, etc. - self.assertPreciseEqual(got, expected) - # Same underlying data - self.assertEqual(got.ctypes.data, expected.ctypes.data) - - base = np.arange(6).reshape((2, 3)).astype(np.float32).copy(order=order) - - # With typed pointer and implied dtype - a = func(self.make_float32_pointer(base), base.shape) - eq(a, base) - # Integer shape - a = func(self.make_float32_pointer(base), base.size) - eq(a, base.ravel('K')) - - # With typed pointer and explicit dtype - a = func(self.make_float32_pointer(base), base.shape, base.dtype) - eq(a, base) - a = func(self.make_float32_pointer(base), base.shape, np.float32) - eq(a, base) - - # With voidptr and explicit dtype - a = func(self.make_voidptr(base), base.shape, base.dtype) - eq(a, base) - a = func(self.make_voidptr(base), base.shape, np.int32) - eq(a, base.view(np.int32)) - - # voidptr without dtype - with self.assertRaises(TypeError): - func(self.make_voidptr(base), base.shape) - # Invalid pointer type - with self.assertRaises(TypeError): - func(base.ctypes.data, base.shape) - # Mismatching dtype - with self.assertRaises(TypeError) as raises: - func(self.make_float32_pointer(base), base.shape, np.int32) - self.assertIn("mismatching dtype 'int32' for pointer", - str(raises.exception)) - - @tag('important') - def test_carray(self): - """ - Test pure Python carray(). - """ - self.check_carray_farray(carray, 'C') - - def test_farray(self): - """ - Test pure Python farray(). - """ - self.check_carray_farray(farray, 'F') - - def make_carray_sigs(self, formal_sig): - """ - Generate a bunch of concrete signatures by varying the width - and signedness of size arguments (see issue #1923). - """ - for actual_size in (types.intp, types.int32, types.intc, - types.uintp, types.uint32, types.uintc): - args = tuple(actual_size if a == types.intp else a - for a in formal_sig.args) - yield formal_sig.return_type(*args) - - def check_numba_carray_farray(self, usecase, dtype_usecase): - # With typed pointers and implicit dtype - pyfunc = usecase - for sig in self.make_carray_sigs(carray_float32_usecase_sig): - f = cfunc(sig)(pyfunc) - self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) - - # With typed pointers and explicit (matching) dtype - pyfunc = dtype_usecase - for sig in self.make_carray_sigs(carray_float32_usecase_sig): - f = cfunc(sig)(pyfunc) - self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) - # With typed pointers and mismatching dtype - with self.assertTypingError() as raises: - f = cfunc(carray_float64_usecase_sig)(pyfunc) - self.assertIn("mismatching dtype 'float32' for pointer type 'float64*'", - str(raises.exception)) - - # With voidptr - pyfunc = dtype_usecase - for sig in self.make_carray_sigs(carray_voidptr_usecase_sig): - f = cfunc(sig)(pyfunc) - self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) - - @tag('important') - def test_numba_carray(self): - """ - Test Numba-compiled carray() against pure Python carray() - """ - self.check_numba_carray_farray(carray_usecase, carray_dtype_usecase) - - def test_numba_farray(self): - """ - Test Numba-compiled farray() against pure Python farray() - """ - self.check_numba_carray_farray(farray_usecase, farray_dtype_usecase) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_cgutils.py b/numba/numba/tests/test_cgutils.py deleted file mode 100644 index 78e23e73c..000000000 --- a/numba/numba/tests/test_cgutils.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import print_function - -import contextlib -import ctypes -import struct -import sys - -import llvmlite.llvmpy.core as lc -import numpy as np - -import numba.unittest_support as unittest -from numba import cgutils, types, typing -from numba.targets import cpu -from .support import TestCase - - -machine_int = lc.Type.int(types.intp.bitwidth) - -def machine_const(n): - return lc.Constant.int(machine_int, n) - - -class StructureTestCase(TestCase): - - def setUp(self): - typing_context = typing.Context() - self.context = cpu.CPUContext(typing_context) - - @contextlib.contextmanager - def compile_function(self, nargs): - llvm_fnty = lc.Type.function(machine_int, [machine_int] * nargs) - ctypes_fnty = ctypes.CFUNCTYPE(ctypes.c_size_t, - * (ctypes.c_size_t,) * nargs) - module = self.context.create_module("") - - function = module.get_or_insert_function(llvm_fnty, - name=self.id()) - assert function.is_declaration - entry_block = function.append_basic_block('entry') - builder = lc.Builder(entry_block) - - first = [True] - - def call_func(*args): - codegen = self.context.codegen() - library = codegen.create_library("test_module.%s" % self.id()) - library.add_ir_module(module) - cptr = library.get_pointer_to_function(function.name) - cfunc = ctypes_fnty(cptr) - return cfunc(*args) - - yield self.context, builder, function.args, call_func - - - def get_bytearray_addr(self, ba): - assert isinstance(ba, bytearray) - ba_as_string = ctypes.pythonapi.PyByteArray_AsString - ba_as_string.argtypes = [ctypes.py_object] - ba_as_string.restype = ctypes.c_void_p - return ba_as_string(ba) - - def test_compile_function(self): - # Simple self-test for compile_function() - with self.compile_function(2) as (context, builder, args, call): - res = builder.add(args[0], args[1]) - builder.ret(res) - self.assertEqual(call(5, -2), 3) - self.assertEqual(call(4, 2), 6) - - @contextlib.contextmanager - def run_struct_access(self, struct_class, buf, offset=0): - with self.compile_function(1) as (context, builder, args, call): - inst = struct_class(context, builder) - sptr = builder.add(args[0], machine_const(offset)) - sptr = builder.inttoptr(sptr, lc.Type.pointer(inst._type)) - inst = struct_class(context, builder, ref=sptr) - - yield context, builder, args, inst - - builder.ret(lc.Constant.int(machine_int, 0)) - call(self.get_bytearray_addr(buf)) - - @contextlib.contextmanager - def run_simple_struct_test(self, struct_class, struct_fmt, struct_args): - # By using a too large buffer and a non-zero offset, we also check - # that surrounding memory isn't touched. - buf = bytearray(b'!') * 40 - expected = buf[:] - offset = 8 - - with self.run_struct_access(struct_class, buf, offset) \ - as (context, builder, args, inst): - yield context, builder, inst - - self.assertNotEqual(buf, expected) - struct.pack_into(struct_fmt, expected, offset, *struct_args) - self.assertEqual(buf, expected) - - def test_int_fields(self): - class S(cgutils.Structure): - _fields = [('a', types.int32), - ('b', types.uint16)] - - fmt = "=iH" - with self.run_simple_struct_test(S, fmt, (0x12345678, 0xABCD)) \ - as (context, builder, inst): - inst.a = lc.Constant.int(lc.Type.int(32), 0x12345678) - inst.b = lc.Constant.int(lc.Type.int(16), 0xABCD) - - def test_float_fields(self): - class S(cgutils.Structure): - _fields = [('a', types.float64), - ('b', types.float32)] - - fmt = "=df" - with self.run_simple_struct_test(S, fmt, (1.23, 4.56)) \ - as (context, builder, inst): - inst.a = lc.Constant.real(lc.Type.double(), 1.23) - inst.b = lc.Constant.real(lc.Type.float(), 4.56) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_chained_assign.py b/numba/numba/tests/test_chained_assign.py deleted file mode 100644 index d23929fea..000000000 --- a/numba/numba/tests/test_chained_assign.py +++ /dev/null @@ -1,153 +0,0 @@ -from __future__ import print_function - -from numba import jit -import numba.unittest_support as unittest -import numpy as np -import copy -from .support import MemoryLeakMixin - - -try: - xrange -except NameError: - xrange = range - - -@jit -def inc(a): - for i in xrange(len(a)): - a[i] += 1 - return a - -@jit -def inc1(a): - a[0] += 1 - return a[0] - -@jit -def inc2(a): - a[0] += 1 - return a[0], a[0] + 1 - - -def chain1(a): - x = y = z = inc(a) - return x + y + z - - -def chain2(v): - a = np.zeros(2) - a[0] = x = a[1] = v - return a[0] + a[1] + (x / 2) - - -def unpack1(x, y): - a, b = x, y - return a + b / 2 - - -def unpack2(x, y): - a, b = c, d = inc1(x), inc1(y) - return a + c / 2, b + d / 2 - - -def chain3(x, y): - a = (b, c) = (inc1(x), inc1(y)) - (d, e) = f = (inc1(x), inc1(y)) - return (a[0] + b / 2 + d + f[0]), (a[1] + c + e / 2 + f[1]) - - -def unpack3(x): - a, b = inc2(x) - return a + b / 2 - - -def unpack4(x): - a, b = c, d = inc2(x) - return a + c / 2, b + d / 2 - - -def unpack5(x): - a = b, c = inc2(x) - d, e = f = inc2(x) - return (a[0] + b / 2 + d + f[0]), (a[1] + c + e / 2 + f[1]) - - -def unpack6(x, y): - (a, b), (c, d) = (x, y), (y + 1, x + 1) - return a + c / 2, b / 2 + d - - -class TestChainedAssign(MemoryLeakMixin, unittest.TestCase): - def test_chain1(self): - args = [ - [np.arange(2)], - [np.arange(4, dtype=np.double)], - ] - self._test_template(chain1, args) - - def test_chain2(self): - args = [ - [3], - [3.0], - ] - self._test_template(chain2, args) - - def test_unpack1(self): - args = [ - [1, 3.0], - [1.0, 3], - ] - self._test_template(unpack1, args) - - def test_unpack2(self): - args = [ - [np.array([2]), np.array([4.0])], - [np.array([2.0]), np.array([4])], - ] - self._test_template(unpack2, args) - - def test_chain3(self): - args = [ - [np.array([0]), np.array([1.5])], - [np.array([0.5]), np.array([1])], - ] - self._test_template(chain3, args) - - def test_unpack3(self): - args = [ - [np.array([1])], - [np.array([1.0])], - ] - self._test_template(unpack3, args) - - def test_unpack4(self): - args = [ - [np.array([1])], - [np.array([1.0])], - ] - self._test_template(unpack4, args) - - def test_unpack5(self): - args = [ - [np.array([2])], - [np.array([2.0])], - ] - self._test_template(unpack5, args) - - def test_unpack6(self): - args1 = 3.0, 2 - args2 = 3.0, 2.0 - self._test_template(unpack6, [args1, args2]) - - def _test_template(self, pyfunc, argcases): - cfunc = jit(pyfunc) - for args in argcases: - a1 = copy.deepcopy(args) - a2 = copy.deepcopy(args) - np.testing.assert_allclose(pyfunc(*a1), cfunc(*a2)) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_closure.py b/numba/numba/tests/test_closure.py deleted file mode 100644 index c1375166a..000000000 --- a/numba/numba/tests/test_closure.py +++ /dev/null @@ -1,455 +0,0 @@ -from __future__ import print_function - -import sys - -# import numpy in two ways, both uses needed -import numpy as np -import numpy - -import numba.unittest_support as unittest -from numba import njit, jit, testing, utils -from numba.errors import (NotDefinedError, TypingError, LoweringError, - UnsupportedError) -from .support import TestCase, tag -from numba.six import exec_ - - -class TestClosure(TestCase): - - def run_jit_closure_variable(self, **jitargs): - Y = 10 - - def add_Y(x): - return x + Y - - c_add_Y = jit('i4(i4)', **jitargs)(add_Y) - self.assertEqual(c_add_Y(1), 11) - - # Like globals in Numba, the value of the closure is captured - # at time of JIT - Y = 12 # should not affect function - self.assertEqual(c_add_Y(1), 11) - - def test_jit_closure_variable(self): - self.run_jit_closure_variable(forceobj=True) - - def test_jit_closure_variable_npm(self): - self.run_jit_closure_variable(nopython=True) - - def run_rejitting_closure(self, **jitargs): - Y = 10 - - def add_Y(x): - return x + Y - - c_add_Y = jit('i4(i4)', **jitargs)(add_Y) - self.assertEqual(c_add_Y(1), 11) - - # Redo the jit - Y = 12 - c_add_Y_2 = jit('i4(i4)', **jitargs)(add_Y) - self.assertEqual(c_add_Y_2(1), 13) - Y = 13 # should not affect function - self.assertEqual(c_add_Y_2(1), 13) - - self.assertEqual(c_add_Y(1), 11) # Test first function again - - def test_rejitting_closure(self): - self.run_rejitting_closure(forceobj=True) - - def test_rejitting_closure_npm(self): - self.run_rejitting_closure(nopython=True) - - def run_jit_multiple_closure_variables(self, **jitargs): - Y = 10 - Z = 2 - - def add_Y_mult_Z(x): - return (x + Y) * Z - - c_add_Y_mult_Z = jit('i4(i4)', **jitargs)(add_Y_mult_Z) - self.assertEqual(c_add_Y_mult_Z(1), 22) - - def test_jit_multiple_closure_variables(self): - self.run_jit_multiple_closure_variables(forceobj=True) - - def test_jit_multiple_closure_variables_npm(self): - self.run_jit_multiple_closure_variables(nopython=True) - - def run_jit_inner_function(self, **jitargs): - def mult_10(a): - return a * 10 - - c_mult_10 = jit('intp(intp)', **jitargs)(mult_10) - c_mult_10.disable_compile() - - def do_math(x): - return c_mult_10(x + 4) - - c_do_math = jit('intp(intp)', **jitargs)(do_math) - c_do_math.disable_compile() - - with self.assertRefCount(c_do_math, c_mult_10): - self.assertEqual(c_do_math(1), 50) - - def test_jit_inner_function(self): - self.run_jit_inner_function(forceobj=True) - - def test_jit_inner_function_npm(self): - self.run_jit_inner_function(nopython=True) - - @testing.allow_interpreter_mode - def test_return_closure(self): - - def outer(x): - - def inner(): - return x + 1 - - return inner - - cfunc = jit(outer) - self.assertEqual(cfunc(10)(), outer(10)()) - - -class TestInlinedClosure(TestCase): - """ - Tests for (partial) closure support in njit. The support is partial - because it only works for closures that can be successfully inlined - at compile time. - """ - - @tag('important') - def test_inner_function(self): - - def outer(x): - - def inner(x): - return x * x - - return inner(x) + inner(x) - - cfunc = njit(outer) - self.assertEqual(cfunc(10), outer(10)) - - @tag('important') - def test_inner_function_with_closure(self): - - def outer(x): - y = x + 1 - - def inner(x): - return x * x + y - - return inner(x) + inner(x) - - cfunc = njit(outer) - self.assertEqual(cfunc(10), outer(10)) - - @tag('important') - def test_inner_function_with_closure_2(self): - - def outer(x): - y = x + 1 - - def inner(x): - return x * y - - y = inner(x) - return y + inner(x) - - cfunc = njit(outer) - self.assertEqual(cfunc(10), outer(10)) - - @unittest.skipIf(utils.PYVERSION < (3, 0), "needs Python 3") - def test_inner_function_with_closure_3(self): - - code = """ - def outer(x): - y = x + 1 - z = 0 - - def inner(x): - nonlocal z - z += x * x - return z + y - - return inner(x) + inner(x) + z - """ - ns = {} - exec_(code.strip(), ns) - - cfunc = njit(ns['outer']) - self.assertEqual(cfunc(10), ns['outer'](10)) - - @tag('important') - def test_inner_function_nested(self): - - def outer(x): - - def inner(y): - - def innermost(z): - return x + y + z - - s = 0 - for i in range(y): - s += innermost(i) - return s - - return inner(x * x) - - cfunc = njit(outer) - self.assertEqual(cfunc(10), outer(10)) - - @tag('important') - def test_bulk_use_cases(self): - """ Tests the large number of use cases defined below """ - - # jitted function used in some tests - @njit - def fib3(n): - if n < 2: - return n - return fib3(n - 1) + fib3(n - 2) - - def outer1(x): - """ Test calling recursive function from inner """ - def inner(x): - return fib3(x) - return inner(x) - - def outer2(x): - """ Test calling recursive function from closure """ - z = x + 1 - - def inner(x): - return x + fib3(z) - return inner(x) - - def outer3(x): - """ Test recursive inner """ - def inner(x): - if x + y < 2: - return 10 - else: - inner(x - 1) - return inner(x) - - def outer4(x): - """ Test recursive closure """ - y = x + 1 - - def inner(x): - if x + y < 2: - return 10 - else: - inner(x - 1) - return inner(x) - - def outer5(x): - """ Test nested closure """ - y = x + 1 - - def inner1(x): - z = y + x + 2 - - def inner2(x): - return x + z - - return inner2(x) + y - - return inner1(x) - - def outer6(x): - """ Test closure with list comprehension in body """ - y = x + 1 - - def inner1(x): - z = y + x + 2 - return [t for t in range(z)] - return inner1(x) - - _OUTER_SCOPE_VAR = 9 - - def outer7(x): - """ Test use of outer scope var, no closure """ - z = x + 1 - return x + z + _OUTER_SCOPE_VAR - - _OUTER_SCOPE_VAR = 9 - - def outer8(x): - """ Test use of outer scope var, with closure """ - z = x + 1 - - def inner(x): - return x + z + _OUTER_SCOPE_VAR - return inner(x) - - def outer9(x): - """ Test closure assignment""" - z = x + 1 - - def inner(x): - return x + z - f = inner - return f(x) - - def outer10(x): - """ Test two inner, one calls other """ - z = x + 1 - - def inner(x): - return x + z - - def inner2(x): - return inner(x) - - return inner2(x) - - def outer11(x): - """ return the closure """ - z = x + 1 - - def inner(x): - return x + z - return inner - - def outer12(x): - """ closure with kwarg""" - z = x + 1 - - def inner(x, kw=7): - return x + z + kw - return inner(x) - - def outer13(x, kw=7): - """ outer with kwarg no closure""" - z = x + 1 + kw - return z - - def outer14(x, kw=7): - """ outer with kwarg used in closure""" - z = x + 1 - - def inner(x): - return x + z + kw - return inner(x) - - def outer15(x, kw=7): - """ outer with kwarg as arg to closure""" - z = x + 1 - - def inner(x, kw): - return x + z + kw - return inner(x, kw) - - def outer16(x): - """ closure is generator, consumed locally """ - z = x + 1 - - def inner(x): - yield x + z - - return list(inner(x)) - - def outer17(x): - """ closure is generator, returned """ - z = x + 1 - - def inner(x): - yield x + z - - return inner(x) - - def outer18(x): - """ closure is generator, consumed in loop """ - z = x + 1 - - def inner(x): - yield x + z - - for i in inner(x): - t = i - - return t - - def outer19(x): - """ closure as arg to another closure """ - z1 = x + 1 - z2 = x + 2 - - def inner(x): - return x + z1 - - def inner2(f, x): - return f(x) + z2 - - return inner2(inner, x) - - def outer20(x): - #""" Test calling numpy in closure """ - z = x + 1 - - def inner(x): - return x + numpy.cos(z) - return inner(x) - - def outer21(x): - #""" Test calling numpy import as in closure """ - z = x + 1 - - def inner(x): - return x + np.cos(z) - return inner(x) - - # functions to test that are expected to pass - f = [outer1, outer2, outer5, outer6, outer7, outer8, - outer9, outer10, outer12, outer13, outer14, - outer15, outer19, outer20, outer21] - for ref in f: - cfunc = njit(ref) - var = 10 - self.assertEqual(cfunc(var), ref(var)) - - # test functions that are expected to fail - with self.assertRaises(NotImplementedError) as raises: - cfunc = jit(nopython=True)(outer3) - cfunc(var) - msg = "Unsupported use of op_LOAD_CLOSURE encountered" - self.assertIn(msg, str(raises.exception)) - - with self.assertRaises(NotImplementedError) as raises: - cfunc = jit(nopython=True)(outer4) - cfunc(var) - msg = "Unsupported use of op_LOAD_CLOSURE encountered" - self.assertIn(msg, str(raises.exception)) - - with self.assertRaises(UnsupportedError) as raises: - cfunc = jit(nopython=True)(outer11) - cfunc(var) - msg = "make_function" - self.assertIn(msg, str(raises.exception)) - - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(outer16) - cfunc(var) - msg = "with argument(s) of type(s): (none)" - self.assertIn(msg, str(raises.exception)) - - with self.assertRaises(LoweringError) as raises: - cfunc = jit(nopython=True)(outer17) - cfunc(var) - msg = "'NoneType' object has no attribute 'yield_points'" - self.assertIn(msg, str(raises.exception)) - - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(outer18) - cfunc(var) - msg = "Invalid use of getiter with parameters (none)" - self.assertIn(msg, str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_codegen.py b/numba/numba/tests/test_codegen.py deleted file mode 100644 index e0251efaf..000000000 --- a/numba/numba/tests/test_codegen.py +++ /dev/null @@ -1,207 +0,0 @@ -""" -Tests for numba.targets.codegen. -""" - -from __future__ import print_function - -import warnings -import base64 -import ctypes -import pickle -import subprocess -import sys -import weakref - -import llvmlite.binding as ll - -import numba.unittest_support as unittest -from numba import utils -from numba.targets.codegen import JITCPUCodegen -from .support import TestCase - - -asm_sum = r""" - define i32 @sum(i32 %.1, i32 %.2) { - %.3 = add i32 %.1, %.2 - ret i32 %.3 - } - """ - -# Note we're using a rather mangled function name to check that it -# is compatible with object serialization. - -asm_sum_inner = """ - define i32 @"__main__.ising_element_update$1.array(int8,_2d,_C).int64.int64"(i32 %.1, i32 %.2) { - %.3 = add i32 %.1, %.2 - ret i32 %.3 - } -""" - -asm_sum_outer = """ - declare i32 @"__main__.ising_element_update$1.array(int8,_2d,_C).int64.int64"(i32 %.1, i32 %.2) - - define i32 @sum(i32 %.1, i32 %.2) { - %.3 = call i32 @"__main__.ising_element_update$1.array(int8,_2d,_C).int64.int64"(i32 %.1, i32 %.2) - ret i32 %.3 - } -""" - -ctypes_sum_ty = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_int) - - -class JITCPUCodegenTestCase(TestCase): - """ - Test the JIT code generation. - """ - - def setUp(self): - self.codegen = JITCPUCodegen('test_codegen') - - def tearDown(self): - del self.codegen - - def compile_module(self, asm, linking_asm=None): - library = self.codegen.create_library('compiled_module') - ll_module = ll.parse_assembly(asm) - ll_module.verify() - library.add_llvm_module(ll_module) - if linking_asm: - linking_library = self.codegen.create_library('linking_module') - ll_module = ll.parse_assembly(linking_asm) - ll_module.verify() - linking_library.add_llvm_module(ll_module) - library.add_linking_library(linking_library) - return library - - @classmethod - def _check_unserialize_sum(cls, state): - codegen = JITCPUCodegen('other_codegen') - library = codegen.unserialize_library(state) - ptr = library.get_pointer_to_function("sum") - assert ptr, ptr - cfunc = ctypes_sum_ty(ptr) - res = cfunc(2, 3) - assert res == 5, res - - def test_get_pointer_to_function(self): - library = self.compile_module(asm_sum) - ptr = library.get_pointer_to_function("sum") - self.assertIsInstance(ptr, utils.integer_types) - cfunc = ctypes_sum_ty(ptr) - self.assertEqual(cfunc(2, 3), 5) - # Note: With llvm3.9.1, deleting `library` will cause memory error in - # the following code during running of optimization passes in - # LLVM. The reason of the error is unclear. The error is known to - # replicate on osx64 and linux64. - - # Same, but with dependency on another library - library2 = self.compile_module(asm_sum_outer, asm_sum_inner) - ptr = library2.get_pointer_to_function("sum") - self.assertIsInstance(ptr, utils.integer_types) - cfunc = ctypes_sum_ty(ptr) - self.assertEqual(cfunc(2, 3), 5) - - def test_magic_tuple(self): - tup = self.codegen.magic_tuple() - pickle.dumps(tup) - cg2 = JITCPUCodegen('xxx') - self.assertEqual(cg2.magic_tuple(), tup) - - # Serialization tests. - - def _check_serialize_unserialize(self, state): - self._check_unserialize_sum(state) - - def _check_unserialize_other_process(self, state): - arg = base64.b64encode(pickle.dumps(state, -1)) - code = """if 1: - import base64 - import pickle - import sys - from numba.tests.test_codegen import %(test_class)s - - state = pickle.loads(base64.b64decode(sys.argv[1])) - %(test_class)s._check_unserialize_sum(state) - """ % dict(test_class=self.__class__.__name__) - subprocess.check_call([sys.executable, '-c', code, arg.decode()]) - - def test_serialize_unserialize_bitcode(self): - library = self.compile_module(asm_sum_outer, asm_sum_inner) - state = library.serialize_using_bitcode() - self._check_serialize_unserialize(state) - - def test_unserialize_other_process_bitcode(self): - library = self.compile_module(asm_sum_outer, asm_sum_inner) - state = library.serialize_using_bitcode() - self._check_unserialize_other_process(state) - - def test_serialize_unserialize_object_code(self): - library = self.compile_module(asm_sum_outer, asm_sum_inner) - library.enable_object_caching() - state = library.serialize_using_object_code() - self._check_serialize_unserialize(state) - - def test_unserialize_other_process_object_code(self): - library = self.compile_module(asm_sum_outer, asm_sum_inner) - library.enable_object_caching() - state = library.serialize_using_object_code() - self._check_unserialize_other_process(state) - - def test_cache_disabled_inspection(self): - """ - """ - library = self.compile_module(asm_sum_outer, asm_sum_inner) - library.enable_object_caching() - state = library.serialize_using_object_code() - - # exercise the valid behavior - with warnings.catch_warnings(record=True) as w: - old_llvm = library.get_llvm_str() - old_asm = library.get_asm_str() - library.get_function_cfg('sum') - self.assertEqual(len(w), 0) - - # unserialize - codegen = JITCPUCodegen('other_codegen') - library = codegen.unserialize_library(state) - - # the inspection methods would warn and give incorrect result - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - self.assertNotEqual(old_llvm, library.get_llvm_str()) - self.assertEqual(len(w), 1) - self.assertIn("Inspection disabled", str(w[0].message)) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - self.assertNotEqual(library.get_asm_str(), old_asm) - self.assertEqual(len(w), 1) - self.assertIn("Inspection disabled", str(w[0].message)) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - with self.assertRaises(NameError) as raises: - library.get_function_cfg('sum') - self.assertEqual(len(w), 1) - self.assertIn("Inspection disabled", str(w[0].message)) - self.assertIn("sum", str(raises.exception)) - - # Lifetime tests - - @unittest.expectedFailure # MCJIT removeModule leaks and it is disabled - def test_library_lifetime(self): - library = self.compile_module(asm_sum_outer, asm_sum_inner) - # Exercise code generation - library.enable_object_caching() - library.serialize_using_bitcode() - library.serialize_using_object_code() - u = weakref.ref(library) - v = weakref.ref(library._final_module) - del library - # Both the library and its backing LLVM module are collected - self.assertIs(u(), None) - self.assertIs(v(), None) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_compile_cache.py b/numba/numba/tests/test_compile_cache.py deleted file mode 100644 index 1eb62db0c..000000000 --- a/numba/numba/tests/test_compile_cache.py +++ /dev/null @@ -1,133 +0,0 @@ -from __future__ import division - -import numba.unittest_support as unittest - -import llvmlite.llvmpy.core as lc - -import numpy as np - -from numba import compiler, types, typing -from numba.targets import callconv, cpu - - -class TestCompileCache(unittest.TestCase): - ''' - Tests that the caching in BaseContext.compile_internal() works correctly by - checking the state of the cache when it is used by the CPUContext. - ''' - - def _context_builder_sig_args(self): - typing_context = typing.Context() - context = cpu.CPUContext(typing_context) - module = lc.Module("test_module") - - sig = typing.signature(types.int32, types.int32) - llvm_fnty = context.call_conv.get_function_type(sig.return_type, - sig.args) - function = module.get_or_insert_function(llvm_fnty, name='test_fn') - args = context.call_conv.get_arguments(function) - assert function.is_declaration - entry_block = function.append_basic_block('entry') - builder = lc.Builder(entry_block) - - return context, builder, sig, args - - def test_cache(self): - def times2(i): - return 2*i - - def times3(i): - return i*3 - - context, builder, sig, args = self._context_builder_sig_args() - - # Ensure the cache is empty to begin with - self.assertEqual(0, len(context.cached_internal_func)) - - # After one compile, it should contain one entry - context.compile_internal(builder, times2, sig, args) - self.assertEqual(1, len(context.cached_internal_func)) - - # After a second compilation of the same thing, it should still contain - # one entry - context.compile_internal(builder, times2, sig, args) - self.assertEqual(1, len(context.cached_internal_func)) - - # After compilation of another function, the cache should have grown by - # one more. - context.compile_internal(builder, times3, sig, args) - self.assertEqual(2, len(context.cached_internal_func)) - - sig2 = typing.signature(types.float64, types.float64) - llvm_fnty2 = context.call_conv.get_function_type(sig2.return_type, - sig2.args) - function2 = builder.module.get_or_insert_function(llvm_fnty2, - name='test_fn_2') - args2 = context.call_conv.get_arguments(function2) - assert function2.is_declaration - entry_block2 = function2.append_basic_block('entry') - builder2 = lc.Builder(entry_block2) - - # Ensure that the same function with a different signature does not - # reuse an entry from the cache in error - context.compile_internal(builder2, times3, sig2, args2) - self.assertEqual(3, len(context.cached_internal_func)) - - def test_closures(self): - """ - Caching must not mix up closures reusing the same code object. - """ - def make_closure(x, y): - def f(z): - return y + z - return f - - context, builder, sig, args = self._context_builder_sig_args() - - # Closures with distinct cell contents must each be compiled. - clo11 = make_closure(1, 1) - clo12 = make_closure(1, 2) - clo22 = make_closure(2, 2) - res1 = context.compile_internal(builder, clo11, sig, args) - self.assertEqual(1, len(context.cached_internal_func)) - res2 = context.compile_internal(builder, clo12, sig, args) - self.assertEqual(2, len(context.cached_internal_func)) - # Same cell contents as above (first parameter isn't captured) - res3 = context.compile_internal(builder, clo22, sig, args) - self.assertEqual(2, len(context.cached_internal_func)) - - def test_error_model(self): - """ - Caching must not mix up different error models. - """ - def inv(x): - return 1.0 / x - - inv_sig = typing.signature(types.float64, types.float64) - - def compile_inv(context): - return context.compile_subroutine(builder, inv, inv_sig) - - context, builder, sig, args = self._context_builder_sig_args() - - py_error_model = callconv.create_error_model('python', context) - np_error_model = callconv.create_error_model('numpy', context) - - py_context1 = context.subtarget(error_model=py_error_model) - py_context2 = context.subtarget(error_model=py_error_model) - np_context = context.subtarget(error_model=np_error_model) - - # Note the parent context's cache is shared by subtargets - self.assertEqual(0, len(context.cached_internal_func)) - # Compiling with the same error model reuses the same cache slot - compile_inv(py_context1) - self.assertEqual(1, len(context.cached_internal_func)) - compile_inv(py_context2) - self.assertEqual(1, len(context.cached_internal_func)) - # Compiling with another error model creates a new cache slot - compile_inv(np_context) - self.assertEqual(2, len(context.cached_internal_func)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_complex.py b/numba/numba/tests/test_complex.py deleted file mode 100644 index bd830fd63..000000000 --- a/numba/numba/tests/test_complex.py +++ /dev/null @@ -1,317 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import cmath -import itertools -import math -import sys - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated, Flags, utils -from numba import types -from .support import TestCase, tag -from .complex_usecases import * - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -no_pyobj_flags = Flags() - - -class BaseComplexTest(object): - - def basic_values(self): - reals = [-0.0, +0.0, 1, -1, +1.5, -3.5, - float('-inf'), float('+inf')] - if sys.platform != 'win32': - reals += [float('nan')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def more_values(self): - reals = [-0.0, +0.0, 1, -1, -math.pi, +math.pi, - float('-inf'), float('+inf')] - if sys.platform != 'win32': - reals += [float('nan')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def non_nan_values(self): - reals = [-0.0, +0.0, 1, -1, -math.pi, +math.pi, - float('inf'), float('-inf')] - return [complex(x, y) for x, y in itertools.product(reals, reals)] - - def run_unary(self, pyfunc, x_types, x_values, ulps=1, abs_tol=None, - flags=enable_pyobj_flags): - for tx in x_types: - cr = compile_isolated(pyfunc, [tx], flags=flags) - cfunc = cr.entry_point - prec = 'single' if tx in (types.float32, types.complex64) else 'double' - for vx in x_values: - try: - expected = pyfunc(vx) - except ValueError as e: - self.assertIn("math domain error", str(e)) - continue - got = cfunc(vx) - msg = 'for input %r with prec %r' % (vx, prec) - self.assertPreciseEqual(got, expected, prec=prec, - ulps=ulps, abs_tol=abs_tol, msg=msg) - - def run_binary(self, pyfunc, value_types, values, ulps=1, - flags=enable_pyobj_flags): - for tx, ty in value_types: - cr = compile_isolated(pyfunc, [tx, ty], flags=flags) - cfunc = cr.entry_point - prec = ('single' - if set([tx, ty]) & set([types.float32, types.complex64]) - else 'double') - for vx, vy in values: - try: - expected = pyfunc(vx, vy) - except ValueError as e: - self.assertIn("math domain error", str(e)) - continue - except ZeroDivisionError: - continue - got = cfunc(vx, vy) - msg = 'for input %r with prec %r' % ((vx, vy), prec) - self.assertPreciseEqual(got, expected, prec=prec, - ulps=ulps, msg=msg) - - -class TestComplex(BaseComplexTest, TestCase): - - def test_real(self, flags=enable_pyobj_flags): - self.run_unary(real_usecase, [types.complex64, types.complex128], - self.basic_values(), flags=flags) - self.run_unary(real_usecase, [types.int8, types.int64], - [1, 0, -3], flags=flags) - self.run_unary(real_usecase, [types.float32, types.float64], - [1.5, -0.5], flags=flags) - - def test_real_npm(self): - self.test_real(flags=no_pyobj_flags) - - def test_imag(self, flags=enable_pyobj_flags): - self.run_unary(imag_usecase, [types.complex64, types.complex128], - self.basic_values(), flags=flags) - self.run_unary(imag_usecase, [types.int8, types.int64], - [1, 0, -3], flags=flags) - self.run_unary(imag_usecase, [types.float32, types.float64], - [1.5, -0.5], flags=flags) - - def test_imag_npm(self): - self.test_imag(flags=no_pyobj_flags) - - def test_conjugate(self, flags=enable_pyobj_flags): - self.run_unary(conjugate_usecase, [types.complex64, types.complex128], - self.basic_values(), flags=flags) - self.run_unary(conjugate_usecase, [types.int8, types.int64], - [1, 0, -3], flags=flags) - self.run_unary(conjugate_usecase, [types.float32, types.float64], - [1.5, -0.5], flags=flags) - - def test_conjugate_npm(self): - self.test_conjugate(flags=no_pyobj_flags) - - def test_div(self, flags=enable_pyobj_flags): - """ - Test complex.__div__ implementation with non-trivial values. - """ - # XXX Fold into test_operator? - values = list(itertools.product(self.more_values(), self.more_values())) - value_types = [(types.complex128, types.complex128), - (types.complex64, types.complex64)] - self.run_binary(div_usecase, value_types, values, flags=flags) - - @tag('important') - def test_div_npm(self): - self.test_div(flags=no_pyobj_flags) - - -class TestCMath(BaseComplexTest, TestCase): - """ - Tests for cmath module support. - """ - - def check_predicate_func(self, pyfunc, flags): - self.run_unary(pyfunc, [types.complex128, types.complex64], - self.basic_values(), flags=flags) - - def check_unary_func(self, pyfunc, flags, ulps=1, abs_tol=None, - values=None): - self.run_unary(pyfunc, [types.complex128], - values or self.more_values(), flags=flags, ulps=ulps, - abs_tol=abs_tol) - # Avoid discontinuities around pi when in single precision. - self.run_unary(pyfunc, [types.complex64], - values or self.basic_values(), flags=flags, ulps=ulps, - abs_tol=abs_tol) - - # Conversions - - def test_phase(self): - self.check_unary_func(phase_usecase, enable_pyobj_flags) - - def test_phase_npm(self): - self.check_unary_func(phase_usecase, no_pyobj_flags) - - def test_polar(self): - self.check_unary_func(polar_usecase, enable_pyobj_flags) - - def test_polar_npm(self): - self.check_unary_func(polar_usecase, no_pyobj_flags) - - def test_rect(self, flags=enable_pyobj_flags): - def do_test(tp, seed_values): - values = [(z.real, z.imag) for z in seed_values - if not math.isinf(z.imag) or z.real == 0] - self.run_binary(rect_usecase, [(tp, tp)], values, flags=flags) - do_test(types.float64, self.more_values()) - # Avoid discontinuities around pi when in single precision. - do_test(types.float32, self.basic_values()) - - def test_rect_npm(self): - self.test_rect(flags=no_pyobj_flags) - - # Classification - - def test_isnan(self, flags=enable_pyobj_flags): - self.check_predicate_func(isnan_usecase, enable_pyobj_flags) - - @tag('important') - def test_isnan_npm(self): - self.check_predicate_func(isnan_usecase, no_pyobj_flags) - - def test_isinf(self, flags=enable_pyobj_flags): - self.check_predicate_func(isinf_usecase, enable_pyobj_flags) - - @tag('important') - def test_isinf_npm(self): - self.check_predicate_func(isinf_usecase, no_pyobj_flags) - - @unittest.skipIf(utils.PYVERSION < (3, 2), "needs Python 3.2+") - def test_isfinite(self, flags=enable_pyobj_flags): - self.check_predicate_func(isfinite_usecase, enable_pyobj_flags) - - @unittest.skipIf(utils.PYVERSION < (3, 2), "needs Python 3.2+") - def test_isfinite_npm(self): - self.check_predicate_func(isfinite_usecase, no_pyobj_flags) - - # Power and logarithms - - def test_exp(self): - self.check_unary_func(exp_usecase, enable_pyobj_flags, ulps=2) - - def test_exp_npm(self): - # Aggressive optimization fixes the following subnormal float problem. - ## The two tests are failing due to subnormal float problems. - ## We are seeing (6.9532198665326e-310+2.1221202807e-314j) != 0j - self.check_unary_func(exp_usecase, no_pyobj_flags, ulps=2) - - def test_log(self): - self.check_unary_func(log_usecase, enable_pyobj_flags) - - def test_log_npm(self): - self.check_unary_func(log_usecase, no_pyobj_flags) - - def test_log_base(self, flags=enable_pyobj_flags): - values = list(itertools.product(self.more_values(), self.more_values())) - value_types = [(types.complex128, types.complex128), - (types.complex64, types.complex64)] - self.run_binary(log_base_usecase, value_types, values, flags=flags, - ulps=3) - - def test_log_base_npm(self): - self.test_log_base(flags=no_pyobj_flags) - - def test_log10(self): - self.check_unary_func(log10_usecase, enable_pyobj_flags) - - def test_log10_npm(self): - self.check_unary_func(log10_usecase, no_pyobj_flags) - - def test_sqrt(self): - self.check_unary_func(sqrt_usecase, enable_pyobj_flags) - - def test_sqrt_npm(self): - self.check_unary_func(sqrt_usecase, no_pyobj_flags) - - # Trigonometric functions - - def test_acos(self): - self.check_unary_func(acos_usecase, enable_pyobj_flags, ulps=2) - - def test_acos_npm(self): - self.check_unary_func(acos_usecase, no_pyobj_flags, ulps=2) - - def test_asin(self): - self.check_unary_func(asin_usecase, enable_pyobj_flags, ulps=2) - - def test_asin_npm(self): - self.check_unary_func(asin_usecase, no_pyobj_flags, ulps=2) - - def test_atan(self): - self.check_unary_func(atan_usecase, enable_pyobj_flags, ulps=2,) - - def test_atan_npm(self): - self.check_unary_func(atan_usecase, no_pyobj_flags, ulps=2,) - - def test_cos(self): - self.check_unary_func(cos_usecase, enable_pyobj_flags, ulps=2) - - def test_cos_npm(self): - self.check_unary_func(cos_usecase, no_pyobj_flags, ulps=2) - - def test_sin(self): - # See test_sinh. - self.check_unary_func(sin_usecase, enable_pyobj_flags, abs_tol='eps') - - def test_sin_npm(self): - self.check_unary_func(sin_usecase, no_pyobj_flags, abs_tol='eps') - - def test_tan(self): - self.check_unary_func(tan_usecase, enable_pyobj_flags, ulps=2) - - def test_tan_npm(self): - self.check_unary_func(tan_usecase, enable_pyobj_flags, ulps=2) - - # Hyperbolic functions - - def test_acosh(self): - self.check_unary_func(acosh_usecase, enable_pyobj_flags) - - def test_acosh_npm(self): - self.check_unary_func(acosh_usecase, no_pyobj_flags) - - def test_asinh(self): - self.check_unary_func(asinh_usecase, enable_pyobj_flags, ulps=2) - - def test_asinh_npm(self): - self.check_unary_func(asinh_usecase, no_pyobj_flags, ulps=2) - - def test_atanh(self): - self.check_unary_func(atanh_usecase, enable_pyobj_flags, ulps=2) - - def test_atanh_npm(self): - self.check_unary_func(atanh_usecase, no_pyobj_flags, ulps=2) - - def test_cosh(self): - self.check_unary_func(cosh_usecase, enable_pyobj_flags, ulps=2) - - def test_cosh_npm(self): - self.check_unary_func(cosh_usecase, no_pyobj_flags, ulps=2) - - def test_sinh(self): - self.check_unary_func(sinh_usecase, enable_pyobj_flags, abs_tol='eps') - - def test_sinh_npm(self): - self.check_unary_func(sinh_usecase, no_pyobj_flags, abs_tol='eps') - - def test_tanh(self): - self.check_unary_func(tanh_usecase, enable_pyobj_flags, ulps=2) - - def test_tanh_npm(self): - self.check_unary_func(tanh_usecase, enable_pyobj_flags, ulps=2) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_comprehension.py b/numba/numba/tests/test_comprehension.py deleted file mode 100644 index 519cbf3c8..000000000 --- a/numba/numba/tests/test_comprehension.py +++ /dev/null @@ -1,451 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest - -import sys - -# deliberately imported twice for different use cases -import numpy as np -import numpy - -from numba.compiler import compile_isolated -from numba import types, utils, jit, types -from numba.errors import TypingError, LoweringError -from .support import tag - -from .test_parfors import _windows_py27, _32bit - -PARALLEL_SUPPORTED = not (_windows_py27 or _32bit) - -def comp_list(n): - l = [i for i in range(n)] - s = 0 - for i in l: - s += i - return s - - -class TestListComprehension(unittest.TestCase): - - @tag('important') - def test_comp_list(self): - pyfunc = comp_list - cres = compile_isolated(pyfunc, [types.intp]) - cfunc = cres.entry_point - self.assertEqual(cfunc(5), pyfunc(5)) - self.assertEqual(cfunc(0), pyfunc(0)) - self.assertEqual(cfunc(-1), pyfunc(-1)) - - @tag('important') - def test_bulk_use_cases(self): - """ Tests the large number of use cases defined below """ - - # jitted function used in some tests - @jit(nopython=True) - def fib3(n): - if n < 2: - return n - return fib3(n - 1) + fib3(n - 2) - - def list1(x): - """ Test basic list comprehension """ - return [i for i in range(1, len(x) - 1)] - - def list2(x): - """ Test conditional list comprehension """ - return [y for y in x if y < 2] - - def list3(x): - """ Test ternary list comprehension """ - return [y if y < 2 else -1 for y in x] - - def list4(x): - """ Test list comprehension to np.array ctor """ - return np.array([1, 2, 3]) - - # expected fail, unsupported type in sequence - def list5(x): - """ Test nested list comprehension to np.array ctor """ - return np.array([np.array([z for z in x]) for y in x]) - - def list6(x): - """ Test use of inner function in list comprehension """ - def inner(x): - return x + 1 - return [inner(z) for z in x] - - def list7(x): - """ Test use of closure in list comprehension """ - y = 3 - - def inner(x): - return x + y - return [inner(z) for z in x] - - def list8(x): - """ Test use of list comprehension as arg to inner function """ - l = [z + 1 for z in x] - - def inner(x): - return x[0] + 1 - q = inner(l) - return q - - def list9(x): - """ Test use of list comprehension access in closure """ - l = [z + 1 for z in x] - - def inner(x): - return x[0] + l[1] - return inner(x) - - def list10(x): - """ Test use of list comprehension access in closure and as arg """ - l = [z + 1 for z in x] - - def inner(x): - return [y + l[0] for y in x] - return inner(l) - - # expected fail, nested mem managed object - def list11(x): - """ Test scalar array construction in list comprehension """ - l = [np.array(z) for z in x] - return l - - def list12(x): - """ Test scalar type conversion construction in list comprehension """ - l = [np.float64(z) for z in x] - return l - - def list13(x): - """ Test use of explicit numpy scalar ctor reference in list comprehension """ - l = [numpy.float64(z) for z in x] - return l - - def list14(x): - """ Test use of python scalar ctor reference in list comprehension """ - l = [float(z) for z in x] - return l - - def list15(x): - """ Test use of python scalar ctor reference in list comprehension followed by np array construction from the list""" - l = [float(z) for z in x] - return np.array(l) - - def list16(x): - """ Test type unification from np array ctors consuming list comprehension """ - l1 = [float(z) for z in x] - l2 = [z for z in x] - ze = np.array(l1) - oe = np.array(l2) - return ze + oe - - def list17(x): - """ Test complex list comprehension including math calls """ - return [(a, b, c) - for a in x for b in x for c in x if np.sqrt(a**2 + b**2) == c] - - _OUTER_SCOPE_VAR = 9 - - def list18(x): - """ Test loop list with outer scope var as conditional""" - z = [] - for i in x: - if i < _OUTER_SCOPE_VAR: - z.append(i) - return z - - _OUTER_SCOPE_VAR = 9 - - def list19(x): - """ Test list comprehension with outer scope as conditional""" - return [i for i in x if i < _OUTER_SCOPE_VAR] - - def list20(x): - """ Test return empty list """ - return [i for i in x if i == -1000] - - def list21(x): - """ Test call a jitted function in a list comprehension """ - return [fib3(i) for i in x] - - def list22(x): - """ Test create two lists comprehensions and a third walking the first two """ - a = [y - 1 for y in x] - b = [y + 1 for y in x] - return [x for x in a for y in b if x == y] - - def list23(x): - """ Test operation on comprehension generated list """ - z = [y for y in x] - z.append(1) - return z - - def list24(x): - """ Test type promotion """ - z = [float(y) if y > 3 else y for y in x] - return z - - # functions to test that are expected to pass - f = [list1, list2, list3, list4, - list6, list7, list8, list9, list10, list11, - list12, list13, list14, list15, - list16, list17, list18, list19, list20, - list21, list23, list24] - - if utils.PYVERSION >= (3, 0): - f.append(list22) - - var = [1, 2, 3, 4, 5] - for ref in f: - try: - cfunc = jit(nopython=True)(ref) - self.assertEqual(cfunc(var), ref(var)) - except ValueError: # likely np array returned - try: - np.testing.assert_allclose(cfunc(var), ref(var)) - except BaseException: - raise - - # test functions that are expected to fail - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(list5) - cfunc(var) - # TODO: we can't really assert the error message for the above - # Also, test_nested_array is a similar case (but without list) that works. - - if sys.maxsize > 2 ** 32: - bits = 64 - else: - bits = 32 - - if utils.PYVERSION < (3, 0): - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(list22) - cfunc(var) - msg = "Cannot unify reflected list(int%d) and int%d" % (bits, bits) - self.assertIn(msg, str(raises.exception)) - -class TestArrayComprehension(unittest.TestCase): - - def check(self, pyfunc, *args, **kwargs): - """A generic check function that run both pyfunc, and jitted pyfunc, - and compare results.""" - run_parallel = kwargs['run_parallel'] if 'run_parallel' in kwargs else False - assert_allocate_list = kwargs['assert_allocate_list'] if 'assert_allocate_list' in kwargs else False - assert_dtype = kwargs['assert_dtype'] if 'assert_dtype' in kwargs else None - cfunc = jit(nopython=True,parallel=run_parallel)(pyfunc) - pyres = pyfunc(*args) - cres = cfunc(*args) - np.testing.assert_array_equal(pyres, cres) - if assert_dtype: - self.assertEqual(cres[1].dtype, assert_dtype) - if assert_allocate_list: - self.assertIn('allocate list', cfunc.inspect_llvm(cfunc.signatures[0])) - else: - self.assertNotIn('allocate list', cfunc.inspect_llvm(cfunc.signatures[0])) - if run_parallel: - self.assertIn('@do_scheduling', cfunc.inspect_llvm(cfunc.signatures[0])) - - @tag('important') - def test_comp_with_array_1(self): - def comp_with_array_1(n): - m = n * 2 - l = np.array([i + m for i in range(n)]) - return l - - self.check(comp_with_array_1, 5) - if PARALLEL_SUPPORTED: - self.check(comp_with_array_1, 5, run_parallel=True) - - @tag('important') - def test_comp_with_array_2(self): - def comp_with_array_2(n, threshold): - A = np.arange(-n, n) - return np.array([ x * x if x < threshold else x * 2 for x in A ]) - - self.check(comp_with_array_2, 5, 0) - - @tag('important') - def test_comp_with_array_noinline(self): - def comp_with_array_noinline(n): - m = n * 2 - l = np.array([i + m for i in range(n)]) - return l - - import numba.inline_closurecall as ic - try: - ic.enable_inline_arraycall = False - self.check(comp_with_array_noinline, 5, assert_allocate_list=True) - finally: - ic.enable_inline_arraycall = True - - @tag('important') - def test_comp_nest_with_array(self): - def comp_nest_with_array(n): - l = np.array([[i * j for j in range(n)] for i in range(n)]) - return l - - self.check(comp_nest_with_array, 5) - if PARALLEL_SUPPORTED: - self.check(comp_nest_with_array, 5, run_parallel=True) - - @tag('important') - def test_comp_nest_with_array_3(self): - def comp_nest_with_array_3(n): - l = np.array([[[i * j * k for k in range(n)] for j in range(n)] for i in range(n)]) - return l - - self.check(comp_nest_with_array_3, 5) - if PARALLEL_SUPPORTED: - self.check(comp_nest_with_array_3, 5, run_parallel=True) - - @tag('important') - def test_comp_nest_with_array_noinline(self): - def comp_nest_with_array_noinline(n): - l = np.array([[i * j for j in range(n)] for i in range(n)]) - return l - - import numba.inline_closurecall as ic - try: - ic.enable_inline_arraycall = False - self.check(comp_nest_with_array_noinline, 5, - assert_allocate_list=True) - finally: - ic.enable_inline_arraycall = True - - @tag('important') - def test_comp_with_array_range(self): - def comp_with_array_range(m, n): - l = np.array([i for i in range(m, n)]) - return l - - self.check(comp_with_array_range, 5, 10) - - @tag('important') - def test_comp_with_array_range_and_step(self): - def comp_with_array_range_and_step(m, n): - l = np.array([i for i in range(m, n, 2)]) - return l - - self.check(comp_with_array_range_and_step, 5, 10) - - @tag('important') - def test_comp_with_array_conditional(self): - def comp_with_array_conditional(n): - l = np.array([i for i in range(n) if i % 2 == 1]) - return l - # arraycall inline would not happen when conditional is present - self.check(comp_with_array_conditional, 10, assert_allocate_list=True) - - @tag('important') - def test_comp_nest_with_array_conditional(self): - def comp_nest_with_array_conditional(n): - l = np.array([[i * j for j in range(n)] for i in range(n) if i % 2 == 1]) - return l - self.check(comp_nest_with_array_conditional, 5, - assert_allocate_list=True) - - @tag('important') - def test_comp_nest_with_dependency(self): - def comp_nest_with_dependency(n): - l = np.array([[i * j for j in range(i+1)] for i in range(n)]) - return l - # test is expected to fail - with self.assertRaises(TypingError) as raises: - self.check(comp_nest_with_dependency, 5) - self.assertIn('Cannot resolve setitem', str(raises.exception)) - - @tag('important') - def test_no_array_comp(self): - def no_array_comp1(n): - l = [1,2,3,4] - a = np.array(l) - return a - # const 1D array is actually inlined - self.check(no_array_comp1, 10, assert_allocate_list=False) - def no_array_comp2(n): - l = [1,2,3,4] - a = np.array(l) - l.append(5) - return a - self.check(no_array_comp2, 10, assert_allocate_list=True) - - @tag('important') - def test_nested_array(self): - def nested_array(n): - l = np.array([ np.array([x for x in range(n)]) for y in range(n)]) - return l - - self.check(nested_array, 10) - - @tag('important') - def test_nested_array_with_const(self): - def nested_array(n): - l = np.array([ np.array([x for x in range(3)]) for y in range(4)]) - return l - - self.check(nested_array, 0) - - @tag('important') - def test_array_comp_with_iter(self): - def array_comp(a): - l = np.array([ x * x for x in a ]) - return l - # with list iterator - l = [1,2,3,4,5] - self.check(array_comp, l) - # with array iterator - self.check(array_comp, np.array(l)) - - def test_array_comp_with_dtype(self): - def array_comp(n): - l = np.array([i for i in range(n)], dtype=np.complex64) - return l - - self.check(array_comp, 10, assert_dtype=np.complex64) - - def test_array_comp_inferred_dtype(self): - def array_comp(n): - l = np.array([i * 1j for i in range(n)]) - return l - - self.check(array_comp, 10) - - def test_array_comp_inferred_dtype_nested(self): - def array_comp(n): - l = np.array([[i * j for j in range(n)] for i in range(n)]) - return l - - self.check(array_comp, 10) - - def test_array_comp_inferred_dtype_nested_sum(self): - def array_comp(n): - l = np.array([[i * j for j in range(n)] for i in range(n)]) - # checks that operations on the inferred array - return l - - self.check(array_comp, 10) - - def test_array_comp_inferred_dtype_outside_setitem(self): - def array_comp(n, v): - arr = np.array([i for i in range(n)]) - # the following should not change the dtype - arr[0] = v - return arr - - # float to int cast is valid - v = 1.2 - self.check(array_comp, 10, v, assert_dtype=np.intp) - # complex to int cast is invalid - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(array_comp) - cfunc(10, 2.3j) - self.assertIn("setitem: array({}, 1d, C)[0] = complex128".format(types.intp), - str(raises.exception)) - - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_config.py b/numba/numba/tests/test_config.py deleted file mode 100644 index de8371b8b..000000000 --- a/numba/numba/tests/test_config.py +++ /dev/null @@ -1,107 +0,0 @@ -import os -import tempfile -import unittest -from .support import TestCase, temp_directory, override_env_config -from numba import config - -try: - import yaml - _HAVE_YAML = True -except ImportError: - _HAVE_YAML = False - -_skip_msg = "pyyaml needed for configuration file tests" -needs_yaml = unittest.skipIf(not _HAVE_YAML, _skip_msg) - - -@needs_yaml -class TestConfig(TestCase): - - # Disable parallel testing due to envvars modification - _numba_parallel_test_ = False - - def setUp(self): - # use support.temp_directory, it can do the clean up - self.tmppath = temp_directory('config_tmp') - super(TestConfig, self).setUp() - - def mock_cfg_location(self): - """ - Creates a mock launch location. - Returns the location path. - """ - return tempfile.mkdtemp(dir=self.tmppath) - - def inject_mock_cfg(self, location, cfg): - """ - Injects a mock configuration at 'location' - """ - tmpcfg = os.path.join(location, config._config_fname) - with open(tmpcfg, 'wt') as f: - yaml.dump(cfg, f, default_flow_style=False) - - def get_settings(self): - """ - Gets the current numba config settings - """ - store = dict() - for x in dir(config): - if x.isupper(): - store[x] = getattr(config, x) - return store - - def create_config_effect(self, cfg): - """ - Returns a config "original" from a location with no config file - and then the impact of applying the supplied cfg dictionary as - a config file at a location in the returned "current". - """ - - # store original cwd - original_cwd = os.getcwd() - - # create mock launch location - launch_dir = self.mock_cfg_location() - - # switch cwd to the mock launch location, get and store settings - os.chdir(launch_dir) - # use override to ensure that the config is zero'd out with respect - # to any existing settings - with override_env_config('_', '_'): - original = self.get_settings() - - # inject new config into a file in the mock launch location - self.inject_mock_cfg(launch_dir, cfg) - - try: - # override something but don't change the value, this is to refresh - # the config and make sure the injected config file is read - with override_env_config('_', '_'): - current = self.get_settings() - finally: - # switch back to original dir with no new config - os.chdir(original_cwd) - return original, current - - def test_config(self): - # ensure a non empty settings file does impact config and that the - # case of the key makes no difference - key = 'COLOR_SCHEME' - for case in [str.upper, str.lower]: - orig, curr = self.create_config_effect({case(key): 'light_bg'}) - self.assertTrue(orig != curr) - self.assertTrue(orig[key] != curr[key]) - self.assertEqual(curr[key], 'light_bg') - # check that just the color scheme is the cause of difference - orig.pop(key) - curr.pop(key) - self.assertEqual(orig, curr) - - def test_empty_config(self): - # ensure an empty settings file does not impact config - orig, curr = self.create_config_effect({}) - self.assertEqual(orig, curr) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_conversion.py b/numba/numba/tests/test_conversion.py deleted file mode 100644 index 365d72b22..000000000 --- a/numba/numba/tests/test_conversion.py +++ /dev/null @@ -1,221 +0,0 @@ -from __future__ import print_function - -import array -import gc -import itertools -import sys - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, jit, numpy_support -from .support import TestCase - - -def identity(x): - return x - -def addition(x, y): - return x + y - -def equality(x, y): - return x == y - -def foobar(x, y, z): - return x - - -class TestConversion(TestCase): - """ - Testing Python to Native conversion - """ - - def test_complex_identity(self): - pyfunc = identity - cres = compile_isolated(pyfunc, [types.complex64], - return_type=types.complex64) - - xs = [1.0j, (1+1j), (-1-1j), (1+0j)] - for x in xs: - self.assertEqual(cres.entry_point(x), x) - for x in np.complex64(xs): - self.assertEqual(cres.entry_point(x), x) - - cres = compile_isolated(pyfunc, [types.complex128], - return_type=types.complex128) - - xs = [1.0j, (1+1j), (-1-1j), (1+0j)] - for x in xs: - self.assertEqual(cres.entry_point(x), x) - for x in np.complex128(xs): - self.assertEqual(cres.entry_point(x), x) - - def test_complex_addition(self): - pyfunc = addition - cres = compile_isolated(pyfunc, [types.complex64, types.complex64], - return_type=types.complex64) - - xs = [1.0j, (1+1j), (-1-1j), (1+0j)] - for x in xs: - y = x - self.assertEqual(cres.entry_point(x, y), x + y) - for x in np.complex64(xs): - y = x - self.assertEqual(cres.entry_point(x, y), x + y) - - - cres = compile_isolated(pyfunc, [types.complex128, types.complex128], - return_type=types.complex128) - - xs = [1.0j, (1+1j), (-1-1j), (1+0j)] - for x in xs: - y = x - self.assertEqual(cres.entry_point(x, y), x + y) - for x in np.complex128(xs): - y = x - self.assertEqual(cres.entry_point(x, y), x + y) - - def test_boolean_as_int(self): - pyfunc = equality - cres = compile_isolated(pyfunc, [types.boolean, types.intp]) - cfunc = cres.entry_point - - xs = True, False - ys = -1, 0, 1 - - for xs, ys in itertools.product(xs, ys): - self.assertEqual(pyfunc(xs, ys), cfunc(xs, ys)) - - def test_boolean_as_float(self): - pyfunc = equality - cres = compile_isolated(pyfunc, [types.boolean, types.float64]) - cfunc = cres.entry_point - - xs = True, False - ys = -1, 0, 1 - - for xs, ys in itertools.product(xs, ys): - self.assertEqual(pyfunc(xs, ys), cfunc(xs, ys)) - - def test_boolean_eq_boolean(self): - pyfunc = equality - cres = compile_isolated(pyfunc, [types.boolean, types.boolean]) - cfunc = cres.entry_point - - xs = True, False - ys = True, False - - for xs, ys in itertools.product(xs, ys): - self.assertEqual(pyfunc(xs, ys), cfunc(xs, ys)) - - # test when a function parameters are jitted as unsigned types - # the function is called with negative parameters the Python error - # that it generates is correctly handled -- a Python error is returned to the user - # For more info, see the comment in Include/longobject.h for _PyArray_AsByteArray - # which PyLong_AsUnsignedLongLong calls - def test_negative_to_unsigned(self): - def f(x): - return x - with self.assertRaises(OverflowError): - jit('uintp(uintp)', nopython=True)(f)(-5) - - # test the switch logic in callwraper.py:build_wrapper() works for more than one argument - # and where the error occurs - def test_multiple_args_negative_to_unsigned(self): - pyfunc = foobar - cres = compile_isolated(pyfunc, [types.uint64, types.uint64, types.uint64], - return_type=types.uint64) - cfunc = cres.entry_point - test_fail_args = ((-1, 0, 1), (0, -1, 1), (0, 1, -1)) - with self.assertRaises(OverflowError): - for a, b, c in test_fail_args: - cfunc(a, b, c) - - # test switch logic of callwraper.py:build_wrapper() with records as function parameters - def test_multiple_args_records(self): - pyfunc = foobar - - mystruct_dt = np.dtype([('p', np.float64), - ('row', np.float64), - ('col', np.float64)]) - mystruct = numpy_support.from_dtype(mystruct_dt) - - cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], - return_type=mystruct[:]) - cfunc = cres.entry_point - - st1 = np.recarray(3, dtype=mystruct_dt) - - st1.p = np.arange(st1.size) + 1 - st1.row = np.arange(st1.size) + 1 - st1.col = np.arange(st1.size) + 1 - - with self.assertRefCount(st1): - test_fail_args = ((st1, -1, 1), (st1, 1, -1)) - - for a, b, c in test_fail_args: - with self.assertRaises(OverflowError): - cfunc(a, b, c) - - del test_fail_args, a, b, c - gc.collect() - - # test switch logic of callwraper.py:build_wrapper() with no function parameters - def test_with_no_parameters(self): - def f(): - pass - self.assertEqual(f(), jit('()', nopython=True)(f)()) - - def check_argument_cleanup(self, typ, obj): - """ - Check that argument cleanup doesn't leak references. - """ - def f(x, y): - pass - - def _objects(obj): - objs = [obj] - if isinstance(obj, tuple): - for v in obj: - objs += _objects(v) - return objs - - objects = _objects(obj) - - cres = compile_isolated(f, (typ, types.uint32)) - with self.assertRefCount(*objects): - cres.entry_point(obj, 1) - with self.assertRefCount(*objects): - with self.assertRaises(OverflowError): - cres.entry_point(obj, -1) - - cres = compile_isolated(f, (types.uint32, typ)) - with self.assertRefCount(*objects): - cres.entry_point(1, obj) - with self.assertRefCount(*objects): - with self.assertRaises(OverflowError): - cres.entry_point(-1, obj) - - def test_cleanup_buffer(self): - mem = memoryview(bytearray(b"xyz")) - self.check_argument_cleanup(types.Buffer(types.intc, 1, 'C'), mem) - - def test_cleanup_record(self): - dtype = np.dtype([('x', np.float64), ('y', np.float64)]) - recarr = np.zeros(1, dtype=dtype) - self.check_argument_cleanup(numpy_support.from_dtype(dtype), recarr[0]) - - def test_cleanup_tuple(self): - mem = memoryview(bytearray(b"xyz")) - tp = types.UniTuple(types.Buffer(types.intc, 1, 'C'), 2) - self.check_argument_cleanup(tp, (mem, mem)) - - def test_cleanup_optional(self): - mem = memoryview(bytearray(b"xyz")) - tp = types.Optional(types.Buffer(types.intc, 1, 'C')) - self.check_argument_cleanup(tp, mem) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_copy_propagate.py b/numba/numba/tests/test_copy_propagate.py deleted file mode 100644 index 4a6434c19..000000000 --- a/numba/numba/tests/test_copy_propagate.py +++ /dev/null @@ -1,108 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from numba import compiler, typing -from numba.targets import cpu -from numba import types -from numba.targets.registry import cpu_target -from numba import config -from numba.annotations import type_annotations -from numba.ir_utils import copy_propagate, apply_copy_propagate, get_name_var_table -from numba import ir -from numba import unittest_support as unittest - -def test_will_propagate(b, z, w): - x = 3 - if b > 0: - y = z + w - else: - y = 0 - a = 2 * x - return a < b - -def test_wont_propagate(b, z, w): - x = 3 - if b > 0: - y = z + w - x = 1 - else: - y = 0 - a = 2 * x - return a < b - -def null_func(a,b,c,d): - False - -def inListVar(list_var, var): - for i in list_var: - if i.name == var: - return True - return False - -def findAssign(func_ir, var): - for label, block in func_ir.blocks.items(): - for i, inst in enumerate(block.body): - if isinstance(inst, ir.Assign) and inst.target.name!=var: - all_var = inst.list_vars() - if inListVar(all_var, var): - return True - - return False - -class TestCopyPropagate(unittest.TestCase): - def test1(self): - typingctx = typing.Context() - targetctx = cpu.CPUContext(typingctx) - test_ir = compiler.run_frontend(test_will_propagate) - #print("Num blocks = ", len(test_ir.blocks)) - #print(test_ir.dump()) - with cpu_target.nested_context(typingctx, targetctx): - typingctx.refresh() - targetctx.refresh() - args = (types.int64, types.int64, types.int64) - typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) - #print("typemap = ", typemap) - #print("return_type = ", return_type) - type_annotation = type_annotations.TypeAnnotation( - func_ir=test_ir, - typemap=typemap, - calltypes=calltypes, - lifted=(), - lifted_from=None, - args=args, - return_type=return_type, - html_output=config.HTML) - in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) - apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) - - self.assertFalse(findAssign(test_ir, "x")) - - def test2(self): - typingctx = typing.Context() - targetctx = cpu.CPUContext(typingctx) - test_ir = compiler.run_frontend(test_wont_propagate) - #print("Num blocks = ", len(test_ir.blocks)) - #print(test_ir.dump()) - with cpu_target.nested_context(typingctx, targetctx): - typingctx.refresh() - targetctx.refresh() - args = (types.int64, types.int64, types.int64) - typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) - type_annotation = type_annotations.TypeAnnotation( - func_ir=test_ir, - typemap=typemap, - calltypes=calltypes, - lifted=(), - lifted_from=None, - args=args, - return_type=return_type, - html_output=config.HTML) - in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) - apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) - - self.assertTrue(findAssign(test_ir, "x")) - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_ctypes.py b/numba/numba/tests/test_ctypes.py deleted file mode 100644 index 747aa195e..000000000 --- a/numba/numba/tests/test_ctypes.py +++ /dev/null @@ -1,236 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import sys -import threading - -import numpy as np - -from numba.ctypes_support import * - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated -from numba import jit, types, errors -from numba.typing import ctypes_utils -from .support import MemoryLeakMixin, tag, TestCase -from .ctypes_usecases import * - - -class TestCTypesTypes(TestCase): - - def _conversion_tests(self, check): - check(c_double, types.float64) - check(c_int, types.intc) - check(c_uint16, types.uint16) - check(c_size_t, types.uintp) - check(c_ssize_t, types.intp) - - check(c_void_p, types.voidptr) - check(POINTER(c_float), types.CPointer(types.float32)) - check(POINTER(POINTER(c_float)), - types.CPointer(types.CPointer(types.float32))) - - check(None, types.void) - - @tag('important') - def test_from_ctypes(self): - """ - Test converting a ctypes type to a Numba type. - """ - def check(cty, ty): - got = ctypes_utils.from_ctypes(cty) - self.assertEqual(got, ty) - - self._conversion_tests(check) - - # An unsupported type - with self.assertRaises(TypeError) as raises: - ctypes_utils.from_ctypes(c_wchar_p) - self.assertIn("Unsupported ctypes type", str(raises.exception)) - - @tag('important') - def test_to_ctypes(self): - """ - Test converting a Numba type to a ctypes type. - """ - def check(cty, ty): - got = ctypes_utils.to_ctypes(ty) - self.assertEqual(got, cty) - - self._conversion_tests(check) - - # An unsupported type - with self.assertRaises(TypeError) as raises: - ctypes_utils.to_ctypes(types.ellipsis) - self.assertIn("Cannot convert Numba type '...' to ctypes type", - str(raises.exception)) - - -class TestCTypesUseCases(MemoryLeakMixin, TestCase): - - @tag('important') - def test_c_sin(self): - pyfunc = use_c_sin - cres = compile_isolated(pyfunc, [types.double]) - cfunc = cres.entry_point - x = 3.14 - self.assertEqual(pyfunc(x), cfunc(x)) - - def test_two_funcs(self): - # Check that two constant functions don't get mixed up. - pyfunc = use_two_funcs - cres = compile_isolated(pyfunc, [types.double]) - cfunc = cres.entry_point - x = 3.14 - self.assertEqual(pyfunc(x), cfunc(x)) - - @unittest.skipUnless(is_windows, "Windows-specific test") - def test_stdcall(self): - # Just check that it doesn't crash - cres = compile_isolated(use_c_sleep, [types.uintc]) - cfunc = cres.entry_point - cfunc(1) - - def test_ctype_wrapping(self): - pyfunc = use_ctype_wrapping - cres = compile_isolated(pyfunc, [types.double]) - cfunc = cres.entry_point - x = 3.14 - self.assertEqual(pyfunc(x), cfunc(x)) - - def test_ctype_voidptr(self): - pyfunc = use_c_pointer - # pyfunc will segfault if called - cres = compile_isolated(pyfunc, [types.int32]) - cfunc = cres.entry_point - x = 123 - self.assertEqual(cfunc(x), x + 1) - - def test_function_pointer(self): - pyfunc = use_func_pointer - cfunc = jit(nopython=True)(pyfunc) - for (fa, fb, x) in [ - (c_sin, c_cos, 1.0), - (c_sin, c_cos, -1.0), - (c_cos, c_sin, 1.0), - (c_cos, c_sin, -1.0)]: - expected = pyfunc(fa, fb, x) - got = cfunc(fa, fb, x) - self.assertEqual(got, expected) - # A single specialization was compiled for all calls - self.assertEqual(len(cfunc.overloads), 1, cfunc.overloads) - - def test_untyped_function(self): - with self.assertRaises(TypeError) as raises: - compile_isolated(use_c_untyped, [types.double]) - self.assertIn("ctypes function '_numba_test_exp' doesn't define its argument types", - str(raises.exception)) - - def test_python_call_back(self): - mydct = {'what': 1232121} - - def call_me_maybe(arr): - return mydct[arr[0].decode('ascii')] - - # Create a callback into the python interpreter - py_call_back = CFUNCTYPE(c_int, py_object)(call_me_maybe) - - def pyfunc(a): - what = py_call_back(a) - return what - - cfunc = jit(nopython=True, nogil=True)(pyfunc) - arr = np.array(["what"], dtype='S10') - self.assertEqual(pyfunc(arr), cfunc(arr)) - - def test_python_call_back_threaded(self): - def pyfunc(a, repeat): - out = 0 - for _ in range(repeat): - out += py_call_back(a) - return out - - cfunc = jit(nopython=True, nogil=True)(pyfunc) - - arr = np.array(["what"], dtype='S10') - repeat = 1000 - - expected = pyfunc(arr, repeat) - outputs = [] - - # Warm up - cfunc(arr, repeat) - - # Test the function in multiple threads to exercise the - # GIL ensure/release code - - def run(func, arr, repeat): - outputs.append(func(arr, repeat)) - - threads = [threading.Thread(target=run, args=(cfunc, arr, repeat)) - for _ in range(10)] - - # Start threads - for th in threads: - th.start() - - # End threads - for th in threads: - th.join() - - # Check results - for got in outputs: - self.assertEqual(expected, got) - - @tag('important') - def test_passing_array_ctypes_data(self): - """ - Test the ".ctypes.data" attribute of an array can be passed - as a "void *" parameter. - """ - def pyfunc(arr): - return c_take_array_ptr(arr.ctypes.data) - - cfunc = jit(nopython=True, nogil=True)(pyfunc) - - arr = np.arange(5) - - expected = pyfunc(arr) - got = cfunc(arr) - - self.assertEqual(expected, got) - - def check_array_ctypes(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - - arr = np.linspace(0, 10, 5) - expected = arr ** 2.0 - got = cfunc(arr) - self.assertPreciseEqual(expected, got) - return cfunc - - @tag('important') - def test_passing_array_ctypes_voidptr(self): - """ - Test the ".ctypes" attribute of an array can be passed - as a "void *" parameter. - """ - self.check_array_ctypes(use_c_vsquare) - - @tag('important') - def test_passing_array_ctypes_voidptr_pass_ptr(self): - """ - Test the ".ctypes" attribute of an array can be passed - as a pointer parameter of the right type. - """ - cfunc = self.check_array_ctypes(use_c_vcube) - - # Non-compatible pointers are not accepted (here float32* vs. float64*) - with self.assertRaises(errors.TypingError) as raises: - cfunc(np.float32([0.0])) - self.assertIn("Invalid use of ExternalFunctionPointer", - str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_dataflow.py b/numba/numba/tests/test_dataflow.py deleted file mode 100644 index dc40db123..000000000 --- a/numba/numba/tests/test_dataflow.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import print_function - -import warnings - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba.utils import PYVERSION -from numba import types, errors -from .support import TestCase, CompilationCache - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -def assignments(a): - b = c = str(a) - return b + c - - -def assignments2(a): - b = c = d = str(a) - return b + c + d - - -# Use cases for issue #503 - -def var_propagate1(a, b): - c = (a if a > b else b) + 5 - return c - - -def var_propagate2(a, b): - c = 5 + (a if a > b else b + 12) / 2.0 - return c - - -def var_propagate3(a, b): - c = 5 + (a > b and a or b) - return c - - -def var_propagate4(a, b): - c = 5 + (a - 1 and b + 1) or (a + 1 and b - 1) - return c - - -# Issue #480 -def chained_compare(a): - return 1 < a < 3 - - -# Issue #591 -def stack_effect_error(x): - i = 2 - c = 1 - if i == x: - for i in range(3): - c = i - return i + c - -# Some more issues with stack effect and blocks -def for_break(n, x): - for i in range(n): - n = 0 - if i == x: - break - else: - n = i - return i, n - -# Issue #571 -def var_swapping(a, b, c, d, e): - a, b = b, a - c, d, e = e, c, d - a, b, c, d = b, c, d, a - return a + b + c + d +e - -def unsupported_op_code(): - # needs unsupported "MAKE_FUNCTION" opcode - def f(): - pass - return f - -class TestDataFlow(TestCase): - - def setUp(self): - self.cache = CompilationCache() - # All tests here should run without warnings - self.w_cm = warnings.catch_warnings() - self.w_cm.__enter__() - warnings.simplefilter("error") - - def tearDown(self): - self.w_cm.__exit__(None, None, None) - - def test_assignments(self, flags=force_pyobj_flags): - pyfunc = assignments - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - def test_assignments2(self, flags=force_pyobj_flags): - pyfunc = assignments2 - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [-1, 0, 1]: - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - if flags is force_pyobj_flags: - cfunc("a") - - # The dataflow analysis must be good enough for native mode - # compilation to succeed, hence the no_pyobj_flags in the following tests. - - def run_propagate_func(self, pyfunc, args): - cr = self.cache.compile(pyfunc, (types.int32, types.int32), - flags=no_pyobj_flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc(*args), pyfunc(*args)) - - def test_var_propagate1(self): - self.run_propagate_func(var_propagate1, (2, 3)) - self.run_propagate_func(var_propagate1, (3, 2)) - - def test_var_propagate2(self): - self.run_propagate_func(var_propagate2, (2, 3)) - self.run_propagate_func(var_propagate2, (3, 2)) - - def test_var_propagate3(self): - self.run_propagate_func(var_propagate3, (2, 3)) - self.run_propagate_func(var_propagate3, (3, 2)) - self.run_propagate_func(var_propagate3, (2, 0)) - self.run_propagate_func(var_propagate3, (-1, 0)) - self.run_propagate_func(var_propagate3, (0, 2)) - self.run_propagate_func(var_propagate3, (0, -1)) - - def test_var_propagate4(self): - self.run_propagate_func(var_propagate4, (1, 1)) - self.run_propagate_func(var_propagate4, (1, 0)) - self.run_propagate_func(var_propagate4, (1, -1)) - self.run_propagate_func(var_propagate4, (0, 1)) - self.run_propagate_func(var_propagate4, (0, 0)) - self.run_propagate_func(var_propagate4, (0, -1)) - self.run_propagate_func(var_propagate4, (-1, 1)) - self.run_propagate_func(var_propagate4, (-1, 0)) - self.run_propagate_func(var_propagate4, (-1, -1)) - - def test_chained_compare(self, flags=force_pyobj_flags): - pyfunc = chained_compare - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in [0, 1, 2, 3, 4]: - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - def test_chained_compare_npm(self): - self.test_chained_compare(no_pyobj_flags) - - def test_stack_effect_error(self, flags=force_pyobj_flags): - # Issue #591: POP_BLOCK must undo all stack pushes done inside - # the block. - pyfunc = stack_effect_error - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cr.entry_point - for x in (0, 1, 2, 3): - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - def test_stack_effect_error_npm(self): - self.test_stack_effect_error(no_pyobj_flags) - - def test_var_swapping(self, flags=force_pyobj_flags): - pyfunc = var_swapping - cr = compile_isolated(pyfunc, (types.int32,) * 5, flags=flags) - cfunc = cr.entry_point - args = tuple(range(0, 10, 2)) - self.assertPreciseEqual(pyfunc(*args), cfunc(*args)) - - def test_var_swapping_npm(self): - self.test_var_swapping(no_pyobj_flags) - - def test_for_break(self, flags=force_pyobj_flags): - # BREAK_LOOP must unwind the current inner syntax block. - pyfunc = for_break - cr = compile_isolated(pyfunc, (types.intp, types.intp), flags=flags) - cfunc = cr.entry_point - for (n, x) in [(4, 2), (4, 6)]: - self.assertPreciseEqual(pyfunc(n, x), cfunc(n, x)) - - def test_for_break_npm(self): - self.test_for_break(no_pyobj_flags) - - def test_unsupported_op_code(self, flags=force_pyobj_flags): - pyfunc = unsupported_op_code - with self.assertRaises(errors.UnsupportedError) as raises: - cr = compile_isolated(pyfunc, (), flags=flags) - msg="make_function" - self.assertIn(msg, str(raises.exception)) - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_datamodel.py b/numba/numba/tests/test_datamodel.py deleted file mode 100644 index 816104e2f..000000000 --- a/numba/numba/tests/test_datamodel.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import print_function, absolute_import - -from llvmlite import ir, binding as ll - -from numba import types -from numba import unittest_support as unittest -from numba import datamodel -from numba.datamodel.testing import test_factory - - -class TestBool(test_factory()): - fe_type = types.boolean - - -class TestPyObject(test_factory()): - fe_type = types.pyobject - - -class TestInt8(test_factory()): - fe_type = types.int8 - - -class TestInt16(test_factory()): - fe_type = types.int16 - - -class TestInt32(test_factory()): - fe_type = types.int32 - - -class TestInt64(test_factory()): - fe_type = types.int64 - - -class TestUInt8(test_factory()): - fe_type = types.uint8 - - -class TestUInt16(test_factory()): - fe_type = types.uint16 - - -class TestUInt32(test_factory()): - fe_type = types.uint32 - - -class TestUInt64(test_factory()): - fe_type = types.uint64 - - -class TestFloat(test_factory()): - fe_type = types.float32 - - -class TestDouble(test_factory()): - fe_type = types.float64 - - -class TestComplex(test_factory()): - fe_type = types.complex64 - - -class TestDoubleComplex(test_factory()): - fe_type = types.complex128 - - -class TestPointerOfInt32(test_factory()): - fe_type = types.CPointer(types.int32) - - -class TestUniTupleOf2xInt32(test_factory()): - fe_type = types.UniTuple(types.int32, 2) - - -class TestUniTupleEmpty(test_factory()): - fe_type = types.UniTuple(types.int32, 0) - - -class TestTupleInt32Float32(test_factory()): - fe_type = types.Tuple([types.int32, types.float32]) - - -class TestTupleEmpty(test_factory()): - fe_type = types.Tuple([]) - - -class Test1DArrayOfInt32(test_factory()): - fe_type = types.Array(types.int32, 1, 'C') - - -class Test2DArrayOfComplex128(test_factory()): - fe_type = types.Array(types.complex128, 2, 'C') - - -class Test0DArrayOfInt32(test_factory()): - fe_type = types.Array(types.int32, 0, 'C') - - -class TestArgInfo(unittest.TestCase): - - def _test_as_arguments(self, fe_args): - """ - Test round-tripping types *fe_args* through the default data model's - argument conversion and unpacking logic. - """ - dmm = datamodel.default_manager - fi = datamodel.ArgPacker(dmm, fe_args) - - module = ir.Module() - fnty = ir.FunctionType(ir.VoidType(), []) - function = ir.Function(module, fnty, name="test_arguments") - builder = ir.IRBuilder() - builder.position_at_end(function.append_basic_block()) - - args = [ir.Constant(dmm.lookup(t).get_value_type(), None) - for t in fe_args] - - # Roundtrip - values = fi.as_arguments(builder, args) - asargs = fi.from_arguments(builder, values) - - self.assertEqual(len(asargs), len(fe_args)) - valtys = tuple([v.type for v in values]) - self.assertEqual(valtys, fi.argument_types) - - expect_types = [a.type for a in args] - got_types = [a.type for a in asargs] - - self.assertEqual(expect_types, got_types) - - # Assign names (check this doesn't raise) - fi.assign_names(values, ["arg%i" for i in range(len(fe_args))]) - - builder.ret_void() - - ll.parse_assembly(str(module)) - - def test_int32_array_complex(self): - fe_args = [types.int32, - types.Array(types.int32, 1, 'C'), - types.complex64] - self._test_as_arguments(fe_args) - - def test_two_arrays(self): - fe_args = [types.Array(types.int32, 1, 'C')] * 2 - self._test_as_arguments(fe_args) - - def test_two_0d_arrays(self): - fe_args = [types.Array(types.int32, 0, 'C')] * 2 - self._test_as_arguments(fe_args) - - def test_tuples(self): - fe_args = [types.UniTuple(types.int32, 2), - types.UniTuple(types.int32, 3)] - self._test_as_arguments(fe_args) - # Tuple of struct-likes - arrty = types.Array(types.int32, 1, 'C') - fe_args = [types.UniTuple(arrty, 2), - types.UniTuple(arrty, 3)] - self._test_as_arguments(fe_args) - # Nested tuple - fe_args = [types.UniTuple(types.UniTuple(types.int32, 2), 3)] - self._test_as_arguments(fe_args) - - def test_empty_tuples(self): - # Empty tuple - fe_args = [types.UniTuple(types.int16, 0), - types.Tuple(()), - types.int32] - self._test_as_arguments(fe_args) - - def test_nested_empty_tuples(self): - fe_args = [types.int32, - types.UniTuple(types.Tuple(()), 2), - types.int64] - self._test_as_arguments(fe_args) - - -class TestMemInfo(unittest.TestCase): - def setUp(self): - self.dmm = datamodel.default_manager - - def test_number(self): - ty = types.int32 - dm = self.dmm[ty] - self.assertFalse(dm.contains_nrt_meminfo()) - - def test_array(self): - ty = types.int32[:] - dm = self.dmm[ty] - self.assertTrue(dm.contains_nrt_meminfo()) - - def test_tuple_of_number(self): - ty = types.UniTuple(dtype=types.int32, count=2) - dm = self.dmm[ty] - self.assertFalse(dm.contains_nrt_meminfo()) - - def test_tuple_of_array(self): - ty = types.UniTuple(dtype=types.int32[:], count=2) - dm = self.dmm[ty] - self.assertTrue(dm.contains_nrt_meminfo()) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_debug.py b/numba/numba/tests/test_debug.py deleted file mode 100644 index a349afeea..000000000 --- a/numba/numba/tests/test_debug.py +++ /dev/null @@ -1,314 +0,0 @@ -from __future__ import print_function, absolute_import - -import os -import platform -import re -import textwrap -import warnings - -import numpy as np - -from .support import (TestCase, override_config, override_env_config, - captured_stdout, forbid_codegen) -from numba import unittest_support as unittest -from numba import jit, jitclass, types -from numba.compiler import compile_isolated, Flags -from numba.targets.cpu import ParallelOptions -from numba.errors import NumbaWarning -from numba import compiler, prange -from .test_parfors import skip_unsupported -from .matmul_usecase import needs_blas - -def simple_nopython(somearg): - retval = somearg + 1 - return retval - -def simple_gen(x, y): - yield x - yield y - - -class SimpleClass(object): - def __init__(self): - self.h = 5 - -simple_class_spec = [('h', types.int32)] - -def simple_class_user(obj): - return obj.h - -def unsupported_parfor(a, b): - return np.dot(a, b) # dot as gemm unsupported - -def supported_parfor(n): - a = np.ones(n) - for i in prange(n): - a[i] = a[i] + np.sin(i) - return a - -force_parallel_flags = Flags() -force_parallel_flags.set("auto_parallel", ParallelOptions(True)) -force_parallel_flags.set('nrt') - -class DebugTestBase(TestCase): - - all_dumps = set(['bytecode', 'cfg', 'ir', 'typeinfer', 'llvm', - 'func_opt_llvm', 'optimized_llvm', 'assembly']) - - def assert_fails(self, *args, **kwargs): - self.assertRaises(AssertionError, *args, **kwargs) - - def check_debug_output(self, out, dump_names): - enabled_dumps = dict.fromkeys(self.all_dumps, False) - for name in dump_names: - assert name in enabled_dumps - enabled_dumps[name] = True - for name, enabled in sorted(enabled_dumps.items()): - check_meth = getattr(self, '_check_dump_%s' % name) - if enabled: - check_meth(out) - else: - self.assert_fails(check_meth, out) - - def _check_dump_bytecode(self, out): - self.assertIn('BINARY_ADD', out) - - def _check_dump_cfg(self, out): - self.assertIn('CFG dominators', out) - - def _check_dump_ir(self, out): - self.assertIn('--IR DUMP: %s--' % self.func_name, out) - - def _check_dump_typeinfer(self, out): - self.assertIn('--propagate--', out) - - def _check_dump_llvm(self, out): - self.assertIn('--LLVM DUMP', out) - if compiler.Flags.OPTIONS['auto_parallel'].enabled == False: - self.assertIn('%"retval" = alloca', out) - - def _check_dump_func_opt_llvm(self, out): - self.assertIn('--FUNCTION OPTIMIZED DUMP %s' % self.func_name, out) - # allocas have been optimized away - self.assertIn('add nsw i64 %arg.somearg, 1', out) - - def _check_dump_optimized_llvm(self, out): - self.assertIn('--OPTIMIZED DUMP %s' % self.func_name, out) - self.assertIn('add nsw i64 %arg.somearg, 1', out) - - def _check_dump_assembly(self, out): - self.assertIn('--ASSEMBLY %s' % self.func_name, out) - if platform.machine() in ('x86_64', 'AMD64', 'i386', 'i686'): - self.assertIn('xorl', out) - - -class FunctionDebugTestBase(DebugTestBase): - - func_name = 'simple_nopython' - - def compile_simple_nopython(self): - with captured_stdout() as out: - cres = compile_isolated(simple_nopython, (types.int64,)) - # Sanity check compiled function - self.assertPreciseEqual(cres.entry_point(2), 3) - return out.getvalue() - - -class TestFunctionDebugOutput(FunctionDebugTestBase): - - def test_dump_bytecode(self): - with override_config('DUMP_BYTECODE', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['bytecode']) - - def test_dump_ir(self): - with override_config('DUMP_IR', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['ir']) - - def test_dump_cfg(self): - with override_config('DUMP_CFG', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['cfg']) - - def test_dump_llvm(self): - with override_config('DUMP_LLVM', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['llvm']) - - def test_dump_func_opt_llvm(self): - with override_config('DUMP_FUNC_OPT', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['func_opt_llvm']) - - def test_dump_optimized_llvm(self): - with override_config('DUMP_OPTIMIZED', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['optimized_llvm']) - - def test_dump_assembly(self): - with override_config('DUMP_ASSEMBLY', True): - out = self.compile_simple_nopython() - self.check_debug_output(out, ['assembly']) - - -class TestGeneratorDebugOutput(DebugTestBase): - - func_name = 'simple_gen' - - def compile_simple_gen(self): - with captured_stdout() as out: - cres = compile_isolated(simple_gen, (types.int64, types.int64)) - # Sanity check compiled function - self.assertPreciseEqual(list(cres.entry_point(2, 5)), [2, 5]) - return out.getvalue() - - def test_dump_ir_generator(self): - with override_config('DUMP_IR', True): - out = self.compile_simple_gen() - self.check_debug_output(out, ['ir']) - self.assertIn('--GENERATOR INFO: %s' % self.func_name, out) - expected_gen_info = textwrap.dedent(""" - generator state variables: ['x', 'y'] - yield point #1: live variables = ['y'], weak live variables = ['x'] - yield point #2: live variables = [], weak live variables = ['y'] - """) - self.assertIn(expected_gen_info, out) - - -class TestDisableJIT(DebugTestBase): - """ - Test the NUMBA_DISABLE_JIT environment variable. - """ - - def test_jit(self): - with override_config('DISABLE_JIT', True): - with forbid_codegen(): - cfunc = jit(nopython=True)(simple_nopython) - self.assertPreciseEqual(cfunc(2), 3) - - def test_jitclass(self): - with override_config('DISABLE_JIT', True): - with forbid_codegen(): - SimpleJITClass = jitclass(simple_class_spec)(SimpleClass) - - obj = SimpleJITClass() - self.assertPreciseEqual(obj.h, 5) - - cfunc = jit(nopython=True)(simple_class_user) - self.assertPreciseEqual(cfunc(obj), 5) - - -class TestEnvironmentOverride(FunctionDebugTestBase): - """ - Test that environment variables are reloaded by Numba when modified. - """ - - # mutates env with os.environ so must be run serially - _numba_parallel_test_ = False - - def test_debug(self): - out = self.compile_simple_nopython() - self.assertFalse(out) - with override_env_config('NUMBA_DEBUG', '1'): - out = self.compile_simple_nopython() - # Note that all variables dependent on NUMBA_DEBUG are - # updated too. - self.check_debug_output(out, ['ir', 'typeinfer', - 'llvm', 'func_opt_llvm', - 'optimized_llvm', 'assembly']) - out = self.compile_simple_nopython() - self.assertFalse(out) - -class TestParforsDebug(TestCase): - """ - Tests debug options associated with parfors - """ - - # mutates env with os.environ so must be run serially - _numba_parallel_test_ = False - - def check_parfors_warning(self, warn_list): - msg = ("parallel=True was specified but no transformation for parallel" - " execution was possible.") - warning_found = False - for w in warn_list: - if msg in str(w.message): - warning_found = True - break - self.assertTrue(warning_found, "Warning message should be found.") - - @needs_blas - @skip_unsupported - def test_warns(self): - """ - Test that using parallel=True on a function that does not have parallel - semantics warns if NUMBA_WARNINGS is set. - """ - with override_env_config('NUMBA_WARNINGS', '1'): - arr_ty = types.Array(types.float64, 2, "C") - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always", NumbaWarning) - cres = compile_isolated(unsupported_parfor, (arr_ty, arr_ty), - flags=force_parallel_flags) - self.check_parfors_warning(w) - - @skip_unsupported - def test_array_debug_opt_stats(self): - """ - Test that NUMBA_DEBUG_ARRAY_OPT_STATS produces valid output - """ - # deliberately trigger a compilation loop to increment the - # Parfor class state, this is to ensure the test works based - # on indices computed based on this state and not hard coded - # indices. - cres = compile_isolated(supported_parfor, (types.int64,), - flags=force_parallel_flags) - - with override_env_config('NUMBA_DEBUG_ARRAY_OPT_STATS', '1'): - with captured_stdout() as out: - cres = compile_isolated(supported_parfor, (types.int64,), - flags=force_parallel_flags) - - # grab the various parts out the output - output = out.getvalue().split('\n') - parallel_loop_output = \ - [x for x in output if 'is produced from pattern' in x] - fuse_output = \ - [x for x in output if 'is fused into' in x] - after_fusion_output = \ - [x for x in output if 'After fusion, function' in x] - - # Parfor's have a shared state index, grab the current value - # as it will be used as an offset for all loop messages - parfor_state = int(re.compile(r'#([0-9]+)').search( - parallel_loop_output[0]).group(1)) - bounds = range(parfor_state, - parfor_state + len(parallel_loop_output)) - - # Check the Parallel for-loop is produced from - # works first - pattern = ('ones function', ('prange', 'user')) - fmt = 'Parallel for-loop #{} is produced from pattern \'{}\' at' - for i, trials, lpattern in zip(bounds, parallel_loop_output, - pattern): - to_match = fmt.format(i, lpattern) - self.assertIn(to_match, trials) - - # Check the fusion statements are correct - pattern = (parfor_state + 1, parfor_state + 0) - fmt = 'Parallel for-loop #{} is fused into for-loop #{}.' - for trials in fuse_output: - to_match = fmt.format(*pattern) - self.assertIn(to_match, trials) - - # Check the post fusion statements are correct - pattern = (supported_parfor.__name__, 1, set([parfor_state])) - fmt = 'After fusion, function {} has {} parallel for-loop(s) #{}.' - for trials in after_fusion_output: - to_match = fmt.format(*pattern) - self.assertIn(to_match, trials) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_debuginfo.py b/numba/numba/tests/test_debuginfo.py deleted file mode 100644 index 0fe3e2f0b..000000000 --- a/numba/numba/tests/test_debuginfo.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import print_function, absolute_import - -import re - -from .support import TestCase, override_config, tag -from numba import unittest_support as unittest -from numba import jit, types - - -class TestDebugInfo(TestCase): - """ - These tests only checks the compiled assembly for debuginfo. - """ - def _getasm(self, fn, sig): - fn.compile(sig) - return fn.inspect_asm(sig) - - def _check(self, fn, sig, expect): - asm = self._getasm(fn, sig=sig) - m = re.search(r"\.section.+debug", asm, re.I) - got = m is not None - self.assertEqual(expect, got, msg='debug info not found in:\n%s' % asm) - - def test_no_debuginfo_in_asm(self): - @jit(nopython=True, debug=False) - def foo(x): - return x - - self._check(foo, sig=(types.int32,), expect=False) - - @tag('important') - def test_debuginfo_in_asm(self): - @jit(nopython=True, debug=True) - def foo(x): - return x - - self._check(foo, sig=(types.int32,), expect=True) - - def test_environment_override(self): - with override_config('DEBUGINFO_DEFAULT', 1): - # Using default value - @jit(nopython=True) - def foo(x): - return x - self._check(foo, sig=(types.int32,), expect=True) - - # User override default - @jit(nopython=True, debug=False) - def bar(x): - return x - self._check(bar, sig=(types.int32,), expect=False) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_del.py b/numba/numba/tests/test_del.py deleted file mode 100644 index 8791f40e0..000000000 --- a/numba/numba/tests/test_del.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import print_function - -import re - -from numba.compiler import compile_isolated -from .support import TestCase -import numba.unittest_support as unittest -from numba import testing - - -def del_ref_func(x): - del x - return x - - -class TestLists(TestCase): - - @testing.allow_interpreter_mode - def test_del_ref_func(self): - pyfunc = del_ref_func - cr = compile_isolated(pyfunc, ()) - cfunc = cr.entry_point - - errmsg = "local variable 'x' referenced before assignment" - with self.assertRaises(UnboundLocalError) as raised: - pyfunc(1) - - if re.search(str(raised.exception), errmsg) is None: - self.fail("unexpected exception: {0}".format(raised.exception)) - - with self.assertRaises(UnboundLocalError) as raised: - cfunc(1) - - if re.search(str(raised.exception), errmsg) is None: - self.fail("unexpected exception: {0}".format(raised.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_deprecations.py b/numba/numba/tests/test_deprecations.py deleted file mode 100644 index 8d913ef25..000000000 --- a/numba/numba/tests/test_deprecations.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import print_function, absolute_import -import warnings -from numba import jit, autojit, vectorize -import numba.unittest_support as unittest - - -def dummy(): pass - - -def stub_vec(a): - return a - - -class TestDeprecation(unittest.TestCase): - - def test_autojit(self): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - autojit(dummy) - self.assertEqual(len(w), 1) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_dicts.py b/numba/numba/tests/test_dicts.py deleted file mode 100644 index 3dfc51cc4..000000000 --- a/numba/numba/tests/test_dicts.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest -from .support import TestCase, force_pyobj_flags - - -def build_map(): - return {0: 1, 2: 3} - -def build_map_from_local_vars(): - # There used to be a crash due to wrong IR generation for STORE_MAP - x = TestCase - return {0: x, x: 1} - - -class DictTestCase(TestCase): - - def test_build_map(self, flags=force_pyobj_flags): - self.run_nullary_func(build_map, flags=flags) - - def test_build_map_from_local_vars(self, flags=force_pyobj_flags): - self.run_nullary_func(build_map_from_local_vars, flags=flags) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_dispatcher.py b/numba/numba/tests/test_dispatcher.py deleted file mode 100644 index df50109ca..000000000 --- a/numba/numba/tests/test_dispatcher.py +++ /dev/null @@ -1,1487 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import errno -import multiprocessing -import os -import shutil -import subprocess -import sys -import threading -import warnings -import inspect - -try: - import jinja2 -except ImportError: - jinja2 = None - -try: - import pygments -except ImportError: - pygments = None - -import numpy as np - -from numba import unittest_support as unittest -from numba import utils, jit, generated_jit, types, typeof -from numba import _dispatcher -from numba.compiler import compile_isolated -from numba.errors import NumbaWarning -from .support import (TestCase, tag, temp_directory, import_dynamic, - override_env_config, capture_cache_log, captured_stdout) -from numba.targets import codegen -from numba.caching import _UserWideCacheLocator - -import llvmlite.binding as ll - - -def dummy(x): - return x - - -def add(x, y): - return x + y - - -def addsub(x, y, z): - return x - y + z - - -def addsub_defaults(x, y=2, z=3): - return x - y + z - - -def star_defaults(x, y=2, *z): - return x, y, z - - -def generated_usecase(x, y=5): - if isinstance(x, types.Complex): - def impl(x, y): - return x + y - else: - def impl(x, y): - return x - y - return impl - -def bad_generated_usecase(x, y=5): - if isinstance(x, types.Complex): - def impl(x): - return x - else: - def impl(x, y=6): - return x - y - return impl - - -class BaseTest(TestCase): - - jit_args = dict(nopython=True) - - def compile_func(self, pyfunc): - def check(*args, **kwargs): - expected = pyfunc(*args, **kwargs) - result = f(*args, **kwargs) - self.assertPreciseEqual(result, expected) - f = jit(**self.jit_args)(pyfunc) - return f, check - -def check_access_is_preventable(): - # This exists to check whether it is possible to prevent access to - # a file/directory through the use of `chmod 500`. If a user has - # elevated rights (e.g. root) then writes are likely to be possible - # anyway. Tests that require functioning access prevention are - # therefore skipped based on the result of this check. - tempdir = temp_directory('test_cache') - test_dir = (os.path.join(tempdir, 'writable_test')) - os.mkdir(test_dir) - # assume access prevention is not possible - ret = False - # check a write is possible - with open(os.path.join(test_dir, 'write_ok'), 'wt') as f: - f.write('check1') - # now forbid access - os.chmod(test_dir, 0o500) - try: - with open(os.path.join(test_dir, 'write_forbidden'), 'wt') as f: - f.write('check2') - except (OSError, IOError) as e: - # Check that the cause of the exception is due to access/permission - # as per https://github.com/conda/conda/blob/4.5.0/conda/gateways/disk/permissions.py#L35-L37 - eno = getattr(e, 'errno', None) - if eno in (errno.EACCES, errno.EPERM): - # errno reports access/perm fail so access prevention via - # `chmod 500` works for this user. - ret = True - finally: - os.chmod(test_dir, 0o775) - shutil.rmtree(test_dir) - return ret - -_access_preventable = check_access_is_preventable() -_access_msg = "Cannot create a directory to which writes are preventable" -skip_bad_access = unittest.skipUnless(_access_preventable, _access_msg) - - -class TestDispatcher(BaseTest): - - def test_dyn_pyfunc(self): - @jit - def foo(x): - return x - - foo(1) - [cr] = foo.overloads.values() - # __module__ must be match that of foo - self.assertEqual(cr.entry_point.__module__, foo.py_func.__module__) - - def test_no_argument(self): - @jit - def foo(): - return 1 - - # Just make sure this doesn't crash - foo() - - def test_coerce_input_types(self): - # Issue #486: do not allow unsafe conversions if we can still - # compile other specializations. - c_add = jit(nopython=True)(add) - self.assertPreciseEqual(c_add(123, 456), add(123, 456)) - self.assertPreciseEqual(c_add(12.3, 45.6), add(12.3, 45.6)) - self.assertPreciseEqual(c_add(12.3, 45.6j), add(12.3, 45.6j)) - self.assertPreciseEqual(c_add(12300000000, 456), add(12300000000, 456)) - - # Now force compilation of only a single specialization - c_add = jit('(i4, i4)', nopython=True)(add) - self.assertPreciseEqual(c_add(123, 456), add(123, 456)) - # Implicit (unsafe) conversion of float to int - self.assertPreciseEqual(c_add(12.3, 45.6), add(12, 45)) - with self.assertRaises(TypeError): - # Implicit conversion of complex to int disallowed - c_add(12.3, 45.6j) - - def test_ambiguous_new_version(self): - """Test compiling new version in an ambiguous case - """ - @jit - def foo(a, b): - return a + b - - INT = 1 - FLT = 1.5 - self.assertAlmostEqual(foo(INT, FLT), INT + FLT) - self.assertEqual(len(foo.overloads), 1) - self.assertAlmostEqual(foo(FLT, INT), FLT + INT) - self.assertEqual(len(foo.overloads), 2) - self.assertAlmostEqual(foo(FLT, FLT), FLT + FLT) - self.assertEqual(len(foo.overloads), 3) - # The following call is ambiguous because (int, int) can resolve - # to (float, int) or (int, float) with equal weight. - self.assertAlmostEqual(foo(1, 1), INT + INT) - self.assertEqual(len(foo.overloads), 4, "didn't compile a new " - "version") - - def test_lock(self): - """ - Test that (lazy) compiling from several threads at once doesn't - produce errors (see issue #908). - """ - errors = [] - - @jit - def foo(x): - return x + 1 - - def wrapper(): - try: - self.assertEqual(foo(1), 2) - except BaseException as e: - errors.append(e) - - threads = [threading.Thread(target=wrapper) for i in range(16)] - for t in threads: - t.start() - for t in threads: - t.join() - self.assertFalse(errors) - - def test_explicit_signatures(self): - f = jit("(int64,int64)")(add) - # Approximate match (unsafe conversion) - self.assertPreciseEqual(f(1.5, 2.5), 3) - self.assertEqual(len(f.overloads), 1, f.overloads) - f = jit(["(int64,int64)", "(float64,float64)"])(add) - # Exact signature matches - self.assertPreciseEqual(f(1, 2), 3) - self.assertPreciseEqual(f(1.5, 2.5), 4.0) - # Approximate match (int32 -> float64 is a safe conversion) - self.assertPreciseEqual(f(np.int32(1), 2.5), 3.5) - # No conversion - with self.assertRaises(TypeError) as cm: - f(1j, 1j) - self.assertIn("No matching definition", str(cm.exception)) - self.assertEqual(len(f.overloads), 2, f.overloads) - # A more interesting one... - f = jit(["(float32,float32)", "(float64,float64)"])(add) - self.assertPreciseEqual(f(np.float32(1), np.float32(2**-25)), 1.0) - self.assertPreciseEqual(f(1, 2**-25), 1.0000000298023224) - # Fail to resolve ambiguity between the two best overloads - f = jit(["(float32,float64)", - "(float64,float32)", - "(int64,int64)"])(add) - with self.assertRaises(TypeError) as cm: - f(1.0, 2.0) - # The two best matches are output in the error message, as well - # as the actual argument types. - self.assertRegexpMatches( - str(cm.exception), - r"Ambiguous overloading for ]*> \(float64, float64\):\n" - r"\(float32, float64\) -> float64\n" - r"\(float64, float32\) -> float64" - ) - # The integer signature is not part of the best matches - self.assertNotIn("int64", str(cm.exception)) - - def test_signature_mismatch(self): - tmpl = "Signature mismatch: %d argument types given, but function takes 2 arguments" - with self.assertRaises(TypeError) as cm: - jit("()")(add) - self.assertIn(tmpl % 0, str(cm.exception)) - with self.assertRaises(TypeError) as cm: - jit("(intc,)")(add) - self.assertIn(tmpl % 1, str(cm.exception)) - with self.assertRaises(TypeError) as cm: - jit("(intc,intc,intc)")(add) - self.assertIn(tmpl % 3, str(cm.exception)) - # With forceobj=True, an empty tuple is accepted - jit("()", forceobj=True)(add) - with self.assertRaises(TypeError) as cm: - jit("(intc,)", forceobj=True)(add) - self.assertIn(tmpl % 1, str(cm.exception)) - - def test_matching_error_message(self): - f = jit("(intc,intc)")(add) - with self.assertRaises(TypeError) as cm: - f(1j, 1j) - self.assertEqual(str(cm.exception), - "No matching definition for argument type(s) " - "complex128, complex128") - - def test_disabled_compilation(self): - @jit - def foo(a): - return a - - foo.compile("(float32,)") - foo.disable_compile() - with self.assertRaises(RuntimeError) as raises: - foo.compile("(int32,)") - self.assertEqual(str(raises.exception), "compilation disabled") - self.assertEqual(len(foo.signatures), 1) - - def test_disabled_compilation_through_list(self): - @jit(["(float32,)", "(int32,)"]) - def foo(a): - return a - - with self.assertRaises(RuntimeError) as raises: - foo.compile("(complex64,)") - self.assertEqual(str(raises.exception), "compilation disabled") - self.assertEqual(len(foo.signatures), 2) - - def test_disabled_compilation_nested_call(self): - @jit(["(intp,)"]) - def foo(a): - return a - - @jit - def bar(): - foo(1) - foo(np.ones(1)) # no matching definition - - with self.assertRaises(TypeError) as raises: - bar() - m = "No matching definition for argument type(s) array(float64, 1d, C)" - self.assertEqual(str(raises.exception), m) - - def test_fingerprint_failure(self): - """ - Failure in computing the fingerprint cannot affect a nopython=False - function. On the other hand, with nopython=True, a ValueError should - be raised to report the failure with fingerprint. - """ - @jit - def foo(x): - return x - - # Empty list will trigger failure in compile_fingerprint - errmsg = 'cannot compute fingerprint of empty list' - with self.assertRaises(ValueError) as raises: - _dispatcher.compute_fingerprint([]) - self.assertIn(errmsg, str(raises.exception)) - # It should work in fallback - self.assertEqual(foo([]), []) - # But, not in nopython=True - strict_foo = jit(nopython=True)(foo.py_func) - with self.assertRaises(ValueError) as raises: - strict_foo([]) - self.assertIn(errmsg, str(raises.exception)) - - # Test in loop lifting context - @jit - def bar(): - object() # force looplifting - x = [] - for i in range(10): - x = foo(x) - return x - - self.assertEqual(bar(), []) - # Make sure it was looplifted - [cr] = bar.overloads.values() - self.assertEqual(len(cr.lifted), 1) - - -class TestSignatureHandling(BaseTest): - """ - Test support for various parameter passing styles. - """ - - @tag('important') - def test_named_args(self): - """ - Test passing named arguments to a dispatcher. - """ - f, check = self.compile_func(addsub) - check(3, z=10, y=4) - check(3, 4, 10) - check(x=3, y=4, z=10) - # All calls above fall under the same specialization - self.assertEqual(len(f.overloads), 1) - # Errors - with self.assertRaises(TypeError) as cm: - f(3, 4, y=6, z=7) - self.assertIn("too many arguments: expected 3, got 4", - str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f() - self.assertIn("not enough arguments: expected 3, got 0", - str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f(3, 4, y=6) - self.assertIn("missing argument 'z'", str(cm.exception)) - - def test_default_args(self): - """ - Test omitting arguments with a default value. - """ - f, check = self.compile_func(addsub_defaults) - check(3, z=10, y=4) - check(3, 4, 10) - check(x=3, y=4, z=10) - # Now omitting some values - check(3, z=10) - check(3, 4) - check(x=3, y=4) - check(3) - check(x=3) - # Errors - with self.assertRaises(TypeError) as cm: - f(3, 4, y=6, z=7) - self.assertIn("too many arguments: expected 3, got 4", - str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f() - self.assertIn("not enough arguments: expected at least 1, got 0", - str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f(y=6, z=7) - self.assertIn("missing argument 'x'", str(cm.exception)) - - def test_star_args(self): - """ - Test a compiled function with starargs in the signature. - """ - f, check = self.compile_func(star_defaults) - check(4) - check(4, 5) - check(4, 5, 6) - check(4, 5, 6, 7) - check(4, 5, 6, 7, 8) - check(x=4) - check(x=4, y=5) - check(4, y=5) - with self.assertRaises(TypeError) as cm: - f(4, 5, y=6) - self.assertIn("some keyword arguments unexpected", str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f(4, 5, z=6) - self.assertIn("some keyword arguments unexpected", str(cm.exception)) - with self.assertRaises(TypeError) as cm: - f(4, x=6) - self.assertIn("some keyword arguments unexpected", str(cm.exception)) - - -class TestSignatureHandlingObjectMode(TestSignatureHandling): - """ - Sams as TestSignatureHandling, but in object mode. - """ - - jit_args = dict(forceobj=True) - - -class TestGeneratedDispatcher(TestCase): - """ - Tests for @generated_jit. - """ - - @tag('important') - def test_generated(self): - f = generated_jit(nopython=True)(generated_usecase) - self.assertEqual(f(8), 8 - 5) - self.assertEqual(f(x=8), 8 - 5) - self.assertEqual(f(x=8, y=4), 8 - 4) - self.assertEqual(f(1j), 5 + 1j) - self.assertEqual(f(1j, 42), 42 + 1j) - self.assertEqual(f(x=1j, y=7), 7 + 1j) - - def test_signature_errors(self): - """ - Check error reporting when implementation signature doesn't match - generating function signature. - """ - f = generated_jit(nopython=True)(bad_generated_usecase) - # Mismatching # of arguments - with self.assertRaises(TypeError) as raises: - f(1j) - self.assertIn("should be compatible with signature '(x, y=5)', but has signature '(x)'", - str(raises.exception)) - # Mismatching defaults - with self.assertRaises(TypeError) as raises: - f(1) - self.assertIn("should be compatible with signature '(x, y=5)', but has signature '(x, y=6)'", - str(raises.exception)) - - -class TestDispatcherMethods(TestCase): - - def test_recompile(self): - closure = 1 - - @jit - def foo(x): - return x + closure - self.assertPreciseEqual(foo(1), 2) - self.assertPreciseEqual(foo(1.5), 2.5) - self.assertEqual(len(foo.signatures), 2) - closure = 2 - self.assertPreciseEqual(foo(1), 2) - # Recompiling takes the new closure into account. - foo.recompile() - # Everything was recompiled - self.assertEqual(len(foo.signatures), 2) - self.assertPreciseEqual(foo(1), 3) - self.assertPreciseEqual(foo(1.5), 3.5) - - def test_recompile_signatures(self): - # Same as above, but with an explicit signature on @jit. - closure = 1 - - @jit("int32(int32)") - def foo(x): - return x + closure - self.assertPreciseEqual(foo(1), 2) - self.assertPreciseEqual(foo(1.5), 2) - closure = 2 - self.assertPreciseEqual(foo(1), 2) - # Recompiling takes the new closure into account. - foo.recompile() - self.assertPreciseEqual(foo(1), 3) - self.assertPreciseEqual(foo(1.5), 3) - - @tag('important') - def test_inspect_llvm(self): - # Create a jited function - @jit - def foo(explicit_arg1, explicit_arg2): - return explicit_arg1 + explicit_arg2 - - # Call it in a way to create 3 signatures - foo(1, 1) - foo(1.0, 1) - foo(1.0, 1.0) - - # base call to get all llvm in a dict - llvms = foo.inspect_llvm() - self.assertEqual(len(llvms), 3) - - # make sure the function name shows up in the llvm - for llvm_bc in llvms.values(): - # Look for the function name - self.assertIn("foo", llvm_bc) - - # Look for the argument names - self.assertIn("explicit_arg1", llvm_bc) - self.assertIn("explicit_arg2", llvm_bc) - - def test_inspect_asm(self): - # Create a jited function - @jit - def foo(explicit_arg1, explicit_arg2): - return explicit_arg1 + explicit_arg2 - - # Call it in a way to create 3 signatures - foo(1, 1) - foo(1.0, 1) - foo(1.0, 1.0) - - # base call to get all llvm in a dict - asms = foo.inspect_asm() - self.assertEqual(len(asms), 3) - - # make sure the function name shows up in the llvm - for asm in asms.values(): - # Look for the function name - self.assertTrue("foo" in asm) - - def _check_cfg_display(self, cfg, wrapper=''): - # simple stringify test - if wrapper: - wrapper = "{}{}".format(len(wrapper), wrapper) - module_name = __name__.split('.', 1)[0] - module_len = len(module_name) - prefix = r'^digraph "CFG for \'_ZN{}{}{}'.format(wrapper, module_len, module_name) - self.assertRegexpMatches(str(cfg), prefix) - # .display() requires an optional dependency on `graphviz`. - # just test for the attribute without running it. - self.assertTrue(callable(cfg.display)) - - def test_inspect_cfg(self): - # Exercise the .inspect_cfg(). These are minimal tests and do not fully - # check the correctness of the function. - @jit - def foo(the_array): - return the_array.sum() - - # Generate 3 overloads - a1 = np.ones(1) - a2 = np.ones((1, 1)) - a3 = np.ones((1, 1, 1)) - foo(a1) - foo(a2) - foo(a3) - - # Call inspect_cfg() without arguments - cfgs = foo.inspect_cfg() - - # Correct count of overloads - self.assertEqual(len(cfgs), 3) - - # Makes sure all the signatures are correct - [s1, s2, s3] = cfgs.keys() - self.assertEqual(set([s1, s2, s3]), - set(map(lambda x: (typeof(x),), [a1, a2, a3]))) - - for cfg in cfgs.values(): - self._check_cfg_display(cfg) - self.assertEqual(len(list(cfgs.values())), 3) - - # Call inspect_cfg(signature) - cfg = foo.inspect_cfg(signature=foo.signatures[0]) - self._check_cfg_display(cfg) - - def test_inspect_cfg_with_python_wrapper(self): - # Exercise the .inspect_cfg() including the python wrapper. - # These are minimal tests and do not fully check the correctness of - # the function. - @jit - def foo(the_array): - return the_array.sum() - - # Generate 3 overloads - a1 = np.ones(1) - a2 = np.ones((1, 1)) - a3 = np.ones((1, 1, 1)) - foo(a1) - foo(a2) - foo(a3) - - # Call inspect_cfg(signature, show_wrapper="python") - cfg = foo.inspect_cfg(signature=foo.signatures[0], - show_wrapper="python") - self._check_cfg_display(cfg, wrapper='cpython') - - def test_inspect_types(self): - @jit - def foo(a, b): - return a + b - - foo(1, 2) - # Exercise the method - foo.inspect_types(utils.StringIO()) - - @unittest.skipIf(jinja2 is None, "please install the 'jinja2' package") - @unittest.skipIf(pygments is None, "please install the 'pygments' package") - def test_inspect_types_pretty(self): - @jit - def foo(a, b): - return a + b - - foo(1, 2) - - # Exercise the method, dump the output - with captured_stdout(): - ann = foo.inspect_types(pretty=True) - - # ensure HTML is found in the annotation output - for k, v in ann.ann.items(): - span_found = False - for line in v['pygments_lines']: - if 'span' in line[2]: - span_found = True - self.assertTrue(span_found) - - # check that file+pretty kwarg combo raises - with self.assertRaises(ValueError) as raises: - foo.inspect_types(file=utils.StringIO(), pretty=True) - - self.assertIn("`file` must be None if `pretty=True`", - str(raises.exception)) - - def test_issue_with_array_layout_conflict(self): - """ - This test an issue with the dispatcher when an array that is both - C and F contiguous is supplied as the first signature. - The dispatcher checks for F contiguous first but the compiler checks - for C contiguous first. This results in an C contiguous code inserted - as F contiguous function. - """ - def pyfunc(A, i, j): - return A[i, j] - - cfunc = jit(pyfunc) - - ary_c_and_f = np.array([[1.]]) - ary_c = np.array([[0., 1.], [2., 3.]], order='C') - ary_f = np.array([[0., 1.], [2., 3.]], order='F') - - exp_c = pyfunc(ary_c, 1, 0) - exp_f = pyfunc(ary_f, 1, 0) - - self.assertEqual(1., cfunc(ary_c_and_f, 0, 0)) - got_c = cfunc(ary_c, 1, 0) - got_f = cfunc(ary_f, 1, 0) - - self.assertEqual(exp_c, got_c) - self.assertEqual(exp_f, got_f) - - -class BaseCacheTest(TestCase): - # This class is also used in test_cfunc.py. - - # The source file that will be copied - usecases_file = None - # Make sure this doesn't conflict with another module - modname = None - - def setUp(self): - self.tempdir = temp_directory('test_cache') - sys.path.insert(0, self.tempdir) - self.modfile = os.path.join(self.tempdir, self.modname + ".py") - self.cache_dir = os.path.join(self.tempdir, "__pycache__") - shutil.copy(self.usecases_file, self.modfile) - self.maxDiff = None - - def tearDown(self): - sys.modules.pop(self.modname, None) - sys.path.remove(self.tempdir) - - def import_module(self): - # Import a fresh version of the test module. All jitted functions - # in the test module will start anew and load overloads from - # the on-disk cache if possible. - old = sys.modules.pop(self.modname, None) - if old is not None: - # Make sure cached bytecode is removed - if sys.version_info >= (3,): - cached = [old.__cached__] - else: - if old.__file__.endswith(('.pyc', '.pyo')): - cached = [old.__file__] - else: - cached = [old.__file__ + 'c', old.__file__ + 'o'] - for fn in cached: - try: - os.unlink(fn) - except OSError as e: - if e.errno != errno.ENOENT: - raise - mod = import_dynamic(self.modname) - self.assertEqual(mod.__file__.rstrip('co'), self.modfile) - return mod - - def cache_contents(self): - try: - return [fn for fn in os.listdir(self.cache_dir) - if not fn.endswith(('.pyc', ".pyo"))] - except OSError as e: - if e.errno != errno.ENOENT: - raise - return [] - - def get_cache_mtimes(self): - return dict((fn, os.path.getmtime(os.path.join(self.cache_dir, fn))) - for fn in sorted(self.cache_contents())) - - def check_pycache(self, n): - c = self.cache_contents() - self.assertEqual(len(c), n, c) - - def dummy_test(self): - pass - - -class BaseCacheUsecasesTest(BaseCacheTest): - here = os.path.dirname(__file__) - usecases_file = os.path.join(here, "cache_usecases.py") - modname = "dispatcher_caching_test_fodder" - - def run_in_separate_process(self): - # Cached functions can be run from a distinct process. - # Also stresses issue #1603: uncached function calling cached function - # shouldn't fail compiling. - code = """if 1: - import sys - - sys.path.insert(0, %(tempdir)r) - mod = __import__(%(modname)r) - mod.self_test() - """ % dict(tempdir=self.tempdir, modname=self.modname) - - popen = subprocess.Popen([sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError("process failed with code %s: stderr follows\n%s\n" - % (popen.returncode, err.decode())) - - def check_module(self, mod): - self.check_pycache(0) - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(2) # 1 index, 1 data - self.assertPreciseEqual(f(2.5, 3), 6.5) - self.check_pycache(3) # 1 index, 2 data - - f = mod.add_objmode_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(5) # 2 index, 3 data - self.assertPreciseEqual(f(2.5, 3), 6.5) - self.check_pycache(6) # 2 index, 4 data - - mod.self_test() - - def check_hits(self, func, hits, misses=None): - st = func.stats - self.assertEqual(sum(st.cache_hits.values()), hits, st.cache_hits) - if misses is not None: - self.assertEqual(sum(st.cache_misses.values()), misses, - st.cache_misses) - - -class TestCache(BaseCacheUsecasesTest): - - @tag('important') - def test_caching(self): - self.check_pycache(0) - mod = self.import_module() - self.check_pycache(0) - - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(2) # 1 index, 1 data - self.assertPreciseEqual(f(2.5, 3), 6.5) - self.check_pycache(3) # 1 index, 2 data - self.check_hits(f, 0, 2) - - f = mod.add_objmode_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(5) # 2 index, 3 data - self.assertPreciseEqual(f(2.5, 3), 6.5) - self.check_pycache(6) # 2 index, 4 data - self.check_hits(f, 0, 2) - - f = mod.record_return - rec = f(mod.aligned_arr, 1) - self.assertPreciseEqual(tuple(rec), (2, 43.5)) - rec = f(mod.packed_arr, 1) - self.assertPreciseEqual(tuple(rec), (2, 43.5)) - self.check_pycache(9) # 3 index, 6 data - self.check_hits(f, 0, 2) - - f = mod.generated_usecase - self.assertPreciseEqual(f(3, 2), 1) - self.assertPreciseEqual(f(3j, 2), 2 + 3j) - - # Check the code runs ok from another process - self.run_in_separate_process() - - @tag('important') - def test_caching_nrt_pruned(self): - self.check_pycache(0) - mod = self.import_module() - self.check_pycache(0) - - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(2) # 1 index, 1 data - # NRT pruning may affect cache - self.assertPreciseEqual(f(2, np.arange(3)), 2 + np.arange(3) + 1) - self.check_pycache(3) # 1 index, 2 data - self.check_hits(f, 0, 2) - - def test_inner_then_outer(self): - # Caching inner then outer function is ok - mod = self.import_module() - self.assertPreciseEqual(mod.inner(3, 2), 6) - self.check_pycache(2) # 1 index, 1 data - # Uncached outer function shouldn't fail (issue #1603) - f = mod.outer_uncached - self.assertPreciseEqual(f(3, 2), 2) - self.check_pycache(2) # 1 index, 1 data - mod = self.import_module() - f = mod.outer_uncached - self.assertPreciseEqual(f(3, 2), 2) - self.check_pycache(2) # 1 index, 1 data - # Cached outer will create new cache entries - f = mod.outer - self.assertPreciseEqual(f(3, 2), 2) - self.check_pycache(4) # 2 index, 2 data - self.assertPreciseEqual(f(3.5, 2), 2.5) - self.check_pycache(6) # 2 index, 4 data - - def test_outer_then_inner(self): - # Caching outer then inner function is ok - mod = self.import_module() - self.assertPreciseEqual(mod.outer(3, 2), 2) - self.check_pycache(4) # 2 index, 2 data - self.assertPreciseEqual(mod.outer_uncached(3, 2), 2) - self.check_pycache(4) # same - mod = self.import_module() - f = mod.inner - self.assertPreciseEqual(f(3, 2), 6) - self.check_pycache(4) # same - self.assertPreciseEqual(f(3.5, 2), 6.5) - self.check_pycache(5) # 2 index, 3 data - - def test_no_caching(self): - mod = self.import_module() - - f = mod.add_nocache_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_pycache(0) - - def test_looplifted(self): - # Loop-lifted functions can't be cached and raise a warning - mod = self.import_module() - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - f = mod.looplifted - self.assertPreciseEqual(f(4), 6) - self.check_pycache(0) - - self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), - 'Cannot cache compiled function "looplifted" ' - 'as it uses lifted loops') - - def test_big_array(self): - # Code references big array globals cannot be cached - mod = self.import_module() - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - f = mod.use_big_array - np.testing.assert_equal(f(), mod.biggie) - self.check_pycache(0) - - self.assertEqual(len(w), 1) - self.assertIn('Cannot cache compiled function "use_big_array" ' - 'as it uses dynamic globals', str(w[0].message)) - - def test_ctypes(self): - # Functions using a ctypes pointer can't be cached and raise - # a warning. - mod = self.import_module() - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - f = mod.use_c_sin - self.assertPreciseEqual(f(0.0), 0.0) - self.check_pycache(0) - - self.assertEqual(len(w), 1) - self.assertIn('Cannot cache compiled function "use_c_sin"', - str(w[0].message)) - - def test_closure(self): - mod = self.import_module() - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - f = mod.closure1 - self.assertPreciseEqual(f(3), 6) - f = mod.closure2 - self.assertPreciseEqual(f(3), 8) - self.check_pycache(0) - - self.assertEqual(len(w), 2) - for item in w: - self.assertIn('Cannot cache compiled function "closure"', - str(item.message)) - - def test_cache_reuse(self): - mod = self.import_module() - mod.add_usecase(2, 3) - mod.add_usecase(2.5, 3.5) - mod.add_objmode_usecase(2, 3) - mod.outer_uncached(2, 3) - mod.outer(2, 3) - mod.record_return(mod.packed_arr, 0) - mod.record_return(mod.aligned_arr, 1) - mod.generated_usecase(2, 3) - mtimes = self.get_cache_mtimes() - # Two signatures compiled - self.check_hits(mod.add_usecase, 0, 2) - - mod2 = self.import_module() - self.assertIsNot(mod, mod2) - f = mod2.add_usecase - f(2, 3) - self.check_hits(f, 1, 0) - f(2.5, 3.5) - self.check_hits(f, 2, 0) - f = mod2.add_objmode_usecase - f(2, 3) - self.check_hits(f, 1, 0) - - # The files haven't changed - self.assertEqual(self.get_cache_mtimes(), mtimes) - - self.run_in_separate_process() - self.assertEqual(self.get_cache_mtimes(), mtimes) - - def test_cache_invalidate(self): - mod = self.import_module() - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - - # This should change the functions' results - with open(self.modfile, "a") as f: - f.write("\nZ = 10\n") - - mod = self.import_module() - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 15) - f = mod.add_objmode_usecase - self.assertPreciseEqual(f(2, 3), 15) - - def test_recompile(self): - # Explicit call to recompile() should overwrite the cache - mod = self.import_module() - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - - mod = self.import_module() - f = mod.add_usecase - mod.Z = 10 - self.assertPreciseEqual(f(2, 3), 6) - f.recompile() - self.assertPreciseEqual(f(2, 3), 15) - - # Freshly recompiled version is re-used from other imports - mod = self.import_module() - f = mod.add_usecase - self.assertPreciseEqual(f(2, 3), 15) - - def test_same_names(self): - # Function with the same names should still disambiguate - mod = self.import_module() - f = mod.renamed_function1 - self.assertPreciseEqual(f(2), 4) - f = mod.renamed_function2 - self.assertPreciseEqual(f(2), 8) - - def test_frozen(self): - from .dummy_module import function - old_code = function.__code__ - code_obj = compile('pass', 'tests/dummy_module.py', 'exec') - try: - function.__code__ = code_obj - - source = inspect.getfile(function) - # doesn't return anything, since it cannot find the module - # fails unless the executable is frozen - locator = _UserWideCacheLocator.from_function(function, source) - self.assertIsNone(locator) - - sys.frozen = True - # returns a cache locator object, only works when executable is frozen - locator = _UserWideCacheLocator.from_function(function, source) - self.assertIsInstance(locator, _UserWideCacheLocator) - - finally: - function.__code__ = old_code - del sys.frozen - - def _test_pycache_fallback(self): - """ - With a disabled __pycache__, test there is a working fallback - (e.g. on the user-wide cache dir) - """ - mod = self.import_module() - f = mod.add_usecase - # Remove this function's cache files at the end, to avoid accumulation - # accross test calls. - self.addCleanup(shutil.rmtree, f.stats.cache_path, ignore_errors=True) - - self.assertPreciseEqual(f(2, 3), 6) - # It's a cache miss since the file was copied to a new temp location - self.check_hits(f, 0, 1) - - # Test re-use - mod2 = self.import_module() - f = mod2.add_usecase - self.assertPreciseEqual(f(2, 3), 6) - self.check_hits(f, 1, 0) - - # The __pycache__ is empty (otherwise the test's preconditions - # wouldn't be met) - self.check_pycache(0) - - @skip_bad_access - @unittest.skipIf(os.name == "nt", - "cannot easily make a directory read-only on Windows") - def test_non_creatable_pycache(self): - # Make it impossible to create the __pycache__ directory - old_perms = os.stat(self.tempdir).st_mode - os.chmod(self.tempdir, 0o500) - self.addCleanup(os.chmod, self.tempdir, old_perms) - - self._test_pycache_fallback() - - @skip_bad_access - @unittest.skipIf(os.name == "nt", - "cannot easily make a directory read-only on Windows") - def test_non_writable_pycache(self): - # Make it impossible to write to the __pycache__ directory - pycache = os.path.join(self.tempdir, '__pycache__') - os.mkdir(pycache) - old_perms = os.stat(pycache).st_mode - os.chmod(pycache, 0o500) - self.addCleanup(os.chmod, pycache, old_perms) - - self._test_pycache_fallback() - - def test_ipython(self): - # Test caching in an IPython session - base_cmd = [sys.executable, '-m', 'IPython'] - base_cmd += ['--quiet', '--quick', '--no-banner', '--colors=NoColor'] - try: - ver = subprocess.check_output(base_cmd + ['--version']) - except subprocess.CalledProcessError as e: - self.skipTest("ipython not available: return code %d" - % e.returncode) - ver = ver.strip().decode() - print("ipython version:", ver) - # Create test input - inputfn = os.path.join(self.tempdir, "ipython_cache_usecase.txt") - with open(inputfn, "w") as f: - f.write(r""" - import os - import sys - - from numba import jit - - # IPython 5 does not support multiline input if stdin isn't - # a tty (https://github.com/ipython/ipython/issues/9752) - f = jit(cache=True)(lambda: 42) - - res = f() - # IPython writes on stdout, so use stderr instead - sys.stderr.write(u"cache hits = %d\n" % f.stats.cache_hits[()]) - - # IPython hijacks sys.exit(), bypass it - sys.stdout.flush() - sys.stderr.flush() - os._exit(res) - """) - - def execute_with_input(): - # Feed the test input as stdin, to execute it in REPL context - with open(inputfn, "rb") as stdin: - p = subprocess.Popen(base_cmd, stdin=stdin, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True) - out, err = p.communicate() - if p.returncode != 42: - self.fail("unexpected return code %d\n" - "-- stdout:\n%s\n" - "-- stderr:\n%s\n" - % (p.returncode, out, err)) - return err - - execute_with_input() - # Run a second time and check caching - err = execute_with_input() - self.assertIn("cache hits = 1", err.strip()) - - -class TestCacheWithCpuSetting(BaseCacheUsecasesTest): - # Disable parallel testing due to envvars modification - _numba_parallel_test_ = False - - def check_later_mtimes(self, mtimes_old): - match_count = 0 - for k, v in self.get_cache_mtimes().items(): - if k in mtimes_old: - self.assertGreaterEqual(v, mtimes_old[k]) - match_count += 1 - self.assertGreater(match_count, 0, - msg='nothing to compare') - - def test_user_set_cpu_name(self): - self.check_pycache(0) - mod = self.import_module() - mod.self_test() - cache_size = len(self.cache_contents()) - - mtimes = self.get_cache_mtimes() - # Change CPU name to generic - with override_env_config('NUMBA_CPU_NAME', 'generic'): - self.run_in_separate_process() - - self.check_later_mtimes(mtimes) - self.assertGreater(len(self.cache_contents()), cache_size) - # Check cache index - cache = mod.add_usecase._cache - cache_file = cache._cache_file - cache_index = cache_file._load_index() - self.assertEqual(len(cache_index), 2) - [key_a, key_b] = cache_index.keys() - if key_a[1][1] == ll.get_host_cpu_name(): - key_host, key_generic = key_a, key_b - else: - key_host, key_generic = key_b, key_a - self.assertEqual(key_host[1][1], ll.get_host_cpu_name()) - self.assertEqual(key_host[1][2], codegen.get_host_cpu_features()) - self.assertEqual(key_generic[1][1], 'generic') - self.assertEqual(key_generic[1][2], '') - - def test_user_set_cpu_features(self): - self.check_pycache(0) - mod = self.import_module() - mod.self_test() - cache_size = len(self.cache_contents()) - - mtimes = self.get_cache_mtimes() - # Change CPU feature - my_cpu_features = '-sse;-avx' - - system_features = codegen.get_host_cpu_features() - - self.assertNotEqual(system_features, my_cpu_features) - with override_env_config('NUMBA_CPU_FEATURES', my_cpu_features): - self.run_in_separate_process() - self.check_later_mtimes(mtimes) - self.assertGreater(len(self.cache_contents()), cache_size) - # Check cache index - cache = mod.add_usecase._cache - cache_file = cache._cache_file - cache_index = cache_file._load_index() - self.assertEqual(len(cache_index), 2) - [key_a, key_b] = cache_index.keys() - - if key_a[1][2] == system_features: - key_host, key_generic = key_a, key_b - else: - key_host, key_generic = key_b, key_a - - self.assertEqual(key_host[1][1], ll.get_host_cpu_name()) - self.assertEqual(key_host[1][2], system_features) - self.assertEqual(key_generic[1][1], ll.get_host_cpu_name()) - self.assertEqual(key_generic[1][2], my_cpu_features) - - -class TestMultiprocessCache(BaseCacheTest): - - # Nested multiprocessing.Pool raises AssertionError: - # "daemonic processes are not allowed to have children" - _numba_parallel_test_ = False - - here = os.path.dirname(__file__) - usecases_file = os.path.join(here, "cache_usecases.py") - modname = "dispatcher_caching_test_fodder" - - def test_multiprocessing(self): - # Check caching works from multiple processes at once (#2028) - mod = self.import_module() - # Calling a pure Python caller of the JIT-compiled function is - # necessary to reproduce the issue. - f = mod.simple_usecase_caller - n = 3 - try: - ctx = multiprocessing.get_context('spawn') - except AttributeError: - ctx = multiprocessing - pool = ctx.Pool(n) - try: - res = sum(pool.imap(f, range(n))) - finally: - pool.close() - self.assertEqual(res, n * (n - 1) // 2) - - -class TestCacheFileCollision(unittest.TestCase): - _numba_parallel_test_ = False - - here = os.path.dirname(__file__) - usecases_file = os.path.join(here, "cache_usecases.py") - modname = "caching_file_loc_fodder" - source_text_1 = """ -from numba import njit -@njit(cache=True) -def bar(): - return 123 -""" - source_text_2 = """ -from numba import njit -@njit(cache=True) -def bar(): - return 321 -""" - - def setUp(self): - self.tempdir = temp_directory('test_cache_file_loc') - sys.path.insert(0, self.tempdir) - self.modname = 'module_name_that_is_unlikely' - self.assertNotIn(self.modname, sys.modules) - self.modname_bar1 = self.modname - self.modname_bar2 = '.'.join([self.modname, 'foo']) - foomod = os.path.join(self.tempdir, self.modname) - os.mkdir(foomod) - with open(os.path.join(foomod, '__init__.py'), 'w') as fout: - print(self.source_text_1, file=fout) - with open(os.path.join(foomod, 'foo.py'), 'w') as fout: - print(self.source_text_2, file=fout) - - def tearDown(self): - sys.modules.pop(self.modname_bar1, None) - sys.modules.pop(self.modname_bar2, None) - sys.path.remove(self.tempdir) - - def import_bar1(self): - return import_dynamic(self.modname_bar1).bar - - def import_bar2(self): - return import_dynamic(self.modname_bar2).bar - - def test_file_location(self): - bar1 = self.import_bar1() - bar2 = self.import_bar2() - # Check that the cache file is named correctly - idxname1 = bar1._cache._cache_file._index_name - idxname2 = bar2._cache._cache_file._index_name - self.assertNotEqual(idxname1, idxname2) - self.assertTrue(idxname1.startswith("__init__.bar-3.py")) - self.assertTrue(idxname2.startswith("foo.bar-3.py")) - - @unittest.skipUnless(hasattr(multiprocessing, 'get_context'), - 'Test requires multiprocessing.get_context') - def test_no_collision(self): - bar1 = self.import_bar1() - bar2 = self.import_bar2() - with capture_cache_log() as buf: - res1 = bar1() - cachelog = buf.getvalue() - # bar1 should save new index and data - self.assertEqual(cachelog.count('index saved'), 1) - self.assertEqual(cachelog.count('data saved'), 1) - self.assertEqual(cachelog.count('index loaded'), 0) - self.assertEqual(cachelog.count('data loaded'), 0) - with capture_cache_log() as buf: - res2 = bar2() - cachelog = buf.getvalue() - # bar2 should save new index and data - self.assertEqual(cachelog.count('index saved'), 1) - self.assertEqual(cachelog.count('data saved'), 1) - self.assertEqual(cachelog.count('index loaded'), 0) - self.assertEqual(cachelog.count('data loaded'), 0) - self.assertNotEqual(res1, res2) - - try: - # Make sure we can spawn new process without inheriting - # the parent context. - mp = multiprocessing.get_context('spawn') - except ValueError: - print("missing spawn context") - - q = mp.Queue() - # Start new process that calls `cache_file_collision_tester` - proc = mp.Process(target=cache_file_collision_tester, - args=(q, self.tempdir, - self.modname_bar1, - self.modname_bar2)) - proc.start() - # Get results from the process - log1 = q.get() - got1 = q.get() - log2 = q.get() - got2 = q.get() - proc.join() - - # The remote execution result of bar1() and bar2() should match - # the one executed locally. - self.assertEqual(got1, res1) - self.assertEqual(got2, res2) - - # The remote should have loaded bar1 from cache - self.assertEqual(log1.count('index saved'), 0) - self.assertEqual(log1.count('data saved'), 0) - self.assertEqual(log1.count('index loaded'), 1) - self.assertEqual(log1.count('data loaded'), 1) - - # The remote should have loaded bar2 from cache - self.assertEqual(log2.count('index saved'), 0) - self.assertEqual(log2.count('data saved'), 0) - self.assertEqual(log2.count('index loaded'), 1) - self.assertEqual(log2.count('data loaded'), 1) - - -def cache_file_collision_tester(q, tempdir, modname_bar1, modname_bar2): - sys.path.insert(0, tempdir) - bar1 = import_dynamic(modname_bar1).bar - bar2 = import_dynamic(modname_bar2).bar - with capture_cache_log() as buf: - r1 = bar1() - q.put(buf.getvalue()) - q.put(r1) - with capture_cache_log() as buf: - r2 = bar2() - q.put(buf.getvalue()) - q.put(r2) - - -class TestDispatcherFunctionBoundaries(TestCase): - def test_pass_dispatcher_as_arg(self): - # Test that a Dispatcher object can be pass as argument - @jit(nopython=True) - def add1(x): - return x + 1 - - @jit(nopython=True) - def bar(fn, x): - return fn(x) - - @jit(nopython=True) - def foo(x): - return bar(add1, x) - - # Check dispatcher as argument inside NPM - inputs = [1, 11.1, np.arange(10)] - expected_results = [x + 1 for x in inputs] - - for arg, expect in zip(inputs, expected_results): - self.assertPreciseEqual(foo(arg), expect) - - # Check dispatcher as argument from python - for arg, expect in zip(inputs, expected_results): - self.assertPreciseEqual(bar(add1, arg), expect) - - def test_dispatcher_as_arg_usecase(self): - @jit(nopython=True) - def maximum(seq, cmpfn): - tmp = seq[0] - for each in seq[1:]: - cmpval = cmpfn(tmp, each) - if cmpval < 0: - tmp = each - return tmp - - got = maximum([1, 2, 3, 4], cmpfn=jit(lambda x, y: x - y)) - self.assertEqual(got, 4) - got = maximum(list(zip(range(5), range(5)[::-1])), - cmpfn=jit(lambda x, y: x[0] - y[0])) - self.assertEqual(got, (4, 0)) - got = maximum(list(zip(range(5), range(5)[::-1])), - cmpfn=jit(lambda x, y: x[1] - y[1])) - self.assertEqual(got, (0, 4)) - - def test_dispatcher_cannot_return_to_python(self): - @jit(nopython=True) - def foo(fn): - return fn - - fn = jit(lambda x: x) - - with self.assertRaises(TypeError) as raises: - foo(fn) - self.assertRegexpMatches(str(raises.exception), - "cannot convert native .* to Python object") - - def test_dispatcher_in_sequence_arg(self): - @jit(nopython=True) - def one(x): - return x + 1 - - @jit(nopython=True) - def two(x): - return one(one(x)) - - @jit(nopython=True) - def three(x): - return one(one(one(x))) - - @jit(nopython=True) - def choose(fns, x): - return fns[0](x), fns[1](x), fns[2](x) - - # Tuple case - self.assertEqual(choose((one, two, three), 1), (2, 3, 4)) - # List case - self.assertEqual(choose([one, one, one], 1), (2, 2, 2)) - - -class TestBoxingDefaultError(unittest.TestCase): - # Testing default error at boxing/unboxing - def test_unbox_runtime_error(self): - # Dummy type has no unbox support - def foo(x): - pass - cres = compile_isolated(foo, (types.Dummy("dummy_type"),)) - with self.assertRaises(TypeError) as raises: - # Can pass in whatever and the unbox logic will always raise - # without checking the input value. - cres.entry_point(None) - self.assertEqual(str(raises.exception), "can't unbox dummy_type type") - - def test_box_runtime_error(self): - def foo(): - return unittest # Module type has no boxing logic - cres = compile_isolated(foo, ()) - with self.assertRaises(TypeError) as raises: - # Can pass in whatever and the unbox logic will always raise - # without checking the input value. - cres.entry_point() - pat = "cannot convert native Module.* to Python object" - self.assertRegexpMatches(str(raises.exception), pat) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_dummyarray.py b/numba/numba/tests/test_dummyarray.py deleted file mode 100644 index 9911b122b..000000000 --- a/numba/numba/tests/test_dummyarray.py +++ /dev/null @@ -1,201 +0,0 @@ -from __future__ import print_function -import numba.unittest_support as unittest -import itertools -import numpy as np -from numba.dummyarray import Array - - -class TestSlicing(unittest.TestCase): - - def assertSameContig(self, arr, nparr): - attrs = 'C_CONTIGUOUS', 'F_CONTIGUOUS' - for attr in attrs: - if arr.flags[attr] != nparr.flags[attr]: - if arr.size == 0 and nparr.size == 0: - # numpy <=1.7 bug that some empty array are contiguous and - # some are not - pass - else: - self.fail("contiguous flag mismatch:\ngot=%s\nexpect=%s" % - (arr.flags, nparr.flags)) - - #### 1D - - def test_slice0_1d(self): - nparr = np.empty(4) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - self.assertSameContig(arr, nparr) - xx = -2, -1, 0, 1, 2 - for x in xx: - expect = nparr[x:] - got = arr[x:] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_slice1_1d(self): - nparr = np.empty(4) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - xx = -2, -1, 0, 1, 2 - for x in xx: - expect = nparr[:x] - got = arr[:x] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_slice2_1d(self): - nparr = np.empty(4) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - xx = -2, -1, 0, 1, 2 - for x, y in itertools.product(xx, xx): - expect = nparr[x:y] - got = arr[x:y] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - #### 2D - - def test_slice0_2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - xx = -2, 0, 1, 2 - for x in xx: - expect = nparr[x:] - got = arr[x:] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - for x, y in itertools.product(xx, xx): - expect = nparr[x:, y:] - got = arr[x:, y:] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_slice1_2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - xx = -2, 0, 2 - for x in xx: - expect = nparr[:x] - got = arr[:x] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - self.assertSameContig(got, expect) - - for x, y in itertools.product(xx, xx): - expect = nparr[:x, :y] - got = arr[:x, :y] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - self.assertSameContig(got, expect) - - def test_slice2_2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - xx = -2, 0, 2 - for s, t, u, v in itertools.product(xx, xx, xx, xx): - expect = nparr[s:t, u:v] - got = arr[s:t, u:v] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - for x, y in itertools.product(xx, xx): - expect = nparr[s:t, u:v] - got = arr[s:t, u:v] - self.assertSameContig(got, expect) - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - -class TestReshape(unittest.TestCase): - def test_reshape_2d2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - expect = nparr.reshape(5, 4) - got = arr.reshape(5, 4)[0] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_reshape_2d1d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - expect = nparr.reshape(5 * 4) - got = arr.reshape(5 * 4)[0] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_reshape_3d3d(self): - nparr = np.empty((3, 4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - expect = nparr.reshape(5, 3, 4) - got = arr.reshape(5, 3, 4)[0] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_reshape_3d2d(self): - nparr = np.empty((3, 4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - expect = nparr.reshape(3 * 4, 5) - got = arr.reshape(3 * 4, 5)[0] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - - def test_reshape_3d1d(self): - nparr = np.empty((3, 4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - expect = nparr.reshape(3 * 4 * 5) - got = arr.reshape(3 * 4 * 5)[0] - self.assertEqual(got.shape, expect.shape) - self.assertEqual(got.strides, expect.strides) - -class TestExtent(unittest.TestCase): - def test_extent_1d(self): - nparr = np.empty(4) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - s, e = arr.extent - self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize) - - def test_extent_2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - s, e = arr.extent - self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize) - - def test_extent_iter_1d(self): - nparr = np.empty(4) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - [ext] = list(arr.iter_contiguous_extent()) - self.assertEqual(ext, arr.extent) - - def test_extent_iter_2d(self): - nparr = np.empty((4, 5)) - arr = Array.from_desc(0, nparr.shape, nparr.strides, - nparr.dtype.itemsize) - [ext] = list(arr.iter_contiguous_extent()) - self.assertEqual(ext, arr.extent) - - self.assertEqual(len(list(arr[::2].iter_contiguous_extent())), 2) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_dyn_array.py b/numba/numba/tests/test_dyn_array.py deleted file mode 100644 index 365ee112b..000000000 --- a/numba/numba/tests/test_dyn_array.py +++ /dev/null @@ -1,1495 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import contextlib -import sys -import numpy as np -import random -import threading - -from numba import unittest_support as unittest -from numba.errors import TypingError -from numba import njit -from numba import utils -from numba.numpy_support import version as numpy_version -from .support import MemoryLeakMixin, TestCase, tag - - -nrtjit = njit(_nrt=True, nogil=True) - - -def np_concatenate1(a, b, c): - return np.concatenate((a, b, c)) - -def np_concatenate2(a, b, c, axis): - return np.concatenate((a, b, c), axis=axis) - -def np_stack1(a, b, c): - return np.stack((a, b, c)) - -def np_stack2(a, b, c, axis): - return np.stack((a, b, c), axis=axis) - -def np_hstack(a, b, c): - return np.hstack((a, b, c)) - -def np_vstack(a, b, c): - return np.vstack((a, b, c)) - -def np_dstack(a, b, c): - return np.dstack((a, b, c)) - -def np_column_stack(a, b, c): - return np.column_stack((a, b, c)) - - -class BaseTest(TestCase): - - def check_outputs(self, pyfunc, argslist, exact=True): - cfunc = nrtjit(pyfunc) - for args in argslist: - expected = pyfunc(*args) - ret = cfunc(*args) - self.assertEqual(ret.size, expected.size) - self.assertEqual(ret.dtype, expected.dtype) - self.assertStridesEqual(ret, expected) - if exact: - np.testing.assert_equal(expected, ret) - else: - np.testing.assert_allclose(expected, ret) - - -class NrtRefCtTest(MemoryLeakMixin): - def assert_array_nrt_refct(self, arr, expect): - self.assertEqual(arr.base.refcount, expect) - - -class TestDynArray(NrtRefCtTest, TestCase): - - def test_empty_0d(self): - @nrtjit - def foo(): - arr = np.empty(()) - arr[()] = 42 - return arr - - arr = foo() - self.assert_array_nrt_refct(arr, 1) - np.testing.assert_equal(42, arr) - self.assertEqual(arr.size, 1) - self.assertEqual(arr.shape, ()) - self.assertEqual(arr.dtype, np.dtype(np.float64)) - self.assertEqual(arr.strides, ()) - arr.fill(123) # test writability - np.testing.assert_equal(123, arr) - del arr - - def test_empty_1d(self): - @nrtjit - def foo(n): - arr = np.empty(n) - for i in range(n): - arr[i] = i - - return arr - - n = 3 - arr = foo(n) - self.assert_array_nrt_refct(arr, 1) - np.testing.assert_equal(np.arange(n), arr) - self.assertEqual(arr.size, n) - self.assertEqual(arr.shape, (n,)) - self.assertEqual(arr.dtype, np.dtype(np.float64)) - self.assertEqual(arr.strides, (np.dtype(np.float64).itemsize,)) - arr.fill(123) # test writability - np.testing.assert_equal(123, arr) - del arr - - def test_empty_2d(self): - def pyfunc(m, n): - arr = np.empty((m, n), np.int32) - for i in range(m): - for j in range(n): - arr[i, j] = i + j - - return arr - - cfunc = nrtjit(pyfunc) - m = 4 - n = 3 - expected_arr = pyfunc(m, n) - got_arr = cfunc(m, n) - self.assert_array_nrt_refct(got_arr, 1) - np.testing.assert_equal(expected_arr, got_arr) - - self.assertEqual(expected_arr.size, got_arr.size) - self.assertEqual(expected_arr.shape, got_arr.shape) - self.assertEqual(expected_arr.strides, got_arr.strides) - - del got_arr - - @tag('important') - def test_empty_3d(self): - def pyfunc(m, n, p): - arr = np.empty((m, n, p), np.int32) - for i in range(m): - for j in range(n): - for k in range(p): - arr[i, j, k] = i + j + k - - return arr - - cfunc = nrtjit(pyfunc) - m = 4 - n = 3 - p = 2 - expected_arr = pyfunc(m, n, p) - got_arr = cfunc(m, n, p) - self.assert_array_nrt_refct(got_arr, 1) - np.testing.assert_equal(expected_arr, got_arr) - - self.assertEqual(expected_arr.size, got_arr.size) - self.assertEqual(expected_arr.shape, got_arr.shape) - self.assertEqual(expected_arr.strides, got_arr.strides) - - del got_arr - - @tag('important') - def test_empty_2d_sliced(self): - def pyfunc(m, n, p): - arr = np.empty((m, n), np.int32) - for i in range(m): - for j in range(n): - arr[i, j] = i + j - - return arr[p] - - cfunc = nrtjit(pyfunc) - m = 4 - n = 3 - p = 2 - expected_arr = pyfunc(m, n, p) - got_arr = cfunc(m, n, p) - self.assert_array_nrt_refct(got_arr, 1) - np.testing.assert_equal(expected_arr, got_arr) - - self.assertEqual(expected_arr.size, got_arr.size) - self.assertEqual(expected_arr.shape, got_arr.shape) - self.assertEqual(expected_arr.strides, got_arr.strides) - - del got_arr - - @tag('important') - def test_return_global_array(self): - y = np.ones(4, dtype=np.float32) - initrefct = sys.getrefcount(y) - - def return_external_array(): - return y - - cfunc = nrtjit(return_external_array) - out = cfunc() - - # out reference by cfunc - self.assertEqual(initrefct + 1, sys.getrefcount(y)) - - np.testing.assert_equal(y, out) - np.testing.assert_equal(y, np.ones(4, dtype=np.float32)) - np.testing.assert_equal(out, np.ones(4, dtype=np.float32)) - - del out - # out is only referenced by cfunc - self.assertEqual(initrefct + 1, sys.getrefcount(y)) - - del cfunc - # y is no longer referenced by cfunc - self.assertEqual(initrefct, sys.getrefcount(y)) - - @tag('important') - def test_return_global_array_sliced(self): - y = np.ones(4, dtype=np.float32) - - def return_external_array(): - return y[2:] - - cfunc = nrtjit(return_external_array) - out = cfunc() - self.assertIsNone(out.base) - - yy = y[2:] - np.testing.assert_equal(yy, out) - np.testing.assert_equal(yy, np.ones(2, dtype=np.float32)) - np.testing.assert_equal(out, np.ones(2, dtype=np.float32)) - - def test_array_pass_through(self): - def pyfunc(y): - return y - - arr = np.ones(4, dtype=np.float32) - - cfunc = nrtjit(pyfunc) - expected = cfunc(arr) - got = pyfunc(arr) - - np.testing.assert_equal(expected, arr) - np.testing.assert_equal(expected, got) - self.assertIs(expected, arr) - self.assertIs(expected, got) - - @tag('important') - def test_array_pass_through_sliced(self): - def pyfunc(y): - return y[y.size // 2:] - - arr = np.ones(4, dtype=np.float32) - - initrefct = sys.getrefcount(arr) - - cfunc = nrtjit(pyfunc) - got = cfunc(arr) - self.assertEqual(initrefct + 1, sys.getrefcount(arr)) - expected = pyfunc(arr) - self.assertEqual(initrefct + 2, sys.getrefcount(arr)) - - np.testing.assert_equal(expected, arr[arr.size // 2]) - np.testing.assert_equal(expected, got) - - del expected - self.assertEqual(initrefct + 1, sys.getrefcount(arr)) - del got - self.assertEqual(initrefct, sys.getrefcount(arr)) - - def test_ufunc_with_allocated_output(self): - - def pyfunc(a, b): - out = np.empty(a.shape) - np.add(a, b, out) - return out - - cfunc = nrtjit(pyfunc) - - # 1D case - arr_a = np.random.random(10) - arr_b = np.random.random(10) - - np.testing.assert_equal(pyfunc(arr_a, arr_b), - cfunc(arr_a, arr_b)) - - self.assert_array_nrt_refct(cfunc(arr_a, arr_b), 1) - - # 2D case - arr_a = np.random.random(10).reshape(2, 5) - arr_b = np.random.random(10).reshape(2, 5) - - np.testing.assert_equal(pyfunc(arr_a, arr_b), - cfunc(arr_a, arr_b)) - - self.assert_array_nrt_refct(cfunc(arr_a, arr_b), 1) - - # 3D case - arr_a = np.random.random(70).reshape(2, 5, 7) - arr_b = np.random.random(70).reshape(2, 5, 7) - - np.testing.assert_equal(pyfunc(arr_a, arr_b), - cfunc(arr_a, arr_b)) - - self.assert_array_nrt_refct(cfunc(arr_a, arr_b), 1) - - def test_allocation_mt(self): - """ - This test exercises the array allocation in multithreaded usecase. - This stress the freelist inside NRT. - """ - - def pyfunc(inp): - out = np.empty(inp.size) - - # Zero fill - for i in range(out.size): - out[i] = 0 - - for i in range(inp[0]): - # Allocate inside a loop - tmp = np.empty(inp.size) - # Write to tmp - for j in range(tmp.size): - tmp[j] = inp[j] - # out = tmp + i - for j in range(tmp.size): - out[j] += tmp[j] + i - - return out - - cfunc = nrtjit(pyfunc) - size = 10 # small array size so that the computation is short - arr = np.random.randint(1, 10, size) - frozen_arr = arr.copy() - - np.testing.assert_equal(pyfunc(arr), cfunc(arr)) - # Ensure we did not modify the input - np.testing.assert_equal(frozen_arr, arr) - - workers = [] - inputs = [] - outputs = [] - - # Make wrapper to store the output - def wrapped(inp, out): - out[:] = cfunc(inp) - - # Create a lot of worker threads to create contention - for i in range(100): - arr = np.random.randint(1, 10, size) - out = np.empty_like(arr) - thread = threading.Thread(target=wrapped, - args=(arr, out), - name="worker{0}".format(i)) - workers.append(thread) - inputs.append(arr) - outputs.append(out) - - # Launch worker threads - for thread in workers: - thread.start() - - # Join worker threads - for thread in workers: - thread.join() - - # Check result - for inp, out in zip(inputs, outputs): - np.testing.assert_equal(pyfunc(inp), out) - - def test_refct_mt(self): - """ - This test exercises the refct in multithreaded code - """ - - def pyfunc(n, inp): - out = np.empty(inp.size) - for i in range(out.size): - out[i] = inp[i] + 1 - # Use swap to trigger many refct ops - for i in range(n): - out, inp = inp, out - return out - - cfunc = nrtjit(pyfunc) - size = 10 - input = np.arange(size, dtype=np.float) - expected_refct = sys.getrefcount(input) - swapct = random.randrange(1000) - expected = pyfunc(swapct, input) - np.testing.assert_equal(expected, cfunc(swapct, input)) - # The following checks can discover a reference count error - del expected - self.assertEqual(expected_refct, sys.getrefcount(input)) - - workers = [] - outputs = [] - swapcts = [] - - # Make wrapper to store the output - def wrapped(n, input, out): - out[:] = cfunc(n, input) - - # Create worker threads - for i in range(100): - out = np.empty(size) - # All thread shares the same input - swapct = random.randrange(1000) - thread = threading.Thread(target=wrapped, - args=(swapct, input, out), - name="worker{0}".format(i)) - workers.append(thread) - outputs.append(out) - swapcts.append(swapct) - - # Launch worker threads - for thread in workers: - thread.start() - - # Join worker threads - for thread in workers: - thread.join() - - # Check result - for swapct, out in zip(swapcts, outputs): - np.testing.assert_equal(pyfunc(swapct, input), out) - - del outputs, workers - # The following checks can discover a reference count error - self.assertEqual(expected_refct, sys.getrefcount(input)) - - def test_swap(self): - - def pyfunc(x, y, t): - """Swap array x and y for t number of times - """ - for i in range(t): - x, y = y, x - - return x, y - - - cfunc = nrtjit(pyfunc) - - x = np.random.random(100) - y = np.random.random(100) - - t = 100 - - initrefct = sys.getrefcount(x), sys.getrefcount(y) - expect, got = pyfunc(x, y, t), cfunc(x, y, t) - self.assertIsNone(got[0].base) - self.assertIsNone(got[1].base) - np.testing.assert_equal(expect, got) - del expect, got - self.assertEqual(initrefct, (sys.getrefcount(x), sys.getrefcount(y))) - - def test_return_tuple_of_array(self): - - def pyfunc(x): - y = np.empty(x.size) - for i in range(y.size): - y[i] = x[i] + 1 - return x, y - - cfunc = nrtjit(pyfunc) - - x = np.random.random(5) - initrefct = sys.getrefcount(x) - expected_x, expected_y = pyfunc(x) - got_x, got_y = cfunc(x) - self.assertIs(x, expected_x) - self.assertIs(x, got_x) - np.testing.assert_equal(expected_x, got_x) - np.testing.assert_equal(expected_y, got_y) - del expected_x, got_x - self.assertEqual(initrefct, sys.getrefcount(x)) - - self.assertEqual(sys.getrefcount(expected_y), sys.getrefcount(got_y)) - - def test_return_tuple_of_array_created(self): - - def pyfunc(x): - y = np.empty(x.size) - for i in range(y.size): - y[i] = x[i] + 1 - out = y, y - return out - - cfunc = nrtjit(pyfunc) - - x = np.random.random(5) - expected_x, expected_y = pyfunc(x) - got_x, got_y = cfunc(x) - np.testing.assert_equal(expected_x, got_x) - np.testing.assert_equal(expected_y, got_y) - # getrefcount owns 1, got_y owns 1 - self.assertEqual(2, sys.getrefcount(got_y)) - # getrefcount owns 1, got_y owns 1 - self.assertEqual(2, sys.getrefcount(got_y)) - - def test_issue_with_return_leak(self): - """ - Dispatcher returns a new reference. - It need to workaround it for now. - """ - @nrtjit - def inner(out): - return out - - def pyfunc(x): - return inner(x) - - cfunc = nrtjit(pyfunc) - - arr = np.arange(10) - old_refct = sys.getrefcount(arr) - - self.assertEqual(old_refct, sys.getrefcount(pyfunc(arr))) - self.assertEqual(old_refct, sys.getrefcount(cfunc(arr))) - self.assertEqual(old_refct, sys.getrefcount(arr)) - - -class ConstructorBaseTest(NrtRefCtTest): - - def check_0d(self, pyfunc): - cfunc = nrtjit(pyfunc) - expected = pyfunc() - ret = cfunc() - self.assert_array_nrt_refct(ret, 1) - self.assertEqual(ret.size, expected.size) - self.assertEqual(ret.shape, expected.shape) - self.assertEqual(ret.dtype, expected.dtype) - self.assertEqual(ret.strides, expected.strides) - self.check_result_value(ret, expected) - # test writability - expected = np.empty_like(ret) # np.full_like was not added until Numpy 1.8 - expected.fill(123) - ret.fill(123) - np.testing.assert_equal(ret, expected) - - def check_1d(self, pyfunc): - cfunc = nrtjit(pyfunc) - n = 3 - expected = pyfunc(n) - ret = cfunc(n) - self.assert_array_nrt_refct(ret, 1) - self.assertEqual(ret.size, expected.size) - self.assertEqual(ret.shape, expected.shape) - self.assertEqual(ret.dtype, expected.dtype) - self.assertEqual(ret.strides, expected.strides) - self.check_result_value(ret, expected) - # test writability - expected = np.empty_like(ret) # np.full_like was not added until Numpy 1.8 - expected.fill(123) - ret.fill(123) - np.testing.assert_equal(ret, expected) - # errors - with self.assertRaises(ValueError) as cm: - cfunc(-1) - self.assertEqual(str(cm.exception), "negative dimensions not allowed") - - def check_2d(self, pyfunc): - cfunc = nrtjit(pyfunc) - m, n = 2, 3 - expected = pyfunc(m, n) - ret = cfunc(m, n) - self.assert_array_nrt_refct(ret, 1) - self.assertEqual(ret.size, expected.size) - self.assertEqual(ret.shape, expected.shape) - self.assertEqual(ret.dtype, expected.dtype) - self.assertEqual(ret.strides, expected.strides) - self.check_result_value(ret, expected) - # test writability - expected = np.empty_like(ret) # np.full_like was not added until Numpy 1.8 - expected.fill(123) - ret.fill(123) - np.testing.assert_equal(ret, expected) - # errors - with self.assertRaises(ValueError) as cm: - cfunc(2, -1) - self.assertEqual(str(cm.exception), "negative dimensions not allowed") - - -class TestNdZeros(ConstructorBaseTest, TestCase): - - def setUp(self): - super(TestNdZeros, self).setUp() - self.pyfunc = np.zeros - - def check_result_value(self, ret, expected): - np.testing.assert_equal(ret, expected) - - def test_0d(self): - pyfunc = self.pyfunc - def func(): - return pyfunc(()) - self.check_0d(func) - - def test_1d(self): - pyfunc = self.pyfunc - def func(n): - return pyfunc(n) - self.check_1d(func) - - def test_1d_dtype(self): - pyfunc = self.pyfunc - def func(n): - return pyfunc(n, np.int32) - self.check_1d(func) - - def test_1d_dtype_instance(self): - # dtype as numpy dtype, not as scalar class - pyfunc = self.pyfunc - _dtype = np.dtype('int32') - def func(n): - return pyfunc(n, _dtype) - self.check_1d(func) - - def test_2d(self): - pyfunc = self.pyfunc - def func(m, n): - return pyfunc((m, n)) - self.check_2d(func) - - @tag('important') - def test_2d_dtype_kwarg(self): - pyfunc = self.pyfunc - def func(m, n): - return pyfunc((m, n), dtype=np.complex64) - self.check_2d(func) - - -class TestNdOnes(TestNdZeros): - - def setUp(self): - super(TestNdOnes, self).setUp() - self.pyfunc = np.ones - - -@unittest.skipIf(numpy_version < (1, 8), "test requires Numpy 1.8 or later") -class TestNdFull(ConstructorBaseTest, TestCase): - - def check_result_value(self, ret, expected): - np.testing.assert_equal(ret, expected) - - def test_0d(self): - def func(): - return np.full((), 4.5) - self.check_0d(func) - - def test_1d(self): - def func(n): - return np.full(n, 4.5) - self.check_1d(func) - - def test_1d_dtype(self): - def func(n): - return np.full(n, 4.5, np.bool_) - self.check_1d(func) - - def test_1d_dtype_instance(self): - dtype = np.dtype('bool') - def func(n): - return np.full(n, 4.5, dtype) - self.check_1d(func) - - def test_2d(self): - def func(m, n): - return np.full((m, n), 4.5) - self.check_2d(func) - - def test_2d_dtype_kwarg(self): - def func(m, n): - return np.full((m, n), 1 + 4.5j, dtype=np.complex64) - self.check_2d(func) - - def test_2d_dtype_from_type(self): - # tests issue #2862 - def func(m, n): - return np.full((m, n), np.int32(1)) - self.check_2d(func) - - # tests meta issues from #2862, that np < 1.12 always - # returns float64. Complex uses `.real`, imaginary part dropped - def func(m, n): - return np.full((m, n), np.complex128(1)) - self.check_2d(func) - - # and that if a dtype is specified, this influences the return type - def func(m, n): - return np.full((m, n), 1, dtype=np.int8) - self.check_2d(func) - - -class ConstructorLikeBaseTest(object): - - def mutate_array(self, arr): - try: - arr.fill(42) - except (TypeError, ValueError): - # Try something else (e.g. Numpy 1.6 with structured dtypes) - fill_value = b'x' * arr.dtype.itemsize - arr.fill(fill_value) - - def check_like(self, pyfunc, dtype): - def check_arr(arr): - expected = pyfunc(arr) - ret = cfunc(arr) - self.assertEqual(ret.size, expected.size) - self.assertEqual(ret.dtype, expected.dtype) - self.assertStridesEqual(ret, expected) - self.check_result_value(ret, expected) - # test writability - self.mutate_array(ret) - self.mutate_array(expected) - np.testing.assert_equal(ret, expected) - - orig = np.linspace(0, 5, 6).astype(dtype) - cfunc = nrtjit(pyfunc) - - for shape in (6, (2, 3), (1, 2, 3), (3, 1, 2), ()): - if shape == (): - arr = orig[-1:].reshape(()) - else: - arr = orig.reshape(shape) - check_arr(arr) - # Non-contiguous array - if arr.ndim > 0: - check_arr(arr[::2]) - # Check new array doesn't inherit readonly flag - arr.flags['WRITEABLE'] = False - # verify read-only - with self.assertRaises(ValueError): - arr[0] = 1 - check_arr(arr) - - # Scalar argument => should produce a 0-d array - check_arr(orig[0]) - - -class TestNdEmptyLike(ConstructorLikeBaseTest, TestCase): - - def setUp(self): - super(TestNdEmptyLike, self).setUp() - self.pyfunc = np.empty_like - - def check_result_value(self, ret, expected): - pass - - def test_like(self): - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr) - self.check_like(func, np.float64) - - def test_like_structured(self): - dtype = np.dtype([('a', np.int16), ('b', np.float32)]) - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr) - self.check_like(func, dtype) - - def test_like_dtype(self): - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr, np.int32) - self.check_like(func, np.float64) - - def test_like_dtype_instance(self): - dtype = np.dtype('int32') - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr, dtype) - self.check_like(func, np.float64) - - def test_like_dtype_structured(self): - dtype = np.dtype([('a', np.int16), ('b', np.float32)]) - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr, dtype) - self.check_like(func, np.float64) - - def test_like_dtype_kwarg(self): - pyfunc = self.pyfunc - def func(arr): - return pyfunc(arr, dtype=np.int32) - self.check_like(func, np.float64) - - -class TestNdZerosLike(TestNdEmptyLike): - - def setUp(self): - super(TestNdZerosLike, self).setUp() - self.pyfunc = np.zeros_like - - def check_result_value(self, ret, expected): - np.testing.assert_equal(ret, expected) - - def test_like_structured(self): - super(TestNdZerosLike, self).test_like_structured() - - def test_like_dtype_structured(self): - super(TestNdZerosLike, self).test_like_dtype_structured() - - -class TestNdOnesLike(TestNdZerosLike): - - def setUp(self): - super(TestNdOnesLike, self).setUp() - self.pyfunc = np.ones_like - self.expected_value = 1 - - # Not supported yet. - - @unittest.expectedFailure - def test_like_structured(self): - super(TestNdOnesLike, self).test_like_structured() - - @unittest.expectedFailure - def test_like_dtype_structured(self): - super(TestNdOnesLike, self).test_like_dtype_structured() - - -@unittest.skipIf(numpy_version < (1, 8), "test requires Numpy 1.8 or later") -class TestNdFullLike(ConstructorLikeBaseTest, TestCase): - - def check_result_value(self, ret, expected): - np.testing.assert_equal(ret, expected) - - def test_like(self): - def func(arr): - return np.full_like(arr, 3.5) - self.check_like(func, np.float64) - - # Not supported yet. - @unittest.expectedFailure - def test_like_structured(self): - dtype = np.dtype([('a', np.int16), ('b', np.float32)]) - def func(arr): - return np.full_like(arr, 4.5) - self.check_like(func, dtype) - - def test_like_dtype(self): - def func(arr): - return np.full_like(arr, 4.5, np.bool_) - self.check_like(func, np.float64) - - def test_like_dtype_instance(self): - dtype = np.dtype('bool') - def func(arr): - return np.full_like(arr, 4.5, dtype) - self.check_like(func, np.float64) - - def test_like_dtype_kwarg(self): - def func(arr): - return np.full_like(arr, 4.5, dtype=np.bool_) - self.check_like(func, np.float64) - - -class TestNdIdentity(BaseTest): - - def check_identity(self, pyfunc): - self.check_outputs(pyfunc, [(3,)]) - - def test_identity(self): - def func(n): - return np.identity(n) - self.check_identity(func) - - def test_identity_dtype(self): - for dtype in (np.complex64, np.int16, np.bool_, np.dtype('bool')): - def func(n): - return np.identity(n, dtype) - self.check_identity(func) - - -class TestNdEye(BaseTest): - - def test_eye_n(self): - def func(n): - return np.eye(n) - self.check_outputs(func, [(1,), (3,)]) - - def test_eye_n_m(self): - def func(n, m): - return np.eye(n, m) - self.check_outputs(func, [(1, 2), (3, 2), (0, 3)]) - - def check_eye_n_m_k(self, func): - self.check_outputs(func, [(1, 2, 0), - (3, 4, 1), - (3, 4, -1), - (4, 3, -2), - (4, 3, -5), - (4, 3, 5)]) - - def test_eye_n_m_k(self): - def func(n, m, k): - return np.eye(n, m, k) - self.check_eye_n_m_k(func) - - def test_eye_n_m_k_dtype(self): - def func(n, m, k): - return np.eye(N=n, M=m, k=k, dtype=np.int16) - self.check_eye_n_m_k(func) - - def test_eye_n_m_k_dtype_instance(self): - dtype = np.dtype('int16') - def func(n, m, k): - return np.eye(N=n, M=m, k=k, dtype=dtype) - self.check_eye_n_m_k(func) - - -class TestNdDiag(TestCase): - - def setUp(self): - v = np.array([1, 2, 3]) - hv = np.array([[1, 2, 3]]) - vv = np.transpose(hv) - self.vectors = [v, hv, vv] - a3x4 = np.arange(12).reshape(3, 4) - a4x3 = np.arange(12).reshape(4, 3) - self.matricies = [a3x4, a4x3] - def func(q): - return np.diag(q) - self.py = func - self.jit = nrtjit(func) - - def func_kwarg(q, k=0): - return np.diag(q, k=k) - self.py_kw = func_kwarg - self.jit_kw = nrtjit(func_kwarg) - - def check_diag(self, pyfunc, nrtfunc, *args, **kwargs): - expected = pyfunc(*args, **kwargs) - computed = nrtfunc(*args, **kwargs) - self.assertEqual(computed.size, expected.size) - self.assertEqual(computed.dtype, expected.dtype) - # NOTE: stride not tested as np returns a RO view, nb returns new data - np.testing.assert_equal(expected, computed) - - # create a diag matrix from a vector - def test_diag_vect_create(self): - for d in self.vectors: - self.check_diag(self.py, self.jit, d) - - # create a diag matrix from a vector at a given offset - def test_diag_vect_create_kwarg(self): - for k in range(-10, 10): - for d in self.vectors: - self.check_diag(self.py_kw, self.jit_kw, d, k=k) - - # extract the diagonal - def test_diag_extract(self): - for d in self.matricies: - self.check_diag(self.py, self.jit, d) - - # extract a diagonal at a given offset - def test_diag_extract_kwarg(self): - for k in range(-4, 4): - for d in self.matricies: - self.check_diag(self.py_kw, self.jit_kw, d, k=k) - - # check error handling - def test_error_handling(self): - d = np.array([[[1.]]]) - cfunc = nrtjit(self.py) - - # missing arg - with self.assertRaises(TypeError): - cfunc() - - # > 2d - with self.assertRaises(TypingError): - cfunc(d) - with self.assertRaises(TypingError): - dfunc = nrtjit(self.py_kw) - dfunc(d, k=3) - -class TestNdArange(BaseTest): - - def test_linspace_2(self): - def pyfunc(n, m): - return np.linspace(n, m) - self.check_outputs(pyfunc, - [(0, 4), (1, 100), (-3.5, 2.5), (-3j, 2+3j), - (2, 1), (1+0.5j, 1.5j)], exact=False) - - def test_linspace_3(self): - def pyfunc(n, m, p): - return np.linspace(n, m, p) - self.check_outputs(pyfunc, - [(0, 4, 9), (1, 4, 3), (-3.5, 2.5, 8), - (-3j, 2+3j, 7), (2, 1, 0), - (1+0.5j, 1.5j, 5), (1, 1e100, 1)], - exact=False) - - -class TestNpyEmptyKeyword(TestCase): - def _test_with_dtype_kw(self, dtype): - def pyfunc(shape): - return np.empty(shape, dtype=dtype) - - shapes = [1, 5, 9] - - cfunc = nrtjit(pyfunc) - for s in shapes: - expected = pyfunc(s) - got = cfunc(s) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(expected.shape, got.shape) - - def test_with_dtype_kws(self): - for dtype in [np.int32, np.float32, np.complex64, np.dtype('complex64')]: - self._test_with_dtype_kw(dtype) - - def _test_with_shape_and_dtype_kw(self, dtype): - def pyfunc(shape): - return np.empty(shape=shape, dtype=dtype) - - shapes = [1, 5, 9] - - cfunc = nrtjit(pyfunc) - for s in shapes: - expected = pyfunc(s) - got = cfunc(s) - self.assertEqual(expected.dtype, got.dtype) - self.assertEqual(expected.shape, got.shape) - - def test_with_shape_and_dtype_kws(self): - for dtype in [np.int32, np.float32, np.complex64, np.dtype('complex64')]: - self._test_with_shape_and_dtype_kw(dtype) - - def test_empty_no_args(self): - - def pyfunc(): - return np.empty() - - cfunc = nrtjit(pyfunc) - - # Trigger the compilation - # That will cause a TypingError due to missing shape argument - with self.assertRaises(TypingError): - cfunc() - - -class TestNpArray(MemoryLeakMixin, BaseTest): - - def test_0d(self): - def pyfunc(arg): - return np.array(arg) - - cfunc = nrtjit(pyfunc) - got = cfunc(42) - self.assertPreciseEqual(got, np.array(42, dtype=np.intp)) - got = cfunc(2.5) - self.assertPreciseEqual(got, np.array(2.5)) - - def test_0d_with_dtype(self): - def pyfunc(arg): - return np.array(arg, dtype=np.int16) - - self.check_outputs(pyfunc, [(42,), (3.5,)]) - - def test_1d(self): - def pyfunc(arg): - return np.array(arg) - - cfunc = nrtjit(pyfunc) - # A list - got = cfunc([2, 3, 42]) - self.assertPreciseEqual(got, np.intp([2, 3, 42])) - # A heterogeneous tuple - got = cfunc((1.0, 2.5j, 42)) - self.assertPreciseEqual(got, np.array([1.0, 2.5j, 42])) - # An empty tuple - got = cfunc(()) - self.assertPreciseEqual(got, np.float64(())) - - def test_1d_with_dtype(self): - def pyfunc(arg): - return np.array(arg, dtype=np.float32) - - self.check_outputs(pyfunc, - [([2, 42],), - ([3.5, 1.0],), - ((1, 3.5, 42),), - ((),), - ]) - - @tag('important') - def test_2d(self): - def pyfunc(arg): - return np.array(arg) - - cfunc = nrtjit(pyfunc) - # A list of tuples - got = cfunc([(1, 2), (3, 4)]) - self.assertPreciseEqual(got, np.intp([[1, 2], [3, 4]])) - got = cfunc([(1, 2.5), (3, 4.5)]) - self.assertPreciseEqual(got, np.float64([[1, 2.5], [3, 4.5]])) - # A tuple of lists - got = cfunc(([1, 2], [3, 4])) - self.assertPreciseEqual(got, np.intp([[1, 2], [3, 4]])) - got = cfunc(([1, 2], [3.5, 4.5])) - self.assertPreciseEqual(got, np.float64([[1, 2], [3.5, 4.5]])) - # A tuple of tuples - got = cfunc(((1.5, 2), (3.5, 4.5))) - self.assertPreciseEqual(got, np.float64([[1.5, 2], [3.5, 4.5]])) - got = cfunc(((), ())) - self.assertPreciseEqual(got, np.float64(((), ()))) - - def test_2d_with_dtype(self): - def pyfunc(arg): - return np.array(arg, dtype=np.int32) - - cfunc = nrtjit(pyfunc) - got = cfunc([(1, 2.5), (3, 4.5)]) - self.assertPreciseEqual(got, np.int32([[1, 2], [3, 4]])) - - def test_raises(self): - - def pyfunc(arg): - return np.array(arg) - - cfunc = nrtjit(pyfunc) - - @contextlib.contextmanager - def check_raises(msg): - with self.assertRaises(TypingError) as raises: - yield - self.assertIn(msg, str(raises.exception)) - - with check_raises(('array(float64, 1d, C) not allowed in a ' - 'homogeneous sequence')): - cfunc(np.array([1.])) - - with check_raises(('type (int64, reflected list(int64)) does ' - 'not have a regular shape')): - cfunc((np.int64(1), [np.int64(2)])) - - with check_raises(("cannot convert (int64, Record([('a', ' RequestError.internal_error: - return x - RequestError.not_found - else: - return x + Shape.circle - - -def vectorize_usecase(x): - if x != RequestError.not_found: - return RequestError['internal_error'] - else: - return RequestError.dummy - - -class BaseEnumTest(object): - - def test_compare(self): - pyfunc = compare_usecase - cfunc = jit(nopython=True)(pyfunc) - - for args in self.pairs: - self.assertPreciseEqual(pyfunc(*args), cfunc(*args)) - - def test_return(self): - """ - Passing and returning enum members. - """ - pyfunc = return_usecase - cfunc = jit(nopython=True)(pyfunc) - - for pair in self.pairs: - for pred in (True, False): - args = pair + (pred,) - self.assertIs(pyfunc(*args), cfunc(*args)) - - def check_constant_usecase(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - - for arg in self.values: - self.assertPreciseEqual(pyfunc(arg), cfunc(arg)) - - def test_constant(self): - self.check_constant_usecase(getattr_usecase) - self.check_constant_usecase(getitem_usecase) - self.check_constant_usecase(make_constant_usecase(self.values[0])) - - -class TestEnum(BaseEnumTest, TestCase): - """ - Tests for Enum classes and members. - """ - values = [Color.red, Color.green] - - pairs = [ - (Color.red, Color.red), - (Color.red, Color.green), - (Shake.mint, Shake.vanilla), - (Planet.VENUS, Planet.MARS), - (Planet.EARTH, Planet.EARTH), - ] - - def test_identity(self): - """ - Enum with equal values should not compare identical - """ - pyfunc = identity_usecase - cfunc = jit(nopython=True)(pyfunc) - args = (Color.blue, Color.green, Shape.square) - self.assertPreciseEqual(pyfunc(*args), cfunc(*args)) - - -class TestIntEnum(BaseEnumTest, TestCase): - """ - Tests for IntEnum classes and members. - """ - values = [Shape.circle, Shape.square] - - pairs = [ - (Shape.circle, Shape.circle), - (Shape.circle, Shape.square), - (RequestError.not_found, RequestError.not_found), - (RequestError.internal_error, RequestError.not_found), - ] - - def test_int_coerce(self): - pyfunc = int_coerce_usecase - cfunc = jit(nopython=True)(pyfunc) - - for arg in [300, 450, 550]: - self.assertPreciseEqual(pyfunc(arg), cfunc(arg)) - - def test_vectorize(self): - cfunc = vectorize(nopython=True)(vectorize_usecase) - arg = np.array([2, 404, 500, 404]) - sol = np.array([vectorize_usecase(i) for i in arg], dtype=arg.dtype) - self.assertPreciseEqual(sol, cfunc(arg)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_errorhandling.py b/numba/numba/tests/test_errorhandling.py deleted file mode 100644 index d04541cf7..000000000 --- a/numba/numba/tests/test_errorhandling.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Unspecified error handling tests -""" -from __future__ import division - -from numba import jit, njit -from numba import unittest_support as unittest -from numba import errors, utils -import numpy as np - - -class TestErrorHandlingBeforeLowering(unittest.TestCase): - - expected_msg = ("Numba encountered the use of a language feature it does " - "not support in this context: %s") - - def test_unsupported_make_function_lambda(self): - def func(x): - f = lambda x: x # requires `make_function` - - for pipeline in jit, njit: - with self.assertRaises(errors.UnsupportedError) as raises: - pipeline(func)(1) - - expected = self.expected_msg % "" - self.assertIn(expected, str(raises.exception)) - - def test_unsupported_make_function_listcomp(self): - try: - @jit - def func(x): - a = [i for i in x] - return undefined_global # force error - - with self.assertRaises(errors.UnsupportedError) as raises: - func([1]) - - expected = self.expected_msg % "" - self.assertIn(expected, str(raises.exception)) - except NameError: #py27 cannot handle the undefined global - self.assertTrue(utils.PY2) - - def test_unsupported_make_function_dictcomp(self): - @jit - def func(): - return {i:0 for i in range(1)} - - with self.assertRaises(errors.UnsupportedError) as raises: - func() - - expected = self.expected_msg % "" - self.assertIn(expected, str(raises.exception)) - - def test_unsupported_make_function_return_inner_func(self): - def func(x): - """ return the closure """ - z = x + 1 - - def inner(x): - return x + z - return inner - - for pipeline in jit, njit: - with self.assertRaises(errors.UnsupportedError) as raises: - pipeline(func)(1) - - expected = self.expected_msg % \ - "" - self.assertIn(expected, str(raises.exception)) - - -class TestUnsupportedReporting(unittest.TestCase): - - def test_unsupported_numpy_function(self): - # np.asarray(list) currently unsupported - @njit - def func(): - np.asarray([1,2,3]) - - with self.assertRaises(errors.TypingError) as raises: - func() - - expected = "Use of unsupported NumPy function 'numpy.asarray'" - self.assertIn(expected, str(raises.exception)) - - -class TestMiscErrorHandling(unittest.TestCase): - - def test_use_of_exception_for_flow_control(self): - # constant inference uses exceptions with no Loc specified to determine - # flow control, this asserts that the construction of the lowering - # error context handler works in the case of an exception with no Loc - # specified. See issue #3135. - @njit - def fn(x): - return 10**x - - a = np.array([1.0],dtype=np.float64) - fn(a) # should not raise - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_errormodels.py b/numba/numba/tests/test_errormodels.py deleted file mode 100644 index aafa743d4..000000000 --- a/numba/numba/tests/test_errormodels.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Test setting/overriding error models -""" -from __future__ import division - -from numba import jit -from numba import unittest_support as unittest - - -class TestErrorModel(unittest.TestCase): - - def test_div_by_zero_python(self): - @jit # python model is the default - def model_python(val): - return 1 / val - - with self.assertRaises(ZeroDivisionError): - model_python(0) - - def test_div_by_zero_numpy(self): - @jit(error_model='numpy') - def model_numpy(val): - return 1 / val - - self.assertEqual(model_numpy(0), float('inf')) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_exceptions.py b/numba/numba/tests/test_exceptions.py deleted file mode 100644 index 932e971d4..000000000 --- a/numba/numba/tests/test_exceptions.py +++ /dev/null @@ -1,203 +0,0 @@ - -import numpy as np -import sys - -from numba.compiler import compile_isolated, Flags -from numba import jit, types -from numba import unittest_support as unittest -from .support import TestCase, tag - - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -class MyError(Exception): - pass - -class OtherError(Exception): - pass - - -def raise_class(exc): - def raiser(i): - if i == 1: - raise exc - elif i == 2: - raise ValueError - elif i == 3: - # The exception type is looked up on a module (issue #1624) - raise np.linalg.LinAlgError - return i - return raiser - -def raise_instance(exc, arg): - def raiser(i): - if i == 1: - raise exc(arg, 1) - elif i == 2: - raise ValueError(arg, 2) - elif i == 3: - raise np.linalg.LinAlgError(arg, 3) - return i - return raiser - -def reraise(): - raise - -def outer_function(inner): - def outer(i): - if i == 3: - raise OtherError("bar", 3) - return inner(i) - return outer - -def assert_usecase(i): - assert i == 1, "bar" - - -class TestRaising(TestCase): - - def test_unituple_index_error(self): - def pyfunc(a, i): - return a.shape[i] - - cres = compile_isolated(pyfunc, (types.Array(types.int32, 1, 'A'), - types.int32)) - - cfunc = cres.entry_point - a = np.empty(2) - - self.assertEqual(cfunc(a, 0), pyfunc(a, 0)) - - with self.assertRaises(IndexError) as cm: - cfunc(a, 2) - self.assertEqual(str(cm.exception), "tuple index out of range") - - def check_raise_class(self, flags): - pyfunc = raise_class(MyError) - cres = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cres.entry_point - self.assertEqual(cfunc(0), 0) - - with self.assertRaises(MyError) as cm: - cfunc(1) - self.assertEqual(cm.exception.args, ()) - with self.assertRaises(ValueError) as cm: - cfunc(2) - self.assertEqual(cm.exception.args, ()) - with self.assertRaises(np.linalg.LinAlgError) as cm: - cfunc(3) - self.assertEqual(cm.exception.args, ()) - - @tag('important') - def test_raise_class_nopython(self): - self.check_raise_class(flags=no_pyobj_flags) - - def test_raise_class_objmode(self): - self.check_raise_class(flags=force_pyobj_flags) - - def check_raise_instance(self, flags): - pyfunc = raise_instance(MyError, "some message") - cres = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cres.entry_point - self.assertEqual(cfunc(0), 0) - - with self.assertRaises(MyError) as cm: - cfunc(1) - self.assertEqual(cm.exception.args, ("some message", 1)) - with self.assertRaises(ValueError) as cm: - cfunc(2) - self.assertEqual(cm.exception.args, ("some message", 2)) - with self.assertRaises(np.linalg.LinAlgError) as cm: - cfunc(3) - self.assertEqual(cm.exception.args, ("some message", 3)) - - def test_raise_instance_objmode(self): - self.check_raise_instance(flags=force_pyobj_flags) - - @tag('important') - def test_raise_instance_nopython(self): - self.check_raise_instance(flags=no_pyobj_flags) - - def check_raise_nested(self, **jit_args): - """ - Check exception propagation from nested functions. - """ - inner_pyfunc = raise_instance(MyError, "some message") - inner_cfunc = jit(**jit_args)(inner_pyfunc) - cfunc = jit(**jit_args)(outer_function(inner_cfunc)) - - with self.assertRaises(MyError) as cm: - cfunc(1) - self.assertEqual(cm.exception.args, ("some message", 1)) - with self.assertRaises(ValueError) as cm: - cfunc(2) - self.assertEqual(cm.exception.args, ("some message", 2)) - with self.assertRaises(OtherError) as cm: - cfunc(3) - self.assertEqual(cm.exception.args, ("bar", 3)) - - def test_raise_nested(self): - self.check_raise_nested(forceobj=True) - - @tag('important') - def test_raise_nested_npm(self): - self.check_raise_nested(nopython=True) - - def check_reraise(self, flags): - pyfunc = reraise - cres = compile_isolated(pyfunc, (), flags=flags) - cfunc = cres.entry_point - with self.assertRaises(ZeroDivisionError): - try: - 1/0 - except ZeroDivisionError as e: - cfunc() - - def test_reraise_objmode(self): - self.check_reraise(flags=force_pyobj_flags) - - @tag('important') - def test_reraise_nopython(self): - self.check_reraise(flags=no_pyobj_flags) - - def check_raise_invalid_class(self, cls, flags): - pyfunc = raise_class(cls) - cres = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cres.entry_point - with self.assertRaises(TypeError) as cm: - cfunc(1) - self.assertEqual(str(cm.exception), - "exceptions must derive from BaseException") - - def test_raise_invalid_class_objmode(self): - self.check_raise_invalid_class(int, flags=force_pyobj_flags) - self.check_raise_invalid_class(1, flags=force_pyobj_flags) - - def test_raise_invalid_class_nopython(self): - with self.assertTypingError(): - self.check_raise_invalid_class(int, flags=no_pyobj_flags) - with self.assertTypingError(): - self.check_raise_invalid_class(1, flags=no_pyobj_flags) - - def check_assert_statement(self, flags): - pyfunc = assert_usecase - cres = compile_isolated(pyfunc, (types.int32,), flags=flags) - cfunc = cres.entry_point - cfunc(1) - with self.assertRaises(AssertionError) as cm: - cfunc(2) - self.assertEqual(str(cm.exception), "bar") - - def test_assert_statement_objmode(self): - self.check_assert_statement(flags=force_pyobj_flags) - - def test_assert_statement_nopython(self): - self.check_assert_statement(flags=no_pyobj_flags) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_extended_arg.py b/numba/numba/tests/test_extended_arg.py deleted file mode 100644 index 16e5ecc79..000000000 --- a/numba/numba/tests/test_extended_arg.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest - -import dis -import struct -import sys - -from numba import jit, utils -from .support import TestCase, tweak_code - - -class TestExtendedArg(TestCase): - """ - Test support for the EXTENDED_ARG opcode. - """ - - def get_extended_arg_load_const(self): - """ - Get a function with a EXTENDED_ARG opcode before a LOAD_CONST opcode. - """ - def f(): - x = 5 - return x - - b = bytearray(f.__code__.co_code) - consts = f.__code__.co_consts - if utils.PYVERSION >= (3, 6): - bytecode_len = 0xff - bytecode_format = " return an array of the same shape - if all(ty.layout == 'C' for ty in (cond, x, y)): - def where_impl(cond, x, y): - """ - Fast implementation for C-contiguous arrays - """ - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - cf = cond.flat - xf = x.flat - yf = y.flat - rf = res.flat - for i in range(cond.size): - rf[i] = xf[i] if cf[i] else yf[i] - return res - else: - def where_impl(cond, x, y): - """ - Generic implementation for other arrays - """ - shape = cond.shape - if x.shape != shape or y.shape != shape: - raise ValueError("all inputs should have the same shape") - res = np.empty_like(x) - for idx, c in np.ndenumerate(cond): - res[idx] = x[idx] if c else y[idx] - return res - - return where_impl - -# We can define another overload function for the same function, they -# will be tried in turn until one succeeds. - -@overload(where) -def overload_where_scalars(cond, x, y): - """ - Implement where() for scalars. - """ - if not isinstance(cond, types.Array): - if x != y: - raise errors.TypingError("x and y should have the same type") - - def where_impl(cond, x, y): - """ - Scalar where() => return a 0-dim array - """ - scal = x if cond else y - # Can't use full_like() on Numpy < 1.8 - arr = np.empty_like(scal) - arr[()] = scal - return arr - - return where_impl - -# ----------------------------------------------------------------------- -# Overload an already defined built-in function, extending it for new types. - -@overload(len) -def overload_len_dummy(arg): - if isinstance(arg, MyDummyType): - def len_impl(arg): - return 13 - - return len_impl - - -@overload_method(MyDummyType, 'length') -def overload_method_length(arg): - def imp(arg): - return len(arg) - return imp - - -def cache_overload_method_usecase(x): - return x.length() - - -def call_func1_nullary(): - return func1() - -def call_func1_unary(x): - return func1(x) - -def len_usecase(x): - return len(x) - -def print_usecase(x): - print(x) - -def getitem_usecase(x, key): - return x[key] - -def npyufunc_usecase(x): - return np.cos(np.sin(x)) - -def get_data_usecase(x): - return x._data - -def get_index_usecase(x): - return x._index - -def is_monotonic_usecase(x): - return x.is_monotonic_increasing - -def make_series_usecase(data, index): - return Series(data, index) - -def clip_usecase(x, lo, hi): - return x.clip(lo, hi) - - -# ----------------------------------------------------------------------- - -def return_non_boxable(): - return np - - -@overload(return_non_boxable) -def overload_return_non_boxable(): - def imp(): - return np - return imp - - -def non_boxable_ok_usecase(sz): - mod = return_non_boxable() - return mod.arange(sz) - - -def non_boxable_bad_usecase(): - return return_non_boxable() - - -class TestLowLevelExtending(TestCase): - """ - Test the low-level two-tier extension API. - """ - - # We check with both @jit and compile_isolated(), to exercise the - # registration logic. - - def test_func1(self): - pyfunc = call_func1_nullary - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(), 42) - pyfunc = call_func1_unary - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(None), 42) - self.assertPreciseEqual(cfunc(18.0), 6.0) - - def test_func1_isolated(self): - pyfunc = call_func1_nullary - cr = compile_isolated(pyfunc, ()) - self.assertPreciseEqual(cr.entry_point(), 42) - pyfunc = call_func1_unary - cr = compile_isolated(pyfunc, (types.float64,)) - self.assertPreciseEqual(cr.entry_point(18.0), 6.0) - - def test_cast_mydummy(self): - pyfunc = get_dummy - cr = compile_isolated(pyfunc, (), types.float64) - self.assertPreciseEqual(cr.entry_point(), 42.0) - - -class TestPandasLike(TestCase): - """ - Test implementing a pandas-like Index object. - Also stresses most of the high-level API. - """ - - def test_index_len(self): - i = Index(np.arange(3)) - cfunc = jit(nopython=True)(len_usecase) - self.assertPreciseEqual(cfunc(i), 3) - - def test_index_getitem(self): - i = Index(np.int32([42, 8, -5])) - cfunc = jit(nopython=True)(getitem_usecase) - self.assertPreciseEqual(cfunc(i, 1), 8) - ii = cfunc(i, slice(1, None)) - self.assertIsInstance(ii, Index) - self.assertEqual(list(ii), [8, -5]) - - def test_index_ufunc(self): - """ - Check Numpy ufunc on an Index object. - """ - i = Index(np.int32([42, 8, -5])) - cfunc = jit(nopython=True)(npyufunc_usecase) - ii = cfunc(i) - self.assertIsInstance(ii, Index) - self.assertPreciseEqual(ii._data, np.cos(np.sin(i._data))) - - def test_index_get_data(self): - # The _data attribute is exposed with make_attribute_wrapper() - i = Index(np.int32([42, 8, -5])) - cfunc = jit(nopython=True)(get_data_usecase) - data = cfunc(i) - self.assertIs(data, i._data) - - def test_index_is_monotonic(self): - # The is_monotonic_increasing attribute is exposed with - # overload_attribute() - cfunc = jit(nopython=True)(is_monotonic_usecase) - for values, expected in [([8, 42, 5], False), - ([5, 8, 42], True), - ([], True)]: - i = Index(np.int32(values)) - got = cfunc(i) - self.assertEqual(got, expected) - - @tag('important') - def test_series_len(self): - i = Index(np.int32([2, 4, 3])) - s = Series(np.float64([1.5, 4.0, 2.5]), i) - cfunc = jit(nopython=True)(len_usecase) - self.assertPreciseEqual(cfunc(s), 3) - - @tag('important') - def test_series_get_index(self): - i = Index(np.int32([2, 4, 3])) - s = Series(np.float64([1.5, 4.0, 2.5]), i) - cfunc = jit(nopython=True)(get_index_usecase) - got = cfunc(s) - self.assertIsInstance(got, Index) - self.assertIs(got._data, i._data) - - def test_series_ufunc(self): - """ - Check Numpy ufunc on an Series object. - """ - i = Index(np.int32([42, 8, -5])) - s = Series(np.int64([1, 2, 3]), i) - cfunc = jit(nopython=True)(npyufunc_usecase) - ss = cfunc(s) - self.assertIsInstance(ss, Series) - self.assertIsInstance(ss._index, Index) - self.assertIs(ss._index._data, i._data) - self.assertPreciseEqual(ss._values, np.cos(np.sin(s._values))) - - @tag('important') - def test_series_constructor(self): - i = Index(np.int32([42, 8, -5])) - d = np.float64([1.5, 4.0, 2.5]) - cfunc = jit(nopython=True)(make_series_usecase) - got = cfunc(d, i) - self.assertIsInstance(got, Series) - self.assertIsInstance(got._index, Index) - self.assertIs(got._index._data, i._data) - self.assertIs(got._values, d) - - @tag('important') - def test_series_clip(self): - i = Index(np.int32([42, 8, -5])) - s = Series(np.float64([1.5, 4.0, 2.5]), i) - cfunc = jit(nopython=True)(clip_usecase) - ss = cfunc(s, 1.6, 3.0) - self.assertIsInstance(ss, Series) - self.assertIsInstance(ss._index, Index) - self.assertIs(ss._index._data, i._data) - self.assertPreciseEqual(ss._values, np.float64([1.6, 3.0, 2.5])) - - -class TestHighLevelExtending(TestCase): - """ - Test the high-level combined API. - """ - - @tag('important') - def test_where(self): - """ - Test implementing a function with @overload. - """ - pyfunc = call_where - cfunc = jit(nopython=True)(pyfunc) - - def check(*args, **kwargs): - expected = np_where(*args, **kwargs) - got = cfunc(*args, **kwargs) - self.assertPreciseEqual - - check(x=3, cond=True, y=8) - check(True, 3, 8) - check(np.bool_([True, False, True]), np.int32([1, 2, 3]), - np.int32([4, 5, 5])) - - # The typing error is propagated - with self.assertRaises(errors.TypingError) as raises: - cfunc(np.bool_([]), np.int32([]), np.int64([])) - self.assertIn("x and y should have the same dtype", - str(raises.exception)) - - @tag('important') - def test_len(self): - """ - Test re-implementing len() for a custom type with @overload. - """ - cfunc = jit(nopython=True)(len_usecase) - self.assertPreciseEqual(cfunc(MyDummy()), 13) - self.assertPreciseEqual(cfunc([4, 5]), 2) - - def test_print(self): - """ - Test re-implementing print() for a custom type with @overload. - """ - cfunc = jit(nopython=True)(print_usecase) - with captured_stdout(): - cfunc(MyDummy()) - self.assertEqual(sys.stdout.getvalue(), "hello!\n") - - def test_no_cpython_wrapper(self): - """ - Test overloading whose return value cannot be represented in CPython. - """ - # Test passing Module type from a @overload implementation to ensure - # that the *no_cpython_wrapper* flag works - ok_cfunc = jit(nopython=True)(non_boxable_ok_usecase) - n = 10 - got = ok_cfunc(n) - expect = non_boxable_ok_usecase(n) - np.testing.assert_equal(expect, got) - # Verify that the Module type cannot be returned to CPython - bad_cfunc = jit(nopython=True)(non_boxable_bad_usecase) - with self.assertRaises(TypeError) as raises: - bad_cfunc() - errmsg = str(raises.exception) - expectmsg = "cannot convert native Module" - self.assertIn(expectmsg, errmsg) - - -def _assert_cache_stats(cfunc, expect_hit, expect_misses): - hit = cfunc._cache_hits[cfunc.signatures[0]] - if hit != expect_hit: - raise AssertionError('cache not used') - miss = cfunc._cache_misses[cfunc.signatures[0]] - if miss != expect_misses: - raise AssertionError('cache not used') - - -class TestOverloadMethodCaching(TestCase): - # Nested multiprocessing.Pool raises AssertionError: - # "daemonic processes are not allowed to have children" - _numba_parallel_test_ = False - - def test_caching_overload_method(self): - self._cache_dir = temp_directory(self.__class__.__name__) - with override_config('CACHE_DIR', self._cache_dir): - self.run_caching_overload_method() - - def run_caching_overload_method(self): - cfunc = jit(nopython=True, cache=True)(cache_overload_method_usecase) - self.assertPreciseEqual(cfunc(MyDummy()), 13) - _assert_cache_stats(cfunc, 0, 1) - llvmir = cfunc.inspect_llvm((mydummy_type,)) - # Ensure the inner method is not a declaration - decls = [ln for ln in llvmir.splitlines() - if ln.startswith('declare') and 'overload_method_length' in ln] - self.assertEqual(len(decls), 0) - # Test in a separate process - try: - ctx = multiprocessing.get_context('spawn') - except AttributeError: - ctx = multiprocessing - q = ctx.Queue() - p = ctx.Process(target=run_caching_overload_method, - args=(q, self._cache_dir)) - p.start() - q.put(MyDummy()) - p.join() - # Ensure subprocess exited normally - self.assertEqual(p.exitcode, 0) - res = q.get(timeout=1) - self.assertEqual(res, 13) - - -def run_caching_overload_method(q, cache_dir): - """ - Used by TestOverloadMethodCaching.test_caching_overload_method - """ - with override_config('CACHE_DIR', cache_dir): - arg = q.get() - cfunc = jit(nopython=True, cache=True)(cache_overload_method_usecase) - res = cfunc(arg) - q.put(res) - # Check cache stat - _assert_cache_stats(cfunc, 1, 0) - -class TestIntrinsic(TestCase): - def test_ll_pointer_cast(self): - """ - Usecase test: custom reinterpret cast to turn int values to pointers - """ - from ctypes import CFUNCTYPE, POINTER, c_float, c_int - - # Use intrinsic to make a reinterpret_cast operation - def unsafe_caster(result_type): - assert isinstance(result_type, types.CPointer) - - @intrinsic - def unsafe_cast(typingctx, src): - self.assertIsInstance(typingctx, typing.Context) - if isinstance(src, types.Integer): - sig = result_type(types.uintp) - - # defines the custom code generation - def codegen(context, builder, signature, args): - [src] = args - rtype = signature.return_type - llrtype = context.get_value_type(rtype) - return builder.inttoptr(src, llrtype) - - return sig, codegen - - return unsafe_cast - - # make a nopython function to use our cast op. - # this is not usable from cpython due to the returning of a pointer. - def unsafe_get_ctypes_pointer(src): - raise NotImplementedError("not callable from python") - - @overload(unsafe_get_ctypes_pointer) - def array_impl_unsafe_get_ctypes_pointer(arrtype): - if isinstance(arrtype, types.Array): - unsafe_cast = unsafe_caster(types.CPointer(arrtype.dtype)) - - def array_impl(arr): - return unsafe_cast(src=arr.ctypes.data) - return array_impl - - # the ctype wrapped function for use in nopython mode - def my_c_fun_raw(ptr, n): - for i in range(n): - print(ptr[i]) - - prototype = CFUNCTYPE(None, POINTER(c_float), c_int) - my_c_fun = prototype(my_c_fun_raw) - - # Call our pointer-cast in a @jit compiled function and use - # the pointer in a ctypes function - @jit(nopython=True) - def foo(arr): - ptr = unsafe_get_ctypes_pointer(arr) - my_c_fun(ptr, arr.size) - - # Test - arr = np.arange(10, dtype=np.float32) - with captured_stdout() as buf: - foo(arr) - got = buf.getvalue().splitlines() - buf.close() - expect = list(map(str, arr)) - self.assertEqual(expect, got) - - def test_serialization(self): - """ - Test serialization of intrinsic objects - """ - # define a intrinsic - @intrinsic - def identity(context, x): - def codegen(context, builder, signature, args): - return args[0] - - sig = x(x) - return sig, codegen - - # use in a jit function - @jit(nopython=True) - def foo(x): - return identity(x) - - self.assertEqual(foo(1), 1) - - # get serialization memo - memo = _Intrinsic._memo - memo_size = len(memo) - - # pickle foo and check memo size - serialized_foo = pickle.dumps(foo) - # increases the memo size - memo_size += 1 - self.assertEqual(memo_size, len(memo)) - # unpickle - foo_rebuilt = pickle.loads(serialized_foo) - self.assertEqual(memo_size, len(memo)) - # check rebuilt foo - self.assertEqual(foo(1), foo_rebuilt(1)) - - # pickle identity directly - serialized_identity = pickle.dumps(identity) - # memo size unchanged - self.assertEqual(memo_size, len(memo)) - # unpickle - identity_rebuilt = pickle.loads(serialized_identity) - # must be the same object - self.assertIs(identity, identity_rebuilt) - # memo size unchanged - self.assertEqual(memo_size, len(memo)) - - def test_deserialization(self): - """ - Test deserialization of intrinsic - """ - def defn(context, x): - def codegen(context, builder, signature, args): - return args[0] - - return x(x), codegen - - memo = _Intrinsic._memo - memo_size = len(memo) - # invoke _Intrinsic indirectly to avoid registration which keeps an - # internal reference inside the compiler - original = _Intrinsic('foo', defn) - self.assertIs(original._defn, defn) - pickled = pickle.dumps(original) - # by pickling, a new memo entry is created - memo_size += 1 - self.assertEqual(memo_size, len(memo)) - del original # remove original before unpickling - # by deleting, the memo entry is removed - memo_size -= 1 - self.assertEqual(memo_size, len(memo)) - - rebuilt = pickle.loads(pickled) - # verify that the rebuilt object is different - self.assertIsNot(rebuilt._defn, defn) - - # the second rebuilt object is the same as the first - second = pickle.loads(pickled) - self.assertIs(rebuilt._defn, second._defn) - - -class TestRegisterJitable(unittest.TestCase): - def test_no_flags(self): - @register_jitable - def foo(x, y): - return x + y - - def bar(x, y): - return foo(x, y) - - cbar = jit(nopython=True)(bar) - - expect = bar(1, 2) - got = cbar(1, 2) - self.assertEqual(expect, got) - - def test_flags_no_nrt(self): - @register_jitable(_nrt=False) - def foo(n): - return np.arange(n) - - def bar(n): - return foo(n) - - self.assertEqual(bar(3).tolist(), [0, 1, 2]) - - cbar = jit(nopython=True)(bar) - with self.assertRaises(errors.TypingError) as raises: - cbar(2) - msg = "Only accept returning of array passed into the function as argument" - self.assertIn(msg, str(raises.exception)) - - -class TestImportCythonFunction(unittest.TestCase): - @unittest.skipIf(sc is None, "Only run if SciPy >= 0.19 is installed") - def test_getting_function(self): - addr = get_cython_function_address("scipy.special.cython_special", "j0") - functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) - _j0 = functype(addr) - j0 = jit(nopython=True)(lambda x: _j0(x)) - self.assertEqual(j0(0), 1) - - def test_missing_module(self): - with self.assertRaises(ImportError) as raises: - addr = get_cython_function_address("fakemodule", "fakefunction") - # The quotes are not there in Python 2 - msg = "No module named '?fakemodule'?" - match = re.match(msg, str(raises.exception)) - self.assertIsNotNone(match) - - @unittest.skipIf(sc is None, "Only run if SciPy >= 0.19 is installed") - def test_missing_function(self): - with self.assertRaises(ValueError) as raises: - addr = get_cython_function_address("scipy.special.cython_special", "foo") - msg = "No function 'foo' found in __pyx_capi__ of 'scipy.special.cython_special'" - self.assertEqual(msg, str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_extending_types.py b/numba/numba/tests/test_extending_types.py deleted file mode 100644 index aca619a67..000000000 --- a/numba/numba/tests/test_extending_types.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Test extending types via the numba.extending.* API. -""" - -from numba import njit -from numba import types -from numba import cgutils -from numba.errors import TypingError -from numba.extending import lower_builtin -from numba.extending import models, register_model -from numba.extending import make_attribute_wrapper -from numba.extending import type_callable -from numba.extending import overload - -from numba import unittest_support as unittest - - -class TestExtTypDummy(unittest.TestCase): - - def setUp(self): - class Dummy(object): - def __init__(self, value): - self.value = value - - class DummyType(types.Type): - def __init__(self): - super(DummyType, self).__init__(name='Dummy') - - dummy_type = DummyType() - - @register_model(DummyType) - class DummyModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('value', types.intp), - ] - models.StructModel.__init__(self, dmm, fe_type, members) - - make_attribute_wrapper(DummyType, 'value', 'value') - - @type_callable(Dummy) - def type_dummy(context): - def typer(value): - return dummy_type - return typer - - @lower_builtin(Dummy, types.intp) - def impl_dummy(context, builder, sig, args): - typ = sig.return_type - [value] = args - dummy = cgutils.create_struct_proxy(typ)(context, builder) - dummy.value = value - return dummy._getvalue() - - # Store attributes - self.Dummy = Dummy - self.DummyType = DummyType - - def _add_float_overload(self): - @overload(float) - def dummy_to_float(x): - if isinstance(x, self.DummyType): - def codegen(x): - return float(x.value) - return codegen - else: - raise TypeError('cannot type float({})'.format(x)) - - def test_overload_float(self): - self._add_float_overload() - Dummy = self.Dummy - - @njit - def foo(x): - return float(Dummy(x)) - - self.assertEqual(foo(123), float(123)) - - def test_overload_float_error_msg(self): - self._add_float_overload() - - @njit - def foo(x): - return float(x) - - with self.assertRaises(TypingError) as raises: - foo(1j) - - self.assertIn("TypeError: float() does not support complex", - str(raises.exception)) - self.assertIn("TypeError: cannot type float(complex128)", - str(raises.exception)) diff --git a/numba/numba/tests/test_fancy_indexing.py b/numba/numba/tests/test_fancy_indexing.py deleted file mode 100644 index fd8be585b..000000000 --- a/numba/numba/tests/test_fancy_indexing.py +++ /dev/null @@ -1,231 +0,0 @@ -from __future__ import print_function - -import itertools - -import numpy as np - -import numba.unittest_support as unittest -from numba import types, jit, typeof -from numba.errors import TypingError -from .support import MemoryLeakMixin, TestCase, tag - - -def getitem_usecase(a, b): - return a[b] - -def setitem_usecase(a, idx, b): - a[idx] = b - -def np_take(A, indices): - return np.take(A, indices) - -def np_take_kws(A, indices, axis): - return np.take(A, indices, axis=axis) - -class TestFancyIndexing(MemoryLeakMixin, TestCase): - - def generate_advanced_indices(self, N, many=True): - choices = [np.int16([0, N - 1, -2])] - if many: - choices += [np.uint16([0, 1, N - 1]), - np.bool_([0, 1, 1, 0])] - return choices - - def generate_basic_index_tuples(self, N, maxdim, many=True): - """ - Generate basic index tuples with 0 to *maxdim* items. - """ - # Note integers can be considered advanced indices in certain - # cases, so we avoid them here. - # See "Combining advanced and basic indexing" - # in http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html - if many: - choices = [slice(None, None, None), - slice(1, N - 1, None), - slice(0, None, 2), - slice(N - 1, None, -2), - slice(-N + 1, -1, None), - slice(-1, -N, -2), - ] - else: - choices = [slice(0, N - 1, None), - slice(-1, -N, -2)] - for ndim in range(maxdim + 1): - for tup in itertools.product(choices, repeat=ndim): - yield tup - - def generate_advanced_index_tuples(self, N, maxdim, many=True): - """ - Generate advanced index tuples by generating basic index tuples - and adding a single advanced index item. - """ - # (Note Numba doesn't support advanced indices with more than - # one advanced index array at the moment) - choices = list(self.generate_advanced_indices(N, many=many)) - for i in range(maxdim + 1): - for tup in self.generate_basic_index_tuples(N, maxdim - 1, many): - for adv in choices: - yield tup[:i] + (adv,) + tup[i:] - - def generate_advanced_index_tuples_with_ellipsis(self, N, maxdim, many=True): - """ - Same as generate_advanced_index_tuples(), but also insert an - ellipsis at various points. - """ - for tup in self.generate_advanced_index_tuples(N, maxdim, many): - for i in range(len(tup) + 1): - yield tup[:i] + (Ellipsis,) + tup[i:] - - def check_getitem_indices(self, arr, indices): - pyfunc = getitem_usecase - cfunc = jit(nopython=True)(pyfunc) - orig = arr.copy() - orig_base = arr.base or arr - - for index in indices: - expected = pyfunc(arr, index) - # Sanity check: if a copy wasn't made, this wasn't advanced - # but basic indexing, and shouldn't be tested here. - assert expected.base is not orig_base - got = cfunc(arr, index) - # Note Numba may not return the same array strides and - # contiguity as Numpy - self.assertEqual(got.shape, expected.shape) - self.assertEqual(got.dtype, expected.dtype) - np.testing.assert_equal(got, expected) - # Check a copy was *really* returned by Numba - if got.size: - got.fill(42) - np.testing.assert_equal(arr, orig) - - def test_getitem_tuple(self): - # Test many variations of advanced indexing with a tuple index - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) - indices = self.generate_advanced_index_tuples(N, ndim) - - self.check_getitem_indices(arr, indices) - - def test_getitem_tuple_and_ellipsis(self): - # Same, but also insert an ellipsis at a random point - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) - indices = self.generate_advanced_index_tuples_with_ellipsis(N, ndim, - many=False) - - self.check_getitem_indices(arr, indices) - - @tag('important') - def test_getitem_array(self): - # Test advanced indexing with a single array index - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) - indices = self.generate_advanced_indices(N) - self.check_getitem_indices(arr, indices) - - def check_setitem_indices(self, arr, indices): - pyfunc = setitem_usecase - cfunc = jit(nopython=True)(pyfunc) - - for index in indices: - src = arr[index] - expected = np.zeros_like(arr) - got = np.zeros_like(arr) - pyfunc(expected, index, src) - cfunc(got, index, src) - # Note Numba may not return the same array strides and - # contiguity as Numpy - self.assertEqual(got.shape, expected.shape) - self.assertEqual(got.dtype, expected.dtype) - np.testing.assert_equal(got, expected) - - def test_setitem_tuple(self): - # Test many variations of advanced indexing with a tuple index - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) - indices = self.generate_advanced_index_tuples(N, ndim) - self.check_setitem_indices(arr, indices) - - def test_setitem_tuple_and_ellipsis(self): - # Same, but also insert an ellipsis at a random point - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) - indices = self.generate_advanced_index_tuples_with_ellipsis(N, ndim, - many=False) - - self.check_setitem_indices(arr, indices) - - def test_setitem_array(self): - # Test advanced indexing with a single array index - N = 4 - ndim = 3 - arr = np.arange(N ** ndim).reshape((N,) * ndim).astype(np.int32) + 10 - indices = self.generate_advanced_indices(N) - self.check_setitem_indices(arr, indices) - - - def test_np_take(self): - # shorter version of array.take test in test_array_methods - pyfunc = np_take - cfunc = jit(nopython=True)(pyfunc) - - def check(arr, ind): - expected = pyfunc(arr, ind) - got = cfunc(arr, ind) - self.assertPreciseEqual(expected, got) - if hasattr(expected, 'order'): - self.assertEqual(expected.order == got.order) - - # need to check: - # 1. scalar index - # 2. 1d array index - # 3. nd array index - # 4. reflected list - # 5. tuples - - test_indices = [] - test_indices.append(1) - test_indices.append(np.array([1, 5, 1, 11, 3])) - test_indices.append(np.array([[[1], [5]], [[1], [11]]])) - test_indices.append([1, 5, 1, 11, 3]) - test_indices.append((1, 5, 1)) - test_indices.append(((1, 5, 1), (11, 3, 2))) - - for dt in [np.int64, np.complex128]: - A = np.arange(12, dtype=dt).reshape((4, 3)) - for ind in test_indices: - check(A, ind) - - #check illegal access raises - szA = A.size - illegal_indices = [szA, -szA - 1, np.array(szA), np.array(-szA - 1), - [szA], [-szA - 1]] - for x in illegal_indices: - with self.assertRaises(IndexError): - cfunc(A, x) # oob raises - - # check float indexing raises - with self.assertRaises(TypingError): - cfunc(A, [1.7]) - - # check unsupported arg raises - with self.assertRaises(TypingError): - take_kws = jit(nopython=True)(np_take_kws) - take_kws(A, 1, 1) - - # check kwarg unsupported raises - with self.assertRaises(TypingError): - take_kws = jit(nopython=True)(np_take_kws) - take_kws(A, 1, axis=1) - - #exceptions leak refs - self.disable_leak_check() - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_fastmath.py b/numba/numba/tests/test_fastmath.py deleted file mode 100644 index d36b6c359..000000000 --- a/numba/numba/tests/test_fastmath.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import print_function, absolute_import - -import math -import numpy as np - -from numba import unittest_support as unittest -from numba.tests.support import captured_stdout, override_config -from numba import njit, vectorize, guvectorize - - -class TestFastMath(unittest.TestCase): - def test_jit(self): - def foo(x): - return x + math.sin(x) - fastfoo = njit(fastmath=True)(foo) - slowfoo = njit(foo) - self.assertEqual(fastfoo(0.5), slowfoo(0.5)) - fastllvm = fastfoo.inspect_llvm(fastfoo.signatures[0]) - slowllvm = slowfoo.inspect_llvm(slowfoo.signatures[0]) - # Ensure fast attribute in fast version only - self.assertIn('fadd fast', fastllvm) - self.assertIn('call fast', fastllvm) - self.assertNotIn('fadd fast', slowllvm) - self.assertNotIn('call fast', slowllvm) - - def test_vectorize(self): - def foo(x): - return x + math.sin(x) - fastfoo = vectorize(fastmath=True)(foo) - slowfoo = vectorize(foo) - x = np.random.random(8).astype(np.float32) - # capture the optimized llvm to check for fast flag - with override_config('DUMP_OPTIMIZED', True): - with captured_stdout() as slow_cap: - expect = slowfoo(x) - slowllvm = slow_cap.getvalue() - with captured_stdout() as fast_cap: - got = fastfoo(x) - fastllvm = fast_cap.getvalue() - np.testing.assert_almost_equal(expect, got) - self.assertIn('fadd fast', fastllvm) - self.assertIn('call fast', fastllvm) - self.assertNotIn('fadd fast', slowllvm) - self.assertNotIn('call fast', slowllvm) - - def test_guvectorize(self): - def foo(x, out): - out[0] = x + math.sin(x) - x = np.random.random(8).astype(np.float32) - with override_config('DUMP_OPTIMIZED', True): - types = ['(float32, float32[:])'] - sig = '()->()' - with captured_stdout() as fast_cap: - fastfoo = guvectorize(types, sig, fastmath=True)(foo) - fastllvm = fast_cap.getvalue() - with captured_stdout() as slow_cap: - slowfoo = guvectorize(types, sig)(foo) - slowllvm = slow_cap.getvalue() - expect = slowfoo(x) - got = fastfoo(x) - np.testing.assert_almost_equal(expect, got) - self.assertIn('fadd fast', fastllvm) - self.assertIn('call fast', fastllvm) - self.assertNotIn('fadd fast', slowllvm) - self.assertNotIn('call fast', slowllvm) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_flow_control.py b/numba/numba/tests/test_flow_control.py deleted file mode 100644 index c064fcc85..000000000 --- a/numba/numba/tests/test_flow_control.py +++ /dev/null @@ -1,854 +0,0 @@ -from __future__ import print_function - -import itertools - -import numba.unittest_support as unittest -from numba.controlflow import CFGraph, Loop -from numba.compiler import compile_isolated, Flags -from numba import types -from .support import TestCase, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -forceobj_flags = Flags() -forceobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() -no_pyobj_flags.set("nrt") - - -def for_loop_usecase1(x, y): - result = 0 - for i in range(x): - result += i - return result - -def for_loop_usecase2(x, y): - result = 0 - for i, j in enumerate(range(x, y, -1)): - result += i * j - return result - -def for_loop_usecase3(x, y): - result = 0 - lst = [x, y] - for i in lst: - result += i - return result - -def for_loop_usecase4(x, y): - result = 0 - for i in range(10): - for j in range(10): - result += 1 - return result - -def for_loop_usecase5(x, y): - result = 0 - for i in range(x): - result += 1 - if result > y: - break - return result - -def for_loop_usecase6(x, y): - result = 0 - for i in range(x): - if i > y: - continue - result += 1 - return result - -def for_loop_usecase7(x, y): - for i in range(x): - x = 0 - for j in range(x): - return 1 - else: - pass - return 0 - -def for_loop_usecase8(x, y): - result = 0 - for i in range(x, y, y - x + 1): - result += 1 - return result - -def for_loop_usecase9(x, y): - z = 0 - for i in range(x): - x = 0 - for j in range(x): - if j == x / 2: - z += j - break - else: - z += y - - return z - -def for_loop_usecase10(x, y): - for i in range(x): - if i == y: - z = y - break - else: - z = i * 2 - return z - - -def while_loop_usecase1(x, y): - result = 0 - i = 0 - while i < x: - result += i - i += 1 - return result - -def while_loop_usecase2(x, y): - result = 0 - while result != x: - result += 1 - return result - -def while_loop_usecase3(x, y): - result = 0 - i = 0 - j = 0 - while i < x: - while j < y: - result += i + j - i += 1 - j += 1 - return result - -def while_loop_usecase4(x, y): - result = 0 - while True: - result += 1 - if result > x: - break - return result - -def while_loop_usecase5(x, y): - result = 0 - while result < x: - if result > y: - result += 2 - continue - result += 1 - return result - -def ifelse_usecase1(x, y): - if x > 0: - pass - elif y > 0: - pass - else: - pass - return True - -def ifelse_usecase2(x, y): - if x > y: - return 1 - elif x == 0 or y == 0: - return 2 - else: - return 3 - -def ifelse_usecase3(x, y): - if x > 0: - if y > 0: - return 1 - elif y < 0: - return 1 - else: - return 0 - elif x < 0: - return 1 - else: - return 0 - -def ifelse_usecase4(x, y): - if x == y: - return 1 - -def ternary_ifelse_usecase1(x, y): - return True if x > y else False - - -class TestFlowControl(TestCase): - - def run_test(self, pyfunc, x_operands, y_operands, - flags=enable_pyobj_flags): - cr = compile_isolated(pyfunc, (types.intp, types.intp), flags=flags) - cfunc = cr.entry_point - for x, y in itertools.product(x_operands, y_operands): - pyerr = None - cerr = None - try: - pyres = pyfunc(x, y) - except Exception as e: - pyerr = e - - try: - cres = cfunc(x, y) - except Exception as e: - if pyerr is None: - raise - cerr = e - self.assertEqual(type(pyerr), type(cerr)) - else: - if pyerr is not None: - self.fail("Invalid for pure-python but numba works\n" + - pyerr) - self.assertEqual(pyres, cres) - - def test_for_loop1(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase1, [-10, 0, 10], [0], flags=flags) - - @tag('important') - def test_for_loop1_npm(self): - self.test_for_loop1(flags=no_pyobj_flags) - - def test_for_loop2(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase2, [-10, 0, 10], [-10, 0, 10], - flags=flags) - - @tag('important') - def test_for_loop2_npm(self): - self.test_for_loop2(flags=no_pyobj_flags) - - def test_for_loop3(self, flags=enable_pyobj_flags): - """ - List requires pyobject - """ - self.run_test(for_loop_usecase3, [1], [2], - flags=flags) - - def test_for_loop3_npm(self): - self.test_for_loop3(flags=no_pyobj_flags) - - def test_for_loop4(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase4, [10], [10], flags=flags) - - @tag('important') - def test_for_loop4_npm(self): - self.test_for_loop4(flags=no_pyobj_flags) - - def test_for_loop5(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase5, [100], [50], flags=flags) - - @tag('important') - def test_for_loop5_npm(self): - self.test_for_loop5(flags=no_pyobj_flags) - - def test_for_loop6(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase6, [100], [50], flags=flags) - - @tag('important') - def test_for_loop6_npm(self): - self.test_for_loop6(flags=no_pyobj_flags) - - def test_for_loop7(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase7, [5], [0], flags=flags) - - @tag('important') - def test_for_loop7_npm(self): - self.test_for_loop7(flags=no_pyobj_flags) - - def test_for_loop8(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase8, [0, 1], [0, 2, 10], flags=flags) - - @tag('important') - def test_for_loop8_npm(self): - self.test_for_loop8(flags=no_pyobj_flags) - - def test_for_loop9(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase9, [0, 1], [0, 2, 10], flags=flags) - - @tag('important') - def test_for_loop9_npm(self): - self.test_for_loop9(flags=no_pyobj_flags) - - def test_for_loop10(self, flags=enable_pyobj_flags): - self.run_test(for_loop_usecase10, [5], [2, 7], flags=flags) - - @tag('important') - def test_for_loop10_npm(self): - self.test_for_loop10(flags=no_pyobj_flags) - - def test_while_loop1(self, flags=enable_pyobj_flags): - self.run_test(while_loop_usecase1, [10], [0], flags=flags) - - def test_while_loop1_npm(self): - self.test_while_loop1(flags=no_pyobj_flags) - - def test_while_loop2(self, flags=enable_pyobj_flags): - self.run_test(while_loop_usecase2, [10], [0], flags=flags) - - def test_while_loop2_npm(self): - self.test_while_loop2(flags=no_pyobj_flags) - - def test_while_loop3(self, flags=enable_pyobj_flags): - self.run_test(while_loop_usecase3, [10], [10], flags=flags) - - @tag('important') - def test_while_loop3_npm(self): - self.test_while_loop3(flags=no_pyobj_flags) - - def test_while_loop4(self, flags=enable_pyobj_flags): - self.run_test(while_loop_usecase4, [10], [0], flags=flags) - - @tag('important') - def test_while_loop4_npm(self): - self.test_while_loop4(flags=no_pyobj_flags) - - def test_while_loop5(self, flags=enable_pyobj_flags): - self.run_test(while_loop_usecase5, [0, 5, 10], [0, 5, 10], flags=flags) - - @tag('important') - def test_while_loop5_npm(self): - self.test_while_loop5(flags=no_pyobj_flags) - - def test_ifelse1(self, flags=enable_pyobj_flags): - self.run_test(ifelse_usecase1, [-1, 0, 1], [-1, 0, 1], flags=flags) - - def test_ifelse1_npm(self): - self.test_ifelse1(flags=no_pyobj_flags) - - def test_ifelse2(self, flags=enable_pyobj_flags): - self.run_test(ifelse_usecase2, [-1, 0, 1], [-1, 0, 1], flags=flags) - - @tag('important') - def test_ifelse2_npm(self): - self.test_ifelse2(flags=no_pyobj_flags) - - def test_ifelse3(self, flags=enable_pyobj_flags): - self.run_test(ifelse_usecase3, [-1, 0, 1], [-1, 0, 1], flags=flags) - - @tag('important') - def test_ifelse3_npm(self): - self.test_ifelse3(flags=no_pyobj_flags) - - def test_ifelse4(self, flags=enable_pyobj_flags): - self.run_test(ifelse_usecase4, [-1, 0, 1], [-1, 0, 1], flags=flags) - - @tag('important') - def test_ifelse4_npm(self): - self.test_ifelse4(flags=no_pyobj_flags) - - def test_ternary_ifelse1(self, flags=enable_pyobj_flags): - self.run_test(ternary_ifelse_usecase1, [-1, 0, 1], [-1, 0, 1], - flags=flags) - - @tag('important') - def test_ternary_ifelse1_npm(self): - self.test_ternary_ifelse1(flags=no_pyobj_flags) - - -class TestCFGraph(TestCase): - """ - Test the numba.controlflow.CFGraph class. - """ - - def from_adj_list(self, d, entry_point=0): - """ - Build a CFGraph class from a dict of adjacency lists. - """ - g = CFGraph() - # Need to add all nodes before adding edges - for node in d: - g.add_node(node) - for node, dests in d.items(): - for dest in dests: - g.add_edge(node, dest) - return g - - def loopless1(self): - """ - A simple CFG corresponding to the following code structure: - - c = (... if ... else ...) + ... - return b + c - """ - g = self.from_adj_list({0: [18, 12], 12: [21], 18: [21], 21: []}) - g.set_entry_point(0) - g.process() - return g - - def loopless1_dead_nodes(self): - """ - Same as loopless1(), but with added dead blocks (some of them - in a loop). - """ - g = self.from_adj_list( - {0: [18, 12], - 12: [21], - 18: [21], - 21: [], - 91: [12, 0], - 92: [91, 93], - 93: [92], - 94: [], - }) - g.set_entry_point(0) - g.process() - return g - - def loopless2(self): - """ - A loopless CFG corresponding to the following code structure: - - c = (... if ... else ...) + ... - if c: - return ... - else: - return ... - - Note there are two exit points, and the entry point has been - changed to a non-zero value. - """ - g = self.from_adj_list( - {99: [18, 12], 12: [21], 18: [21], 21: [42, 34], 34: [], 42: []}) - g.set_entry_point(99) - g.process() - return g - - def multiple_loops(self): - """ - A CFG with multiple nested loops: - - for y in b: - for x in a: - # This loop has two back edges - if b: - continue - else: - continue - for z in c: - if z: - return ... - """ - g = self.from_adj_list( - {0: [7], - 7: [10, 60], - 10: [13], - 13: [20], - 20: [56, 23], - 23: [32, 44], - 32: [20], - 44: [20], - 56: [57], - 57: [7], - 60: [61], - 61: [68], - 68: [87, 71], - 71: [80, 68], - 80: [], - 87: [88], - 88: []} - ) - g.set_entry_point(0) - g.process() - return g - - def multiple_exits(self): - """ - A CFG with three loop exits, one of which is also a function - exit point, and another function exit point: - - for x in a: - if a: - return b - elif b: - break - return c - """ - g = self.from_adj_list( - {0: [7], - 7: [10, 36], - 10: [19, 23], - 19: [], - 23: [29, 7], - 29: [37], - 36: [37], - 37: [] - }) - g.set_entry_point(0) - g.process() - return g - - def infinite_loop1(self): - """ - A CFG with a infinite loop and an alternate exit point: - - if c: - return - while True: - if a: - ... - else: - ... - """ - g = self.from_adj_list( - {0: [10, 6], 6: [], 10: [13], 13: [26, 19], 19: [13], 26: [13]}) - g.set_entry_point(0) - g.process() - return g - - def infinite_loop2(self): - """ - A CFG with no exit point at all: - - while True: - if a: - ... - else: - ... - """ - g = self.from_adj_list({0: [3], 3: [16, 9], 9: [3], 16: [3]}) - g.set_entry_point(0) - g.process() - return g - - def test_simple_properties(self): - g = self.loopless1() - self.assertEqual(sorted(g.successors(0)), [(12, None), (18, None)]) - self.assertEqual(sorted(g.successors(21)), []) - self.assertEqual(sorted(g.predecessors(0)), []) - self.assertEqual(sorted(g.predecessors(21)), [(12, None), (18, None)]) - - def test_exit_points(self): - g = self.loopless1() - self.assertEqual(sorted(g.exit_points()), [21]) - g = self.loopless1_dead_nodes() - self.assertEqual(sorted(g.exit_points()), [21]) - g = self.loopless2() - self.assertEqual(sorted(g.exit_points()), [34, 42]) - g = self.multiple_loops() - self.assertEqual(sorted(g.exit_points()), [80, 88]) - g = self.infinite_loop1() - self.assertEqual(sorted(g.exit_points()), [6]) - g = self.infinite_loop2() - self.assertEqual(sorted(g.exit_points()), []) - g = self.multiple_exits() - self.assertEqual(sorted(g.exit_points()), [19, 37]) - - def test_dead_nodes(self): - g = self.loopless1() - self.assertEqual(len(g.dead_nodes()), 0) - self.assertEqual(sorted(g.nodes()), - [0, 12, 18, 21]) - g = self.loopless2() - self.assertEqual(len(g.dead_nodes()), 0) - self.assertEqual(sorted(g.nodes()), - [12, 18, 21, 34, 42, 99]) - g = self.multiple_loops() - self.assertEqual(len(g.dead_nodes()), 0) - g = self.infinite_loop1() - self.assertEqual(len(g.dead_nodes()), 0) - g = self.multiple_exits() - self.assertEqual(len(g.dead_nodes()), 0) - # Only this example has dead nodes - g = self.loopless1_dead_nodes() - self.assertEqual(sorted(g.dead_nodes()), - [91, 92, 93, 94]) - self.assertEqual(sorted(g.nodes()), - [0, 12, 18, 21]) - - def test_descendents(self): - g = self.loopless2() - d = g.descendents(34) - self.assertEqual(sorted(d), []) - d = g.descendents(42) - self.assertEqual(sorted(d), []) - d = g.descendents(21) - self.assertEqual(sorted(d), [34, 42]) - d = g.descendents(99) - self.assertEqual(sorted(d), [12, 18, 21, 34, 42]) - g = self.infinite_loop1() - d = g.descendents(26) - self.assertEqual(sorted(d), []) - d = g.descendents(19) - self.assertEqual(sorted(d), []) - d = g.descendents(13) - self.assertEqual(sorted(d), [19, 26]) - d = g.descendents(10) - self.assertEqual(sorted(d), [13, 19, 26]) - d = g.descendents(6) - self.assertEqual(sorted(d), []) - d = g.descendents(0) - self.assertEqual(sorted(d), [6, 10, 13, 19, 26]) - - def test_topo_order(self): - g = self.loopless1() - self.assertIn(g.topo_order(), - ([0, 12, 18, 21], [0, 18, 12, 21])) - g = self.loopless2() - self.assertIn(g.topo_order(), - ([99, 18, 12, 21, 34, 42], [99, 12, 18, 21, 34, 42])) - g = self.infinite_loop2() - self.assertIn(g.topo_order(), - ([0, 3, 9, 16], [0, 3, 16, 9])) - g = self.infinite_loop1() - self.assertIn(g.topo_order(), - ([0, 6, 10, 13, 19, 26], [0, 6, 10, 13, 26, 19], - [0, 10, 13, 19, 26, 6], [0, 10, 13, 26, 19, 6])) - - def test_topo_sort(self): - def check_topo_sort(nodes, expected): - self.assertIn(list(g.topo_sort(nodes)), expected) - self.assertIn(list(g.topo_sort(nodes[::-1])), expected) - self.assertIn(list(g.topo_sort(nodes, reverse=True))[::-1], - expected) - self.assertIn(list(g.topo_sort(nodes[::-1], reverse=True))[::-1], - expected) - self.random.shuffle(nodes) - self.assertIn(list(g.topo_sort(nodes)), expected) - self.assertIn(list(g.topo_sort(nodes, reverse=True))[::-1], - expected) - - g = self.loopless2() - check_topo_sort([21, 99, 12, 34], ([99, 12, 21, 34],)) - # NOTE: topo_sort() is not stable - check_topo_sort([18, 12, 42, 99], - ([99, 12, 18, 42], [99, 18, 12, 42])) - g = self.multiple_exits() - check_topo_sort([19, 10, 7, 36], - ([7, 10, 19, 36], [7, 10, 36, 19], [7, 36, 10, 19])) - - def check_dominators(self, got, expected): - self.assertEqual(sorted(got), sorted(expected)) - for node in sorted(got): - self.assertEqual(sorted(got[node]), sorted(expected[node]), - "mismatch for %r" % (node,)) - - def test_dominators_loopless(self): - def eq_(d, l): - self.assertEqual(sorted(doms[d]), l) - for g in [self.loopless1(), self.loopless1_dead_nodes()]: - doms = g.dominators() - eq_(0, [0]) - eq_(12, [0, 12]) - eq_(18, [0, 18]) - eq_(21, [0, 21]) - g = self.loopless2() - doms = g.dominators() - eq_(99, [99]) - eq_(12, [12, 99]) - eq_(18, [18, 99]) - eq_(21, [21, 99]) - eq_(34, [21, 34, 99]) - eq_(42, [21, 42, 99]) - - def test_dominators_loops(self): - g = self.multiple_exits() - doms = g.dominators() - self.check_dominators(doms, - {0: [0], - 7: [0, 7], - 10: [0, 7, 10], - 19: [0, 7, 10, 19], - 23: [0, 7, 10, 23], - 29: [0, 7, 10, 23, 29], - 36: [0, 7, 36], - 37: [0, 7, 37], - }) - g = self.multiple_loops() - doms = g.dominators() - self.check_dominators(doms, - {0: [0], - 7: [0, 7], - 10: [0, 10, 7], - 13: [0, 10, 13, 7], - 20: [0, 10, 20, 13, 7], - 23: [0, 20, 23, 7, 10, 13], - 32: [32, 0, 20, 23, 7, 10, 13], - 44: [0, 20, 23, 7, 10, 44, 13], - 56: [0, 20, 7, 56, 10, 13], - 57: [0, 20, 7, 56, 57, 10, 13], - 60: [0, 60, 7], - 61: [0, 60, 61, 7], - 68: [0, 68, 60, 61, 7], - 71: [0, 68, 71, 7, 60, 61], - 80: [80, 0, 68, 71, 7, 60, 61], - 87: [0, 68, 87, 7, 60, 61], - 88: [0, 68, 87, 88, 7, 60, 61] - }) - g = self.infinite_loop1() - doms = g.dominators() - self.check_dominators(doms, - {0: [0], - 6: [0, 6], - 10: [0, 10], - 13: [0, 10, 13], - 19: [0, 10, 19, 13], - 26: [0, 10, 13, 26], - }) - - def test_post_dominators_loopless(self): - def eq_(d, l): - self.assertEqual(sorted(doms[d]), l) - for g in [self.loopless1(), self.loopless1_dead_nodes()]: - doms = g.post_dominators() - eq_(0, [0, 21]) - eq_(12, [12, 21]) - eq_(18, [18, 21]) - eq_(21, [21]) - g = self.loopless2() - doms = g.post_dominators() - eq_(34, [34]) - eq_(42, [42]) - eq_(21, [21]) - eq_(18, [18, 21]) - eq_(12, [12, 21]) - eq_(99, [21, 99]) - - def test_post_dominators_loops(self): - g = self.multiple_exits() - doms = g.post_dominators() - self.check_dominators(doms, - {0: [0, 7], - 7: [7], - 10: [10], - 19: [19], - 23: [23], - 29: [29, 37], - 36: [36, 37], - 37: [37], - }) - g = self.multiple_loops() - doms = g.post_dominators() - self.check_dominators(doms, - {0: [0, 60, 68, 61, 7], - 7: [60, 68, 61, 7], - 10: [68, 7, 10, 13, 20, 56, 57, 60, 61], - 13: [68, 7, 13, 20, 56, 57, 60, 61], - 20: [20, 68, 7, 56, 57, 60, 61], - 23: [68, 7, 20, 23, 56, 57, 60, 61], - 32: [32, 68, 7, 20, 56, 57, 60, 61], - 44: [68, 7, 44, 20, 56, 57, 60, 61], - 56: [68, 7, 56, 57, 60, 61], - 57: [57, 60, 68, 61, 7], - 60: [60, 68, 61], - 61: [68, 61], - 68: [68], - 71: [71], - 80: [80], - 87: [88, 87], - 88: [88] - }) - - def test_post_dominators_infinite_loops(self): - # Post-dominators with infinite loops need special care - # (the ordinary algorithm won't work). - g = self.infinite_loop1() - doms = g.post_dominators() - self.check_dominators(doms, - {0: [0], - 6: [6], - 10: [10, 13], - 13: [13], - 19: [19], - 26: [26], - }) - g = self.infinite_loop2() - doms = g.post_dominators() - self.check_dominators(doms, - {0: [0, 3], - 3: [3], - 9: [9], - 16: [16], - }) - - def test_backbone_loopless(self): - for g in [self.loopless1(), self.loopless1_dead_nodes()]: - self.assertEqual(sorted(g.backbone()), [0, 21]) - g = self.loopless2() - self.assertEqual(sorted(g.backbone()), [21, 99]) - - def test_backbone_loops(self): - g = self.multiple_loops() - self.assertEqual(sorted(g.backbone()), [0, 7, 60, 61, 68]) - g = self.infinite_loop1() - self.assertEqual(sorted(g.backbone()), [0]) - g = self.infinite_loop2() - self.assertEqual(sorted(g.backbone()), [0, 3]) - - def test_loops(self): - for g in [self.loopless1(), self.loopless1_dead_nodes(), - self.loopless2()]: - self.assertEqual(len(g.loops()), 0) - - g = self.multiple_loops() - # Loop headers - self.assertEqual(sorted(g.loops()), [7, 20, 68]) - outer1 = g.loops()[7] - inner1 = g.loops()[20] - outer2 = g.loops()[68] - self.assertEqual(outer1.header, 7) - self.assertEqual(sorted(outer1.entries), [0]) - self.assertEqual(sorted(outer1.exits), [60]) - self.assertEqual(sorted(outer1.body), - [7, 10, 13, 20, 23, 32, 44, 56, 57]) - self.assertEqual(inner1.header, 20) - self.assertEqual(sorted(inner1.entries), [13]) - self.assertEqual(sorted(inner1.exits), [56]) - self.assertEqual(sorted(inner1.body), [20, 23, 32, 44]) - self.assertEqual(outer2.header, 68) - self.assertEqual(sorted(outer2.entries), [61]) - self.assertEqual(sorted(outer2.exits), [80, 87]) - self.assertEqual(sorted(outer2.body), [68, 71]) - for node in [0, 60, 61, 80, 87, 88]: - self.assertEqual(g.in_loops(node), []) - for node in [7, 10, 13, 56, 57]: - self.assertEqual(g.in_loops(node), [outer1]) - for node in [20, 23, 32, 44]: - self.assertEqual(g.in_loops(node), [inner1, outer1]) - for node in [68, 71]: - self.assertEqual(g.in_loops(node), [outer2]) - - g = self.infinite_loop1() - # Loop headers - self.assertEqual(sorted(g.loops()), [13]) - loop = g.loops()[13] - self.assertEqual(loop.header, 13) - self.assertEqual(sorted(loop.entries), [10]) - self.assertEqual(sorted(loop.exits), []) - self.assertEqual(sorted(loop.body), [13, 19, 26]) - for node in [0, 6, 10]: - self.assertEqual(g.in_loops(node), []) - for node in [13, 19, 26]: - self.assertEqual(g.in_loops(node), [loop]) - - g = self.infinite_loop2() - # Loop headers - self.assertEqual(sorted(g.loops()), [3]) - loop = g.loops()[3] - self.assertEqual(loop.header, 3) - self.assertEqual(sorted(loop.entries), [0]) - self.assertEqual(sorted(loop.exits), []) - self.assertEqual(sorted(loop.body), [3, 9, 16]) - for node in [0]: - self.assertEqual(g.in_loops(node), []) - for node in [3, 9, 16]: - self.assertEqual(g.in_loops(node), [loop]) - - g = self.multiple_exits() - # Loop headers - self.assertEqual(sorted(g.loops()), [7]) - loop = g.loops()[7] - self.assertEqual(loop.header, 7) - self.assertEqual(sorted(loop.entries), [0]) - self.assertEqual(sorted(loop.exits), [19, 29, 36]) - self.assertEqual(sorted(loop.body), [7, 10, 23]) - for node in [0, 19, 29, 36]: - self.assertEqual(g.in_loops(node), []) - for node in [7, 10, 23]: - self.assertEqual(g.in_loops(node), [loop]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_func_interface.py b/numba/numba/tests/test_func_interface.py deleted file mode 100644 index 454a2fd93..000000000 --- a/numba/numba/tests/test_func_interface.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest -from numba import jit - - -class TestFuncInterface(unittest.TestCase): - def test_jit_function_docstring(self): - - def add(x, y): - '''Return sum of two numbers''' - return x + y - - c_add = jit(add) - self.assertEqual(c_add.__doc__, 'Return sum of two numbers') - - def test_jit_function_name(self): - - def add(x, y): - return x + y - - c_add = jit(add) - self.assertEqual(c_add.__name__, 'add') - - def test_jit_function_module(self): - - def add(x, y): - return x + y - - c_add = jit(add) - # Expected answer depends on how you run this test. - # Compare to python function instead. - self.assertEqual(c_add.__module__, add.__module__) - - def test_jit_function_code_object(self): - def add(x, y): - return x + y - - c_add = jit(add) - self.assertEqual(c_add.__code__, add.__code__) - self.assertEqual(c_add.func_code, add.__code__) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_func_lifetime.py b/numba/numba/tests/test_func_lifetime.py deleted file mode 100644 index 02aca26ff..000000000 --- a/numba/numba/tests/test_func_lifetime.py +++ /dev/null @@ -1,168 +0,0 @@ - -from __future__ import print_function, absolute_import - -import gc -import weakref - -from numba import unittest_support as unittest -from numba.utils import IS_PY3 -from numba import jit, types -from .support import TestCase - - -class Dummy(object): - - def __add__(self, other): - return other + 5 - - -def global_usecase1(x): - return x + 1 - -def global_usecase2(): - return global_obj + 1 - - -class TestFuncLifetime(TestCase): - """ - Test the lifetime of compiled function objects and their dependencies. - """ - - def get_impl(self, dispatcher): - """ - Get the single implementation (a C function object) of a dispatcher. - """ - self.assertEqual(len(dispatcher.overloads), 1) - cres = list(dispatcher.overloads.values())[0] - return cres.entry_point - - def check_local_func_lifetime(self, **jitargs): - def f(x): - return x + 1 - - c_f = jit('int32(int32)', **jitargs)(f) - self.assertPreciseEqual(c_f(1), 2) - - cfunc = self.get_impl(c_f) - - # Since we can't take a weakref to a C function object - # (see http://bugs.python.org/issue22116), ensure it's - # collected by taking a weakref to its __self__ instead - # (a _dynfunc._Closure object). - refs = [weakref.ref(obj) for obj in (f, c_f, cfunc.__self__)] - obj = f = c_f = cfunc = None - gc.collect() - self.assertEqual([wr() for wr in refs], [None] * len(refs)) - - def test_local_func_lifetime(self): - self.check_local_func_lifetime(forceobj=True) - - def test_local_func_lifetime_npm(self): - self.check_local_func_lifetime(nopython=True) - - def check_global_func_lifetime(self, **jitargs): - c_f = jit(**jitargs)(global_usecase1) - self.assertPreciseEqual(c_f(1), 2) - - cfunc = self.get_impl(c_f) - - wr = weakref.ref(c_f) - refs = [weakref.ref(obj) for obj in (c_f, cfunc.__self__)] - obj = c_f = cfunc = None - gc.collect() - self.assertEqual([wr() for wr in refs], [None] * len(refs)) - - def test_global_func_lifetime(self): - self.check_global_func_lifetime(forceobj=True) - - def test_global_func_lifetime_npm(self): - self.check_global_func_lifetime(nopython=True) - - def check_global_obj_lifetime(self, **jitargs): - # Since global objects can be recorded for typing purposes, - # check that they are not kept around after they are removed - # from the globals. - global global_obj - global_obj = Dummy() - - c_f = jit(**jitargs)(global_usecase2) - self.assertPreciseEqual(c_f(), 6) - - refs = [weakref.ref(obj) for obj in (c_f, global_obj)] - obj = c_f = global_obj = None - gc.collect() - self.assertEqual([wr() for wr in refs], [None] * len(refs)) - - def test_global_obj_lifetime(self): - self.check_global_obj_lifetime(forceobj=True) - - def check_inner_function_lifetime(self, **jitargs): - """ - When a jitted function calls into another jitted function, check - that everything is collected as desired. - """ - def mult_10(a): - return a * 10 - - c_mult_10 = jit('intp(intp)', **jitargs)(mult_10) - c_mult_10.disable_compile() - - def do_math(x): - return c_mult_10(x + 4) - - c_do_math = jit('intp(intp)', **jitargs)(do_math) - c_do_math.disable_compile() - - self.assertEqual(c_do_math(1), 50) - - wrs = [weakref.ref(obj) for obj in - (mult_10, c_mult_10, do_math, c_do_math, - self.get_impl(c_mult_10).__self__, - self.get_impl(c_do_math).__self__, - )] - obj = mult_10 = c_mult_10 = do_math = c_do_math = None - gc.collect() - self.assertEqual([w() for w in wrs], [None] * len(wrs)) - - @unittest.skipUnless(IS_PY3, "py3 only; known leak in py2") - def test_inner_function_lifetime(self): - self.check_inner_function_lifetime(forceobj=True) - - def test_inner_function_lifetime_npm(self): - self.check_inner_function_lifetime(nopython=True) - - -class TestLifeTimeIssue(TestCase): - def test_double_free(self): - from numba import njit - import numpy as np - - # This is the function that causes the crash - - @njit - def is_point_in_polygons(point, polygons): - num_polygons = polygons.shape[0] - if num_polygons != 0: - # An extra decref is inserted in this block - intentionally_unused_variable = polygons[0] - return 0 - - # This function creates some NRT objects for the previous function - # to corrupt. - - @njit - def dummy(): - return np.empty(10, dtype=np.int64) - - polygons = np.array([[[0, 1]]]) - points = np.array([[-1.5, 0.5]]) - a = dummy() - is_point_in_polygons(points[0], polygons) - b = dummy() - # Crash happens at second call - is_point_in_polygons(points[0], polygons) - c = dummy() - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_generators.py b/numba/numba/tests/test_generators.py deleted file mode 100644 index a0bcfcc6a..000000000 --- a/numba/numba/tests/test_generators.py +++ /dev/null @@ -1,643 +0,0 @@ -from __future__ import print_function - -import sys -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import jit, njit, types -from .support import TestCase, MemoryLeakMixin, tag -from numba import testing -from numba.datamodel.testing import test_factory - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -forceobj_flags = Flags() -forceobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -def make_consumer(gen_func): - def consumer(x): - res = 0.0 - for y in gen_func(x): - res += y - return res - - return consumer - - -def gen1(x): - for i in range(x): - yield i - - -def gen2(x): - for i in range(x): - yield i - for j in range(1, 3): - yield i + j - - -def gen3(x): - # Polymorphic yield types must be unified - yield x - yield x + 1.5 - yield x + 1j - - -def gen4(x, y, z): - for i in range(3): - yield z - yield y + z - return - yield x - - -def gen5(): - # The bytecode for this generator doesn't contain any YIELD_VALUE - # (it's optimized away). We fail typing it, since the yield type - # is entirely undefined. - if 0: - yield 1 - - -def gen6(a, b): - # Infinite loop: exercise computation of state variables - x = a + 1 - while True: - y = b + 2 - yield x + y - - -def gen7(arr): - # Array variable in generator state - for i in range(arr.size): - yield arr[i] - - -# Optional arguments and boolean state members -def gen8(x=1, y=2, b=False): - bb = not b - yield x - if bb: - yield y - if b: - yield x + y - - -def genobj(x): - object() - yield x - - -def return_generator_expr(x): - return (i * 2 for i in x) - - -def gen_ndindex(shape): - for ind in np.ndindex(shape): - yield ind - - -def gen_flat(arr): - for val in arr.flat: - yield val - - -def gen_ndenumerate(arr): - for tup in np.ndenumerate(arr): - yield tup - - -def gen_bool(): - yield True - -class TestGenerators(MemoryLeakMixin, TestCase): - def check_generator(self, pygen, cgen): - self.assertEqual(next(cgen), next(pygen)) - # Use list comprehensions to make sure we trash the generator's - # former C stack. - expected = [x for x in pygen] - got = [x for x in cgen] - self.assertEqual(expected, got) - with self.assertRaises(StopIteration): - next(cgen) - - def check_gen1(self, flags=no_pyobj_flags): - pyfunc = gen1 - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - pygen = pyfunc(8) - cgen = cr.entry_point(8) - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen1(self): - self.check_gen1() - - def test_gen1_objmode(self): - self.check_gen1(flags=forceobj_flags) - - def check_gen2(self, flags=no_pyobj_flags): - pyfunc = gen2 - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - pygen = pyfunc(8) - cgen = cr.entry_point(8) - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen2(self): - self.check_gen2() - - def test_gen2_objmode(self): - self.check_gen2(flags=forceobj_flags) - - def check_gen3(self, flags=no_pyobj_flags): - pyfunc = gen3 - cr = compile_isolated(pyfunc, (types.int32,), flags=flags) - pygen = pyfunc(8) - cgen = cr.entry_point(8) - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen3(self): - self.check_gen3() - - def test_gen3_objmode(self): - self.check_gen3(flags=forceobj_flags) - - def check_gen4(self, flags=no_pyobj_flags): - pyfunc = gen4 - cr = compile_isolated(pyfunc, (types.int32,) * 3, flags=flags) - pygen = pyfunc(5, 6, 7) - cgen = cr.entry_point(5, 6, 7) - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen4(self): - self.check_gen4() - - def test_gen4_objmode(self): - self.check_gen4(flags=forceobj_flags) - - def test_gen5(self): - with self.assertTypingError() as cm: - cr = compile_isolated(gen5, ()) - self.assertIn("Cannot type generator: it does not yield any value", - str(cm.exception)) - - def test_gen5_objmode(self): - cr = compile_isolated(gen5, (), flags=forceobj_flags) - cgen = cr.entry_point() - self.assertEqual(list(cgen), []) - with self.assertRaises(StopIteration): - next(cgen) - - def check_gen6(self, flags=no_pyobj_flags): - pyfunc = gen6 - cr = compile_isolated(pyfunc, (types.int32,) * 2, flags=flags) - cgen = cr.entry_point(5, 6) - l = [] - for i in range(3): - l.append(next(cgen)) - self.assertEqual(l, [14] * 3) - - @tag('important') - def test_gen6(self): - self.check_gen6() - - def test_gen6_objmode(self): - self.check_gen6(flags=forceobj_flags) - - def check_gen7(self, flags=no_pyobj_flags): - pyfunc = gen7 - cr = compile_isolated(pyfunc, (types.Array(types.float64, 1, 'C'),), - flags=flags) - arr = np.linspace(1, 10, 7) - pygen = pyfunc(arr.copy()) - cgen = cr.entry_point(arr) - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen7(self): - self.check_gen7() - - def test_gen7_objmode(self): - self.check_gen7(flags=forceobj_flags) - - def check_gen8(self, **jit_args): - pyfunc = gen8 - cfunc = jit(**jit_args)(pyfunc) - - def check(*args, **kwargs): - self.check_generator(pyfunc(*args, **kwargs), - cfunc(*args, **kwargs)) - - check(2, 3) - check(4) - check(y=5) - check(x=6, b=True) - - @tag('important') - def test_gen8(self): - self.check_gen8(nopython=True) - - def test_gen8_objmode(self): - self.check_gen8(forceobj=True) - - def check_gen9(self, flags=no_pyobj_flags): - pyfunc = gen_bool - cr = compile_isolated(pyfunc, (), flags=flags) - pygen = pyfunc() - cgen = cr.entry_point() - self.check_generator(pygen, cgen) - - @tag('important') - def test_gen9(self): - self.check_gen9(flags=no_pyobj_flags) - - def test_gen9_objmode(self): - self.check_gen9(flags=forceobj_flags) - - def check_consume_generator(self, gen_func): - cgen = jit(nopython=True)(gen_func) - cfunc = jit(nopython=True)(make_consumer(cgen)) - pyfunc = make_consumer(gen_func) - expected = pyfunc(5) - got = cfunc(5) - self.assertPreciseEqual(got, expected) - - def test_consume_gen1(self): - self.check_consume_generator(gen1) - - def test_consume_gen2(self): - self.check_consume_generator(gen2) - - @tag('important') - def test_consume_gen3(self): - self.check_consume_generator(gen3) - - # Check generator storage of some types - - def check_ndindex(self, flags=no_pyobj_flags): - pyfunc = gen_ndindex - cr = compile_isolated(pyfunc, (types.UniTuple(types.intp, 2),), - flags=flags) - shape = (2, 3) - pygen = pyfunc(shape) - cgen = cr.entry_point(shape) - self.check_generator(pygen, cgen) - - def test_ndindex(self): - self.check_ndindex() - - def test_ndindex_objmode(self): - self.check_ndindex(flags=forceobj_flags) - - def check_np_flat(self, pyfunc, flags=no_pyobj_flags): - cr = compile_isolated(pyfunc, (types.Array(types.int32, 2, "C"),), - flags=flags) - arr = np.arange(6, dtype=np.int32).reshape((2, 3)) - self.check_generator(pyfunc(arr), cr.entry_point(arr)) - cr = compile_isolated(pyfunc, (types.Array(types.int32, 2, "A"),), - flags=flags) - arr = arr.T - self.check_generator(pyfunc(arr), cr.entry_point(arr)) - - def test_np_flat(self): - self.check_np_flat(gen_flat) - - def test_np_flat_objmode(self): - self.check_np_flat(gen_flat, flags=forceobj_flags) - - def test_ndenumerate(self): - self.check_np_flat(gen_ndenumerate) - - def test_ndenumerate_objmode(self): - self.check_np_flat(gen_ndenumerate, flags=forceobj_flags) - - -class TestGenExprs(MemoryLeakMixin, TestCase): - @testing.allow_interpreter_mode - def test_return_generator_expr(self): - pyfunc = return_generator_expr - cr = compile_isolated(pyfunc, ()) - cfunc = cr.entry_point - self.assertEqual(sum(cfunc([1, 2, 3])), sum(pyfunc([1, 2, 3]))) - - -def nrt_gen0(ary): - for elem in ary: - yield elem - - -def nrt_gen1(ary1, ary2): - for e1, e2 in zip(ary1, ary2): - yield e1 - yield e2 - - -class TestNrtArrayGen(MemoryLeakMixin, TestCase): - def test_nrt_gen0(self): - pygen = nrt_gen0 - cgen = jit(nopython=True)(pygen) - - py_ary = np.arange(10) - c_ary = py_ary.copy() - - py_res = list(pygen(py_ary)) - c_res = list(cgen(c_ary)) - - np.testing.assert_equal(py_ary, c_ary) - self.assertEqual(py_res, c_res) - # Check reference count - self.assertEqual(sys.getrefcount(py_ary), - sys.getrefcount(c_ary)) - - def test_nrt_gen1(self): - pygen = nrt_gen1 - cgen = jit(nopython=True)(pygen) - - py_ary1 = np.arange(10) - py_ary2 = py_ary1 + 100 - - c_ary1 = py_ary1.copy() - c_ary2 = py_ary2.copy() - - py_res = list(pygen(py_ary1, py_ary2)) - c_res = list(cgen(c_ary1, c_ary2)) - - np.testing.assert_equal(py_ary1, c_ary1) - np.testing.assert_equal(py_ary2, c_ary2) - self.assertEqual(py_res, c_res) - # Check reference count - self.assertEqual(sys.getrefcount(py_ary1), - sys.getrefcount(c_ary1)) - self.assertEqual(sys.getrefcount(py_ary2), - sys.getrefcount(c_ary2)) - - def test_combine_gen0_gen1(self): - """ - Issue #1163 is observed when two generator with NRT object arguments - is ran in sequence. The first one does a invalid free and corrupts - the NRT memory subsystem. The second generator is likely to segfault - due to corrupted NRT data structure (an invalid MemInfo). - """ - self.test_nrt_gen0() - self.test_nrt_gen1() - - def test_nrt_gen0_stop_iteration(self): - """ - Test cleanup on StopIteration - """ - pygen = nrt_gen0 - cgen = jit(nopython=True)(pygen) - - py_ary = np.arange(1) - c_ary = py_ary.copy() - - py_iter = pygen(py_ary) - c_iter = cgen(c_ary) - - py_res = next(py_iter) - c_res = next(c_iter) - - with self.assertRaises(StopIteration): - py_res = next(py_iter) - - with self.assertRaises(StopIteration): - c_res = next(c_iter) - - del py_iter - del c_iter - - np.testing.assert_equal(py_ary, c_ary) - self.assertEqual(py_res, c_res) - # Check reference count - self.assertEqual(sys.getrefcount(py_ary), - sys.getrefcount(c_ary)) - - def test_nrt_gen0_no_iter(self): - """ - Test cleanup for a initialized but never iterated (never call next()) - generator. - """ - pygen = nrt_gen0 - cgen = jit(nopython=True)(pygen) - - py_ary = np.arange(1) - c_ary = py_ary.copy() - - py_iter = pygen(py_ary) - c_iter = cgen(c_ary) - - del py_iter - del c_iter - - np.testing.assert_equal(py_ary, c_ary) - - # Check reference count - self.assertEqual(sys.getrefcount(py_ary), - sys.getrefcount(c_ary)) - - -# TODO: fix nested generator and MemoryLeakMixin -class TestNrtNestedGen(TestCase): - def test_nrt_nested_gen(self): - - def gen0(arr): - for i in range(arr.size): - yield arr - - def factory(gen0): - def gen1(arr): - out = np.zeros_like(arr) - for x in gen0(arr): - out = out + x - return out, arr - - return gen1 - - py_arr = np.arange(10) - c_arr = py_arr.copy() - py_res, py_old = factory(gen0)(py_arr) - c_gen = jit(nopython=True)(factory(jit(nopython=True)(gen0))) - c_res, c_old = c_gen(c_arr) - - self.assertIsNot(py_arr, c_arr) - self.assertIs(py_old, py_arr) - self.assertIs(c_old, c_arr) - - np.testing.assert_equal(py_res, c_res) - - self.assertEqual(sys.getrefcount(py_res), - sys.getrefcount(c_res)) - - # The below test will fail due to generator finalizer not invoked. - # This kept a reference of the c_old. - # - # self.assertEqual(sys.getrefcount(py_old), - # sys.getrefcount(c_old)) - - @unittest.expectedFailure - def test_nrt_nested_gen_refct(self): - def gen0(arr): - yield arr - - def factory(gen0): - def gen1(arr): - for out in gen0(arr): - return out - - return gen1 - - py_arr = np.arange(10) - c_arr = py_arr.copy() - py_old = factory(gen0)(py_arr) - c_gen = jit(nopython=True)(factory(jit(nopython=True)(gen0))) - c_old = c_gen(c_arr) - - self.assertIsNot(py_arr, c_arr) - self.assertIs(py_old, py_arr) - self.assertIs(c_old, c_arr) - - self.assertEqual(sys.getrefcount(py_old), - sys.getrefcount(c_old)) - - def test_nrt_nested_nopython_gen(self): - """ - Test nesting three generators - """ - - def factory(decor=lambda x: x): - @decor - def foo(a, n): - for i in range(n): - yield a[i] - a[i] += i - - @decor - def bar(n): - a = np.arange(n) - for i in foo(a, n): - yield i * 2 - for i in range(a.size): - yield a[i] - - @decor - def cat(n): - for i in bar(n): - yield i + i - - return cat - - py_gen = factory() - c_gen = factory(jit(nopython=True)) - - py_res = list(py_gen(10)) - c_res = list(c_gen(10)) - - self.assertEqual(py_res, c_res) - - -class TestGeneratorWithNRT(MemoryLeakMixin, TestCase): - def test_issue_1254(self): - """ - Missing environment for returning array - """ - - @jit(nopython=True) - def random_directions(n): - for i in range(n): - vec = np.empty(3) - vec[:] = 12 - yield vec - - outputs = list(random_directions(5)) - self.assertEqual(len(outputs), 5) - - expect = np.empty(3) - expect[:] = 12 - for got in outputs: - np.testing.assert_equal(expect, got) - - def test_issue_1265(self): - """ - Double-free for locally allocated, non escaping NRT objects - """ - - def py_gen(rmin, rmax, nr): - a = np.linspace(rmin, rmax, nr) - yield a[0] - yield a[1] - - c_gen = jit(nopython=True)(py_gen) - - py_res = list(py_gen(-2, 2, 100)) - c_res = list(c_gen(-2, 2, 100)) - - self.assertEqual(py_res, c_res) - - def py_driver(args): - rmin, rmax, nr = args - points = np.empty(nr, dtype=np.complex128) - for i, c in enumerate(py_gen(rmin, rmax, nr)): - points[i] = c - - return points - - @jit(nopython=True) - def c_driver(args): - rmin, rmax, nr = args - points = np.empty(nr, dtype=np.complex128) - for i, c in enumerate(c_gen(rmin, rmax, nr)): - points[i] = c - - return points - - n = 2 - patches = (-2, -1, n) - - py_res = py_driver(patches) - # The error will cause a segfault here - c_res = c_driver(patches) - - np.testing.assert_equal(py_res, c_res) - - def test_issue_1808(self): - """ - Incorrect return data model - """ - magic = 0xdeadbeef - - @njit - def generator(): - yield magic - - @njit - def get_generator(): - return generator() - - @njit - def main(): - out = 0 - for x in get_generator(): - out += x - - return out - - self.assertEqual(main(), magic) - - -class TestGeneratorModel(test_factory()): - fe_type = types.Generator(gen_func=None, yield_type=types.int32, - arg_types=[types.int64, types.float32], - state_types=[types.intp, types.intp[::1]], - has_finalizer=False) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_gil.py b/numba/numba/tests/test_gil.py deleted file mode 100644 index 30b68ef91..000000000 --- a/numba/numba/tests/test_gil.py +++ /dev/null @@ -1,185 +0,0 @@ -from __future__ import print_function - -import ctypes -import ctypes.util -import os -import sys -import threading -import warnings - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import errors, jit -from .support import TestCase, tag - - -# This CPython API function is a portable way to get the current thread id. -PyThread_get_thread_ident = ctypes.pythonapi.PyThread_get_thread_ident -PyThread_get_thread_ident.restype = ctypes.c_long -PyThread_get_thread_ident.argtypes = [] - -# A way of sleeping from nopython code -if os.name == 'nt': - sleep = ctypes.windll.kernel32.Sleep - sleep.argtypes = [ctypes.c_uint] - sleep.restype = None - sleep_factor = 1 # milliseconds -else: - sleep = ctypes.CDLL(ctypes.util.find_library("c")).usleep - sleep.argtypes = [ctypes.c_uint] - sleep.restype = ctypes.c_int - sleep_factor = 1000 # microseconds - - -def f(a, indices): - # If run from one thread at a time, the function will always fill the - # array with identical values. - # If run from several threads at a time, the function will probably - # fill the array with differing values. - for idx in indices: - # Let another thread run - sleep(10 * sleep_factor) - a[idx] = PyThread_get_thread_ident() - -f_sig = "void(int64[:], intp[:])" - -def lifted_f(a, indices): - """ - Same as f(), but inside a lifted loop - """ - object() # Force object mode - for idx in indices: - # Let another thread run - sleep(10 * sleep_factor) - a[idx] = PyThread_get_thread_ident() - -def object_f(a, indices): - """ - Same as f(), but in object mode - """ - for idx in indices: - # Let another thread run - sleep(10 * sleep_factor) - object() # Force object mode - a[idx] = PyThread_get_thread_ident() - - -class TestGILRelease(TestCase): - - def make_test_array(self, n_members): - return np.arange(n_members, dtype=np.int64) - - def run_in_threads(self, func, n_threads): - # Run the function in parallel over an array and collect results. - threads = [] - # Warm up compilation, since we don't want that to interfere with - # the test proper. - func(self.make_test_array(1), np.arange(1, dtype=np.intp)) - arr = self.make_test_array(50) - for i in range(n_threads): - # Ensure different threads write into the array in different - # orders. - indices = np.arange(arr.size, dtype=np.intp) - np.random.shuffle(indices) - t = threading.Thread(target=func, args=(arr, indices)) - threads.append(t) - for t in threads: - t.start() - for t in threads: - t.join() - return arr - - def check_gil_held(self, func): - arr = self.run_in_threads(func, n_threads=4) - distinct = set(arr) - self.assertEqual(len(distinct), 1, distinct) - - def check_gil_released(self, func): - for n_threads in (4, 12, 32): - # Try harder each time. On an empty machine 4 threads seems - # sufficient, but in some contexts (e.g. Travis CI) we need more. - arr = self.run_in_threads(func, n_threads) - distinct = set(arr) - try: - self.assertGreater(len(distinct), 1, distinct) - except AssertionError as e: - failure = e - else: - return - raise failure - - def test_gil_held(self): - """ - Test the GIL is held by default, by checking serialized runs - produce deterministic results. - """ - cfunc = jit(f_sig, nopython=True)(f) - self.check_gil_held(cfunc) - - @tag('important') - def test_gil_released(self): - """ - Test releasing the GIL, by checking parallel runs produce - unpredictable results. - """ - cfunc = jit(f_sig, nopython=True, nogil=True)(f) - self.check_gil_released(cfunc) - - def test_gil_released_inside_lifted_loop(self): - """ - Test the GIL can by released by a lifted loop even though the - surrounding code uses object mode. - """ - cfunc = jit(f_sig, nogil=True)(lifted_f) - self.check_gil_released(cfunc) - - def test_gil_released_by_caller(self): - """ - Releasing the GIL in the caller is sufficient to have it - released in a callee. - """ - compiled_f = jit(f_sig, nopython=True)(f) - @jit(f_sig, nopython=True, nogil=True) - def caller(a, i): - compiled_f(a, i) - self.check_gil_released(caller) - - def test_gil_released_by_caller_and_callee(self): - """ - Same, but with both caller and callee asking to release the GIL. - """ - compiled_f = jit(f_sig, nopython=True, nogil=True)(f) - @jit(f_sig, nopython=True, nogil=True) - def caller(a, i): - compiled_f(a, i) - self.check_gil_released(caller) - - def test_gil_ignored_by_callee(self): - """ - When only the callee asks to release the GIL, it gets ignored. - """ - compiled_f = jit(f_sig, nopython=True, nogil=True)(f) - @jit(f_sig, nopython=True) - def caller(a, i): - compiled_f(a, i) - self.check_gil_held(caller) - - def test_object_mode(self): - """ - When the function is compiled in object mode, a warning is - printed out. - """ - with warnings.catch_warnings(record=True) as wlist: - warnings.simplefilter('always', errors.NumbaWarning) - cfunc = jit(f_sig, nogil=True)(object_f) - self.assertTrue(any(w.category is errors.NumbaWarning - and "Code running in object mode won't allow parallel execution" in str(w.message) - for w in wlist), wlist) - # Just check it doesn't crash. - self.run_in_threads(cfunc, 2) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_globals.py b/numba/numba/tests/test_globals.py deleted file mode 100644 index cefaf8d8e..000000000 --- a/numba/numba/tests/test_globals.py +++ /dev/null @@ -1,171 +0,0 @@ -from __future__ import print_function, division, absolute_import -import numpy as np -from numba import jit -from numba import unittest_support as unittest -from numba.tests import usecases - -X = np.arange(10) - - -def global_ndarray_func(x): - y = x + X.shape[0] - return y - - -# Create complex array with real and imaginary parts of distinct value -cplx_X = np.arange(10, dtype=np.complex128) -tmp = np.arange(10, dtype=np.complex128) -cplx_X += (tmp+10)*1j - - -def global_cplx_arr_copy(a): - for i in range(len(a)): - a[i] = cplx_X[i] - - -# Create a recarray with fields of distinct value -x_dt = np.dtype([('a', np.int32), ('b', np.float32)]) -rec_X = np.recarray(10, dtype=x_dt) -for i in range(len(rec_X)): - rec_X[i].a = i - rec_X[i].b = i + 0.5 - - -def global_rec_arr_copy(a): - for i in range(len(a)): - a[i] = rec_X[i] - - -def global_rec_arr_extract_fields(a, b): - for i in range(len(a)): - a[i] = rec_X[i].a - b[i] = rec_X[i].b - - -# Create additional global recarray -y_dt = np.dtype([('c', np.int16), ('d', np.float64)]) -rec_Y = np.recarray(10, dtype=y_dt) -for i in range(len(rec_Y)): - rec_Y[i].c = i + 10 - rec_Y[i].d = i + 10.5 - - -def global_two_rec_arrs(a, b, c, d): - for i in range(len(a)): - a[i] = rec_X[i].a - b[i] = rec_X[i].b - c[i] = rec_Y[i].c - d[i] = rec_Y[i].d - - -# Test a global record -record_only_X = np.recarray(1, dtype=x_dt)[0] -record_only_X.a = 1 -record_only_X.b = 1.5 - -@jit(nopython=True) -def global_record_func(x): - return x.a == record_only_X.a - - -@jit(nopython=True) -def global_module_func(x, y): - return usecases.andornopython(x, y) - - -class TestGlobals(unittest.TestCase): - - def check_global_ndarray(self, **jitargs): - # (see github issue #448) - ctestfunc = jit(**jitargs)(global_ndarray_func) - self.assertEqual(ctestfunc(1), 11) - - def test_global_ndarray(self): - # This also checks we can access an unhashable global value - # (see issue #697) - self.check_global_ndarray(forceobj=True) - - def test_global_ndarray_npm(self): - self.check_global_ndarray(nopython=True) - - - def check_global_complex_arr(self, **jitargs): - # (see github issue #897) - ctestfunc = jit(**jitargs)(global_cplx_arr_copy) - arr = np.zeros(len(cplx_X), dtype=np.complex128) - ctestfunc(arr) - np.testing.assert_equal(arr, cplx_X) - - def test_global_complex_arr(self): - self.check_global_complex_arr(forceobj=True) - - def test_global_complex_arr_npm(self): - self.check_global_complex_arr(nopython=True) - - - def check_global_rec_arr(self, **jitargs): - # (see github issue #897) - ctestfunc = jit(**jitargs)(global_rec_arr_copy) - arr = np.zeros(rec_X.shape, dtype=x_dt) - ctestfunc(arr) - np.testing.assert_equal(arr, rec_X) - - def test_global_rec_arr(self): - self.check_global_rec_arr(forceobj=True) - - def test_global_rec_arr_npm(self): - self.check_global_rec_arr(nopython=True) - - - def check_global_rec_arr_extract(self, **jitargs): - # (see github issue #897) - ctestfunc = jit(**jitargs)(global_rec_arr_extract_fields) - arr1 = np.zeros(rec_X.shape, dtype=np.int32) - arr2 = np.zeros(rec_X.shape, dtype=np.float32) - ctestfunc(arr1, arr2) - np.testing.assert_equal(arr1, rec_X.a) - np.testing.assert_equal(arr2, rec_X.b) - - def test_global_rec_arr_extract(self): - self.check_global_rec_arr_extract(forceobj=True) - - def test_global_rec_arr_extract_npm(self): - self.check_global_rec_arr_extract(nopython=True) - - - def check_two_global_rec_arrs(self, **jitargs): - # (see github issue #897) - ctestfunc = jit(**jitargs)(global_two_rec_arrs) - arr1 = np.zeros(rec_X.shape, dtype=np.int32) - arr2 = np.zeros(rec_X.shape, dtype=np.float32) - arr3 = np.zeros(rec_Y.shape, dtype=np.int16) - arr4 = np.zeros(rec_Y.shape, dtype=np.float64) - ctestfunc(arr1, arr2, arr3, arr4) - np.testing.assert_equal(arr1, rec_X.a) - np.testing.assert_equal(arr2, rec_X.b) - np.testing.assert_equal(arr3, rec_Y.c) - np.testing.assert_equal(arr4, rec_Y.d) - - def test_two_global_rec_arrs(self): - self.check_two_global_rec_arrs(forceobj=True) - - def test_two_global_rec_arrs_npm(self): - self.check_two_global_rec_arrs(nopython=True) - - def test_global_module(self): - # (see github issue #1059) - res = global_module_func(5, 6) - self.assertEqual(True, res) - - def test_global_record(self): - # (see github issue #1081) - x = np.recarray(1, dtype=x_dt)[0] - x.a = 1 - res = global_record_func(x) - self.assertEqual(True, res) - x.a = 2 - res = global_record_func(x) - self.assertEqual(False, res) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_hashing.py b/numba/numba/tests/test_hashing.py deleted file mode 100644 index d4a8919ea..000000000 --- a/numba/numba/tests/test_hashing.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Test hashing of various supported types. -""" -from __future__ import print_function - -import numba.unittest_support as unittest - -from collections import defaultdict - -import numpy as np - -from numba import jit, types, utils -import numba.unittest_support as unittest -from .support import TestCase, tag, CompilationCache - - -def hash_usecase(x): - return hash(x) - - -class BaseTest(TestCase): - - def setUp(self): - self.cfunc = jit(nopython=True)(hash_usecase) - - def check_collection(self, values): - cfunc = self.cfunc - values = list(values) - hashes = [cfunc(x) for x in values] - for x in hashes: - self.assertIsInstance(x, utils.INT_TYPES) - - def check_distribution(hashes): - distinct = set(hashes) - if len(distinct) < 0.95 * len(values): - # Display hash collisions, for ease of debugging - counter = defaultdict(list) - for v, h in zip(values, hashes): - counter[h].append(v) - collisions = [(h, v) for h, v in counter.items() - if len(v) > 1] - collisions = "\n".join("%s: %s" % (h, v) - for h, v in sorted(collisions)) - self.fail("too many hash collisions: \n%s" % collisions) - - check_distribution(hashes) - - def int_samples(self, typ=np.int64): - for start in (0, -50, 60000, 1<<32): - info = np.iinfo(typ) - if not info.min <= start <= info.max: - continue - n = 100 - yield range(start, start + n) - yield range(start, start + 100 * n, 100) - yield range(start, start + 128 * n, 128) - - def float_samples(self, typ): - info = np.finfo(typ) - - for start in (0, 10, info.max ** 0.5, info.max / 1000.0): - n = 100 - min_step = max(info.tiny, start * info.resolution) - for step in (1.2, min_step ** 0.5, min_step): - if step < min_step: - continue - a = np.linspace(start, start + n * step, n) - a = a.astype(typ) - yield a - yield -a - yield a + a.mean() - - # Infs, nans, zeros - a = typ([0.0, 0.5, -0.0, -1.0, float('inf'), -float('inf'), float('nan')]) - yield a - - def complex_samples(self, typ, float_ty): - for real in self.float_samples(float_ty): - for imag in self.float_samples(float_ty): - # Ensure equal sizes - real = real[:len(imag)] - imag = imag[:len(real)] - a = real + typ(1j) * imag - yield a - - -class TestNumberHashing(BaseTest): - """ - Test hashing of number types. - """ - - def check_ints(self, typ): - def check_values(values): - values = sorted(set(typ(x) for x in values)) - self.check_collection(values) - - for a in self.int_samples(typ): - check_values(a) - - def check_floats(self, typ): - for a in self.float_samples(typ): - self.assertEqual(a.dtype, np.dtype(typ)) - self.check_collection(a) - - def check_complex(self, typ, float_ty): - for a in self.complex_samples(typ, float_ty): - self.assertEqual(a.dtype, np.dtype(typ)) - self.check_collection(a) - - @tag('important') - def test_ints(self): - self.check_ints(np.int8) - self.check_ints(np.uint16) - self.check_ints(np.int32) - self.check_ints(np.uint64) - - @tag('important') - def test_floats(self): - self.check_floats(np.float32) - self.check_floats(np.float64) - - @tag('important') - def test_complex(self): - self.check_complex(np.complex64, np.float32) - self.check_complex(np.complex128, np.float64) - - def test_bool(self): - self.check_collection([False, True]) - - -class TestTupleHashing(BaseTest): - """ - Test hashing of tuples. - """ - - def check_tuples(self, value_generator, split): - for values in value_generator: - tuples = [split(a) for a in values] - self.check_collection(tuples) - - def test_homogeneous_tuples(self): - typ = np.uint64 - def split2(i): - """ - Split i's bits into 2 integers. - """ - i = typ(i) - return (i & typ(0x5555555555555555), - i & typ(0xaaaaaaaaaaaaaaaa), - ) - - def split3(i): - """ - Split i's bits into 3 integers. - """ - i = typ(i) - return (i & typ(0x2492492492492492), - i & typ(0x4924924924924924), - i & typ(0x9249249249249249), - ) - - self.check_tuples(self.int_samples(), split2) - self.check_tuples(self.int_samples(), split3) - - @tag('important') - def test_heterogeneous_tuples(self): - modulo = 2**63 - - def split(i): - a = i & 0x5555555555555555 - b = (i & 0xaaaaaaaa) ^ ((i >> 32) & 0xaaaaaaaa) - return np.int64(a), np.float64(b * 0.0001) - - self.check_tuples(self.int_samples(), split) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_import.py b/numba/numba/tests/test_import.py deleted file mode 100644 index cbabf9de2..000000000 --- a/numba/numba/tests/test_import.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import print_function, absolute_import - -import subprocess -import sys - -from numba import unittest_support as unittest -from .support import TestCase - - -class TestNumbaImport(TestCase): - """ - Test behaviour of importing Numba. - """ - - def test_laziness(self): - """ - Importing top-level numba features should not import too many modules. - """ - # A heuristic set of modules that shouldn't be imported immediately - blacklist = [ - 'cffi', - 'distutils', - 'numba.cuda', - 'numba.hsa', - 'numba.targets.mathimpl', - 'numba.targets.randomimpl', - 'numba.tests', - 'numba.typing.collections', - 'numba.typing.listdecl', - 'numba.typing.npdatetime', - ] - # Sanity check the modules still exist... - for mod in blacklist: - if mod not in ('cffi', 'numba.hsa'): - __import__(mod) - - code = """if 1: - from numba import jit, types, vectorize - import sys - print(list(sys.modules)) - """ - - popen = subprocess.Popen([sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError("process failed with code %s: stderr follows\n%s\n" - % (popen.returncode, err.decode())) - - modlist = set(eval(out.strip())) - unexpected = set(blacklist) & set(modlist) - self.assertFalse(unexpected, "some modules unexpectedly imported") diff --git a/numba/numba/tests/test_indexing.py b/numba/numba/tests/test_indexing.py deleted file mode 100644 index 3b075bbd6..000000000 --- a/numba/numba/tests/test_indexing.py +++ /dev/null @@ -1,1147 +0,0 @@ -from __future__ import print_function - -import decimal -import itertools - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, utils, njit, errors, typeof, numpy_support -from .support import TestCase, tag - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -Noflags = Flags() -Noflags.set("nrt") - - -def slicing_1d_usecase(a, start, stop, step): - return a[start:stop:step] - -def slicing_1d_usecase2(a, start, stop, step): - b = a[start:stop:step] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase3(a, start, stop): - b = a[start:stop] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase4(a): - b = a[:] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase5(a, start): - b = a[start:] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase6(a, stop): - b = a[:stop] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase7(a, start): - # Omitted stop with negative step (issue #1690) - b = a[start::-2] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_1d_usecase8(a, start): - # Omitted start with negative step - b = a[::-2] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - - -def slicing_2d_usecase(a, start1, stop1, step1, start2, stop2, step2): - # The index is a homogeneous tuple of slices - return a[start1:stop1:step1, start2:stop2:step2] - -def slicing_2d_usecase3(a, start1, stop1, step1, index): - # The index is a heterogeneous tuple - return a[start1:stop1:step1, index] - -def slicing_3d_usecase(a, index0, start1, index2): - b = a[index0, start1:, index2] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def slicing_3d_usecase2(a, index0, stop1, index2): - b = a[index0, :stop1, index2] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def partial_1d_usecase(a, index): - b = a[index] - total = 0 - for i in range(b.shape[0]): - total += b[i] * (i + 1) - return total - -def integer_indexing_1d_usecase(a, i): - return a[i] - -def integer_indexing_2d_usecase(a, i1, i2): - return a[i1,i2] - -def integer_indexing_2d_usecase2(a, i1, i2): - return a[i1][i2] - -def ellipsis_usecase1(a, i, j): - return a[i:j, ...] - -def ellipsis_usecase2(a, i, j): - return a[..., i:j] - -def ellipsis_usecase3(a, i, j): - return a[i, ..., j] - -def none_index_usecase(a): - return a[None] - -def empty_tuple_usecase(a): - return a[()] - - -@njit -def setitem_usecase(a, index, value): - a[index] = value - - -@njit -def setitem_broadcast_usecase(a, value): - a[:] = value - - -def slicing_1d_usecase_set(a, b, start, stop, step): - a[start:stop:step] = b - return a - -def slicing_1d_usecase_add(a, b, start, stop): - # NOTE: uses the ROT_FOUR opcode on Python 2, only on the [start:stop] - # with inplace operator form. - a[start:stop] += b - return a - -def slicing_2d_usecase_set(a, b, start, stop, step, start2, stop2, step2): - a[start:stop:step,start2:stop2:step2] = b - return a - - -class TestGetItem(TestCase): - """ - Test basic indexed load from an array (returning a view or a scalar). - Note fancy indexing is tested in test_fancy_indexing. - """ - - def test_1d_slicing(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - for indices in [(0, 10, 1), - (2, 3, 1), - (10, 0, 1), - (0, 10, -1), - (0, 10, 2), - (9, 0, -1), - (-5, -2, 1), - (0, -1, 1), - ]: - expected = pyfunc(a, *indices) - self.assertPreciseEqual(cfunc(a, *indices), expected) - - def test_1d_slicing_npm(self): - self.test_1d_slicing(flags=Noflags) - - def test_1d_slicing2(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase2 - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - - args = [(0, 10, 1), - (2, 3, 1), - (10, 0, 1), - (0, 10, -1), - (0, 10, 2)] - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - - # Any - arraytype = types.Array(types.int32, 1, 'A') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(20, dtype='i4')[::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - - args = [(0, 10, 1), - (2, 3, 1), - (10, 0, 1), - (0, 10, -1), - (0, 10, 2)] - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - def test_1d_slicing2_npm(self): - self.test_1d_slicing2(flags=Noflags) - - def test_1d_slicing3(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase3 - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - - args = [(3, 10), - (2, 3), - (10, 0), - (0, 10), - (5, 10)] - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - - # Any - arraytype = types.Array(types.int32, 1, 'A') - argtys = (arraytype, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(20, dtype='i4')[::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - def test_1d_slicing3_npm(self): - self.test_1d_slicing3(flags=Noflags) - - def test_1d_slicing4(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase4 - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype,) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - self.assertEqual(pyfunc(a), cfunc(a)) - - # Any - arraytype = types.Array(types.int32, 1, 'A') - argtys = (arraytype,) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(20, dtype='i4')[::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - self.assertEqual(pyfunc(a), cfunc(a)) - - def test_1d_slicing4_npm(self): - self.test_1d_slicing4(flags=Noflags) - - def check_1d_slicing_with_arg(self, pyfunc, flags): - args = list(range(-9, 10)) - - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - for arg in args: - self.assertEqual(pyfunc(a, arg), cfunc(a, arg)) - - # Any - arraytype = types.Array(types.int32, 1, 'A') - argtys = (arraytype, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(20, dtype='i4')[::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - for arg in args: - self.assertEqual(pyfunc(a, arg), cfunc(a, arg)) - - def test_1d_slicing5(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase5 - self.check_1d_slicing_with_arg(pyfunc, flags) - - def test_1d_slicing5_npm(self): - self.test_1d_slicing5(flags=Noflags) - - def test_1d_slicing6(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase6 - self.check_1d_slicing_with_arg(pyfunc, flags) - - def test_1d_slicing6_npm(self): - self.test_1d_slicing6(flags=Noflags) - - def test_1d_slicing7(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase7 - self.check_1d_slicing_with_arg(pyfunc, flags) - - def test_1d_slicing7_npm(self): - self.test_1d_slicing7(flags=Noflags) - - def test_1d_slicing8(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase8 - self.check_1d_slicing_with_arg(pyfunc, flags) - - def test_1d_slicing8_npm(self): - self.test_1d_slicing8(flags=Noflags) - - def test_2d_slicing(self, flags=enable_pyobj_flags): - """ - arr_2d[a:b:c] - """ - pyfunc = slicing_1d_usecase - arraytype = types.Array(types.int32, 2, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(100, dtype='i4').reshape(10, 10) - for args in [(0, 10, 1), (2, 3, 1), (10, 0, 1), - (0, 10, -1), (0, 10, 2)]: - self.assertPreciseEqual(pyfunc(a, *args), cfunc(a, *args), - msg="for args %s" % (args,)) - - def test_2d_slicing_npm(self): - self.test_2d_slicing(flags=Noflags) - - def test_2d_slicing2(self, flags=enable_pyobj_flags): - """ - arr_2d[a:b:c, d:e:f] - """ - # C layout - pyfunc = slicing_2d_usecase - arraytype = types.Array(types.int32, 2, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32, - types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(100, dtype='i4').reshape(10, 10) - - indices = [(0, 10, 1), - (2, 3, 1), - (10, 0, 1), - (0, 10, -1), - (0, 10, 2), - (10, 0, -1), - (9, 0, -2), - (-5, -2, 1), - (0, -1, 1), - ] - args = [tup1 + tup2 - for (tup1, tup2) in itertools.product(indices, indices)] - for arg in args: - expected = pyfunc(a, *arg) - self.assertPreciseEqual(cfunc(a, *arg), expected) - - # Any layout - arraytype = types.Array(types.int32, 2, 'A') - argtys = (arraytype, types.int32, types.int32, types.int32, - types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(400, dtype='i4').reshape(20, 20)[::2, ::2] - - for arg in args: - expected = pyfunc(a, *arg) - self.assertPreciseEqual(cfunc(a, *arg), expected) - - def test_2d_slicing2_npm(self): - self.test_2d_slicing2(flags=Noflags) - - def test_2d_slicing3(self, flags=enable_pyobj_flags): - """ - arr_2d[a:b:c, d] - """ - # C layout - pyfunc = slicing_2d_usecase3 - arraytype = types.Array(types.int32, 2, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32, - types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(100, dtype='i4').reshape(10, 10) - - args = [ - (0, 10, 1, 0), - (2, 3, 1, 1), - (10, 0, -1, 8), - (9, 0, -2, 4), - (0, 10, 2, 3), - (0, -1, 3, 1), - ] - for arg in args: - expected = pyfunc(a, *arg) - self.assertPreciseEqual(cfunc(a, *arg), expected) - - # Any layout - arraytype = types.Array(types.int32, 2, 'A') - argtys = (arraytype, types.int32, types.int32, types.int32, - types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(400, dtype='i4').reshape(20, 20)[::2, ::2] - - for arg in args: - expected = pyfunc(a, *arg) - self.assertPreciseEqual(cfunc(a, *arg), expected) - - def test_2d_slicing3_npm(self): - self.test_2d_slicing3(flags=Noflags) - - def test_3d_slicing(self, flags=enable_pyobj_flags): - # C layout - pyfunc = slicing_3d_usecase - arraytype = types.Array(types.int32, 3, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(1000, dtype='i4').reshape(10, 10, 10) - - args = [ - (0, 9, 1), - (2, 3, 1), - (9, 0, 1), - (0, 9, -1), - (0, 9, 2), - ] - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - # Any layout - arraytype = types.Array(types.int32, 3, 'A') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(2000, dtype='i4')[::2].reshape(10, 10, 10) - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - @tag('important') - def test_3d_slicing_npm(self): - self.test_3d_slicing(flags=Noflags) - - def test_3d_slicing2(self, flags=enable_pyobj_flags): - # C layout - pyfunc = slicing_3d_usecase2 - arraytype = types.Array(types.int32, 3, 'C') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(1000, dtype='i4').reshape(10, 10, 10) - - args = [ - (0, 9, 1), - (2, 3, 1), - (9, 0, 1), - (0, 9, -1), - (0, 9, 2), - ] - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - # Any layout - arraytype = types.Array(types.int32, 3, 'A') - argtys = (arraytype, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - a = np.arange(2000, dtype='i4')[::2].reshape(10, 10, 10) - - for arg in args: - self.assertEqual(pyfunc(a, *arg), cfunc(a, *arg)) - - def test_3d_slicing2_npm(self): - self.test_3d_slicing2(flags=Noflags) - - def test_1d_integer_indexing(self, flags=enable_pyobj_flags): - # C layout - pyfunc = integer_indexing_1d_usecase - arraytype = types.Array(types.int32, 1, 'C') - cr = compile_isolated(pyfunc, (arraytype, types.int32), flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4') - self.assertEqual(pyfunc(a, 0), cfunc(a, 0)) - self.assertEqual(pyfunc(a, 9), cfunc(a, 9)) - self.assertEqual(pyfunc(a, -1), cfunc(a, -1)) - - # Any layout - arraytype = types.Array(types.int32, 1, 'A') - cr = compile_isolated(pyfunc, (arraytype, types.int32), flags=flags) - cfunc = cr.entry_point - - a = np.arange(10, dtype='i4')[::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - self.assertEqual(pyfunc(a, 0), cfunc(a, 0)) - self.assertEqual(pyfunc(a, 2), cfunc(a, 2)) - self.assertEqual(pyfunc(a, -1), cfunc(a, -1)) - - # Using a 0-d array as integer index - arraytype = types.Array(types.int32, 1, 'C') - indextype = types.Array(types.int16, 0, 'C') - cr = compile_isolated(pyfunc, (arraytype, indextype), flags=flags) - cfunc = cr.entry_point - - a = np.arange(3, 13, dtype=np.int32) - for i in (0, 9, -2): - idx = np.array(i).astype(np.int16) - assert idx.ndim == 0 - self.assertEqual(pyfunc(a, idx), cfunc(a, idx)) - - def test_1d_integer_indexing_npm(self): - self.test_1d_integer_indexing(flags=Noflags) - - def test_integer_indexing_1d_for_2d(self, flags=enable_pyobj_flags): - # Test partial (1d) indexing of a 2d array - pyfunc = integer_indexing_1d_usecase - arraytype = types.Array(types.int32, 2, 'C') - cr = compile_isolated(pyfunc, (arraytype, types.int32), flags=flags) - cfunc = cr.entry_point - - a = np.arange(100, dtype='i4').reshape(10, 10) - self.assertPreciseEqual(pyfunc(a, 0), cfunc(a, 0)) - self.assertPreciseEqual(pyfunc(a, 9), cfunc(a, 9)) - self.assertPreciseEqual(pyfunc(a, -1), cfunc(a, -1)) - - arraytype = types.Array(types.int32, 2, 'A') - cr = compile_isolated(pyfunc, (arraytype, types.int32), flags=flags) - cfunc = cr.entry_point - - a = np.arange(20, dtype='i4').reshape(5, 4)[::2] - self.assertPreciseEqual(pyfunc(a, 0), cfunc(a, 0)) - - @tag('important') - def test_integer_indexing_1d_for_2d_npm(self): - self.test_integer_indexing_1d_for_2d(flags=Noflags) - - def test_2d_integer_indexing(self, flags=enable_pyobj_flags, - pyfunc=integer_indexing_2d_usecase): - # C layout - a = np.arange(100, dtype='i4').reshape(10, 10) - arraytype = types.Array(types.int32, 2, 'C') - cr = compile_isolated(pyfunc, (arraytype, types.int32, types.int32), - flags=flags) - cfunc = cr.entry_point - - self.assertEqual(pyfunc(a, 0, 3), cfunc(a, 0, 3)) - self.assertEqual(pyfunc(a, 9, 9), cfunc(a, 9, 9)) - self.assertEqual(pyfunc(a, -2, -1), cfunc(a, -2, -1)) - - # Any layout - a = np.arange(100, dtype='i4').reshape(10, 10)[::2, ::2] - self.assertFalse(a.flags['C_CONTIGUOUS']) - self.assertFalse(a.flags['F_CONTIGUOUS']) - - arraytype = types.Array(types.int32, 2, 'A') - cr = compile_isolated(pyfunc, (arraytype, types.int32, types.int32), - flags=flags) - cfunc = cr.entry_point - - self.assertEqual(pyfunc(a, 0, 1), cfunc(a, 0, 1)) - self.assertEqual(pyfunc(a, 2, 2), cfunc(a, 2, 2)) - self.assertEqual(pyfunc(a, -2, -1), cfunc(a, -2, -1)) - - # With 0-d arrays as integer indices - a = np.arange(100, dtype='i4').reshape(10, 10) - arraytype = types.Array(types.int32, 2, 'C') - indextype = types.Array(types.int32, 0, 'C') - cr = compile_isolated(pyfunc, (arraytype, indextype, indextype), - flags=flags) - cfunc = cr.entry_point - - for i, j in [(0, 3), (8, 9), (-2, -1)]: - i = np.array(i).astype(np.int32) - j = np.array(j).astype(np.int32) - self.assertEqual(pyfunc(a, i, j), cfunc(a, i, j)) - - @tag('important') - def test_2d_integer_indexing_npm(self): - self.test_2d_integer_indexing(flags=Noflags) - - def test_2d_integer_indexing2(self): - self.test_2d_integer_indexing(pyfunc=integer_indexing_2d_usecase2) - self.test_2d_integer_indexing(flags=Noflags, - pyfunc=integer_indexing_2d_usecase2) - - def test_2d_integer_indexing_via_call(self): - @njit - def index1(X, i0): - return X[i0] - @njit - def index2(X, i0, i1): - return index1(X[i0], i1) - a = np.arange(10).reshape(2, 5) - self.assertEqual(index2(a, 0, 0), a[0][0]) - self.assertEqual(index2(a, 1, 1), a[1][1]) - self.assertEqual(index2(a, -1, -1), a[-1][-1]) - - def test_2d_float_indexing(self, flags=enable_pyobj_flags): - a = np.arange(100, dtype='i4').reshape(10, 10) - pyfunc = integer_indexing_2d_usecase - arraytype = types.Array(types.int32, 2, 'C') - cr = compile_isolated(pyfunc, (arraytype, types.float32, types.int32), - flags=flags) - cfunc = cr.entry_point - - self.assertEqual(pyfunc(a, 0, 0), cfunc(a, 0, 0)) - self.assertEqual(pyfunc(a, 9, 9), cfunc(a, 9, 9)) - self.assertEqual(pyfunc(a, -1, -1), cfunc(a, -1, -1)) - - def test_partial_1d_indexing(self, flags=enable_pyobj_flags): - pyfunc = partial_1d_usecase - - def check(arr, arraytype): - cr = compile_isolated(pyfunc, (arraytype, types.int32), - flags=flags) - cfunc = cr.entry_point - self.assertEqual(pyfunc(arr, 0), cfunc(arr, 0)) - n = arr.shape[0] - 1 - self.assertEqual(pyfunc(arr, n), cfunc(arr, n)) - self.assertEqual(pyfunc(arr, -1), cfunc(arr, -1)) - - a = np.arange(12, dtype='i4').reshape((4, 3)) - arraytype = types.Array(types.int32, 2, 'C') - check(a, arraytype) - - a = np.arange(12, dtype='i4').reshape((3, 4)).T - arraytype = types.Array(types.int32, 2, 'F') - check(a, arraytype) - - a = np.arange(12, dtype='i4').reshape((3, 4))[::2] - arraytype = types.Array(types.int32, 2, 'A') - check(a, arraytype) - - def check_ellipsis(self, pyfunc, flags): - def compile_func(arr): - cr = compile_isolated(pyfunc, (typeof(arr), types.intp, types.intp), - flags=flags) - return cr.entry_point - - def run(a): - bounds = (0, 1, 2, -1, -2) - cfunc = compile_func(a) - for i, j in itertools.product(bounds, bounds): - x = cfunc(a, i, j) - self.assertPreciseEqual(pyfunc(a, i, j), cfunc(a, i, j)) - - run(np.arange(16, dtype='i4').reshape(4, 4)) - run(np.arange(27, dtype='i4').reshape(3, 3, 3)) - - def test_ellipsis1(self, flags=enable_pyobj_flags): - self.check_ellipsis(ellipsis_usecase1, flags) - - def test_ellipsis1_npm(self): - self.test_ellipsis1(flags=Noflags) - - def test_ellipsis2(self, flags=enable_pyobj_flags): - self.check_ellipsis(ellipsis_usecase2, flags) - - def test_ellipsis2_npm(self): - self.test_ellipsis2(flags=Noflags) - - def test_ellipsis3(self, flags=enable_pyobj_flags): - self.check_ellipsis(ellipsis_usecase3, flags) - - def test_ellipsis3_npm(self): - self.test_ellipsis3(flags=Noflags) - - def test_ellipsis_issue1498(self): - # This is an issue due to incorrect layout inferred for when - # ellpsis is used and ndenumerate is specializing on the layout. - @njit - def udt(arr): - out = np.zeros_like(arr) - i = 0 - for index, val in np.ndenumerate(arr[..., i]): - out[index][i] = val - - return out - - py_func = udt.py_func - - outersize = 4 - innersize = 4 - arr = np.arange(outersize * innersize).reshape(outersize, innersize) - got = udt(arr) - expected = py_func(arr) - np.testing.assert_equal(got, expected) - - def test_ellipsis_issue1499(self): - # This tests an issue when ndarray.__getitem__ recv a tuple of - # constants. The lowering is mishandling the constant value creation. - @njit - def udt(arr): - return arr[..., 0] - - arr = np.arange(3) - got = udt(arr) - expected = udt.py_func(arr) - np.testing.assert_equal(got, expected) - - def test_none_index(self, flags=enable_pyobj_flags): - pyfunc = none_index_usecase - arraytype = types.Array(types.int32, 2, 'C') - # TODO should be enable to handle this in NoPython mode - cr = compile_isolated(pyfunc, (arraytype,), flags=flags) - cfunc = cr.entry_point - - a = np.arange(100, dtype='i4').reshape(10, 10) - self.assertPreciseEqual(pyfunc(a), cfunc(a)) - - def test_none_index_npm(self): - with self.assertTypingError(): - self.test_none_index(flags=Noflags) - - def test_empty_tuple_indexing(self, flags=enable_pyobj_flags): - pyfunc = empty_tuple_usecase - arraytype = types.Array(types.int32, 0, 'C') - cr = compile_isolated(pyfunc, (arraytype,), flags=flags) - cfunc = cr.entry_point - - a = np.arange(1, dtype='i4').reshape(()) - self.assertPreciseEqual(pyfunc(a), cfunc(a)) - - def test_empty_tuple_indexing_npm(self): - self.test_empty_tuple_indexing(flags=Noflags) - - -class TestSetItem(TestCase): - """ - Test basic indexed store into an array. - Note fancy indexing is tested in test_fancy_indexing. - """ - - def test_conversion_setitem(self, flags=enable_pyobj_flags): - """ this used to work, and was used in one of the tutorials """ - from numba import jit - - def pyfunc(array): - for index in range(len(array)): - array[index] = index % decimal.Decimal(100) - - cfunc = jit("void(i8[:])")(pyfunc) - - udt = np.arange(100, dtype='i1') - control = udt.copy() - pyfunc(control) - cfunc(udt) - self.assertPreciseEqual(udt, control) - - def test_1d_slicing_set(self, flags=enable_pyobj_flags): - """ - 1d to 1d slice assignment - """ - pyfunc = slicing_1d_usecase_set - # Note heterogeneous types for the source and destination arrays - # (int16[:] -> int32[:]) - dest_type = types.Array(types.int32, 1, 'C') - src_type = types.Array(types.int16, 1, 'A') - argtys = (dest_type, src_type, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - N = 10 - arg = np.arange(N, dtype='i2') + 40 - bounds = [0, 2, N - 2, N, N + 1, N + 3, - -2, -N + 2, -N, -N - 1, -N - 3] - def make_dest(): - return np.zeros_like(arg, dtype='i4') - for start, stop in itertools.product(bounds, bounds): - for step in (1, 2, -1, -2): - args = start, stop, step - index = slice(*args) - pyleft = pyfunc(make_dest(), arg[index], *args) - cleft = cfunc(make_dest(), arg[index], *args) - self.assertPreciseEqual(pyleft, cleft) - - # Mismatching input size and slice length - with self.assertRaises(ValueError): - cfunc(np.zeros_like(arg), arg, 0, 0, 1) - - def check_1d_slicing_set_sequence(self, flags, seqty, seq): - """ - Generic sequence to 1d slice assignment - """ - pyfunc = slicing_1d_usecase_set - dest_type = types.Array(types.int32, 1, 'C') - argtys = (dest_type, seqty, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - N = 10 - k = len(seq) - arg = np.arange(N, dtype=np.int32) - args = (seq, 1, -N + k + 1, 1) - expected = pyfunc(arg.copy(), *args) - got = cfunc(arg.copy(), *args) - self.assertPreciseEqual(expected, got) - - if numpy_support.version != (1, 7): - # Numpy 1.7 doesn't always raise an error here (object mode) - args = (seq, 1, -N + k, 1) - with self.assertRaises(ValueError) as raises: - cfunc(arg.copy(), *args) - - def test_1d_slicing_set_tuple(self, flags=enable_pyobj_flags): - """ - Tuple to 1d slice assignment - """ - self.check_1d_slicing_set_sequence( - flags, types.UniTuple(types.int16, 2), (8, -42)) - - def test_1d_slicing_set_list(self, flags=enable_pyobj_flags): - """ - List to 1d slice assignment - """ - self.check_1d_slicing_set_sequence( - flags, types.List(types.int16), [8, -42]) - - def test_1d_slicing_broadcast(self, flags=enable_pyobj_flags): - """ - scalar to 1d slice assignment - """ - pyfunc = slicing_1d_usecase_set - arraytype = types.Array(types.int32, 1, 'C') - # Note heterogeneous types for the source scalar and the destination - # array (int16 -> int32[:]) - argtys = (arraytype, types.int16, types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - N = 10 - arg = np.arange(N, dtype='i4') - val = 42 - bounds = [0, 2, N - 2, N, N + 1, N + 3, - -2, -N + 2, -N, -N - 1, -N - 3] - for start, stop in itertools.product(bounds, bounds): - for step in (1, 2, -1, -2): - args = val, start, stop, step - pyleft = pyfunc(arg.copy(), *args) - cleft = cfunc(arg.copy(), *args) - self.assertPreciseEqual(pyleft, cleft) - - def test_1d_slicing_add(self, flags=enable_pyobj_flags): - pyfunc = slicing_1d_usecase_add - arraytype = types.Array(types.int32, 1, 'C') - argtys = (arraytype, arraytype, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - arg = np.arange(10, dtype='i4') - for test in ((0, 10), (2, 5)): - pyleft = pyfunc(np.zeros_like(arg), arg[slice(*test)], *test) - cleft = cfunc(np.zeros_like(arg), arg[slice(*test)], *test) - self.assertPreciseEqual(pyleft, cleft) - - def test_1d_slicing_set_npm(self): - self.test_1d_slicing_set(flags=Noflags) - - def test_1d_slicing_set_list_npm(self): - self.test_1d_slicing_set_list(flags=Noflags) - - def test_1d_slicing_set_tuple_npm(self): - self.test_1d_slicing_set_tuple(flags=Noflags) - - def test_1d_slicing_broadcast_npm(self): - self.test_1d_slicing_broadcast(flags=Noflags) - - def test_1d_slicing_add_npm(self): - self.test_1d_slicing_add(flags=Noflags) - - @tag('important') - def test_2d_slicing_set(self, flags=enable_pyobj_flags): - """ - 2d to 2d slice assignment - """ - pyfunc = slicing_2d_usecase_set - arraytype = types.Array(types.int32, 2, 'A') - argtys = (arraytype, arraytype, types.int32, types.int32, types.int32, - types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - arg = np.arange(10*10, dtype='i4').reshape(10,10) - tests = [ - (0, 10, 1, 0, 10, 1), - (2, 3, 1, 2, 3, 1), - (10, 0, 1, 10, 0, 1), - (0, 10, -1, 0, 10, -1), - (0, 10, 2, 0, 10, 2), - ] - for test in tests: - pyleft = pyfunc(np.zeros_like(arg), arg[slice(*test[0:3]), slice(*test[3:6])], *test) - cleft = cfunc(np.zeros_like(arg), arg[slice(*test[0:3]), slice(*test[3:6])], *test) - self.assertPreciseEqual(cleft, pyleft) - - def test_2d_slicing_broadcast(self, flags=enable_pyobj_flags): - """ - scalar to 2d slice assignment - """ - pyfunc = slicing_2d_usecase_set - arraytype = types.Array(types.int32, 2, 'C') - # Note heterogeneous types for the source scalar and the destination - # array (int16 -> int32[:]) - argtys = (arraytype, types.int16, types.int32, types.int32, types.int32, - types.int32, types.int32, types.int32) - cr = compile_isolated(pyfunc, argtys, flags=flags) - cfunc = cr.entry_point - - arg = np.arange(10*10, dtype='i4').reshape(10,10) - val = 42 - tests = [ - (0, 10, 1, 0, 10, 1), - (2, 3, 1, 2, 3, 1), - (10, 0, 1, 10, 0, 1), - (0, 10, -1, 0, 10, -1), - (0, 10, 2, 0, 10, 2), - ] - for test in tests: - pyleft = pyfunc(arg.copy(), val, *test) - cleft = cfunc(arg.copy(), val, *test) - self.assertPreciseEqual(cleft, pyleft) - - def test_2d_slicing_set_npm(self): - self.test_2d_slicing_set(flags=Noflags) - - def test_2d_slicing_broadcast_npm(self): - self.test_2d_slicing_broadcast(flags=Noflags) - - def test_setitem(self): - """ - scalar indexed assignment - """ - arr = np.arange(5) - setitem_usecase(arr, 1, 42) - self.assertEqual(arr.tolist(), [0, 42, 2, 3, 4]) - # Using a 0-d array as scalar index - setitem_usecase(arr, np.array(3).astype(np.uint16), 8) - self.assertEqual(arr.tolist(), [0, 42, 2, 8, 4]) - # Scalar Broadcasting - arr = np.arange(9).reshape(3, 3) - setitem_usecase(arr, 1, 42) - self.assertEqual(arr.tolist(), [[0, 1, 2], [42, 42, 42], [6, 7, 8]]) - - def test_setitem_broadcast(self): - """ - broadcasted array assignment - """ - # Scalar Broadcasting - dst = np.arange(5) - setitem_broadcast_usecase(dst, 42) - self.assertEqual(dst.tolist(), [42] * 5) - # 1D -> 2D Array Broadcasting - dst = np.arange(6).reshape(2, 3) - setitem_broadcast_usecase(dst, np.arange(1, 4)) - self.assertEqual(dst.tolist(), [[1, 2, 3], [1, 2, 3]]) - # 2D -> 2D Array Broadcasting - dst = np.arange(6).reshape(2, 3) - setitem_broadcast_usecase(dst, np.arange(1, 4).reshape(1, 3)) - self.assertEqual(dst.tolist(), [[1, 2, 3], [1, 2, 3]]) - # 2D -> 4D Array Broadcasting - dst = np.arange(12).reshape(2, 1, 2, 3) - setitem_broadcast_usecase(dst, np.arange(1, 4).reshape(1, 3)) - inner2 = [[1, 2, 3], [1, 2, 3]] - self.assertEqual(dst.tolist(), [[inner2]] * 2) - # 2D -> 1D Array Broadcasting - dst = np.arange(5) - setitem_broadcast_usecase(dst, np.arange(1, 6).reshape(1, 5)) - self.assertEqual(dst.tolist(), [1, 2, 3, 4, 5]) - # 4D -> 2D Array Broadcasting - dst = np.arange(6).reshape(2, 3) - setitem_broadcast_usecase(dst, np.arange(1, 1 + dst.size).reshape(1, 1, 2, 3)) - self.assertEqual(dst.tolist(), [[1, 2, 3], [4, 5, 6]]) - - def test_setitem_broadcast_error(self): - # higher dim assigned into lower dim - # 2D -> 1D - dst = np.arange(5) - src = np.arange(10).reshape(2, 5) - with self.assertRaises(ValueError) as raises: - setitem_broadcast_usecase(dst, src) - errmsg = str(raises.exception) - self.assertEqual('cannot broadcast source array for assignment', - errmsg) - # 3D -> 2D - dst = np.arange(5).reshape(1, 5) - src = np.arange(10).reshape(1, 2, 5) - with self.assertRaises(ValueError) as raises: - setitem_broadcast_usecase(dst, src) - errmsg = str(raises.exception) - self.assertEqual('cannot assign slice from input of different size', - errmsg) - # lower to higher - # 1D -> 2D - dst = np.arange(10).reshape(2, 5) - src = np.arange(4) - with self.assertRaises(ValueError) as raises: - setitem_broadcast_usecase(dst, src) - errmsg = str(raises.exception) - self.assertEqual('cannot assign slice from input of different size', - errmsg) - - def test_slicing_1d_broadcast(self): - # 1D -> 2D sliced (1) - dst = np.arange(6).reshape(3, 2) - src = np.arange(1, 3) - slicing_1d_usecase_set(dst, src, 0, 2, 1) - self.assertEqual(dst.tolist(), [[1, 2], [1, 2], [4, 5]]) - # 1D -> 2D sliced (2) - dst = np.arange(6).reshape(3, 2) - src = np.arange(1, 3) - slicing_1d_usecase_set(dst, src, 0, None, 2) - self.assertEqual(dst.tolist(), [[1, 2], [2, 3], [1, 2]]) - # 2D -> 2D sliced (3) - dst = np.arange(6).reshape(3, 2) - src = np.arange(1, 5).reshape(2, 2) - slicing_1d_usecase_set(dst, src, None, 2, 1) - self.assertEqual(dst.tolist(), [[1, 2], [3, 4], [4, 5]]) - - def test_setitem_readonly(self): - arr = np.arange(5) - arr.flags.writeable = False - with self.assertRaises((TypeError, errors.TypingError)) as raises: - setitem_usecase(arr, 1, 42) - self.assertIn("Cannot modify value of type readonly array", - str(raises.exception)) - - -class TestTyping(TestCase): - """ - Check typing of basic indexing operations - """ - - def test_layout(self): - """ - Check an appropriate layout is inferred for the result of array - indexing. - """ - from numba.typing import arraydecl - from numba.types import intp, ellipsis, slice2_type, slice3_type - - func = arraydecl.get_array_index_type - - cty = types.Array(types.float64, 3, 'C') - fty = types.Array(types.float64, 3, 'F') - aty = types.Array(types.float64, 3, 'A') - - indices = [ - # Tuples of (indexing arguments, keeps "C" layout, keeps "F" layout) - ((), True, True), - ((ellipsis,), True, True), - - # Indexing from the left => can sometimes keep "C" layout - ((intp,), True, False), - ((slice2_type,), True, False), - ((intp, slice2_type), True, False), - ((slice2_type, intp), False, False), - ((slice2_type, slice2_type), False, False), - # Strided slices = > "A" layout - ((intp, slice3_type), False, False), - ((slice3_type,), False, False), - - # Indexing from the right => can sometimes keep "F" layout - ((ellipsis, intp,), False, True), - ((ellipsis, slice2_type,), False, True), - ((ellipsis, intp, slice2_type,), False, False), - ((ellipsis, slice2_type, intp,), False, True), - ((ellipsis, slice2_type, slice2_type,), False, False), - # Strided slices = > "A" layout - ((ellipsis, slice3_type,), False, False), - ((ellipsis, slice3_type, intp,), False, False), - - # Indexing from both sides => only if all dimensions are indexed - ((intp, ellipsis, intp,), False, False), - ((slice2_type, ellipsis, slice2_type,), False, False), - ((intp, intp, slice2_type,), True, False), - ((intp, ellipsis, intp, slice2_type,), True, False), - ((slice2_type, intp, intp,), False, True), - ((slice2_type, intp, ellipsis, intp,), False, True), - ((intp, slice2_type, intp,), False, False), - # Strided slices = > "A" layout - ((slice3_type, intp, intp,), False, False), - ((intp, intp, slice3_type,), False, False), - ] - - for index_tuple, keep_c, _ in indices: - index = types.Tuple(index_tuple) - r = func(cty, index) - self.assertEqual(tuple(r.index), index_tuple) - self.assertEqual(r.result.layout, 'C' if keep_c else 'A', - index_tuple) - self.assertFalse(r.advanced) - - for index_tuple, _, keep_f in indices: - index = types.Tuple(index_tuple) - r = func(fty, index) - self.assertEqual(tuple(r.index), index_tuple) - self.assertEqual(r.result.layout, 'F' if keep_f else 'A', - index_tuple) - self.assertFalse(r.advanced) - - for index_tuple, _, _ in indices: - index = types.Tuple(index_tuple) - r = func(aty, index) - self.assertEqual(tuple(r.index), index_tuple) - self.assertEqual(r.result.layout, 'A') - self.assertFalse(r.advanced) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_inlining.py b/numba/numba/tests/test_inlining.py deleted file mode 100644 index 753af38af..000000000 --- a/numba/numba/tests/test_inlining.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import print_function, absolute_import - -import re - -from .support import TestCase, override_config, captured_stdout -from numba import unittest_support as unittest -from numba import jit, types - - -@jit((types.int32,), nopython=True) -def inner(a): - return a + 1 - -@jit((types.int32,), nopython=True) -def more(a): - return inner(inner(a)) - -def outer_simple(a): - return inner(a) * 2 - -def outer_multiple(a): - return inner(a) * more(a) - - -class TestInlining(TestCase): - """ - Check that jitted inner functions are inlined into outer functions, - in nopython mode. - Note that not all inner functions are guaranteed to be inlined. - We just trust LLVM's inlining heuristics. - """ - - def make_pattern(self, fullname): - """ - Make regexpr to match mangled name - """ - parts = fullname.split('.') - return r'_ZN?' + r''.join([r'\d+{}'.format(p) for p in parts]) - - def assert_has_pattern(self, fullname, text): - pat = self.make_pattern(fullname) - self.assertIsNotNone(re.search(pat, text), - msg='expected {}'.format(pat)) - - def assert_not_has_pattern(self, fullname, text): - pat = self.make_pattern(fullname) - self.assertIsNone(re.search(pat, text), - msg='unexpected {}'.format(pat)) - - def test_inner_function(self): - with override_config('DUMP_ASSEMBLY', True): - with captured_stdout() as out: - cfunc = jit((types.int32,), nopython=True)(outer_simple) - self.assertPreciseEqual(cfunc(1), 4) - # Check the inner function was elided from the output (which also - # guarantees it was inlined into the outer function). - asm = out.getvalue() - prefix = __name__ - self.assert_has_pattern('%s.outer_simple' % prefix, asm) - self.assert_not_has_pattern('%s.inner' % prefix, asm) - - def test_multiple_inner_functions(self): - # Same with multiple inner functions, and multiple calls to - # the same inner function (inner()). This checks that linking in - # the same library/module twice doesn't produce linker errors. - with override_config('DUMP_ASSEMBLY', True): - with captured_stdout() as out: - cfunc = jit((types.int32,), nopython=True)(outer_multiple) - self.assertPreciseEqual(cfunc(1), 6) - asm = out.getvalue() - prefix = __name__ - self.assert_has_pattern('%s.outer_multiple' % prefix, asm) - self.assert_not_has_pattern('%s.more' % prefix, asm) - self.assert_not_has_pattern('%s.inner' % prefix, asm) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_interproc.py b/numba/numba/tests/test_interproc.py deleted file mode 100644 index f84db11a3..000000000 --- a/numba/numba/tests/test_interproc.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import print_function - -import gc - -from numba import jit, int32 -from numba import unittest_support as unittest - - -def foo(a, b): - return a + b - - -def bar(a, b): - return cfoo(a, b) + b - -@jit -def inner(x, y): - return x + y - -@jit(nopython=True) -def outer(x, y): - return inner(x, y) - - -class TestInterProc(unittest.TestCase): - - def test_bar_call_foo(self): - global cfoo - cfoo = jit((int32, int32), nopython=True)(foo) - cbar = jit((int32, int32), nopython=True)(bar) - self.assertEqual(cbar(1, 2), 1 + 2 + 2) - - def test_bar_call_foo_compiled_twice(self): - # When a function is compiled twice, then called from another - # compiled function, check that the right target is called. - # (otherwise, LLVM would assert out or crash) - global cfoo - for i in range(2): - cfoo = jit((int32, int32), nopython=True)(foo) - gc.collect() - cbar = jit((int32, int32), nopython=True)(bar) - self.assertEqual(cbar(1, 2), 1 + 2 + 2) - - def test_callsite_compilation(self): - self.assertEqual(outer(1, 2), 1 + 2) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_intwidth.py b/numba/numba/tests/test_intwidth.py deleted file mode 100644 index d8187e8d3..000000000 --- a/numba/numba/tests/test_intwidth.py +++ /dev/null @@ -1,93 +0,0 @@ -import numba.unittest_support as unittest - -import math -import sys - -from numba import jit, utils -from .support import TestCase, tag - - -max_uint64 = 18446744073709551615 - -def usecase_uint64_global(): - return max_uint64 - -def usecase_uint64_constant(): - return 18446744073709551615 - -def usecase_uint64_func(): - return max(18446744073709551614, 18446744073709551615) - -def usecase_int64_pos(): - return 9223372036854775807 - -def usecase_int64_neg(): - return -9223372036854775808 - -def usecase_int64_func(): - return (max(9223372036854775807, -9223372036854775808) - + min(9223372036854775807, -9223372036854775808)) - - -class IntWidthTest(TestCase): - - def check_nullary_func(self, pyfunc, **kwargs): - cfunc = jit(**kwargs)(pyfunc) - self.assertPreciseEqual(cfunc(), pyfunc()) - - def test_global_uint64(self, nopython=False): - pyfunc = usecase_uint64_global - self.check_nullary_func(pyfunc, nopython=nopython) - - def test_global_uint64_npm(self): - self.test_global_uint64(nopython=True) - - def test_constant_uint64(self, nopython=False): - pyfunc = usecase_uint64_constant - self.check_nullary_func(pyfunc, nopython=nopython) - - @tag('important') - def test_constant_uint64_npm(self): - self.test_constant_uint64(nopython=True) - - def test_constant_uint64_function_call(self, nopython=False): - pyfunc = usecase_uint64_func - self.check_nullary_func(pyfunc, nopython=nopython) - - def test_constant_uint64_function_call_npm(self): - self.test_constant_uint64_function_call(nopython=True) - - def test_bit_length(self): - f = utils.bit_length - self.assertEqual(f(0x7f), 7) - self.assertEqual(f(-0x7f), 7) - self.assertEqual(f(0x80), 8) - self.assertEqual(f(-0x80), 7) - self.assertEqual(f(0xff), 8) - self.assertEqual(f(-0xff), 8) - self.assertEqual(f(0x100), 9) - self.assertEqual(f(-0x100), 8) - self.assertEqual(f(-0x101), 9) - self.assertEqual(f(0x7fffffff), 31) - self.assertEqual(f(-0x7fffffff), 31) - self.assertEqual(f(-0x80000000), 31) - self.assertEqual(f(0x80000000), 32) - self.assertEqual(f(0xffffffff), 32) - self.assertEqual(f(0xffffffffffffffff), 64) - self.assertEqual(f(0x10000000000000000), 65) - if utils.PYVERSION < (3, 0): - self.assertEqual(f(long(0xffffffffffffffff)), 64) - - @tag('important') - def test_constant_int64(self, nopython=False): - self.check_nullary_func(usecase_int64_pos, nopython=nopython) - self.check_nullary_func(usecase_int64_neg, nopython=nopython) - self.check_nullary_func(usecase_int64_func, nopython=nopython) - - def test_constant_int64_npm(self): - self.test_constant_int64(nopython=True) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_ir.py b/numba/numba/tests/test_ir.py deleted file mode 100644 index 09e6936e9..000000000 --- a/numba/numba/tests/test_ir.py +++ /dev/null @@ -1,183 +0,0 @@ -from __future__ import print_function - -import textwrap - -import numba.unittest_support as unittest -from numba import compiler, ir -from numba.utils import PYVERSION, StringIO - - -def var_swapping(a, b, c, d, e): - """ - label 0: - a = arg(0, name=a) ['a'] - b = arg(1, name=b) ['b'] - c = arg(2, name=c) ['c'] - d = arg(3, name=d) ['d'] - e = arg(4, name=e) ['e'] - a.1 = b ['a.1', 'b'] - del b [] - b.1 = a ['a', 'b.1'] - del a [] - c.1 = e ['c.1', 'e'] - del e [] - d.1 = c ['c', 'd.1'] - del c [] - e.1 = d ['d', 'e.1'] - del d [] - $0.8 = a.1 + b.1 ['$0.8', 'a.1', 'b.1'] - del b.1 [] - del a.1 [] - $0.10 = $0.8 + c.1 ['$0.10', '$0.8', 'c.1'] - del c.1 [] - del $0.8 [] - $0.12 = $0.10 + d.1 ['$0.10', '$0.12', 'd.1'] - del d.1 [] - del $0.10 [] - $0.14 = $0.12 + e.1 ['$0.12', '$0.14', 'e.1'] - del e.1 [] - del $0.12 [] - $0.15 = cast(value=$0.14) ['$0.14', '$0.15'] - del $0.14 [] - return $0.15 ['$0.15'] - """ - a, b = b, a - c, d, e = e, c, d - return a + b + c + d + e - -def var_propagate1_pre36(a, b): - """ - label 0: - a = arg(0, name=a) ['a'] - b = arg(1, name=b) ['b'] - $0.3 = a > b ['$0.3', 'a', 'b'] - branch $0.3, 12, 18 ['$0.3'] - label 12: - del b [] - del $0.3 [] - $phi21.2 = a ['$phi21.2', 'a'] - del a [] - jump 21 [] - label 18: - del a [] - del $0.3 [] - $phi21.2 = b ['$phi21.2', 'b'] - del b [] - jump 21 [] - label 21: - $const21.1 = const(int, 5) ['$const21.1'] - $21.3 = $phi21.2 + $const21.1 ['$21.3', '$const21.1', '$phi21.2'] - del $phi21.2 [] - del $const21.1 [] - c = $21.3 ['$21.3', 'c'] - del $21.3 [] - $21.5 = cast(value=c) ['$21.5', 'c'] - del c [] - return $21.5 ['$21.5'] - """ - c = (a if a > b else b) + 5 - return c - -def var_propagate1_post36(a, b): - """ - label 0: - a = arg(0, name=a) ['a'] - b = arg(1, name=b) ['b'] - $0.3 = a > b ['$0.3', 'a', 'b'] - branch $0.3, 8, 12 ['$0.3'] - label 8: - del b [] - del $0.3 [] - $phi14.2 = a ['$phi14.2', 'a'] - del a [] - jump 14 [] - label 12: - del a [] - del $0.3 [] - $phi14.2 = b ['$phi14.2', 'b'] - del b [] - jump 14 [] - label 14: - $const14.1 = const(int, 5) ['$const14.1'] - $14.3 = $phi14.2 + $const14.1 ['$14.3', '$const14.1', '$phi14.2'] - del $phi14.2 [] - del $const14.1 [] - c = $14.3 ['$14.3', 'c'] - del $14.3 [] - $14.5 = cast(value=c) ['$14.5', 'c'] - del c [] - return $14.5 ['$14.5'] - """ - c = (a if a > b else b) + 5 - return c - - -var_propagate1 = (var_propagate1_post36 - if PYVERSION >= (3, 6) - else var_propagate1_pre36) - - -class TestIR(unittest.TestCase): - - def test_IRScope(self): - filename = "" - top = ir.Scope(parent=None, loc=ir.Loc(filename=filename, line=1)) - local = ir.Scope(parent=top, loc=ir.Loc(filename=filename, line=2)) - - apple = local.define('apple', loc=ir.Loc(filename=filename, line=3)) - self.assertIs(local.get('apple'), apple) - self.assertEqual(len(local.localvars), 1) - - orange = top.define('orange', loc=ir.Loc(filename=filename, line=4)) - self.assertEqual(len(local.localvars), 1) - self.assertEqual(len(top.localvars), 1) - self.assertIs(top.get('orange'), orange) - self.assertIs(local.get('orange'), orange) - - more_orange = local.define('orange', loc=ir.Loc(filename=filename, - line=5)) - self.assertIs(top.get('orange'), orange) - self.assertIsNot(local.get('orange'), not orange) - self.assertIs(local.get('orange'), more_orange) - - try: - bad_orange = local.define('orange', loc=ir.Loc(filename=filename, - line=5)) - except ir.RedefinedError: - pass - else: - self.fail("Expecting an %s" % ir.RedefinedError) - - -class TestIRDump(unittest.TestCase): - """ - Exercise the IR dump of some constructs. These tests are fragile - (may need to be updated when details of IR generation change, may - need to be skipped for some Python versions) but help find out - regressions. - """ - - def get_ir(self, pyfunc): - return compiler.run_frontend(pyfunc) - - def check_ir_dump(self, pyfunc): - func_ir = self.get_ir(pyfunc) - out = StringIO() - func_ir.dump(file=out) - expected = textwrap.dedent(pyfunc.__doc__).strip().splitlines() - got = out.getvalue().strip().splitlines() - self.assertEqual(got, expected, - "dump might need to be refreshed; here is the " - "actual dump:\n%s\n" % (out.getvalue())) - - def test_var_swapping(self): - # This exercises removal of unused temporaries. - self.check_ir_dump(var_swapping) - - def test_var_propagate1(self): - # This exercises generation of phi nodes. - self.check_ir_dump(var_propagate1) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_itanium_mangler.py b/numba/numba/tests/test_itanium_mangler.py deleted file mode 100644 index 7a6e56b8a..000000000 --- a/numba/numba/tests/test_itanium_mangler.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf8 -*- -from __future__ import print_function, absolute_import - -import re - -from numba import unittest_support as unittest -from numba import itanium_mangler -from numba import int32, int64, uint32, uint64, float32, float64 -from numba.types import range_iter32_type - - -class TestItaniumManager(unittest.TestCase): - def test_ident(self): - got = itanium_mangler.mangle_identifier("apple") - expect = "5apple" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_identifier("ap_ple") - expect = "6ap_ple" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_identifier("apple213") - expect = "8apple213" - self.assertEqual(expect, got) - - def test_types(self): - got = itanium_mangler.mangle_type(int32) - expect = "i" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_type(int64) - expect = "x" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_type(uint32) - expect = "j" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_type(uint64) - expect = "y" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_type(float32) - expect = "f" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle_type(float64) - expect = "d" - self.assertEqual(expect, got) - - def test_function(self): - got = itanium_mangler.mangle("what", [int32, float32]) - expect = "_Z4whatif" - self.assertEqual(expect, got) - - got = itanium_mangler.mangle("a_little_brown_fox", [uint64, - uint32, - float64]) - expect = "_Z18a_little_brown_foxyjd" - self.assertEqual(expect, got) - - def test_custom_type(self): - got = itanium_mangler.mangle_type(range_iter32_type) - name = str(range_iter32_type) - expect = "{n}{name}".format(n=len(name), name=name) - self.assertEqual(expect, got) - - def test_mangle_literal(self): - # check int - got = itanium_mangler.mangle_value(123) - expect = "Li123E" - self.assertEqual(expect, got) - # check float (not handled using standard) - got = itanium_mangler.mangle_value(12.3) - self.assertRegexpMatches(got, r'^\d+_12\$[0-9a-z][0-9a-z]3$') - - def test_mangle_unicode(self): - name = u'f∂ƒ©z' - got = itanium_mangler.mangle_identifier(name) - self.assertRegexpMatches(got, r'^\d+f(\$[a-z0-9][a-z0-9])+z$') - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_iteration.py b/numba/numba/tests/test_iteration.py deleted file mode 100644 index 2ab1a4feb..000000000 --- a/numba/numba/tests/test_iteration.py +++ /dev/null @@ -1,198 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import numpy_support, types -from .support import TestCase, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -def int_tuple_iter_usecase(): - res = 0 - for i in (1, 2, 99, 3): - res += i - return res - -def float_tuple_iter_usecase(): - res = 0.0 - for i in (1.5, 2.0, 99.3, 3.4): - res += i - return res - -def tuple_tuple_iter_usecase(): - # Recursively homogeneous tuple type - res = 0.0 - for i in ((1.5, 2.0), (99.3, 3.4), (1.8, 2.5)): - for j in i: - res += j - res = res * 2 - return res - -def enumerate_nested_tuple_usecase(): - res = 0.0 - for i, j in enumerate(((1.5, 2.0), (99.3, 3.4), (1.8, 2.5))): - for l in j: - res += i * l - res = res * 2 - return res - -def nested_enumerate_usecase(): - res = 0.0 - for i, (j, k) in enumerate(enumerate(((1.5, 2.0), (99.3, 3.4), (1.8, 2.5)))): - for l in k: - res += i * j * l - res = res * 2 - return res - -def scalar_iter_usecase(iterable): - res = 0.0 - for x in iterable: - res += x - return res - -def record_iter_usecase(iterable): - res = 0.0 - for x in iterable: - res += x.a * x.b - return res - -def record_iter_mutate_usecase(iterable): - for x in iterable: - x.a = x.a + x.b - - -record_dtype = np.dtype([('a', np.float64), - ('b', np.int32), - ]) - - -class IterationTest(TestCase): - - def run_nullary_func(self, pyfunc, flags): - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - expected = pyfunc() - self.assertPreciseEqual(cfunc(), expected) - - def test_int_tuple_iter(self, flags=force_pyobj_flags): - self.run_nullary_func(int_tuple_iter_usecase, flags) - - @tag('important') - def test_int_tuple_iter_npm(self): - self.test_int_tuple_iter(flags=no_pyobj_flags) - - # Type inference on tuples used to be hardcoded for ints, check - # that it works for other types. - - def test_float_tuple_iter(self, flags=force_pyobj_flags): - self.run_nullary_func(float_tuple_iter_usecase, flags) - - def test_float_tuple_iter_npm(self): - self.test_float_tuple_iter(flags=no_pyobj_flags) - - def test_tuple_tuple_iter(self, flags=force_pyobj_flags): - self.run_nullary_func(tuple_tuple_iter_usecase, flags) - - @tag('important') - def test_tuple_tuple_iter_npm(self): - self.test_tuple_tuple_iter(flags=no_pyobj_flags) - - def test_enumerate_nested_tuple(self, flags=force_pyobj_flags): - self.run_nullary_func(enumerate_nested_tuple_usecase, flags) - - @tag('important') - def test_enumerate_nested_tuple_npm(self): - self.test_enumerate_nested_tuple(flags=no_pyobj_flags) - - def test_nested_enumerate(self, flags=force_pyobj_flags): - self.run_nullary_func(nested_enumerate_usecase, flags) - - @tag('important') - def test_nested_enumerate_npm(self): - self.test_nested_enumerate(flags=no_pyobj_flags) - - def run_array_1d(self, item_type, arg, flags): - # Iteration over a 1d numpy array - pyfunc = scalar_iter_usecase - cr = compile_isolated(pyfunc, (types.Array(item_type, 1, 'A'),), - item_type, flags=flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc(arg), pyfunc(arg)) - - def test_array_1d_float(self, flags=force_pyobj_flags): - self.run_array_1d(types.float64, np.arange(5.0), flags) - - def test_array_1d_float_npm(self): - self.test_array_1d_float(no_pyobj_flags) - - def test_array_1d_complex(self, flags=force_pyobj_flags): - self.run_array_1d(types.complex128, np.arange(5.0) * 1.0j, flags) - - @tag('important') - def test_array_1d_complex_npm(self): - self.test_array_1d_complex(no_pyobj_flags) - - def test_array_1d_record(self, flags=force_pyobj_flags): - pyfunc = record_iter_usecase - item_type = numpy_support.from_dtype(record_dtype) - cr = compile_isolated(pyfunc, (types.Array(item_type, 1, 'A'),), - flags=flags) - cfunc = cr.entry_point - arr = np.recarray(3, dtype=record_dtype) - for i in range(3): - arr[i].a = float(i * 2) - arr[i].b = i + 2 - got = pyfunc(arr) - self.assertPreciseEqual(cfunc(arr), got) - - def test_array_1d_record_npm(self): - self.test_array_1d_record(no_pyobj_flags) - - def test_array_1d_record_mutate_npm(self, flags=no_pyobj_flags): - pyfunc = record_iter_mutate_usecase - item_type = numpy_support.from_dtype(record_dtype) - cr = compile_isolated(pyfunc, (types.Array(item_type, 1, 'A'),), - flags=flags) - cfunc = cr.entry_point - arr = np.recarray(3, dtype=record_dtype) - for i in range(3): - arr[i].a = float(i * 2) - arr[i].b = i + 2 - expected = arr.copy() - pyfunc(expected) - got = arr.copy() - cfunc(got) - self.assertPreciseEqual(expected, got) - - def test_array_1d_record_mutate(self): - self.test_array_1d_record_mutate_npm(flags=force_pyobj_flags) - - def test_tuple_iter_issue1504(self): - # The issue is due to `row` being typed as heterogeneous tuple. - def bar(x, y): - total = 0 - for row in zip(x, y): - total += row[0] + row[1] - - return total - - x = y = np.arange(3, dtype=np.int32) - aryty = types.Array(types.int32, 1, 'C') - cres = compile_isolated(bar, (aryty, aryty)) - - expect = bar(x, y) - got = cres.entry_point(x, y) - self.assertEqual(expect, got) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_jitclasses.py b/numba/numba/tests/test_jitclasses.py deleted file mode 100644 index eefd19d88..000000000 --- a/numba/numba/tests/test_jitclasses.py +++ /dev/null @@ -1,644 +0,0 @@ -from __future__ import absolute_import, print_function, division - -from collections import OrderedDict -import ctypes -import sys - -import numpy as np - -from numba import (float32, float64, int16, int32, boolean, deferred_type, - optional) -from numba import njit, typeof, errors -from numba import unittest_support as unittest -from numba import jitclass -from .support import TestCase, MemoryLeakMixin, tag -from numba.jitclass import _box -from numba.runtime.nrt import MemInfo -from numba.errors import LoweringError - - -def _get_meminfo(box): - ptr = _box.box_get_meminfoptr(box) - mi = MemInfo(ptr) - mi.acquire() - return mi - - -class TestJitClass(TestCase, MemoryLeakMixin): - - def _check_spec(self, spec): - @jitclass(spec) - class Test(object): - - def __init__(self): - pass - - clsty = Test.class_type.instance_type - names = list(clsty.struct.keys()) - values = list(clsty.struct.values()) - self.assertEqual(names[0], 'x') - self.assertEqual(names[1], 'y') - self.assertEqual(values[0], int32) - self.assertEqual(values[1], float32) - - def test_ordereddict_spec(self): - spec = OrderedDict() - spec['x'] = int32 - spec['y'] = float32 - self._check_spec(spec) - - def test_list_spec(self): - spec = [('x', int32), - ('y', float32)] - self._check_spec(spec) - - def test_spec_errors(self): - spec1 = [('x', int), ('y', float32[:])] - spec2 = [(1, int32), ('y', float32[:])] - - class Test(object): - - def __init__(self): - pass - - with self.assertRaises(TypeError) as raises: - jitclass(spec1)(Test) - self.assertIn("spec values should be Numba type instances", - str(raises.exception)) - with self.assertRaises(TypeError) as raises: - jitclass(spec2)(Test) - self.assertEqual(str(raises.exception), - "spec keys should be strings, got 1") - - def _make_Float2AndArray(self): - spec = OrderedDict() - spec['x'] = float32 - spec['y'] = float32 - spec['arr'] = float32[:] - - @jitclass(spec) - class Float2AndArray(object): - - def __init__(self, x, y, arr): - self.x = x - self.y = y - self.arr = arr - - def add(self, val): - self.x += val - self.y += val - return val - - return Float2AndArray - - def _make_Vector2(self): - spec = OrderedDict() - spec['x'] = int32 - spec['y'] = int32 - - @jitclass(spec) - class Vector2(object): - - def __init__(self, x, y): - self.x = x - self.y = y - - return Vector2 - - @tag('important') - def test_jit_class_1(self): - Float2AndArray = self._make_Float2AndArray() - Vector2 = self._make_Vector2() - - @njit - def bar(obj): - return obj.x + obj.y - - @njit - def foo(a): - obj = Float2AndArray(1, 2, a) - obj.add(123) - - vec = Vector2(3, 4) - return bar(obj), bar(vec), obj.arr - - inp = np.ones(10, dtype=np.float32) - a, b, c = foo(inp) - self.assertEqual(a, 123 + 1 + 123 + 2) - self.assertEqual(b, 3 + 4) - self.assertPreciseEqual(c, inp) - - @tag('important') - def test_jitclass_usage_from_python(self): - Float2AndArray = self._make_Float2AndArray() - - @njit - def identity(obj): - return obj - - @njit - def retrieve_attributes(obj): - return obj.x, obj.y, obj.arr - - arr = np.arange(10, dtype=np.float32) - obj = Float2AndArray(1, 2, arr) - obj_meminfo = _get_meminfo(obj) - self.assertEqual(obj_meminfo.refcount, 2) - self.assertEqual(obj_meminfo.data, _box.box_get_dataptr(obj)) - self.assertEqual(obj._numba_type_.class_type, - Float2AndArray.class_type) - # Use jit class instance in numba - other = identity(obj) - other_meminfo = _get_meminfo(other) # duplicates MemInfo object to obj - self.assertEqual(obj_meminfo.refcount, 4) - self.assertEqual(other_meminfo.refcount, 4) - self.assertEqual(other_meminfo.data, _box.box_get_dataptr(other)) - self.assertEqual(other_meminfo.data, obj_meminfo.data) - - # Check dtor - del other, other_meminfo - self.assertEqual(obj_meminfo.refcount, 2) - - # Check attributes - out_x, out_y, out_arr = retrieve_attributes(obj) - self.assertEqual(out_x, 1) - self.assertEqual(out_y, 2) - self.assertIs(out_arr, arr) - - # Access attributes from python - self.assertEqual(obj.x, 1) - self.assertEqual(obj.y, 2) - self.assertIs(obj.arr, arr) - - # Access methods from python - self.assertEqual(obj.add(123), 123) - self.assertEqual(obj.x, 1 + 123) - self.assertEqual(obj.y, 2 + 123) - - # Setter from python - obj.x = 333 - obj.y = 444 - obj.arr = newarr = np.arange(5, dtype=np.float32) - self.assertEqual(obj.x, 333) - self.assertEqual(obj.y, 444) - self.assertIs(obj.arr, newarr) - - def test_jitclass_datalayout(self): - spec = OrderedDict() - # Boolean has different layout as value vs data - spec['val'] = boolean - - @jitclass(spec) - class Foo(object): - - def __init__(self, val): - self.val = val - - self.assertTrue(Foo(True).val) - self.assertFalse(Foo(False).val) - - @tag('important') - def test_deferred_type(self): - node_type = deferred_type() - - spec = OrderedDict() - spec['data'] = float32 - spec['next'] = optional(node_type) - - @njit - def get_data(node): - return node.data - - @jitclass(spec) - class LinkedNode(object): - - def __init__(self, data, next): - self.data = data - self.next = next - - def get_next_data(self): - # use deferred type as argument - return get_data(self.next) - - def append_to_tail(self, other): - cur = self - while cur.next is not None: - cur = cur.next - cur.next = other - - - node_type.define(LinkedNode.class_type.instance_type) - - first = LinkedNode(123, None) - self.assertEqual(first.data, 123) - self.assertIsNone(first.next) - - second = LinkedNode(321, first) - - first_meminfo = _get_meminfo(first) - second_meminfo = _get_meminfo(second) - self.assertEqual(first_meminfo.refcount, 3) - self.assertEqual(second.next.data, first.data) - self.assertEqual(first_meminfo.refcount, 3) - self.assertEqual(second_meminfo.refcount, 2) - - # Test using deferred type as argument - first_val = second.get_next_data() - self.assertEqual(first_val, first.data) - - # Check setattr (issue #2606) - self.assertIsNone(first.next) - second.append_to_tail(LinkedNode(567, None)) - self.assertIsNotNone(first.next) - self.assertEqual(first.next.data, 567) - self.assertIsNone(first.next.next) - second.append_to_tail(LinkedNode(678, None)) - self.assertIsNotNone(first.next.next) - self.assertEqual(first.next.next.data, 678) - - # Check ownership - self.assertEqual(first_meminfo.refcount, 3) - del second, second_meminfo - self.assertEqual(first_meminfo.refcount, 2) - - def test_c_structure(self): - spec = OrderedDict() - spec['a'] = int32 - spec['b'] = int16 - spec['c'] = float64 - - @jitclass(spec) - class Struct(object): - - def __init__(self, a, b, c): - self.a = a - self.b = b - self.c = c - - st = Struct(0xabcd, 0xef, 3.1415) - - class CStruct(ctypes.Structure): - _fields_ = [ - ('a', ctypes.c_int32), - ('b', ctypes.c_int16), - ('c', ctypes.c_double), - ] - - ptr = ctypes.c_void_p(_box.box_get_dataptr(st)) - cstruct = ctypes.cast(ptr, ctypes.POINTER(CStruct))[0] - self.assertEqual(cstruct.a, st.a) - self.assertEqual(cstruct.b, st.b) - self.assertEqual(cstruct.c, st.c) - - def test_is(self): - Vector = self._make_Vector2() - vec_a = Vector(1, 2) - vec_b = Vector(1, 2) - - @njit - def do_is(a, b): - return a is b - - with self.assertRaises(LoweringError) as raises: - # trigger compilation - do_is(vec_a, vec_a) - self.assertIn('no default `is` implementation', str(raises.exception)) - - def test_isinstance(self): - Vector2 = self._make_Vector2() - vec = Vector2(1, 2) - self.assertIsInstance(vec, Vector2) - - def test_subclassing(self): - Vector2 = self._make_Vector2() - with self.assertRaises(TypeError) as raises: - class SubV(Vector2): - pass - self.assertEqual(str(raises.exception), - "cannot subclass from a jitclass") - - def test_base_class(self): - class Base(object): - - def what(self): - return self.attr - - @jitclass([('attr', int32)]) - class Test(Base): - - def __init__(self, attr): - self.attr = attr - - obj = Test(123) - self.assertEqual(obj.what(), 123) - - def test_globals(self): - - class Mine(object): - constant = 123 - - def __init__(self): - pass - - with self.assertRaises(TypeError) as raises: - jitclass(())(Mine) - - self.assertEqual(str(raises.exception), - "class members are not yet supported: constant") - - @tag('important') - def test_user_getter_setter(self): - @jitclass([('attr', int32)]) - class Foo(object): - - def __init__(self, attr): - self.attr = attr - - @property - def value(self): - return self.attr + 1 - - @value.setter - def value(self, val): - self.attr = val - 1 - - foo = Foo(123) - self.assertEqual(foo.attr, 123) - # Getter - self.assertEqual(foo.value, 123 + 1) - # Setter - foo.value = 789 - self.assertEqual(foo.attr, 789 - 1) - self.assertEqual(foo.value, 789) - - # Test nopython mode usage of getter and setter - @njit - def bar(foo, val): - a = foo.value - foo.value = val - b = foo.value - c = foo.attr - return a, b, c - - a, b, c = bar(foo, 567) - self.assertEqual(a, 789) - self.assertEqual(b, 567) - self.assertEqual(c, 567 - 1) - - def test_user_deleter_error(self): - class Foo(object): - - def __init__(self): - pass - - @property - def value(self): - return 1 - - @value.deleter - def value(self): - pass - - with self.assertRaises(TypeError) as raises: - jitclass([])(Foo) - self.assertEqual(str(raises.exception), - "deleter is not supported: value") - - def test_name_shadowing_error(self): - class Foo(object): - - def __init__(self): - pass - - @property - def my_property(self): - pass - - def my_method(self): - pass - - with self.assertRaises(NameError) as raises: - jitclass([('my_property', int32)])(Foo) - self.assertEqual(str(raises.exception), 'name shadowing: my_property') - - with self.assertRaises(NameError) as raises: - jitclass([('my_method', int32)])(Foo) - self.assertEqual(str(raises.exception), 'name shadowing: my_method') - - def test_distinct_classes(self): - # Different classes with the same names shouldn't confuse the compiler - @jitclass([('x', int32)]) - class Foo(object): - - def __init__(self, x): - self.x = x + 2 - - def run(self): - return self.x + 1 - - FirstFoo = Foo - - @jitclass([('x', int32)]) - class Foo(object): - - def __init__(self, x): - self.x = x - 2 - - def run(self): - return self.x - 1 - - SecondFoo = Foo - foo = FirstFoo(5) - self.assertEqual(foo.x, 7) - self.assertEqual(foo.run(), 8) - foo = SecondFoo(5) - self.assertEqual(foo.x, 3) - self.assertEqual(foo.run(), 2) - - def test_parameterized(self): - class MyClass(object): - - def __init__(self, value): - self.value = value - - def create_my_class(value): - cls = jitclass([('value', typeof(value))])(MyClass) - return cls(value) - - a = create_my_class(123) - self.assertEqual(a.value, 123) - - b = create_my_class(12.3) - self.assertEqual(b.value, 12.3) - - c = create_my_class(np.array([123])) - np.testing.assert_equal(c.value, [123]) - - d = create_my_class(np.array([12.3])) - np.testing.assert_equal(d.value, [12.3]) - - @tag('important') - def test_protected_attrs(self): - spec = { - 'value': int32, - '_value': float32, - '__value': int32, - '__value__': int32, - } - - @jitclass(spec) - class MyClass(object): - - def __init__(self, value): - self.value = value - self._value = value / 2 - self.__value = value * 2 - self.__value__ = value - 1 - - @property - def private_value(self): - return self.__value - - @property - def _inner_value(self): - return self._value - - @_inner_value.setter - def _inner_value(self, v): - self._value = v - - @property - def __private_value(self): - return self.__value - - @__private_value.setter - def __private_value(self, v): - self.__value = v - - def swap_private_value(self, new): - old = self.__private_value - self.__private_value = new - return old - - def _protected_method(self, factor): - return self._value * factor - - def __private_method(self, factor): - return self.__value * factor - - def check_private_method(self, factor): - return self.__private_method(factor) - - - value = 123 - inst = MyClass(value) - # test attributes - self.assertEqual(inst.value, value) - self.assertEqual(inst._value, value / 2) - self.assertEqual(inst.private_value, value * 2) - # test properties - self.assertEqual(inst._inner_value, inst._value) - freeze_inst_value = inst._value - inst._inner_value -= 1 - self.assertEqual(inst._inner_value, freeze_inst_value - 1) - - self.assertEqual(inst.swap_private_value(321), value * 2) - self.assertEqual(inst.swap_private_value(value * 2), 321) - # test methods - self.assertEqual(inst._protected_method(3), inst._value * 3) - self.assertEqual(inst.check_private_method(3), inst.private_value * 3) - # test special - self.assertEqual(inst.__value__, value - 1) - inst.__value__ -= 100 - self.assertEqual(inst.__value__, value - 101) - - # test errors - @njit - def access_dunder(inst): - return inst.__value - - with self.assertRaises(errors.TypingError) as raises: - access_dunder(inst) - # It will appear as "_TestJitClass__value" because the `access_dunder` - # is under the scope of 'TestJitClass'. - self.assertIn('_TestJitClass__value', str(raises.exception)) - - with self.assertRaises(AttributeError) as raises: - access_dunder.py_func(inst) - self.assertIn('_TestJitClass__value', str(raises.exception)) - - @unittest.skipIf(sys.version_info < (3,), "Python 3-specific test") - def test_annotations(self): - """ - Methods with annotations should compile fine (issue #1911). - """ - from .annotation_usecases import AnnotatedClass - - spec = {'x': int32} - cls = jitclass(spec)(AnnotatedClass) - - obj = cls(5) - self.assertEqual(obj.x, 5) - self.assertEqual(obj.add(2), 7) - - def test_docstring(self): - - @jitclass([]) - class Apple(object): - "Class docstring" - def __init__(self): - "init docstring" - - def foo(self): - "foo method docstring" - - @property - def aval(self): - "aval property docstring" - - self.assertEqual(Apple.__doc__, 'Class docstring') - self.assertEqual(Apple.__init__.__doc__, 'init docstring') - self.assertEqual(Apple.foo.__doc__, 'foo method docstring') - self.assertEqual(Apple.aval.__doc__, 'aval property docstring') - - def test_kwargs(self): - spec = [('a', int32), - ('b', float64)] - - @jitclass(spec) - class TestClass(object): - def __init__(self, x, y, z): - self.a = x * y - self.b = z - - x = 2 - y = 2 - z = 1.1 - kwargs = {'y': y, 'z': z} - tc = TestClass(x=2, **kwargs) - self.assertEqual(tc.a, x * y) - self.assertEqual(tc.b, z) - - def test_generator_method(self): - spec = [] - - @jitclass(spec) - class TestClass(object): - def __init__(self): - pass - - def gen(self, niter): - for i in range(niter): - yield np.arange(i) - - def expected_gen(niter): - for i in range(niter): - yield np.arange(i) - - for niter in range(10): - for expect, got in zip(expected_gen(niter), TestClass().gen(niter)): - self.assertPreciseEqual(expect, got) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_jitmethod.py b/numba/numba/tests/test_jitmethod.py deleted file mode 100644 index 571d27f45..000000000 --- a/numba/numba/tests/test_jitmethod.py +++ /dev/null @@ -1,71 +0,0 @@ -import numba.unittest_support as unittest - -import numpy as np - -from numba import config, jit, types -from numba.compiler import compile_isolated -from numba.tests.support import override_config - - -class TestJITMethod(unittest.TestCase): - def test_bound_jit_method_with_loop_lift(self): - class Something(object): - def __init__(self, x0): - self.x0 = x0 - - @jit - def method(self, x): - a = np.empty(shape=5, dtype=np.float32) - x0 = self.x0 - - for i in range(a.shape[0]): - a[i] = x0 * x - - return a - - something = Something(3) - np.testing.assert_array_equal(something.method(5), - np.array([15, 15, 15, 15, 15], dtype=np.float32)) - - # Check that loop lifting in nopython mode was successful - [cres] = something.method.overloads.values() - jitloop = cres.lifted[0] - [loopcres] = jitloop.overloads.values() - self.assertTrue(loopcres.fndesc.native) - - def test_unbound_jit_method(self): - class Something(object): - def __init__(self, x0): - self.x0 = x0 - - @jit - def method(self): - return self.x0 - - something = Something(3) - self.assertEquals(Something.method(something), 3) - - -class TestDisabledJIT(unittest.TestCase): - def test_decorated_function(self): - with override_config('DISABLE_JIT', True): - def method(x): - return x - jitted = jit(method) - - self.assertEqual(jitted, method) - self.assertEqual(10, method(10)) - self.assertEqual(10, jitted(10)) - - def test_decorated_function_with_kwargs(self): - with override_config('DISABLE_JIT', True): - def method(x): - return x - jitted = jit(nopython=True)(method) - - self.assertEqual(jitted, method) - self.assertEqual(10, method(10)) - self.assertEqual(10, jitted(10)) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_linalg.py b/numba/numba/tests/test_linalg.py deleted file mode 100644 index 46bff63e8..000000000 --- a/numba/numba/tests/test_linalg.py +++ /dev/null @@ -1,2415 +0,0 @@ -from __future__ import division, print_function - -import contextlib -import gc -from itertools import product, cycle -import sys -import warnings -from numbers import Number, Integral - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, errors -from numba.numpy_support import version as numpy_version -from .support import TestCase, tag -from .matmul_usecase import matmul_usecase, needs_matmul, needs_blas - -try: - import scipy.linalg.cython_lapack - has_lapack = True -except ImportError: - has_lapack = False - -needs_lapack = unittest.skipUnless(has_lapack, - "LAPACK needs Scipy 0.16+") - - -def dot2(a, b): - return np.dot(a, b) - - -def dot3(a, b, out): - return np.dot(a, b, out=out) - - -def vdot(a, b): - return np.vdot(a, b) - - -class TestProduct(TestCase): - """ - Tests for dot products. - """ - - dtypes = (np.float64, np.float32, np.complex128, np.complex64) - - def setUp(self): - # Collect leftovers from previous test cases before checking for leaks - gc.collect() - - def sample_vector(self, n, dtype): - # Be careful to generate only exactly representable float values, - # to avoid rounding discrepancies between Numpy and Numba - base = np.arange(n) - if issubclass(dtype, np.complexfloating): - return (base * (1 - 0.5j) + 2j).astype(dtype) - else: - return (base * 0.5 - 1).astype(dtype) - - def sample_matrix(self, m, n, dtype): - return self.sample_vector(m * n, dtype).reshape((m, n)) - - @contextlib.contextmanager - def check_contiguity_warning(self, pyfunc): - """ - Check performance warning(s) for non-contiguity. - """ - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', errors.PerformanceWarning) - yield - self.assertGreaterEqual(len(w), 1) - self.assertIs(w[0].category, errors.PerformanceWarning) - self.assertIn("faster on contiguous arrays", str(w[0].message)) - self.assertEqual(w[0].filename, pyfunc.__code__.co_filename) - # This works because our functions are one-liners - self.assertEqual(w[0].lineno, pyfunc.__code__.co_firstlineno + 1) - - def check_func(self, pyfunc, cfunc, args): - with self.assertNoNRTLeak(): - expected = pyfunc(*args) - got = cfunc(*args) - self.assertPreciseEqual(got, expected, ignore_sign_on_zero=True) - del got, expected - - def check_func_out(self, pyfunc, cfunc, args, out): - with self.assertNoNRTLeak(): - expected = np.copy(out) - got = np.copy(out) - self.assertIs(pyfunc(*args, out=expected), expected) - self.assertIs(cfunc(*args, out=got), got) - self.assertPreciseEqual(got, expected, ignore_sign_on_zero=True) - del got, expected - - def assert_mismatching_sizes(self, cfunc, args, is_out=False): - with self.assertRaises(ValueError) as raises: - cfunc(*args) - msg = ("incompatible output array size" if is_out else - "incompatible array sizes") - self.assertIn(msg, str(raises.exception)) - - def assert_mismatching_dtypes(self, cfunc, args, func_name="np.dot()"): - with self.assertRaises(errors.TypingError) as raises: - cfunc(*args) - self.assertIn("%s arguments must all have the same dtype" - % (func_name,), - str(raises.exception)) - - @needs_blas - def check_dot_vv(self, pyfunc, func_name): - n = 3 - cfunc = jit(nopython=True)(pyfunc) - for dtype in self.dtypes: - a = self.sample_vector(n, dtype) - b = self.sample_vector(n, dtype) - self.check_func(pyfunc, cfunc, (a, b)) - # Non-contiguous - self.check_func(pyfunc, cfunc, (a[::-1], b[::-1])) - - # Mismatching sizes - a = self.sample_vector(n - 1, np.float64) - b = self.sample_vector(n, np.float64) - self.assert_mismatching_sizes(cfunc, (a, b)) - # Mismatching dtypes - a = self.sample_vector(n, np.float32) - b = self.sample_vector(n, np.float64) - self.assert_mismatching_dtypes(cfunc, (a, b), func_name=func_name) - - def test_dot_vv(self): - """ - Test vector * vector np.dot() - """ - self.check_dot_vv(dot2, "np.dot()") - - def test_vdot(self): - """ - Test np.vdot() - """ - self.check_dot_vv(vdot, "np.vdot()") - - @needs_blas - def check_dot_vm(self, pyfunc2, pyfunc3, func_name): - m, n = 2, 3 - - def samples(m, n): - for order in 'CF': - a = self.sample_matrix(m, n, np.float64).copy(order=order) - b = self.sample_vector(n, np.float64) - yield a, b - for dtype in self.dtypes: - a = self.sample_matrix(m, n, dtype) - b = self.sample_vector(n, dtype) - yield a, b - # Non-contiguous - yield a[::-1], b[::-1] - - cfunc2 = jit(nopython=True)(pyfunc2) - if pyfunc3 is not None: - cfunc3 = jit(nopython=True)(pyfunc3) - for a, b in samples(m, n): - self.check_func(pyfunc2, cfunc2, (a, b)) - self.check_func(pyfunc2, cfunc2, (b, a.T)) - if pyfunc3 is not None: - for a, b in samples(m, n): - out = np.empty(m, dtype=a.dtype) - self.check_func_out(pyfunc3, cfunc3, (a, b), out) - self.check_func_out(pyfunc3, cfunc3, (b, a.T), out) - - # Mismatching sizes - a = self.sample_matrix(m, n - 1, np.float64) - b = self.sample_vector(n, np.float64) - self.assert_mismatching_sizes(cfunc2, (a, b)) - self.assert_mismatching_sizes(cfunc2, (b, a.T)) - if pyfunc3 is not None: - out = np.empty(m, np.float64) - self.assert_mismatching_sizes(cfunc3, (a, b, out)) - self.assert_mismatching_sizes(cfunc3, (b, a.T, out)) - a = self.sample_matrix(m, m, np.float64) - b = self.sample_vector(m, np.float64) - out = np.empty(m - 1, np.float64) - self.assert_mismatching_sizes(cfunc3, (a, b, out), is_out=True) - self.assert_mismatching_sizes(cfunc3, (b, a.T, out), is_out=True) - # Mismatching dtypes - a = self.sample_matrix(m, n, np.float32) - b = self.sample_vector(n, np.float64) - self.assert_mismatching_dtypes(cfunc2, (a, b), func_name) - if pyfunc3 is not None: - a = self.sample_matrix(m, n, np.float64) - b = self.sample_vector(n, np.float64) - out = np.empty(m, np.float32) - self.assert_mismatching_dtypes(cfunc3, (a, b, out), func_name) - - def test_dot_vm(self): - """ - Test vector * matrix and matrix * vector np.dot() - """ - self.check_dot_vm(dot2, dot3, "np.dot()") - - @needs_blas - def check_dot_mm(self, pyfunc2, pyfunc3, func_name): - - def samples(m, n, k): - for order_a, order_b in product('CF', 'CF'): - a = self.sample_matrix(m, k, np.float64).copy(order=order_a) - b = self.sample_matrix(k, n, np.float64).copy(order=order_b) - yield a, b - for dtype in self.dtypes: - a = self.sample_matrix(m, k, dtype) - b = self.sample_matrix(k, n, dtype) - yield a, b - # Non-contiguous - yield a[::-1], b[::-1] - - cfunc2 = jit(nopython=True)(pyfunc2) - if pyfunc3 is not None: - cfunc3 = jit(nopython=True)(pyfunc3) - - # Test generic matrix * matrix as well as "degenerate" cases - # where one of the outer dimensions is 1 (i.e. really represents - # a vector, which may select a different implementation) - for m, n, k in [(2, 3, 4), # Generic matrix * matrix - (1, 3, 4), # 2d vector * matrix - (1, 1, 4), # 2d vector * 2d vector - ]: - for a, b in samples(m, n, k): - self.check_func(pyfunc2, cfunc2, (a, b)) - self.check_func(pyfunc2, cfunc2, (b.T, a.T)) - if pyfunc3 is not None: - for a, b in samples(m, n, k): - out = np.empty((m, n), dtype=a.dtype) - self.check_func_out(pyfunc3, cfunc3, (a, b), out) - out = np.empty((n, m), dtype=a.dtype) - self.check_func_out(pyfunc3, cfunc3, (b.T, a.T), out) - - # Mismatching sizes - m, n, k = 2, 3, 4 - a = self.sample_matrix(m, k - 1, np.float64) - b = self.sample_matrix(k, n, np.float64) - self.assert_mismatching_sizes(cfunc2, (a, b)) - if pyfunc3 is not None: - out = np.empty((m, n), np.float64) - self.assert_mismatching_sizes(cfunc3, (a, b, out)) - a = self.sample_matrix(m, k, np.float64) - b = self.sample_matrix(k, n, np.float64) - out = np.empty((m, n - 1), np.float64) - self.assert_mismatching_sizes(cfunc3, (a, b, out), is_out=True) - # Mismatching dtypes - a = self.sample_matrix(m, k, np.float32) - b = self.sample_matrix(k, n, np.float64) - self.assert_mismatching_dtypes(cfunc2, (a, b), func_name) - if pyfunc3 is not None: - a = self.sample_matrix(m, k, np.float64) - b = self.sample_matrix(k, n, np.float64) - out = np.empty((m, n), np.float32) - self.assert_mismatching_dtypes(cfunc3, (a, b, out), func_name) - - @tag('important') - def test_dot_mm(self): - """ - Test matrix * matrix np.dot() - """ - self.check_dot_mm(dot2, dot3, "np.dot()") - - @needs_matmul - def test_matmul_vv(self): - """ - Test vector @ vector - """ - self.check_dot_vv(matmul_usecase, "'@'") - - @needs_matmul - def test_matmul_vm(self): - """ - Test vector @ matrix and matrix @ vector - """ - self.check_dot_vm(matmul_usecase, None, "'@'") - - @needs_matmul - def test_matmul_mm(self): - """ - Test matrix @ matrix - """ - self.check_dot_mm(matmul_usecase, None, "'@'") - - @needs_blas - def test_contiguity_warnings(self): - m, k, n = 2, 3, 4 - dtype = np.float64 - a = self.sample_matrix(m, k, dtype)[::-1] - b = self.sample_matrix(k, n, dtype)[::-1] - out = np.empty((m, n), dtype) - - cfunc = jit(nopython=True)(dot2) - with self.check_contiguity_warning(cfunc.py_func): - cfunc(a, b) - cfunc = jit(nopython=True)(dot3) - with self.check_contiguity_warning(cfunc.py_func): - cfunc(a, b, out) - - a = self.sample_vector(n, dtype)[::-1] - b = self.sample_vector(n, dtype)[::-1] - - cfunc = jit(nopython=True)(vdot) - with self.check_contiguity_warning(cfunc.py_func): - cfunc(a, b) - - -# Implementation definitions for the purpose of jitting. - -def invert_matrix(a): - return np.linalg.inv(a) - - -def cholesky_matrix(a): - return np.linalg.cholesky(a) - - -def eig_matrix(a): - return np.linalg.eig(a) - - -def eigvals_matrix(a): - return np.linalg.eigvals(a) - - -def eigh_matrix(a): - return np.linalg.eigh(a) - - -def eigvalsh_matrix(a): - return np.linalg.eigvalsh(a) - - -def svd_matrix(a, full_matrices=1): - return np.linalg.svd(a, full_matrices) - - -def qr_matrix(a): - return np.linalg.qr(a) - - -def lstsq_system(A, B, rcond=-1): - return np.linalg.lstsq(A, B, rcond) - - -def solve_system(A, B): - return np.linalg.solve(A, B) - - -def pinv_matrix(A, rcond=1e-15): # 1e-15 from numpy impl - return np.linalg.pinv(A) - - -def slogdet_matrix(a): - return np.linalg.slogdet(a) - - -def det_matrix(a): - return np.linalg.det(a) - - -def norm_matrix(a, ord=None): - return np.linalg.norm(a, ord) - - -def cond_matrix(a, p=None): - return np.linalg.cond(a, p) - - -def matrix_rank_matrix(a, tol=None): - return np.linalg.matrix_rank(a, tol) - - -def matrix_power_matrix(a, n): - return np.linalg.matrix_power(a, n) - - -def trace_matrix(a, offset=0): - return np.trace(a, offset) - - -def trace_matrix_no_offset(a): - return np.trace(a) - - -if numpy_version >= (1, 9): - def outer_matrix(a, b, out=None): - return np.outer(a, b, out=out) -else: - def outer_matrix(a, b): - return np.outer(a, b) - - -def kron_matrix(a, b): - return np.kron(a, b) - - -class TestLinalgBase(TestCase): - """ - Provides setUp and common data/error modes for testing np.linalg functions. - """ - - # supported dtypes - dtypes = (np.float64, np.float32, np.complex128, np.complex64) - - def setUp(self): - # Collect leftovers from previous test cases before checking for leaks - gc.collect() - - def sample_vector(self, n, dtype): - # Be careful to generate only exactly representable float values, - # to avoid rounding discrepancies between Numpy and Numba - base = np.arange(n) - if issubclass(dtype, np.complexfloating): - return (base * (1 - 0.5j) + 2j).astype(dtype) - else: - return (base * 0.5 + 1).astype(dtype) - - def specific_sample_matrix( - self, size, dtype, order, rank=None, condition=None): - """ - Provides a sample matrix with an optionally specified rank or condition - number. - - size: (rows, columns), the dimensions of the returned matrix. - dtype: the dtype for the returned matrix. - order: the memory layout for the returned matrix, 'F' or 'C'. - rank: the rank of the matrix, an integer value, defaults to full rank. - condition: the condition number of the matrix (defaults to 1.) - - NOTE: Only one of rank or condition may be set. - """ - - # default condition - d_cond = 1. - - if len(size) != 2: - raise ValueError("size must be a length 2 tuple.") - - if order not in ['F', 'C']: - raise ValueError("order must be one of 'F' or 'C'.") - - if dtype not in [np.float32, np.float64, np.complex64, np.complex128]: - raise ValueError("dtype must be a numpy floating point type.") - - if rank is not None and condition is not None: - raise ValueError("Only one of rank or condition can be specified.") - - if condition is None: - condition = d_cond - - if condition < 1: - raise ValueError("Condition number must be >=1.") - - np.random.seed(0) # repeatable seed - m, n = size - - if m < 0 or n < 0: - raise ValueError("Negative dimensions given for matrix shape.") - - minmn = min(m, n) - if rank is None: - rv = minmn - else: - if rank <= 0: - raise ValueError("Rank must be greater than zero.") - if not isinstance(rank, Integral): - raise ValueError("Rank must an integer.") - rv = rank - if rank > minmn: - raise ValueError("Rank given greater than full rank.") - - if m == 1 or n == 1: - # vector, must be rank 1 (enforced above) - # condition of vector is also 1 - if condition != d_cond: - raise ValueError( - "Condition number was specified for a vector (always 1.).") - maxmn = max(m, n) - Q = self.sample_vector(maxmn, dtype).reshape(m, n) - else: - # Build a sample matrix via combining SVD like inputs. - - # Create matrices of left and right singular vectors. - # This could use Modified Gram-Schmidt and perhaps be quicker, - # at present it uses QR decompositions to obtain orthonormal - # matrices. - tmp = self.sample_vector(m * m, dtype).reshape(m, m) - U, _ = np.linalg.qr(tmp) - # flip the second array, else for m==n the identity matrix appears - tmp = self.sample_vector(n * n, dtype)[::-1].reshape(n, n) - V, _ = np.linalg.qr(tmp) - # create singular values. - sv = np.linspace(d_cond, condition, rv) - S = np.zeros((m, n)) - idx = np.nonzero(np.eye(m, n)) - S[idx[0][:rv], idx[1][:rv]] = sv - Q = np.dot(np.dot(U, S), V.T) # construct - Q = np.array(Q, dtype=dtype, order=order) # sort out order/type - - return Q - - def shape_with_0_input(self, *args): - """ - returns True if an input argument has a dimension that is zero - and Numpy version is < 1.13, else False. This is due to behaviour - changes in handling dimension zero arrays: - https://github.com/numpy/numpy/issues/10573 - """ - if numpy_version < (1, 13): - for x in args: - if isinstance(x, np.ndarray): - if 0 in x.shape: - return True - return False - - def assert_error(self, cfunc, args, msg, err=ValueError): - with self.assertRaises(err) as raises: - cfunc(*args) - self.assertIn(msg, str(raises.exception)) - - def assert_non_square(self, cfunc, args): - msg = "Last 2 dimensions of the array must be square." - self.assert_error(cfunc, args, msg, np.linalg.LinAlgError) - - def assert_wrong_dtype(self, name, cfunc, args): - msg = "np.linalg.%s() only supported on float and complex arrays" % name - self.assert_error(cfunc, args, msg, errors.TypingError) - - def assert_wrong_dimensions(self, name, cfunc, args, la_prefix=True): - prefix = "np.linalg" if la_prefix else "np" - msg = "%s.%s() only supported on 2-D arrays" % (prefix, name) - self.assert_error(cfunc, args, msg, errors.TypingError) - - def assert_no_nan_or_inf(self, cfunc, args): - msg = "Array must not contain infs or NaNs." - self.assert_error(cfunc, args, msg, np.linalg.LinAlgError) - - def assert_contig_sanity(self, got, expected_contig): - """ - This checks that in a computed result from numba (array, possibly tuple - of arrays) all the arrays are contiguous in memory and that they are - all at least one of "C_CONTIGUOUS" or "F_CONTIGUOUS". The computed - result of the contiguousness is then compared against a hardcoded - expected result. - - got: is the computed results from numba - expected_contig: is "C" or "F" and is the expected type of - contiguousness across all input values - (and therefore tests). - """ - - if isinstance(got, tuple): - # tuple present, check all results - for a in got: - self.assert_contig_sanity(a, expected_contig) - else: - if not isinstance(got, Number): - # else a single array is present - c_contig = got.flags.c_contiguous - f_contig = got.flags.f_contiguous - - # check that the result (possible set of) is at least one of - # C or F contiguous. - msg = "Results are not at least one of all C or F contiguous." - self.assertTrue(c_contig | f_contig, msg) - - msg = "Computed contiguousness does not match expected." - if expected_contig == "C": - self.assertTrue(c_contig, msg) - elif expected_contig == "F": - self.assertTrue(f_contig, msg) - else: - raise ValueError("Unknown contig") - - def assert_raise_on_singular(self, cfunc, args): - msg = "Matrix is singular to machine precision." - self.assert_error(cfunc, args, msg, err=np.linalg.LinAlgError) - - def assert_is_identity_matrix(self, got, rtol=None, atol=None): - """ - Checks if a matrix is equal to the identity matrix. - """ - # check it is square - self.assertEqual(got.shape[-1], got.shape[-2]) - # create identity matrix - eye = np.eye(got.shape[-1], dtype=got.dtype) - resolution = 5 * np.finfo(got.dtype).resolution - if rtol is None: - rtol = 10 * resolution - if atol is None: - atol = 100 * resolution # zeros tend to be fuzzy - # check it matches - np.testing.assert_allclose(got, eye, rtol, atol) - - def assert_invalid_norm_kind(self, cfunc, args): - """ - For use in norm() and cond() tests. - """ - msg = "Invalid norm order for matrices." - self.assert_error(cfunc, args, msg, ValueError) - - def assert_raise_on_empty(self, cfunc, args): - msg = 'Arrays cannot be empty' - self.assert_error(cfunc, args, msg, np.linalg.LinAlgError) - - -class TestTestLinalgBase(TestCase): - """ - The sample matrix code TestLinalgBase.specific_sample_matrix() - is a bit involved, this class tests it works as intended. - """ - - def test_specific_sample_matrix(self): - - # add a default test to the ctor, it never runs so doesn't matter - inst = TestLinalgBase('specific_sample_matrix') - - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - - # test loop - for size, dtype, order in product(sizes, inst.dtypes, 'FC'): - - m, n = size - minmn = min(m, n) - - # test default full rank - A = inst.specific_sample_matrix(size, dtype, order) - self.assertEqual(A.shape, size) - self.assertEqual(np.linalg.matrix_rank(A), minmn) - - # test reduced rank if a reduction is possible - if minmn > 1: - rank = minmn - 1 - A = inst.specific_sample_matrix(size, dtype, order, rank=rank) - self.assertEqual(A.shape, size) - self.assertEqual(np.linalg.matrix_rank(A), rank) - - resolution = 5 * np.finfo(dtype).resolution - - # test default condition - A = inst.specific_sample_matrix(size, dtype, order) - self.assertEqual(A.shape, size) - np.testing.assert_allclose(np.linalg.cond(A), - 1., - rtol=resolution, - atol=resolution) - - # test specified condition if matrix is > 1D - if minmn > 1: - condition = 10. - A = inst.specific_sample_matrix( - size, dtype, order, condition=condition) - self.assertEqual(A.shape, size) - np.testing.assert_allclose(np.linalg.cond(A), - 10., - rtol=resolution, - atol=resolution) - - # check errors are raised appropriately - def check_error(args, msg, err=ValueError): - with self.assertRaises(err) as raises: - inst.specific_sample_matrix(*args) - self.assertIn(msg, str(raises.exception)) - - # check the checker runs ok - with self.assertRaises(AssertionError) as raises: - msg = "blank" - check_error(((2, 3), np.float64, 'F'), msg, err=ValueError) - - # check invalid inputs... - - # bad size - msg = "size must be a length 2 tuple." - check_error(((1,), np.float64, 'F'), msg, err=ValueError) - - # bad order - msg = "order must be one of 'F' or 'C'." - check_error(((2, 3), np.float64, 'z'), msg, err=ValueError) - - # bad type - msg = "dtype must be a numpy floating point type." - check_error(((2, 3), np.int32, 'F'), msg, err=ValueError) - - # specifying both rank and condition - msg = "Only one of rank or condition can be specified." - check_error(((2, 3), np.float64, 'F', 1, 1), msg, err=ValueError) - - # specifying negative condition - msg = "Condition number must be >=1." - check_error(((2, 3), np.float64, 'F', None, -1), msg, err=ValueError) - - # specifying negative matrix dimension - msg = "Negative dimensions given for matrix shape." - check_error(((2, -3), np.float64, 'F'), msg, err=ValueError) - - # specifying negative rank - msg = "Rank must be greater than zero." - check_error(((2, 3), np.float64, 'F', -1), msg, err=ValueError) - - # specifying a rank greater than maximum rank - msg = "Rank given greater than full rank." - check_error(((2, 3), np.float64, 'F', 4), msg, err=ValueError) - - # specifying a condition number for a vector - msg = "Condition number was specified for a vector (always 1.)." - check_error(((1, 3), np.float64, 'F', None, 10), msg, err=ValueError) - - # specifying a non integer rank - msg = "Rank must an integer." - check_error(((2, 3), np.float64, 'F', 1.5), msg, err=ValueError) - - -class TestLinalgInv(TestLinalgBase): - """ - Tests for np.linalg.inv. - """ - - @tag('important') - @needs_lapack - def test_linalg_inv(self): - """ - Test np.linalg.inv - """ - n = 10 - cfunc = jit(nopython=True)(invert_matrix) - - def check(a, **kwargs): - expected = invert_matrix(a) - got = cfunc(a) - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - - # try strict - try: - np.testing.assert_array_almost_equal_nulp(got, expected, - nulp=10) - except AssertionError: - # fall back to reconstruction - use_reconstruction = True - - if use_reconstruction: - rec = np.dot(got, a) - self.assert_is_identity_matrix(rec) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a) - - for dtype, order in product(self.dtypes, 'CF'): - a = self.specific_sample_matrix((n, n), dtype, order) - check(a) - - # 0 dimensioned matrix - check(np.empty((0, 0))) - - # Non square matrix - self.assert_non_square(cfunc, (np.ones((2, 3)),)) - - # Wrong dtype - self.assert_wrong_dtype("inv", cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions("inv", cfunc, (np.ones(10),)) - - # Singular matrix - self.assert_raise_on_singular(cfunc, (np.zeros((2, 2)),)) - - -class TestLinalgCholesky(TestLinalgBase): - """ - Tests for np.linalg.cholesky. - """ - - def sample_matrix(self, m, dtype, order): - # pd. (positive definite) matrix has eigenvalues in Z+ - np.random.seed(0) # repeatable seed - A = np.random.rand(m, m) - # orthonormal q needed to form up q^{-1}*D*q - # no "orth()" in numpy - q, _ = np.linalg.qr(A) - L = np.arange(1, m + 1) # some positive eigenvalues - Q = np.dot(np.dot(q.T, np.diag(L)), q) # construct - Q = np.array(Q, dtype=dtype, order=order) # sort out order/type - return Q - - def assert_not_pd(self, cfunc, args): - msg = "Matrix is not positive definite." - self.assert_error(cfunc, args, msg, np.linalg.LinAlgError) - - @needs_lapack - def test_linalg_cholesky(self): - """ - Test np.linalg.cholesky - """ - n = 10 - cfunc = jit(nopython=True)(cholesky_matrix) - - def check(a): - if self.shape_with_0_input(a): - # has shape with 0 on input, numpy will fail, - # just make sure Numba runs without error - cfunc(a) - return - expected = cholesky_matrix(a) - got = cfunc(a) - use_reconstruction = False - # check that the computed results are contig and in the same way - self.assert_contig_sanity(got, "C") - - # try strict - try: - np.testing.assert_array_almost_equal_nulp(got, expected, - nulp=10) - except AssertionError: - # fall back to reconstruction - use_reconstruction = True - - # try via reconstruction - if use_reconstruction: - rec = np.dot(got, np.conj(got.T)) - resolution = 5 * np.finfo(a.dtype).resolution - np.testing.assert_allclose( - a, - rec, - rtol=resolution, - atol=resolution - ) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a) - - for dtype, order in product(self.dtypes, 'FC'): - a = self.sample_matrix(n, dtype, order) - check(a) - - # 0 dimensioned matrix - check(np.empty((0, 0))) - - rn = "cholesky" - # Non square matrices - self.assert_non_square(cfunc, (np.ones((2, 3), dtype=np.float64),)) - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # not pd - self.assert_not_pd(cfunc, - (np.ones(4, dtype=np.float64).reshape(2, 2),)) - - -class TestLinalgEigenSystems(TestLinalgBase): - """ - Tests for np.linalg.eig/eigvals. - """ - - def sample_matrix(self, m, dtype, order): - # This is a tridiag with the same but skewed values on the diagonals - v = self.sample_vector(m, dtype) - Q = np.diag(v) - idx = np.nonzero(np.eye(Q.shape[0], Q.shape[1], 1)) - Q[idx] = v[1:] - idx = np.nonzero(np.eye(Q.shape[0], Q.shape[1], -1)) - Q[idx] = v[:-1] - Q = np.array(Q, dtype=dtype, order=order) - return Q - - def assert_no_domain_change(self, name, cfunc, args): - msg = name + "() argument must not cause a domain change." - self.assert_error(cfunc, args, msg) - - def checker_for_linalg_eig( - self, name, func, expected_res_len, check_for_domain_change=None): - """ - Test np.linalg.eig - """ - n = 10 - cfunc = jit(nopython=True)(func) - - def check(a): - if self.shape_with_0_input(a): - # has shape with 0 on input, numpy will fail, - # just make sure Numba runs without error - cfunc(a) - return - expected = func(a) - got = cfunc(a) - # check that the returned tuple is same length - self.assertEqual(len(expected), len(got)) - # and that dimension is correct - res_is_tuple = False - if isinstance(got, tuple): - res_is_tuple = True - self.assertEqual(len(got), expected_res_len) - else: # its an array - self.assertEqual(got.ndim, expected_res_len) - - # and that the computed results are contig and in the same way - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - # try plain match of each array to np first - for k in range(len(expected)): - try: - np.testing.assert_array_almost_equal_nulp( - got[k], expected[k], nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - - # If plain match fails then reconstruction is used. - # this checks that A*V ~== V*diag(W) - # i.e. eigensystem ties out - # this is required as numpy uses only double precision lapack - # routines and computation of eigenvectors is numerically - # sensitive, numba uses the type specific routines therefore - # sometimes comes out with a different (but entirely - # valid) answer (eigenvectors are not unique etc.). - # This is only applicable if eigenvectors are computed - # along with eigenvalues i.e. result is a tuple. - resolution = 5 * np.finfo(a.dtype).resolution - if use_reconstruction: - if res_is_tuple: - w, v = got - # modify 'a' if hermitian eigensystem functionality is - # being tested. 'L' for use lower part is default and - # the only thing used at present so we conjugate transpose - # the lower part into the upper for use in the - # reconstruction. By construction the sample matrix is - # tridiag so this is just a question of copying the lower - # diagonal into the upper and conjugating on the way. - if name[-1] == 'h': - idxl = np.nonzero(np.eye(a.shape[0], a.shape[1], -1)) - idxu = np.nonzero(np.eye(a.shape[0], a.shape[1], 1)) - cfunc(a) - # upper idx must match lower for default uplo="L" - # if complex, conjugate - a[idxu] = np.conj(a[idxl]) - # also, only the real part of the diagonals is - # considered in the calculation so the imag is zeroed - # out for the purposes of use in reconstruction. - a[np.diag_indices(a.shape[0])] = np.real(np.diag(a)) - - lhs = np.dot(a, v) - rhs = np.dot(v, np.diag(w)) - - np.testing.assert_allclose( - lhs.real, - rhs.real, - rtol=resolution, - atol=resolution - ) - if np.iscomplexobj(v): - np.testing.assert_allclose( - lhs.imag, - rhs.imag, - rtol=resolution, - atol=resolution - ) - else: - # This isn't technically reconstruction but is here to - # deal with that the order of the returned eigenvalues - # may differ in the case of routines just returning - # eigenvalues and there's no true reconstruction - # available with which to perform a check. - np.testing.assert_allclose( - np.sort(expected), - np.sort(got), - rtol=resolution, - atol=resolution - ) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a) - - # The main test loop - for dtype, order in product(self.dtypes, 'FC'): - a = self.sample_matrix(n, dtype, order) - check(a) - - # Test both a real and complex type as the impls are different - for ty in [np.float32, np.complex64]: - - # 0 dimensioned matrix - check(np.empty((0, 0), dtype=ty)) - - # Non square matrices - self.assert_non_square(cfunc, (np.ones((2, 3), dtype=ty),)) - - # Wrong dtype - self.assert_wrong_dtype(name, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(name, cfunc, (np.ones(10, dtype=ty),)) - - # no nans or infs - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=ty),)) - - if check_for_domain_change: - # By design numba does not support dynamic return types, numpy does - # and uses this in the case of returning eigenvalues/vectors of - # a real matrix. The return type of np.linalg.eig(), when - # operating on a matrix in real space depends on the values present - # in the matrix itself (recalling that eigenvalues are the roots of the - # characteristic polynomial of the system matrix, which will by - # construction depend on the values present in the system matrix). - # This test asserts that if a domain change is required on the return - # type, i.e. complex eigenvalues from a real input, an error is raised. - # For complex types, regardless of the value of the imaginary part of - # the returned eigenvalues, a complex type will be returned, this - # follows numpy and fits in with numba. - - # First check that the computation is valid (i.e. in complex space) - A = np.array([[1, -2], [2, 1]]) - check(A.astype(np.complex128)) - # and that the imaginary part is nonzero - l, _ = func(A) - self.assertTrue(np.any(l.imag)) - - # Now check that the computation fails in real space - for ty in [np.float32, np.float64]: - self.assert_no_domain_change(name, cfunc, (A.astype(ty),)) - - @needs_lapack - def test_linalg_eig(self): - self.checker_for_linalg_eig("eig", eig_matrix, 2, True) - - @needs_lapack - def test_linalg_eigvals(self): - self.checker_for_linalg_eig("eigvals", eigvals_matrix, 1, True) - - @needs_lapack - def test_linalg_eigh(self): - self.checker_for_linalg_eig("eigh", eigh_matrix, 2, False) - - @needs_lapack - def test_linalg_eigvalsh(self): - self.checker_for_linalg_eig("eigvalsh", eigvalsh_matrix, 1, False) - - -class TestLinalgSvd(TestLinalgBase): - """ - Tests for np.linalg.svd. - """ - - @needs_lapack - def test_linalg_svd(self): - """ - Test np.linalg.svd - """ - cfunc = jit(nopython=True)(svd_matrix) - - def check(a, **kwargs): - expected = svd_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - # check that the returned tuple is same length - self.assertEqual(len(expected), len(got)) - # and that length is 3 - self.assertEqual(len(got), 3) - # and that the computed results are contig and in the same way - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - # try plain match of each array to np first - for k in range(len(expected)): - - try: - np.testing.assert_array_almost_equal_nulp( - got[k], expected[k], nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - - # if plain match fails then reconstruction is used. - # this checks that A ~= U*S*V**H - # i.e. SV decomposition ties out - # this is required as numpy uses only double precision lapack - # routines and computation of svd is numerically - # sensitive, numba using the type specific routines therefore - # sometimes comes out with a different answer (orthonormal bases - # are not unique etc.). - if use_reconstruction: - u, sv, vt = got - - # check they are dimensionally correct - for k in range(len(expected)): - self.assertEqual(got[k].shape, expected[k].shape) - - # regardless of full_matrices cols in u and rows in vt - # dictates the working size of s - s = np.zeros((u.shape[1], vt.shape[0])) - np.fill_diagonal(s, sv) - - rec = np.dot(np.dot(u, s), vt) - resolution = np.finfo(a.dtype).resolution - np.testing.assert_allclose( - a, - rec, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # test: column vector, tall, wide, square, row vector - # prime sizes - sizes = [(7, 1), (7, 5), (5, 7), (3, 3), (1, 7)] - - # flip on reduced or full matrices - full_matrices = (True, False) - - # test loop - for size, dtype, fmat, order in \ - product(sizes, self.dtypes, full_matrices, 'FC'): - - a = self.specific_sample_matrix(size, dtype, order) - check(a, full_matrices=fmat) - - rn = "svd" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # no nans or infs - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - # empty - for sz in [(0, 1), (1, 0), (0, 0)]: - args = (np.empty(sz), True) - self.assert_raise_on_empty(cfunc, args) - - -class TestLinalgQr(TestLinalgBase): - """ - Tests for np.linalg.qr. - """ - - @needs_lapack - def test_linalg_qr(self): - """ - Test np.linalg.qr - """ - cfunc = jit(nopython=True)(qr_matrix) - - def check(a, **kwargs): - expected = qr_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # check that the returned tuple is same length - self.assertEqual(len(expected), len(got)) - # and that length is 2 - self.assertEqual(len(got), 2) - # and that the computed results are contig and in the same way - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - # try plain match of each array to np first - for k in range(len(expected)): - try: - np.testing.assert_array_almost_equal_nulp( - got[k], expected[k], nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - - # if plain match fails then reconstruction is used. - # this checks that A ~= Q*R and that (Q^H)*Q = I - # i.e. QR decomposition ties out - # this is required as numpy uses only double precision lapack - # routines and computation of qr is numerically - # sensitive, numba using the type specific routines therefore - # sometimes comes out with a different answer (orthonormal bases - # are not unique etc.). - if use_reconstruction: - q, r = got - - # check they are dimensionally correct - for k in range(len(expected)): - self.assertEqual(got[k].shape, expected[k].shape) - - # check A=q*r - rec = np.dot(q, r) - resolution = np.finfo(a.dtype).resolution - np.testing.assert_allclose( - a, - rec, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - - # check q is orthonormal - self.assert_is_identity_matrix(np.dot(np.conjugate(q.T), q)) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # test: column vector, tall, wide, square, row vector - # prime sizes - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - - # test loop - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - a = self.specific_sample_matrix(size, dtype, order) - check(a) - - rn = "qr" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # no nans or infs - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - - # empty - for sz in [(0, 1), (1, 0), (0, 0)]: - self.assert_raise_on_empty(cfunc, (np.empty(sz),)) - - -class TestLinalgSystems(TestLinalgBase): - """ - Base class for testing "system" solvers from np.linalg. - Namely np.linalg.solve() and np.linalg.lstsq(). - """ - - # check for RHS with dimension > 2 raises - def assert_wrong_dimensions_1D(self, name, cfunc, args, la_prefix=True): - prefix = "np.linalg" if la_prefix else "np" - msg = "%s.%s() only supported on 1 and 2-D arrays" % (prefix, name) - self.assert_error(cfunc, args, msg, errors.TypingError) - - # check that a dimensionally invalid system raises - def assert_dimensionally_invalid(self, cfunc, args): - msg = "Incompatible array sizes, system is not dimensionally valid." - self.assert_error(cfunc, args, msg, np.linalg.LinAlgError) - - # check that args with differing dtypes raise - def assert_homogeneous_dtypes(self, name, cfunc, args): - msg = "np.linalg.%s() only supports inputs that have homogeneous dtypes." % name - self.assert_error(cfunc, args, msg, errors.TypingError) - - -class TestLinalgLstsq(TestLinalgSystems): - """ - Tests for np.linalg.lstsq. - """ - - # NOTE: The testing of this routine is hard as it has to handle numpy - # using double precision routines on single precision input, this has - # a knock on effect especially in rank deficient cases and cases where - # conditioning is generally poor. As a result computed ranks can differ - # and consequently the calculated residual can differ. - # The tests try and deal with this as best as they can through the use - # of reconstruction and measures like residual norms. - # Suggestions for improvements are welcomed! - - @needs_lapack - def test_linalg_lstsq(self): - """ - Test np.linalg.lstsq - """ - cfunc = jit(nopython=True)(lstsq_system) - - def check(A, B, **kwargs): - expected = lstsq_system(A, B, **kwargs) - got = cfunc(A, B, **kwargs) - - # check that the returned tuple is same length - self.assertEqual(len(expected), len(got)) - # and that length is 4 - self.assertEqual(len(got), 4) - # and that the computed results are contig and in the same way - self.assert_contig_sanity(got, "C") - - use_reconstruction = False - - # check the ranks are the same and continue to a standard - # match if that is the case (if ranks differ, then output - # in e.g. residual array is of different size!). - try: - self.assertEqual(got[2], expected[2]) - # try plain match of each array to np first - for k in range(len(expected)): - try: - np.testing.assert_array_almost_equal_nulp( - got[k], expected[k], nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - except AssertionError: - use_reconstruction = True - - if use_reconstruction: - x, res, rank, s = got - - # indicies in the output which are ndarrays - out_array_idx = [0, 1, 3] - - try: - # check the ranks are the same - self.assertEqual(rank, expected[2]) - # check they are dimensionally correct, skip [2] = rank. - for k in out_array_idx: - if isinstance(expected[k], np.ndarray): - self.assertEqual(got[k].shape, expected[k].shape) - except AssertionError: - # check the rank differs by 1. (numerical fuzz) - self.assertTrue(abs(rank - expected[2]) < 2) - - # check if A*X = B - resolution = np.finfo(A.dtype).resolution - try: - # this will work so long as the conditioning is - # ok and the rank is full - rec = np.dot(A, x) - np.testing.assert_allclose( - B, - rec, - rtol=10 * resolution, - atol=10 * resolution - ) - except AssertionError: - # system is probably under/over determined and/or - # poorly conditioned. Check slackened equality - # and that the residual norm is the same. - for k in out_array_idx: - try: - np.testing.assert_allclose( - expected[k], - got[k], - rtol=100 * resolution, - atol=100 * resolution - ) - except AssertionError: - # check the fail is likely due to bad conditioning - c = np.linalg.cond(A) - self.assertGreater(10 * c, (1. / resolution)) - - # make sure the residual 2-norm is ok - # if this fails its probably due to numpy using double - # precision LAPACK routines for singles. - res_expected = np.linalg.norm( - B - np.dot(A, expected[0])) - res_got = np.linalg.norm(B - np.dot(A, x)) - # rtol = 10. as all the systems are products of orthonormals - # and on the small side (rows, cols) < 100. - np.testing.assert_allclose( - res_expected, res_got, rtol=10.) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(A, B, **kwargs) - - # test: column vector, tall, wide, square, row vector - # prime sizes, the A's - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - # compatible B's for Ax=B must have same number of rows and 1 or more - # columns - - # This test takes ages! So combinations are trimmed via cycling - - # gets a dtype - cycle_dt = cycle(self.dtypes) - - orders = ['F', 'C'] - # gets a memory order flag - cycle_order = cycle(orders) - - # a specific condition number to use in the following tests - # there is nothing special about it other than it is not magic - specific_cond = 10. - - # inner test loop, extracted as there's additional logic etc required - # that'd end up with this being repeated a lot - def inner_test_loop_fn(A, dt, **kwargs): - # test solve Ax=B for (column, matrix) B, same dtype as A - b_sizes = (1, 13) - - for b_size in b_sizes: - - # check 2D B - b_order = next(cycle_order) - B = self.specific_sample_matrix( - (A.shape[0], b_size), dt, b_order) - check(A, B, **kwargs) - - # check 1D B - b_order = next(cycle_order) - tmp = B[:, 0].copy(order=b_order) - check(A, tmp, **kwargs) - - # test loop - for a_size in sizes: - - dt = next(cycle_dt) - a_order = next(cycle_order) - - # A full rank, well conditioned system - A = self.specific_sample_matrix(a_size, dt, a_order) - - # run the test loop - inner_test_loop_fn(A, dt) - - m, n = a_size - minmn = min(m, n) - - # operations that only make sense with a 2D matrix system - if m != 1 and n != 1: - - # Test a rank deficient system - r = minmn - 1 - A = self.specific_sample_matrix( - a_size, dt, a_order, rank=r) - # run the test loop - inner_test_loop_fn(A, dt) - - # Test a system with a given condition number for use in - # testing the rcond parameter. - # This works because the singular values in the - # specific_sample_matrix code are linspace (1, cond, [0... if - # rank deficient]) - A = self.specific_sample_matrix( - a_size, dt, a_order, condition=specific_cond) - # run the test loop - rcond = 1. / specific_cond - approx_half_rank_rcond = minmn * rcond - inner_test_loop_fn(A, dt, - rcond=approx_half_rank_rcond) - - # check empty arrays - empties = [ - [(0, 1), (1,)], # empty A, valid b - [(1, 0), (1,)], # empty A, valid b - [(1, 1), (0,)], # valid A, empty 1D b - [(1, 1), (1, 0)], # valid A, empty 2D b - ] - - for A, b in empties: - args = (np.empty(A), np.empty(b)) - self.assert_raise_on_empty(cfunc, args) - - # Test input validation - ok = np.array([[1., 2.], [3., 4.]], dtype=np.float64) - - # check ok input is ok - cfunc, (ok, ok) - - # check bad inputs - rn = "lstsq" - - # Wrong dtype - bad = np.array([[1, 2], [3, 4]], dtype=np.int32) - self.assert_wrong_dtype(rn, cfunc, (ok, bad)) - self.assert_wrong_dtype(rn, cfunc, (bad, ok)) - - # different dtypes - bad = np.array([[1, 2], [3, 4]], dtype=np.float32) - self.assert_homogeneous_dtypes(rn, cfunc, (ok, bad)) - self.assert_homogeneous_dtypes(rn, cfunc, (bad, ok)) - - # Dimension issue - bad = np.array([1, 2], dtype=np.float64) - self.assert_wrong_dimensions(rn, cfunc, (bad, ok)) - - # no nans or infs - bad = np.array([[1., 2., ], [np.inf, np.nan]], dtype=np.float64) - self.assert_no_nan_or_inf(cfunc, (ok, bad)) - self.assert_no_nan_or_inf(cfunc, (bad, ok)) - - # check 1D is accepted for B (2D is done previously) - # and then that anything of higher dimension raises - oneD = np.array([1., 2.], dtype=np.float64) - cfunc, (ok, oneD) - bad = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64) - self.assert_wrong_dimensions_1D(rn, cfunc, (ok, bad)) - - # check a dimensionally invalid system raises (1D and 2D cases - # checked) - bad1D = np.array([1.], dtype=np.float64) - bad2D = np.array([[1.], [2.], [3.]], dtype=np.float64) - self.assert_dimensionally_invalid(cfunc, (ok, bad1D)) - self.assert_dimensionally_invalid(cfunc, (ok, bad2D)) - - -class TestLinalgSolve(TestLinalgSystems): - """ - Tests for np.linalg.solve. - """ - - @needs_lapack - def test_linalg_solve(self): - """ - Test np.linalg.solve - """ - cfunc = jit(nopython=True)(solve_system) - - def check(a, b, **kwargs): - expected = solve_system(a, b, **kwargs) - got = cfunc(a, b, **kwargs) - - # check that the computed results are contig and in the same way - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - # try plain match of the result first - try: - np.testing.assert_array_almost_equal_nulp( - got, expected, nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - - # If plain match fails then reconstruction is used, - # this checks that AX ~= B. - # Plain match can fail due to numerical fuzziness associated - # with system size and conditioning, or more simply from - # numpy using double precision routines for computation that - # could be done in single precision (which is what numba does). - # Therefore minor differences in results can appear due to - # e.g. numerical roundoff being different between two precisions. - if use_reconstruction: - # check they are dimensionally correct - self.assertEqual(got.shape, expected.shape) - - # check AX=B - rec = np.dot(a, got) - resolution = np.finfo(a.dtype).resolution - np.testing.assert_allclose( - b, - rec, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, b, **kwargs) - - # test: prime size squares - sizes = [(1, 1), (3, 3), (7, 7)] - - # test loop - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - A = self.specific_sample_matrix(size, dtype, order) - - b_sizes = (1, 13) - - for b_size, b_order in product(b_sizes, 'FC'): - # check 2D B - B = self.specific_sample_matrix( - (A.shape[0], b_size), dtype, b_order) - check(A, B) - - # check 1D B - tmp = B[:, 0].copy(order=b_order) - check(A, tmp) - - # check empty - cfunc(np.empty((0, 0)), np.empty((0,))) - - # Test input validation - ok = np.array([[1., 0.], [0., 1.]], dtype=np.float64) - - # check ok input is ok - cfunc(ok, ok) - - # check bad inputs - rn = "solve" - - # Wrong dtype - bad = np.array([[1, 0], [0, 1]], dtype=np.int32) - self.assert_wrong_dtype(rn, cfunc, (ok, bad)) - self.assert_wrong_dtype(rn, cfunc, (bad, ok)) - - # different dtypes - bad = np.array([[1, 2], [3, 4]], dtype=np.float32) - self.assert_homogeneous_dtypes(rn, cfunc, (ok, bad)) - self.assert_homogeneous_dtypes(rn, cfunc, (bad, ok)) - - # Dimension issue - bad = np.array([1, 0], dtype=np.float64) - self.assert_wrong_dimensions(rn, cfunc, (bad, ok)) - - # no nans or infs - bad = np.array([[1., 0., ], [np.inf, np.nan]], dtype=np.float64) - self.assert_no_nan_or_inf(cfunc, (ok, bad)) - self.assert_no_nan_or_inf(cfunc, (bad, ok)) - - # check 1D is accepted for B (2D is done previously) - # and then that anything of higher dimension raises - ok_oneD = np.array([1., 2.], dtype=np.float64) - cfunc(ok, ok_oneD) - bad = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64) - self.assert_wrong_dimensions_1D(rn, cfunc, (ok, bad)) - - # check an invalid system raises (1D and 2D cases checked) - bad1D = np.array([1.], dtype=np.float64) - bad2D = np.array([[1.], [2.], [3.]], dtype=np.float64) - self.assert_dimensionally_invalid(cfunc, (ok, bad1D)) - self.assert_dimensionally_invalid(cfunc, (ok, bad2D)) - - # check that a singular system raises - bad2D = self.specific_sample_matrix((2, 2), np.float64, 'C', rank=1) - self.assert_raise_on_singular(cfunc, (bad2D, ok)) - - -class TestLinalgPinv(TestLinalgBase): - """ - Tests for np.linalg.pinv. - """ - - @needs_lapack - def test_linalg_pinv(self): - """ - Test np.linalg.pinv - """ - cfunc = jit(nopython=True)(pinv_matrix) - - def check(a, **kwargs): - if self.shape_with_0_input(a): - # has shape with 0 on input, numpy will fail, - # just make sure Numba runs without error - cfunc(a, **kwargs) - return - expected = pinv_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # check that the computed results are contig and in the same way - self.assert_contig_sanity(got, "F") - - use_reconstruction = False - # try plain match of each array to np first - - try: - np.testing.assert_array_almost_equal_nulp( - got, expected, nulp=10) - except AssertionError: - # plain match failed, test by reconstruction - use_reconstruction = True - - # If plain match fails then reconstruction is used. - # This can occur due to numpy using double precision - # LAPACK when single can be used, this creates round off - # problems. Also, if the matrix has machine precision level - # zeros in its singular values then the singular vectors are - # likely to vary depending on round off. - if use_reconstruction: - - # check they are dimensionally correct - self.assertEqual(got.shape, expected.shape) - - # check pinv(A)*A~=eye - # if the problem is numerical fuzz then this will probably - # work, if the problem is rank deficiency then it won't! - rec = np.dot(got, a) - try: - self.assert_is_identity_matrix(rec) - except AssertionError: - # check A=pinv(pinv(A)) - resolution = 5 * np.finfo(a.dtype).resolution - rec = cfunc(got) - np.testing.assert_allclose( - rec, - a, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - if a.shape[0] >= a.shape[1]: - # if it is overdetermined or fully determined - # use numba lstsq function (which is type specific) to - # compute the inverse and check against that. - lstsq = jit(nopython=True)(lstsq_system) - lstsq_pinv = lstsq( - a, np.eye( - a.shape[0]).astype( - a.dtype), **kwargs)[0] - np.testing.assert_allclose( - got, - lstsq_pinv, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - # check the 2 norm of the difference is small - self.assertLess(np.linalg.norm(got - expected), resolution) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # test: column vector, tall, wide, square, row vector - # prime sizes - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - - # When required, a specified condition number - specific_cond = 10. - - # test loop - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - # check a full rank matrix - a = self.specific_sample_matrix(size, dtype, order) - check(a) - - m, n = size - if m != 1 and n != 1: - # check a rank deficient matrix - minmn = min(m, n) - a = self.specific_sample_matrix(size, dtype, order, - condition=specific_cond) - rcond = 1. / specific_cond - approx_half_rank_rcond = minmn * rcond - check(a, rcond=approx_half_rank_rcond) - - # check empty - for sz in [(0, 1), (1, 0)]: - check(np.empty(sz)) - - rn = "pinv" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # no nans or infs - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - - -class TestLinalgDetAndSlogdet(TestLinalgBase): - """ - Tests for np.linalg.det. and np.linalg.slogdet. - Exactly the same inputs are used for both tests as - det() is a trivial function of slogdet(), the tests - are therefore combined. - """ - - def check_det(self, cfunc, a, **kwargs): - if self.shape_with_0_input(a): - # has shape with 0 on input, numpy will fail, - # just make sure Numba runs without error - cfunc(a, **kwargs) - return - expected = det_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - resolution = 5 * np.finfo(a.dtype).resolution - - # check the determinants are the same - np.testing.assert_allclose(got, expected, rtol=resolution) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - def check_slogdet(self, cfunc, a, **kwargs): - if self.shape_with_0_input(a): - # has shape with 0 on input, numpy will fail, - # just make sure Numba runs without error - cfunc(a, **kwargs) - return - expected = slogdet_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # As numba returns python floats types and numpy returns - # numpy float types, some more adjustment and different - # types of comparison than those used with array based - # results is required. - - # check that the returned tuple is same length - self.assertEqual(len(expected), len(got)) - # and that length is 2 - self.assertEqual(len(got), 2) - - # check that the domain of the results match - for k in range(2): - self.assertEqual( - np.iscomplexobj(got[k]), - np.iscomplexobj(expected[k])) - - # turn got[0] into the same dtype as `a` - # this is so checking with nulp will work - got_conv = a.dtype.type(got[0]) - np.testing.assert_array_almost_equal_nulp( - got_conv, expected[0], nulp=10) - # compare log determinant magnitude with a more fuzzy value - # as numpy values come from higher precision lapack routines - resolution = 5 * np.finfo(a.dtype).resolution - np.testing.assert_allclose( - got[1], expected[1], rtol=resolution, atol=resolution) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - def do_test(self, rn, check, cfunc): - - # test: 1x1 as it is unusual, 4x4 as it is even and 7x7 as is it odd! - sizes = [(1, 1), (4, 4), (7, 7)] - - # test loop - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - # check a full rank matrix - a = self.specific_sample_matrix(size, dtype, order) - check(cfunc, a) - - # use a matrix of zeros to trip xgetrf U(i,i)=0 singular test - for dtype, order in product(self.dtypes, 'FC'): - a = np.zeros((3, 3), dtype=dtype) - check(cfunc, a) - - # check empty - check(cfunc, np.empty((0, 0))) - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # no nans or infs - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - - @needs_lapack - def test_linalg_det(self): - cfunc = jit(nopython=True)(det_matrix) - self.do_test("det", self.check_det, cfunc) - - @needs_lapack - def test_linalg_slogdet(self): - cfunc = jit(nopython=True)(slogdet_matrix) - self.do_test("slogdet", self.check_slogdet, cfunc) - -# Use TestLinalgSystems as a base to get access to additional -# testing for 1 and 2D inputs. - - -class TestLinalgNorm(TestLinalgSystems): - """ - Tests for np.linalg.norm. - """ - - @needs_lapack - def test_linalg_norm(self): - """ - Test np.linalg.norm - """ - cfunc = jit(nopython=True)(norm_matrix) - - def check(a, **kwargs): - expected = norm_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # All results should be in the real domain - self.assertTrue(not np.iscomplexobj(got)) - - resolution = 5 * np.finfo(a.dtype).resolution - - # check the norms are the same to the arg `a` precision - np.testing.assert_allclose(got, expected, rtol=resolution) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # Check 1D inputs - sizes = [1, 4, 7] - nrm_types = [None, np.inf, -np.inf, 0, 1, -1, 2, -2, 5, 6.7, -4.3] - - # standard 1D input - for size, dtype, nrm_type in \ - product(sizes, self.dtypes, nrm_types): - a = self.sample_vector(size, dtype) - check(a, ord=nrm_type) - - # sliced 1D input - for dtype, nrm_type in \ - product(self.dtypes, nrm_types): - a = self.sample_vector(10, dtype)[::3] - check(a, ord=nrm_type) - - # Check 2D inputs: - # test: column vector, tall, wide, square, row vector - # prime sizes - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - nrm_types = [None, np.inf, -np.inf, 1, -1, 2, -2] - - # standard 2D input - for size, dtype, order, nrm_type in \ - product(sizes, self.dtypes, 'FC', nrm_types): - # check a full rank matrix - a = self.specific_sample_matrix(size, dtype, order) - check(a, ord=nrm_type) - - # check 2D slices work for the case where xnrm2 is called from - # BLAS (ord=None) to make sure it is working ok. - nrm_types = [None] - for dtype, nrm_type, order in \ - product(self.dtypes, nrm_types, 'FC'): - a = self.specific_sample_matrix((17, 13), dtype, order) - # contig for C order - check(a[:3], ord=nrm_type) - - # contig for Fortran order - check(a[:, 3:], ord=nrm_type) - - # contig for neither order - check(a[1, 4::3], ord=nrm_type) - - # check that numba returns zero for empty arrays. Numpy returns zero - # for most norm types and raises ValueError for +/-np.inf. - # there is not a great deal of consistency in Numpy's response so - # it is not being emulated in Numba - for dtype, nrm_type, order in \ - product(self.dtypes, nrm_types, 'FC'): - a = np.empty((0,), dtype=dtype, order=order) - self.assertEqual(cfunc(a, nrm_type), 0.0) - a = np.empty((0, 0), dtype=dtype, order=order) - self.assertEqual(cfunc(a, nrm_type), 0.0) - - rn = "norm" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue, reuse the test from the TestLinalgSystems class - self.assert_wrong_dimensions_1D( - rn, cfunc, (np.ones( - 12, dtype=np.float64).reshape( - 2, 2, 3),)) - - # no nans or infs for 2d case when SVD is used (e.g 2-norm) - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2.], [np.inf, np.nan]], - dtype=np.float64), 2)) - - # assert 2D input raises for an invalid norm kind kwarg - self.assert_invalid_norm_kind(cfunc, (np.array([[1., 2.], [3., 4.]], - dtype=np.float64), 6)) - - -class TestLinalgCond(TestLinalgBase): - """ - Tests for np.linalg.cond. - """ - - @needs_lapack - def test_linalg_cond(self): - """ - Test np.linalg.cond - """ - - cfunc = jit(nopython=True)(cond_matrix) - - def check(a, **kwargs): - expected = cond_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # All results should be in the real domain - self.assertTrue(not np.iscomplexobj(got)) - - resolution = 5 * np.finfo(a.dtype).resolution - - # check the cond is the same to the arg `a` precision - np.testing.assert_allclose(got, expected, rtol=resolution) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # valid p values (used to indicate norm type) - ps = [None, np.inf, -np.inf, 1, -1, 2, -2] - sizes = [(3, 3), (7, 7)] - - for size, dtype, order, p in \ - product(sizes, self.dtypes, 'FC', ps): - a = self.specific_sample_matrix(size, dtype, order) - check(a, p=p) - - # When p=None non-square matrices are accepted. - sizes = [(7, 1), (11, 5), (5, 11), (1, 7)] - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - a = self.specific_sample_matrix(size, dtype, order) - check(a) - - # empty - for sz in [(0, 1), (1, 0), (0, 0)]: - self.assert_raise_on_empty(cfunc, (np.empty(sz),)) - - # try an ill-conditioned system with 2-norm, make sure np raises an - # overflow warning as the result is `+inf` and that the result from - # numba matches. - with warnings.catch_warnings(): - a = np.array([[1.e308, 0], [0, 0.1]], dtype=np.float64) - warnings.simplefilter("error", RuntimeWarning) - self.assertRaisesRegexp(RuntimeWarning, - 'overflow encountered in.*', - check, a) - warnings.simplefilter("ignore", RuntimeWarning) - check(a) - - rn = "cond" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64),)) - - # no nans or infs when p="None" (default for kwarg). - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - - # assert raises for an invalid norm kind kwarg - self.assert_invalid_norm_kind(cfunc, (np.array([[1., 2.], [3., 4.]], - dtype=np.float64), 6)) - - -class TestLinalgMatrixRank(TestLinalgSystems): - """ - Tests for np.linalg.matrix_rank. - """ - - @needs_lapack - def test_linalg_matrix_rank(self): - """ - Test np.linalg.matrix_rank - """ - - cfunc = jit(nopython=True)(matrix_rank_matrix) - - def check(a, **kwargs): - expected = matrix_rank_matrix(a, **kwargs) - got = cfunc(a, **kwargs) - - # Ranks are integral so comparison should be trivial. - # check the rank is the same - np.testing.assert_allclose(got, expected) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - - for size, dtype, order in \ - product(sizes, self.dtypes, 'FC'): - # check full rank system - a = self.specific_sample_matrix(size, dtype, order) - check(a) - - # If the system is a matrix, check rank deficiency is reported - # correctly. Check all ranks from 0 to (full rank - 1). - tol = 1e-13 - # first check 1 to (full rank - 1) - for k in range(1, min(size) - 1): - # check rank k - a = self.specific_sample_matrix(size, dtype, order, rank=k) - self.assertEqual(cfunc(a), k) - check(a) - # check provision of a tolerance works as expected - # create a (m x n) diagonal matrix with a singular value - # guaranteed below the tolerance 1e-13 - m, n = a.shape - a[:, :] = 0. # reuse `a`'s memory - idx = np.nonzero(np.eye(m, n)) - if np.iscomplexobj(a): - b = 1. + np.random.rand(k) + 1.j +\ - 1.j * np.random.rand(k) - # min singular value is sqrt(2)*1e-14 - b[0] = 1e-14 + 1e-14j - else: - b = 1. + np.random.rand(k) - b[0] = 1e-14 # min singular value is 1e-14 - a[idx[0][:k], idx[1][:k]] = b.astype(dtype) - # rank should be k-1 (as tol is present) - self.assertEqual(cfunc(a, tol), k - 1) - check(a, tol=tol) - # then check zero rank - a[:, :] = 0. - self.assertEqual(cfunc(a), 0) - check(a) - # add in a singular value that is small - if np.iscomplexobj(a): - a[-1, -1] = 1e-14 + 1e-14j - else: - a[-1, -1] = 1e-14 - # check the system has zero rank to a given tolerance - self.assertEqual(cfunc(a, tol), 0) - check(a, tol=tol) - - # check the zero vector returns rank 0 and a nonzero vector - # returns rank 1. - for dt in self.dtypes: - a = np.zeros((5), dtype=dt) - self.assertEqual(cfunc(a), 0) - check(a) - # make it a nonzero vector - a[0] = 1. - self.assertEqual(cfunc(a), 1) - check(a) - - # empty - for sz in [(0, 1), (1, 0), (0, 0)]: - for tol in [None, 1e-13]: - self.assert_raise_on_empty(cfunc, (np.empty(sz), tol)) - - rn = "matrix_rank" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32),)) - - # Dimension issue - self.assert_wrong_dimensions_1D( - rn, cfunc, (np.ones( - 12, dtype=np.float64).reshape( - 2, 2, 3),)) - - # no nans or infs for 2D case - self.assert_no_nan_or_inf(cfunc, - (np.array([[1., 2., ], [np.inf, np.nan]], - dtype=np.float64),)) - - -class TestLinalgMatrixPower(TestLinalgBase): - """ - Tests for np.linalg.matrix_power. - """ - - def assert_int_exponenent(self, cfunc, args): - # validate first arg is ok - cfunc(args[0], 1) - # pass in both args and assert fail - with self.assertRaises(errors.TypingError): - cfunc(*args) - - @needs_lapack - def test_linalg_matrix_power(self): - cfunc = jit(nopython=True)(matrix_power_matrix) - - def check(a, pwr): - expected = matrix_power_matrix(a, pwr) - got = cfunc(a, pwr) - - # check that the computed results are contig and in the same way - self.assert_contig_sanity(got, "C") - - res = 5 * np.finfo(a.dtype).resolution - np.testing.assert_allclose(got, expected, rtol=res, atol=res) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, pwr) - - sizes = [(1, 1), (5, 5), (7, 7)] - powers = [-33, -17] + list(range(-10, 10)) + [17, 33] - - for size, pwr, dtype, order in \ - product(sizes, powers, self.dtypes, 'FC'): - a = self.specific_sample_matrix(size, dtype, order) - check(a, pwr) - a = np.empty((0, 0), dtype=dtype, order=order) - check(a, pwr) - - rn = "matrix_power" - - # Wrong dtype - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32), 1)) - - # not an int power - self.assert_wrong_dtype(rn, cfunc, - (np.ones((2, 2), dtype=np.int32), 1)) - - # non square system - args = (np.ones((3, 5)), 1) - msg = 'input must be a square array' - self.assert_error(cfunc, args, msg) - - # Dimension issue - self.assert_wrong_dimensions(rn, cfunc, - (np.ones(10, dtype=np.float64), 1)) - - # non-integer supplied as exponent - self.assert_int_exponenent(cfunc, (np.ones((2, 2)), 1.2)) - - # singular matrix is not invertible - self.assert_raise_on_singular(cfunc, (np.array([[0., 0], [1, 1]]), -1)) - - -class TestTrace(TestLinalgBase): - """ - Tests for np.trace. - """ - - def setUp(self): - super(TestTrace, self).setUp() - # compile two versions, one with and one without the offset kwarg - self.cfunc_w_offset = jit(nopython=True)(trace_matrix) - self.cfunc_no_offset = jit(nopython=True)(trace_matrix_no_offset) - - def assert_int_offset(self, cfunc, a, **kwargs): - # validate first arg is ok - cfunc(a) - # pass in kwarg and assert fail - with self.assertRaises(errors.TypingError): - cfunc(a, **kwargs) - - def test_trace(self): - - def check(a, **kwargs): - if 'offset' in kwargs: - expected = trace_matrix(a, **kwargs) - cfunc = self.cfunc_w_offset - else: - expected = trace_matrix_no_offset(a, **kwargs) - cfunc = self.cfunc_no_offset - - got = cfunc(a, **kwargs) - - res = 5 * np.finfo(a.dtype).resolution - np.testing.assert_allclose(got, expected, rtol=res, atol=res) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # test: column vector, tall, wide, square, row vector - # prime sizes - sizes = [(7, 1), (11, 5), (5, 11), (3, 3), (1, 7)] - - # offsets to cover the range of the matrix sizes above - offsets = [-13, -12, -11] + list(range(-10, 10)) + [11, 12, 13] - - for size, offset, dtype, order in \ - product(sizes, offsets, self.dtypes, 'FC'): - a = self.specific_sample_matrix(size, dtype, order) - check(a, offset=offset) - if offset == 0: - check(a) - a = np.empty((0, 0), dtype=dtype, order=order) - check(a, offset=offset) - if offset == 0: - check(a) - - rn = "trace" - - # Dimension issue - self.assert_wrong_dimensions(rn, self.cfunc_w_offset, - (np.ones(10, dtype=np.float64), 1), False) - self.assert_wrong_dimensions(rn, self.cfunc_no_offset, - (np.ones(10, dtype=np.float64),), False) - - # non-integer supplied as exponent - self.assert_int_offset( - self.cfunc_w_offset, np.ones( - (2, 2)), offset=1.2) - - def test_trace_w_optional_input(self): - "Issue 2314" - @jit("(optional(float64[:,:]),)", nopython=True) - def tested(a): - return np.trace(a) - - a = np.ones((5, 5), dtype=np.float64) - tested(a) - - with self.assertRaises(TypeError) as raises: - tested(None) - - errmsg = str(raises.exception) - self.assertEqual('expected array(float64, 2d, A), got None', errmsg) - - -class TestBasics(TestLinalgSystems): # TestLinalgSystems for 1d test - - order1 = cycle(['F', 'C', 'C', 'F']) - order2 = cycle(['C', 'F', 'C', 'F']) - - # test: column vector, matrix, row vector, 1d sizes - # (7, 1, 3) and two scalars - sizes = [(7, 1), (3, 3), (1, 7), (7,), (1,), (3,), 3., 5.] - - def _assert_wrong_dim(self, rn, cfunc): - # Dimension issue - self.assert_wrong_dimensions_1D( - rn, cfunc, (np.array([[[1]]], dtype=np.float64), np.ones(1)), False) - self.assert_wrong_dimensions_1D( - rn, cfunc, (np.ones(1), np.array([[[1]]], dtype=np.float64)), False) - - def _gen_input(self, size, dtype, order): - if not isinstance(size, tuple): - return size - else: - if len(size) == 1: - return self.sample_vector(size[0], dtype) - else: - return self.sample_vector( - size[0] * size[1], - dtype).reshape( - size, order=order) - - def _get_input(self, size1, size2, dtype): - a = self._gen_input(size1, dtype, next(self.order1)) - b = self._gen_input(size2, dtype, next(self.order2)) - # force domain consistency as underlying ufuncs require it - if np.iscomplexobj(a): - b = b + 1j - if np.iscomplexobj(b): - a = a + 1j - return (a, b) - - def test_outer(self): - cfunc = jit(nopython=True)(outer_matrix) - - def check(a, b, **kwargs): - - # check without kwargs - expected = outer_matrix(a, b) - got = cfunc(a, b) - - res = 5 * np.finfo(np.asarray(a).dtype).resolution - np.testing.assert_allclose(got, expected, rtol=res, atol=res) - - # if kwargs present check with them too - if 'out' in kwargs: - got = cfunc(a, b, **kwargs) - np.testing.assert_allclose(got, expected, rtol=res, - atol=res) - np.testing.assert_allclose(kwargs['out'], expected, - rtol=res, atol=res) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, b, **kwargs) - - for size1, size2, dtype in \ - product(self.sizes, self.sizes, self.dtypes): - (a, b) = self._get_input(size1, size2, dtype) - check(a, b) - if numpy_version >= (1, 9): - c = np.empty((np.asarray(a).size, np.asarray(b).size), - dtype=np.asarray(a).dtype) - check(a, b, out=c) - - self._assert_wrong_dim("outer", cfunc) - - def test_kron(self): - cfunc = jit(nopython=True)(kron_matrix) - - def check(a, b, **kwargs): - - expected = kron_matrix(a, b) - got = cfunc(a, b) - - res = 5 * np.finfo(np.asarray(a).dtype).resolution - np.testing.assert_allclose(got, expected, rtol=res, atol=res) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, b) - - for size1, size2, dtype in \ - product(self.sizes, self.sizes, self.dtypes): - (a, b) = self._get_input(size1, size2, dtype) - check(a, b) - - self._assert_wrong_dim("kron", cfunc) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_lists.py b/numba/numba/tests/test_lists.py deleted file mode 100644 index 1fc5481b9..000000000 --- a/numba/numba/tests/test_lists.py +++ /dev/null @@ -1,1429 +0,0 @@ -from __future__ import print_function - -from collections import namedtuple -import contextlib -import itertools -import math -import sys -import numpy as np - -from numba.compiler import compile_isolated, Flags -from numba import jit, types, utils, typeof, jitclass -import numba.unittest_support as unittest -from numba import testing, errors -from .support import TestCase, MemoryLeakMixin, tag - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -Point = namedtuple('Point', ('a', 'b')) - - -def noop(x): - pass - -def unbox_usecase(x): - """ - Expect a list of numbers - """ - res = 0 - for v in x: - res += v - return res - -def unbox_usecase2(x): - """ - Expect a list of tuples - """ - res = 0 - for v in x: - res += len(v) - return res - -def unbox_usecase3(x): - """ - Expect a (number, list of numbers) tuple. - """ - a, b = x - res = a - for v in b: - res += v - return res - -def unbox_usecase4(x): - """ - Expect a (number, list of tuples) tuple. - """ - a, b = x - res = a - for v in b: - res += len(v) - return res - - -def create_list(x, y, z): - return [x, y, z] - -def create_nested_list(x, y, z, a, b, c): - return [[x, y, z], [a, b, c]] - -def list_comprehension1(): - return sum([x**2 for x in range(10)]) - -def list_comprehension2(): - return sum([x for x in range(10) if x % 2 == 0]) - -def list_comprehension3(): - return sum([math.pow(x, 2) for x in range(10)]) - -def list_comprehension4(): - return sum([x * y for x in range(10) for y in range(10)]) - -def list_comprehension5(): - return [x * 2 for x in range(10)] - -def list_comprehension6(): - return [[x for x in range(y)] for y in range(3)] - - -def list_constructor(n): - return list(range(n)) - -def list_constructor_empty(): - # cannot be typed, list is empty and no typing information is present to - # infer a type - return list() - -def list_constructor_empty_but_typeable(n): - # can be typed, list is empty but later append has typing info that allows - # for inference - y = list() - return y.append(n) - -def list_append(n): - l = [] - l.append(42) - for i in range(n): - l.append(i) - return l - -def list_append_heterogeneous(n): - l = [] - l.append(42.0) - for i in range(n): - l.append(i) - return l - -def list_extend(n): - l = [] - # A non-list iterable and a list - l.extend(range(n)) - l.extend(l[:-1]) - l.extend(range(n, 0, -1)) - return l - -def list_extend_heterogeneous(n): - l = [] - # Extend with various iterables, including lists, with different types - l.extend(range(n)) - l.extend(l[:-1]) - l.extend((5, 42)) - l.extend([123.0]) - return l - -def list_pop0(n): - l = list(range(n)) - res = 0 - while len(l) > 0: - res += len(l) * l.pop() - return res - -def list_pop1(n, i): - l = list(range(n)) - x = l.pop(i) - return x, l - -def list_len(n): - l = list(range(n)) - return len(l) - -def list_getitem(n): - l = list(range(n)) - res = 0 - # Positive indices - for i in range(len(l)): - res += i * l[i] - # Negative indices - for i in range(-len(l), 0): - res -= i * l[i] - return res - -def list_setitem(n): - l = list(range(n)) - res = 0 - # Positive indices - for i in range(len(l)): - l[i] = i * l[i] - # Negative indices - for i in range(-len(l), 0): - l[i] = i * l[i] - for i in range(len(l)): - res += l[i] - return res - -def list_getslice2(n, start, stop): - l = list(range(n)) - return l[start:stop] - -def list_getslice3(n, start, stop, step): - l = list(range(n)) - return l[start:stop:step] - -def list_setslice2(n, n_source, start, stop): - # Generic setslice with size change - l = list(range(n)) - v = list(range(100, 100 + n_source)) - l[start:stop] = v - return l - -def list_setslice3(n, start, stop, step): - l = list(range(n)) - v = l[start:stop:step] - for i in range(len(v)): - v[i] += 100 - l[start:stop:step] = v - return l - -def list_setslice3_arbitrary(n, n_src, start, stop, step): - l = list(range(n)) - l[start:stop:step] = list(range(100, 100 + n_src)) - return l - -def list_delslice0(n): - l = list(range(n)) - del l[:] - return l - -def list_delslice1(n, start, stop): - l = list(range(n)) - del l[start:] - del l[:stop] - return l - -def list_delslice2(n, start, stop): - l = list(range(n)) - del l[start:stop] - return l - -def list_clear(n): - l = list(range(n)) - l.clear() - return l - -def list_copy(n): - l = list(range(n)) - ll = l.copy() - l.append(42) - return l, ll - -def list_iteration(n): - l = list(range(n)) - res = 0 - for i, v in enumerate(l): - res += i * v - return res - -def list_contains(n): - l = list(range(n)) - return (0 in l, 1 in l, n - 1 in l, n in l, - 0 not in l, 1 not in l, n - 1 not in l, n not in l, - ) - -def list_index1(n, v): - l = list(range(n, 0, -1)) - return l.index(v) - -def list_index2(n, v, start): - l = list(range(n, 0, -1)) - return l.index(v, start) - -def list_index3(n, v, start, stop): - l = list(range(n, 0, -1)) - return l.index(v, start, stop) - -def list_remove(n, v): - l = list(range(n - 1, -1, -1)) - l.remove(v) - return l - -def list_insert(n, pos, v): - l = list(range(0, n)) - l.insert(pos, v) - return l - -def list_count(n, v): - l = [] - for x in range(n): - l.append(x & 3) - return l.count(v) - -def list_reverse(n): - l = list(range(n)) - l.reverse() - return l - -def list_add(m, n): - a = list(range(0, m)) - b = list(range(100, 100 + n)) - res = a + b - res.append(42) # check result is a copy - return a, b, res - -def list_add_heterogeneous(): - a = [1] - b = [2.0] - c = a + b - d = b + a - # check result is a copy - a.append(3) - b.append(4.0) - return a, b, c, d - -def list_add_inplace(m, n): - a = list(range(0, m)) - b = list(range(100, 100 + n)) - a += b - return a, b - -def list_add_inplace_heterogeneous(): - a = [1] - b = [2.0] - a += b - b += a - return a, b - -def list_mul(n, v): - a = list(range(n)) - return a * v - -def list_mul_inplace(n, v): - a = list(range(n)) - a *= v - return a - -def list_bool(n): - a = list(range(n)) - return bool(a), (True if a else False) - -def eq_usecase(a, b): - return list(a) == list(b) - -def ne_usecase(a, b): - return list(a) != list(b) - -def gt_usecase(a, b): - return list(a) > list(b) - -def ge_usecase(a, b): - return list(a) >= list(b) - -def lt_usecase(a, b): - return list(a) < list(b) - -def le_usecase(a, b): - return list(a) <= list(b) - -def identity_usecase(n): - a = list(range(n)) - b = a - c = a[:] - return (a is b), (a is not b), (a is c), (a is not c) - -def bool_list_usecase(): - # Exercise getitem, setitem, iteration with bool values (issue #1373) - l = [False] - l[0] = True - x = False - for v in l: - x = x ^ v - return l, x - -def reflect_simple(l, ll): - x = l.pop() - y = l.pop() - l[0] = 42. - l.extend(ll) - return l, x, y - -def reflect_conditional(l, ll): - # `l` may or may not actually reflect a Python list - if ll[0]: - l = [11., 22., 33., 44.] - x = l.pop() - y = l.pop() - l[0] = 42. - l.extend(ll) - return l, x, y - -def reflect_exception(l): - l.append(42) - raise ZeroDivisionError - -def reflect_dual(l, ll): - l.append(ll.pop()) - return l is ll - - -class TestLists(MemoryLeakMixin, TestCase): - - def test_create_list(self): - pyfunc = create_list - cr = compile_isolated(pyfunc, (types.int32, types.int32, types.int32)) - cfunc = cr.entry_point - self.assertEqual(cfunc(1, 2, 3), pyfunc(1, 2, 3)) - - def test_create_nested_list(self): - pyfunc = create_nested_list - cr = compile_isolated(pyfunc, (types.int32, types.int32, types.int32, - types.int32, types.int32, types.int32)) - cfunc = cr.entry_point - self.assertEqual(cfunc(1, 2, 3, 4, 5, 6), pyfunc(1, 2, 3, 4, 5, 6)) - - @testing.allow_interpreter_mode - def test_list_comprehension(self): - list_tests = [list_comprehension1, - list_comprehension2, - list_comprehension3, - list_comprehension4, - list_comprehension5, - list_comprehension6] - - for test in list_tests: - pyfunc = test - cr = compile_isolated(pyfunc, ()) - cfunc = cr.entry_point - self.assertEqual(cfunc(), pyfunc()) - - def check_unary_with_size(self, pyfunc, precise=True): - cfunc = jit(nopython=True)(pyfunc) - # Use various sizes, to stress the allocation algorithm - for n in [0, 3, 16, 70, 400]: - eq = self.assertPreciseEqual if precise else self.assertEqual - eq(cfunc(n), pyfunc(n)) - - def test_constructor(self): - self.check_unary_with_size(list_constructor) - - def test_constructor_empty(self): - self.disable_leak_check() - cfunc = jit(nopython=True)(list_constructor_empty) - with self.assertRaises(errors.TypingError) as raises: - cfunc() - self.assertIn("Can't infer type of variable", str(raises.exception)) - self.assertIn("list(undefined)", str(raises.exception)) - - def test_constructor_empty_but_typeable(self): - args = [np.int32(1), 10., 1 + 3j, [7], [17., 14.], np.array([10])] - pyfunc = list_constructor_empty_but_typeable - for arg in args: - cfunc = jit(nopython=True)(pyfunc) - expected = pyfunc(arg) - got = cfunc(arg) - self.assertPreciseEqual(got, expected) - - def test_append(self): - self.check_unary_with_size(list_append) - - @tag('important') - def test_append_heterogeneous(self): - self.check_unary_with_size(list_append_heterogeneous, precise=False) - - def test_extend(self): - self.check_unary_with_size(list_extend) - - @tag('important') - def test_extend_heterogeneous(self): - self.check_unary_with_size(list_extend_heterogeneous, precise=False) - - def test_pop0(self): - self.check_unary_with_size(list_pop0) - - @tag('important') - def test_pop1(self): - pyfunc = list_pop1 - cfunc = jit(nopython=True)(pyfunc) - for n in [5, 40]: - for i in [0, 1, n - 2, n - 1, -1, -2, -n + 3, -n + 1]: - expected = pyfunc(n, i) - self.assertPreciseEqual(cfunc(n, i), expected) - - def test_pop_errors(self): - # XXX References are leaked when an exception is raised - self.disable_leak_check() - cfunc = jit(nopython=True)(list_pop1) - with self.assertRaises(IndexError) as cm: - cfunc(0, 5) - self.assertEqual(str(cm.exception), "pop from empty list") - with self.assertRaises(IndexError) as cm: - cfunc(1, 5) - self.assertEqual(str(cm.exception), "pop index out of range") - - def test_insert(self): - pyfunc = list_insert - cfunc = jit(nopython=True)(pyfunc) - for n in [5, 40]: - indices = [0, 1, n - 2, n - 1, n + 1, -1, -2, -n + 3, -n - 1] - for i in indices: - expected = pyfunc(n, i, 42) - self.assertPreciseEqual(cfunc(n, i, 42), expected) - - def test_len(self): - self.check_unary_with_size(list_len) - - @tag('important') - def test_getitem(self): - self.check_unary_with_size(list_getitem) - - @tag('important') - def test_setitem(self): - self.check_unary_with_size(list_setitem) - - def check_slicing2(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - sizes = [5, 40] - for n in sizes: - indices = [0, 1, n - 2, -1, -2, -n + 3, -n - 1, -n] - for start, stop in itertools.product(indices, indices): - expected = pyfunc(n, start, stop) - self.assertPreciseEqual(cfunc(n, start, stop), expected) - - def test_getslice2(self): - self.check_slicing2(list_getslice2) - - def test_setslice2(self): - pyfunc = list_setslice2 - cfunc = jit(nopython=True)(pyfunc) - sizes = [5, 40] - for n, n_src in itertools.product(sizes, sizes): - indices = [0, 1, n - 2, -1, -2, -n + 3, -n - 1, -n] - for start, stop in itertools.product(indices, indices): - expected = pyfunc(n, n_src, start, stop) - self.assertPreciseEqual(cfunc(n, n_src, start, stop), expected) - - @tag('important') - def test_getslice3(self): - pyfunc = list_getslice3 - cfunc = jit(nopython=True)(pyfunc) - for n in [10]: - indices = [0, 1, n - 2, -1, -2, -n + 3, -n - 1, -n] - steps = [4, 1, -1, 2, -3] - for start, stop, step in itertools.product(indices, indices, steps): - expected = pyfunc(n, start, stop, step) - self.assertPreciseEqual(cfunc(n, start, stop, step), expected) - - @tag('important') - def test_setslice3(self): - pyfunc = list_setslice3 - cfunc = jit(nopython=True)(pyfunc) - for n in [10]: - indices = [0, 1, n - 2, -1, -2, -n + 3, -n - 1, -n] - steps = [4, 1, -1, 2, -3] - for start, stop, step in itertools.product(indices, indices, steps): - expected = pyfunc(n, start, stop, step) - self.assertPreciseEqual(cfunc(n, start, stop, step), expected) - - def test_setslice3_resize(self): - # XXX References are leaked when an exception is raised - self.disable_leak_check() - pyfunc = list_setslice3_arbitrary - cfunc = jit(nopython=True)(pyfunc) - # step == 1 => can resize - cfunc(5, 10, 0, 2, 1) - # step != 1 => cannot resize - with self.assertRaises(ValueError) as cm: - cfunc(5, 100, 0, 3, 2) - self.assertIn("cannot resize", str(cm.exception)) - - def test_delslice0(self): - self.check_unary_with_size(list_delslice0) - - def test_delslice1(self): - self.check_slicing2(list_delslice1) - - @tag('important') - def test_delslice2(self): - self.check_slicing2(list_delslice2) - - def test_invalid_slice(self): - self.disable_leak_check() - pyfunc = list_getslice3 - cfunc = jit(nopython=True)(pyfunc) - with self.assertRaises(ValueError) as cm: - cfunc(10, 1, 2, 0) - self.assertEqual(str(cm.exception), "slice step cannot be zero") - - def test_iteration(self): - self.check_unary_with_size(list_iteration) - - @tag('important') - def test_reverse(self): - self.check_unary_with_size(list_reverse) - - def test_contains(self): - self.check_unary_with_size(list_contains) - - def check_index_result(self, pyfunc, cfunc, args): - try: - expected = pyfunc(*args) - except ValueError: - with self.assertRaises(ValueError): - cfunc(*args) - else: - self.assertPreciseEqual(cfunc(*args), expected) - - def test_index1(self): - self.disable_leak_check() - pyfunc = list_index1 - cfunc = jit(nopython=True)(pyfunc) - for v in (0, 1, 5, 10, 99999999): - self.check_index_result(pyfunc, cfunc, (16, v)) - - def test_index2(self): - self.disable_leak_check() - pyfunc = list_index2 - cfunc = jit(nopython=True)(pyfunc) - n = 16 - for v in (0, 1, 5, 10, 99999999): - indices = [0, 1, n - 2, n - 1, n + 1, -1, -2, -n + 3, -n - 1] - for start in indices: - self.check_index_result(pyfunc, cfunc, (16, v, start)) - - def test_index3(self): - self.disable_leak_check() - pyfunc = list_index3 - cfunc = jit(nopython=True)(pyfunc) - n = 16 - for v in (0, 1, 5, 10, 99999999): - indices = [0, 1, n - 2, n - 1, n + 1, -1, -2, -n + 3, -n - 1] - for start, stop in itertools.product(indices, indices): - self.check_index_result(pyfunc, cfunc, (16, v, start, stop)) - - def test_remove(self): - pyfunc = list_remove - cfunc = jit(nopython=True)(pyfunc) - n = 16 - for v in (0, 1, 5, 15): - expected = pyfunc(n, v) - self.assertPreciseEqual(cfunc(n, v), expected) - - def test_remove_error(self): - self.disable_leak_check() - pyfunc = list_remove - cfunc = jit(nopython=True)(pyfunc) - with self.assertRaises(ValueError) as cm: - cfunc(10, 42) - self.assertEqual(str(cm.exception), "list.remove(x): x not in list") - - def test_count(self): - pyfunc = list_count - cfunc = jit(nopython=True)(pyfunc) - for v in range(5): - self.assertPreciseEqual(cfunc(18, v), pyfunc(18, v)) - - @unittest.skipUnless(sys.version_info >= (3, 3), - "list.clear() needs Python 3.3+") - def test_clear(self): - self.check_unary_with_size(list_clear) - - @unittest.skipUnless(sys.version_info >= (3, 3), - "list.copy() needs Python 3.3+") - def test_copy(self): - self.check_unary_with_size(list_copy) - - def check_add(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - sizes = [0, 3, 50, 300] - for m, n in itertools.product(sizes, sizes): - expected = pyfunc(m, n) - self.assertPreciseEqual(cfunc(m, n), expected) - - def test_add(self): - self.check_add(list_add) - - def test_add_heterogeneous(self): - pyfunc = list_add_heterogeneous - cfunc = jit(nopython=True)(pyfunc) - expected = pyfunc() - self.assertEqual(cfunc(), expected) - - def test_add_inplace(self): - self.check_add(list_add_inplace) - - def test_add_inplace_heterogeneous(self): - pyfunc = list_add_inplace_heterogeneous - cfunc = jit(nopython=True)(pyfunc) - expected = pyfunc() - self.assertEqual(cfunc(), expected) - - def check_mul(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - for n in [0, 3, 50, 300]: - for v in [1, 2, 3, 0, -1, -42]: - expected = pyfunc(n, v) - self.assertPreciseEqual(cfunc(n, v), expected) - - def test_mul(self): - self.check_mul(list_mul) - - def test_mul_inplace(self): - self.check_mul(list_mul_inplace) - - @unittest.skipUnless(sys.maxsize >= 2**32, - "need a 64-bit system to test for MemoryError") - def test_mul_error(self): - self.disable_leak_check() - pyfunc = list_mul - cfunc = jit(nopython=True)(pyfunc) - # Fail in malloc() - with self.assertRaises(MemoryError): - cfunc(1, 2**58) - # Overflow size computation when multiplying by item size - with self.assertRaises(MemoryError): - cfunc(1, 2**62) - - def test_bool(self): - pyfunc = list_bool - cfunc = jit(nopython=True)(pyfunc) - for n in [0, 1, 3]: - expected = pyfunc(n) - self.assertPreciseEqual(cfunc(n), expected) - - def test_list_passing(self): - # Check one can pass a list from a Numba function to another - @jit(nopython=True) - def inner(lst): - return len(lst), lst[-1] - - @jit(nopython=True) - def outer(n): - l = list(range(n)) - return inner(l) - - self.assertPreciseEqual(outer(5), (5, 4)) - - def _test_compare(self, pyfunc): - def eq(args): - self.assertIs(cfunc(*args), pyfunc(*args), - "mismatch for arguments %s" % (args,)) - - cfunc = jit(nopython=True)(pyfunc) - eq(((1, 2), (1, 2))) - eq(((1, 2, 3), (1, 2))) - eq(((1, 2), (1, 2, 3))) - eq(((1, 2, 4), (1, 2, 3))) - eq(((1.0, 2.0, 3.0), (1, 2, 3))) - eq(((1.0, 2.0, 3.5), (1, 2, 3))) - - def test_eq(self): - self._test_compare(eq_usecase) - - def test_ne(self): - self._test_compare(ne_usecase) - - def test_le(self): - self._test_compare(le_usecase) - - def test_lt(self): - self._test_compare(lt_usecase) - - def test_ge(self): - self._test_compare(ge_usecase) - - def test_gt(self): - self._test_compare(gt_usecase) - - def test_identity(self): - pyfunc = identity_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(3), pyfunc(3)) - - def test_bool_list(self): - # Check lists of bools compile and run successfully - pyfunc = bool_list_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(), pyfunc()) - - -class TestUnboxing(MemoryLeakMixin, TestCase): - """ - Test unboxing of Python lists into native Numba lists. - """ - - @contextlib.contextmanager - def assert_type_error(self, msg): - with self.assertRaises(TypeError) as raises: - yield - if msg is not None: - self.assertRegexpMatches(str(raises.exception), msg) - - def check_unary(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - def check(arg): - expected = pyfunc(arg) - got = cfunc(arg) - self.assertPreciseEqual(got, expected) - return check - - def test_numbers(self): - check = self.check_unary(unbox_usecase) - check([1, 2]) - check([1j, 2.5j]) - - def test_tuples(self): - check = self.check_unary(unbox_usecase2) - check([(1, 2), (3, 4)]) - check([(1, 2j), (3, 4j)]) - check([(), (), ()]) - - @tag('important') - def test_list_inside_tuple(self): - check = self.check_unary(unbox_usecase3) - check((1, [2, 3, 4])) - - def test_list_of_tuples_inside_tuple(self): - check = self.check_unary(unbox_usecase4) - check((1, [(2,), (3,)])) - - def test_errors(self): - # See #1545 and #1594: error checking should ensure the list is - # homogeneous - msg = "can't unbox heterogeneous list" - pyfunc = noop - cfunc = jit(nopython=True)(pyfunc) - lst = [1, 2.5] - with self.assert_type_error(msg): - cfunc(lst) - # The list hasn't been changed (bogus reflecting) - self.assertEqual(lst, [1, 2.5]) - with self.assert_type_error(msg): - cfunc([1, 2j]) - # Same when the list is nested in a tuple or namedtuple - with self.assert_type_error(msg): - cfunc((1, [1, 2j])) - with self.assert_type_error(msg): - cfunc(Point(1, [1, 2j])) - # Issue #1638: tuples of different size. - # Note the check is really on the tuple side. - lst = [(1,), (2, 3)] - with self.assertRaises(TypeError) as raises: - cfunc(lst) - if utils.IS_PY3: - msg = ("can't unbox heterogeneous list: " - "tuple({0} x 1) != tuple({0} x 2)") - self.assertEqual(str(raises.exception), msg.format(types.intp)) - else: - self.assertEqual( - str(raises.exception), - "can't unbox heterogeneous list", - ) - - -class TestListReflection(MemoryLeakMixin, TestCase): - """ - Test reflection of native Numba lists on Python list objects. - """ - - def check_reflection(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - samples = [([1., 2., 3., 4.], [0.]), - ([1., 2., 3., 4.], [5., 6., 7., 8., 9.]), - ] - for dest, src in samples: - expected = list(dest) - got = list(dest) - pyres = pyfunc(expected, src) - with self.assertRefCount(got, src): - cres = cfunc(got, src) - self.assertPreciseEqual(cres, pyres) - self.assertPreciseEqual(expected, got) - self.assertEqual(pyres[0] is expected, cres[0] is got) - del pyres, cres - - def test_reflect_simple(self): - self.check_reflection(reflect_simple) - - def test_reflect_conditional(self): - self.check_reflection(reflect_conditional) - - def test_reflect_exception(self): - """ - When the function exits with an exception, lists should still be - reflected. - """ - pyfunc = reflect_exception - cfunc = jit(nopython=True)(pyfunc) - l = [1, 2, 3] - with self.assertRefCount(l): - with self.assertRaises(ZeroDivisionError): - cfunc(l) - self.assertPreciseEqual(l, [1, 2, 3, 42]) - - @tag('important') - def test_reflect_same_list(self): - """ - When the same list object is reflected twice, behaviour should - be consistent. - """ - pyfunc = reflect_dual - cfunc = jit(nopython=True)(pyfunc) - pylist = [1, 2, 3] - clist = pylist[:] - expected = pyfunc(pylist, pylist) - got = cfunc(clist, clist) - self.assertPreciseEqual(expected, got) - self.assertPreciseEqual(pylist, clist) - self.assertPreciseEqual(sys.getrefcount(pylist), sys.getrefcount(clist)) - - def test_reflect_clean(self): - """ - When the list wasn't mutated, no reflection should take place. - """ - cfunc = jit(nopython=True)(noop) - # Use a complex, as Python integers can be cached - l = [12.5j] - ids = [id(x) for x in l] - cfunc(l) - self.assertEqual([id(x) for x in l], ids) - - -class ManagedListTestCase(MemoryLeakMixin, TestCase): - - def assert_list_element_precise_equal(self, expect, got): - self.assertEqual(len(expect), len(got)) - for a, b in zip(expect, got): - self.assertPreciseEqual(a, b) - - -class TestListManagedElements(ManagedListTestCase): - "Test list containing objects that need refct" - - def _check_element_equal(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - con = [np.arange(3).astype(np.intp), np.arange(5).astype(np.intp)] - expect = list(con) - pyfunc(expect) - got = list(con) - cfunc(got) - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_reflect_passthru(self): - def pyfunc(con): - pass - self._check_element_equal(pyfunc) - - def test_reflect_appended(self): - def pyfunc(con): - con.append(np.arange(10).astype(np.intp)) - - self._check_element_equal(pyfunc) - - def test_reflect_setitem(self): - def pyfunc(con): - con[1] = np.arange(10) - - self._check_element_equal(pyfunc) - - def test_reflect_popped(self): - def pyfunc(con): - con.pop() - - self._check_element_equal(pyfunc) - - def test_append(self): - def pyfunc(): - con = [] - for i in range(300): - con.append(np.arange(i, ).astype(np.intp)) - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_append_noret(self): - # This test make sure local dtor works - def pyfunc(): - con = [] - for i in range(300): - con.append(np.arange(i)) - c = 0.0 - for arr in con: - c += arr.sum() / (1 + arr.size) - return c - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assertEqual(expect, got) - - def test_reassign_refct(self): - def pyfunc(): - con = [] - for i in range(5): - con.append(np.arange(2)) - con[0] = np.arange(4) - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_get_slice(self): - def pyfunc(): - con = [] - for i in range(5): - con.append(np.arange(2)) - return con[2:4] - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_set_slice(self): - def pyfunc(): - con = [] - for i in range(5): - con.append(np.arange(2)) - con[1:3] = con[2:4] - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_pop(self): - def pyfunc(): - con = [] - for i in range(20): - con.append(np.arange(i + 1)) - while len(con) > 2: - con.pop() - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_pop_loc(self): - def pyfunc(): - con = [] - for i in range(1000): - con.append(np.arange(i + 1)) - while len(con) > 2: - con.pop(1) - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_del_range(self): - def pyfunc(): - con = [] - for i in range(20): - con.append(np.arange(i + 1)) - del con[3:10] - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - - def test_list_of_list(self): - def pyfunc(): - con = [] - for i in range(10): - con.append([0] * i) - return con - - cfunc = jit(nopython=True)(pyfunc) - expect = pyfunc() - got = cfunc() - - self.assertEqual(expect, got) - - - -def expect_reflection_failure(fn): - def wrapped(self, *args, **kwargs): - self.disable_leak_check() - with self.assertRaises(TypeError) as raises: - fn(self, *args, **kwargs) - expect_msg = 'cannot reflect element of reflected container' - self.assertIn(expect_msg, str(raises.exception)) - - return wrapped - - -class TestListOfList(ManagedListTestCase): - - def compile_and_test(self, pyfunc, *args): - from copy import deepcopy - expect_args = deepcopy(args) - expect = pyfunc(*expect_args) - - njit_args = deepcopy(args) - cfunc = jit(nopython=True)(pyfunc) - got = cfunc(*njit_args) - - self.assert_list_element_precise_equal( - expect=expect, got=got - ) - # Check reflection - self.assert_list_element_precise_equal( - expect=expect_args, got=njit_args - ) - - def test_returning_list_of_list(self): - def pyfunc(): - a = [[np.arange(i)] for i in range(4)] - return a - - self.compile_and_test(pyfunc) - - @expect_reflection_failure - def test_heterogeneous_list_error(self): - def pyfunc(x): - return x[1] - - cfunc = jit(nopython=True)(pyfunc) - l2 = [[np.zeros(i) for i in range(5)], - [np.ones(i)+1j for i in range(5)]] - l3 = [[np.zeros(i) for i in range(5)], [(1,)]] - l4 = [[1], [{1}]] - l5 = [[1], [{'a': 1}]] - - # TODO: this triggers a reflection error. - # Remove this line when nested reflection is supported - cfunc(l2) - - # error_cases - with self.assertRaises(TypeError) as raises: - cfunc(l2) - - self.assertIn( - ("reflected list(array(float64, 1d, C)) != " - "reflected list(array(complex128, 1d, C))"), - str(raises.exception) - ) - - with self.assertRaises(TypeError) as raises: - cfunc(l3) - - self.assertIn( - ("reflected list(array(float64, 1d, C)) != " - "reflected list((int64 x 1))"), - str(raises.exception) - ) - - with self.assertRaises(TypeError) as raises: - cfunc(l4) - self.assertIn( - "reflected list(int64) != reflected list(reflected set(int64))", - str(raises.exception) - ) - - with self.assertRaises(ValueError) as raises: - cfunc(l5) - self.assertIn( - "Cannot type list element of ", - str(raises.exception) - ) - - @expect_reflection_failure - def test_list_of_list_reflected(self): - def pyfunc(l1, l2): - l1.append(l2) - l1[-1].append(123) - - cfunc = jit(nopython=True)(pyfunc) - l1 = [[0, 1], [2, 3]] - l2 = [4, 5] - expect = list(l1), list(l2) - got = list(l1), list(l2) - pyfunc(*expect) - cfunc(*got) - self.assertEqual(expect, got) - - @expect_reflection_failure - def test_heterogeneous_list(self): - def pyfunc(x): - return x[1] - - l1 = [[np.zeros(i) for i in range(5)], [np.ones(i) for i in range(5)]] - - cfunc = jit(nopython=True)(pyfunc) - l1_got = cfunc(l1) - self.assertPreciseEqual(pyfunc(l1), l1_got) - - @expect_reflection_failure - def test_c01(self): - def bar(x): - return x.pop() - - r = [[np.zeros(0)], [np.zeros(10)*1j]] - # TODO: this triggers a reflection error. - # Remove this line when nested reflection is supported - self.compile_and_test(bar, r) - - with self.assertRaises(TypeError) as raises: - self.compile_and_test(bar, r) - self.assertIn( - ("reflected list(array(float64, 1d, C)) != " - "reflected list(array(complex128, 1d, C))"), - str(raises.exception), - ) - - def test_c02(self): - def bar(x): - x.append(x) - return x - - r = [[np.zeros(0)]] - - with self.assertRaises(errors.TypingError) as raises: - self.compile_and_test(bar, r) - self.assertIn( - "Invalid use of BoundFunction(list.append", - str(raises.exception), - ) - - def test_c03(self): - def bar(x): - f = x - f[0] = 1 - return f - - r = [[np.arange(3)]] - - with self.assertRaises(errors.TypingError) as raises: - self.compile_and_test(bar, r) - self.assertIn( - "invalid setitem with value of {} to element of {}".format( - typeof(1), - typeof(r[0]), - ), - str(raises.exception), - ) - - def test_c04(self): - def bar(x): - f = x - f[0][0] = 10 - return f - - r = [[np.arange(3)]] - with self.assertRaises(errors.TypingError) as raises: - self.compile_and_test(bar, r) - self.assertIn( - "invalid setitem with value of {} to element of {}".format( - typeof(10), - typeof(r[0][0]), - ), - str(raises.exception), - ) - - @unittest.skipUnless(utils.IS_PY3, "Py3 only due to ordering of error") - @expect_reflection_failure - def test_c05(self): - def bar(x): - f = x - f[0][0] = np.array([x for x in np.arange(10).astype(np.intp)]) - return f - - r = [[np.arange(3).astype(np.intp)]] - self.compile_and_test(bar, r) - - @unittest.skipUnless(utils.IS_PY3, "Py3 only due to ordering of error") - def test_c06(self): - def bar(x): - f = x - f[0][0] = np.array([x + 1j for x in np.arange(10)]) - return f - - r = [[np.arange(3)]] - with self.assertRaises(errors.TypingError) as raises: - self.compile_and_test(bar, r) - self.assertIn("invalid setitem with value", str(raises.exception)) - - @expect_reflection_failure - def test_c07(self): - self.disable_leak_check() - - def bar(x): - return x[-7] - - r = [[np.arange(3)]] - cfunc = jit(nopython=True)(bar) - with self.assertRaises(IndexError) as raises: - cfunc(r) - self.assertIn("getitem out of range", str(raises.exception)) - - def test_c08(self): - self.disable_leak_check() - - def bar(x): - x[5] = 7 - return x - - r = [1, 2, 3] - cfunc = jit(nopython=True)(bar) - with self.assertRaises(IndexError) as raises: - cfunc(r) - self.assertIn("setitem out of range", str(raises.exception)) - - def test_c09(self): - def bar(x): - x[-2] = 7j - return x - - r = [1, 2, 3] - with self.assertRaises(errors.TypingError) as raises: - self.compile_and_test(bar, r) - self.assertIn("invalid setitem with value", str(raises.exception)) - - @expect_reflection_failure - def test_c10(self): - def bar(x): - x[0], x[1] = x[1], x[0] - return x - - r = [[1, 2, 3], [4, 5, 6]] - self.compile_and_test(bar, r) - - @expect_reflection_failure - def test_c11(self): - def bar(x): - x[:] = x[::-1] - return x - - r = [[1, 2, 3], [4, 5, 6]] - self.compile_and_test(bar, r) - - def test_c12(self): - def bar(x): - del x[-1] - return x - - r = [x for x in range(10)] - self.compile_and_test(bar, r) - - -class Item(object): - def __init__(self, many, scalar): - self.many = many - self.scalar = scalar - - -class Container(object): - def __init__(self, n): - self.data = [[np.arange(i).astype(np.float64)] for i in range(n)] - - def more(self, n): - for i in range(n): - self.data.append([np.arange(i).astype(np.float64)]) - - -class TestListAndJitClasses(ManagedListTestCase): - def make_jitclass_element(self): - spec = [ - ('many', types.float64[:]), - ('scalar', types.float64), - ] - JCItem = jitclass(spec)(Item) - return JCItem - - def make_jitclass_container(self): - spec = { - 'data': types.List(dtype=types.List(types.float64[::1])), - } - JCContainer = jitclass(spec)(Container) - return JCContainer - - def assert_list_element_with_tester(self, tester, expect, got): - for x, y in zip(expect, got): - tester(x, y) - - def test_jitclass_instance_elements(self): - JCItem = self.make_jitclass_element() - - def pyfunc(xs): - xs[1], xs[0] = xs[0], xs[1] - return xs - - def eq(x, y): - self.assertPreciseEqual(x.many, y.many) - self.assertPreciseEqual(x.scalar, y.scalar) - - cfunc = jit(nopython=True)(pyfunc) - - arg = [JCItem(many=np.random.random(n + 1), scalar=n * 1.2) - for n in range(5)] - - expect_arg = list(arg) - got_arg = list(arg) - - expect_res = pyfunc(expect_arg) - got_res = cfunc(got_arg) - - self.assert_list_element_with_tester(eq, expect_arg, got_arg) - self.assert_list_element_with_tester(eq, expect_res, got_res) - - def test_jitclass_containing_list(self): - JCContainer = self.make_jitclass_container() - - expect = Container(n=4) - got = JCContainer(n=4) - self.assert_list_element_precise_equal(got.data, expect.data) - expect.more(3) - got.more(3) - self.assert_list_element_precise_equal(got.data, expect.data) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_llvm_version_check.py b/numba/numba/tests/test_llvm_version_check.py deleted file mode 100644 index 8b1b85480..000000000 --- a/numba/numba/tests/test_llvm_version_check.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import imp -import sys - -from numba import unittest_support as unittest - - -class TestLlvmVersion(unittest.TestCase): - - def test_llvmlite_version(self): - # test the system it's running on - import llvmlite - import numba - self.assertTrue(numba.__version__) - - llvmlite_version = llvmlite.__version__ - def cleanup(): - llvmlite.__version__ = llvmlite_version - self.addCleanup(cleanup) - - # explicitly test all 3 cases of version string - ver = numba._min_llvmlite_version - version_pass = '%d.%d.%d' % ver - git_version_pass = '%d.%d.%d-10-g92584ed' % ver - rc_version_pass = '%d.%d.%drc1' % (ver[0], ver[1], ver[2] + 1) - version_fail = '%d.%d.0' % (ver[0], ver[1] - 1) - git_version_fail = '%d.%d.9-10-g92584ed' % (ver[0], ver[1] - 1) - - ver_pass = (version_pass, git_version_pass, rc_version_pass) - ver_fail = (version_fail, git_version_fail) - for v in ver_pass: - llvmlite.__version__ = v - imp.reload(numba) - self.assertTrue(numba.__version__) - - for v in ver_fail: - with self.assertRaises(ImportError): - llvmlite.__version__ = v - imp.reload(numba) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_locals.py b/numba/numba/tests/test_locals.py deleted file mode 100644 index 162826fdb..000000000 --- a/numba/numba/tests/test_locals.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from numba import compiler, float32 -from numba import unittest_support as unittest - -def foo(): - x = 123 - return x - - -class TestLocals(unittest.TestCase): - - def test_seed_types(self): - cres = compiler.compile_isolated(foo, (), locals={'x': float32}) - self.assertEqual(cres.signature.return_type, float32) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_looplifting.py b/numba/numba/tests/test_looplifting.py deleted file mode 100644 index cb5d5ee29..000000000 --- a/numba/numba/tests/test_looplifting.py +++ /dev/null @@ -1,498 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import types, utils -from numba import unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from .support import TestCase, tag, MemoryLeakMixin - - -looplift_flags = Flags() -looplift_flags.set("enable_pyobject") -looplift_flags.set("enable_looplift") - -pyobject_looplift_flags = looplift_flags.copy() -pyobject_looplift_flags.set("enable_pyobject_looplift") - - -def lift1(x): - # Outer needs object mode because of np.empty() - a = np.empty(3) - for i in range(a.size): - # Inner is nopython-compliant - a[i] = x - return a - - -def lift2(x): - # Outer needs object mode because of np.empty() - a = np.empty((3, 4)) - for i in range(a.shape[0]): - for j in range(a.shape[1]): - # Inner is nopython-compliant - a[i, j] = x - return a - - -def lift3(x): - # Output variable from the loop - a = np.arange(5, dtype=np.int64) - c = 0 - for i in range(a.shape[0]): - c += a[i] * x - return c - -def lift4(x): - # Output two variables from the loop - a = np.arange(5, dtype=np.int64) - c = 0 - d = 0 - for i in range(a.shape[0]): - c += a[i] * x - d += c - return c + d - -def lift5(x): - a = np.arange(4) - for i in range(a.shape[0]): - # Inner has a break statement - if i > 2: - break - return a - -def lift_gen1(x): - # Outer needs object mode because of np.empty() - a = np.empty(3) - yield 0 - for i in range(a.size): - # Inner is nopython-compliant - a[i] = x - yield np.sum(a) - -def lift_issue2561(): - np.empty(1) # This forces objectmode because no nrt - for i in range(10): - for j in range(10): - return 1 - return 2 - -def reject1(x): - a = np.arange(4) - for i in range(a.shape[0]): - # Inner returns a variable from outer "scope" => cannot loop-lift - return a - return a - - -def reject_gen1(x): - a = np.arange(4) - for i in range(a.shape[0]): - # Inner is a generator => cannot loop-lift - yield a[i] - -def reject_gen2(x): - # Outer needs object mode because of np.empty() - a = np.arange(3) - for i in range(a.size): - # Middle has a yield => cannot loop-lift - res = a[i] + x - for j in range(i): - # Inner is nopython-compliant, but the current algorithm isn't - # able to separate it. - res = res ** 2 - yield res - -def reject_npm1(x): - a = np.empty(3, dtype=np.int32) - for i in range(a.size): - # Inner uses np.arange() => cannot loop-lift unless - # enable_pyobject_looplift is enabled. - a[i] = np.arange(i + 1)[i] - - return a - - -class TestLoopLifting(MemoryLeakMixin, TestCase): - - def try_lift(self, pyfunc, argtypes): - cres = compile_isolated(pyfunc, argtypes, - flags=looplift_flags) - # One lifted loop - self.assertEqual(len(cres.lifted), 1) - return cres - - def assert_lifted_native(self, cres): - # Check if we have lifted in nopython mode - jitloop = cres.lifted[0] - [loopcres] = jitloop.overloads.values() - self.assertTrue(loopcres.fndesc.native) # Lifted function is native - - def check_lift_ok(self, pyfunc, argtypes, args): - """ - Check that pyfunc can loop-lift even in nopython mode. - """ - cres = self.try_lift(pyfunc, argtypes) - expected = pyfunc(*args) - got = cres.entry_point(*args) - self.assert_lifted_native(cres) - # Check return values - self.assertPreciseEqual(expected, got) - - def check_lift_generator_ok(self, pyfunc, argtypes, args): - """ - Check that pyfunc (a generator function) can loop-lift even in - nopython mode. - """ - cres = self.try_lift(pyfunc, argtypes) - expected = list(pyfunc(*args)) - got = list(cres.entry_point(*args)) - self.assert_lifted_native(cres) - # Check return values - self.assertPreciseEqual(expected, got) - - def check_no_lift(self, pyfunc, argtypes, args): - """ - Check that pyfunc can't loop-lift. - """ - cres = compile_isolated(pyfunc, argtypes, - flags=looplift_flags) - self.assertFalse(cres.lifted) - expected = pyfunc(*args) - got = cres.entry_point(*args) - # Check return values - self.assertPreciseEqual(expected, got) - - def check_no_lift_generator(self, pyfunc, argtypes, args): - """ - Check that pyfunc (a generator function) can't loop-lift. - """ - cres = compile_isolated(pyfunc, argtypes, - flags=looplift_flags) - self.assertFalse(cres.lifted) - expected = list(pyfunc(*args)) - got = list(cres.entry_point(*args)) - self.assertPreciseEqual(expected, got) - - def check_no_lift_nopython(self, pyfunc, argtypes, args): - """ - Check that pyfunc will fail loop-lifting if pyobject mode - is disabled inside the loop, succeed otherwise. - """ - cres = compile_isolated(pyfunc, argtypes, - flags=looplift_flags) - self.assertTrue(cres.lifted) - with self.assertTypingError(): - cres.entry_point(*args) - cres = compile_isolated(pyfunc, argtypes, - flags=pyobject_looplift_flags) - self.assertTrue(cres.lifted) - expected = pyfunc(*args) - got = cres.entry_point(*args) - self.assertPreciseEqual(expected, got) - - def test_lift1(self): - self.check_lift_ok(lift1, (types.intp,), (123,)) - - def test_lift2(self): - self.check_lift_ok(lift2, (types.intp,), (123,)) - - def test_lift3(self): - self.check_lift_ok(lift3, (types.intp,), (123,)) - - @tag('important') - def test_lift4(self): - self.check_lift_ok(lift4, (types.intp,), (123,)) - - def test_lift5(self): - self.check_lift_ok(lift5, (types.intp,), (123,)) - - def test_lift_issue2561(self): - self.check_no_lift(lift_issue2561, (), ()) - - @tag('important') - def test_lift_gen1(self): - self.check_lift_generator_ok(lift_gen1, (types.intp,), (123,)) - - def test_reject1(self): - self.check_no_lift(reject1, (types.intp,), (123,)) - - def test_reject_gen1(self): - self.check_no_lift_generator(reject_gen1, (types.intp,), (123,)) - - def test_reject_gen2(self): - self.check_no_lift_generator(reject_gen2, (types.intp,), (123,)) - - def test_reject_npm1(self): - self.check_no_lift_nopython(reject_npm1, (types.intp,), (123,)) - - -class TestLoopLiftingAnnotate(TestCase): - def test_annotate_1(self): - """ - Verify that annotation works as expected with one lifted loop - """ - from numba import jit - - # dummy function to force objmode - def bar(): - pass - - def foo(x): - bar() # force obj - for i in range(x.size): - x[i] += 1 - - return x - - cfoo = jit(foo) - - x = np.arange(10) - xcopy = x.copy() - r = cfoo(x) - np.testing.assert_equal(r, xcopy + 1) - - buf = utils.StringIO() - cfoo.inspect_types(file=buf) - annotation = buf.getvalue() - buf.close() - - self.assertIn("The function contains lifted loops", annotation) - line = foo.__code__.co_firstlineno + 2 # 2 lines down from func head - self.assertIn("Loop at line {line}".format(line=line), annotation) - self.assertIn("Has 1 overloads", annotation) - - def test_annotate_2(self): - """ - Verify that annotation works as expected with two lifted loops - """ - from numba import jit - - # dummy function to force objmode - def bar(): - pass - - def foo(x): - bar() # force obj - # first lifted loop - for i in range(x.size): - x[i] += 1 - # second lifted loop - for j in range(x.size): - x[j] *= 2 - return x - - cfoo = jit(foo) - - x = np.arange(10) - xcopy = x.copy() - r = cfoo(x) - np.testing.assert_equal(r, (xcopy + 1) * 2) - - buf = utils.StringIO() - cfoo.inspect_types(file=buf) - annotation = buf.getvalue() - buf.close() - - self.assertIn("The function contains lifted loops", annotation) - line1 = foo.__code__.co_firstlineno + 3 # 3 lines down from func head - line2 = foo.__code__.co_firstlineno + 6 # 6 lines down from func head - self.assertIn("Loop at line {line}".format(line=line1), annotation) - self.assertIn("Loop at line {line}".format(line=line2), annotation) - - -class TestLoopLiftingInAction(MemoryLeakMixin, TestCase): - def test_issue_734(self): - from numba import jit, void, int32, double - - @jit(void(int32, double[:]), forceobj=True) - def forloop_with_if(u, a): - if u == 0: - for i in range(a.shape[0]): - a[i] = a[i] * 2.0 - else: - for i in range(a.shape[0]): - a[i] = a[i] + 1.0 - - for u in (0, 1): - nb_a = np.arange(10, dtype='int32') - np_a = np.arange(10, dtype='int32') - forloop_with_if(u, nb_a) - forloop_with_if.py_func(u, np_a) - self.assertPreciseEqual(nb_a, np_a) - - def test_issue_812(self): - from numba import jit - - @jit('f8[:](f8[:])', forceobj=True) - def test(x): - res = np.zeros(len(x)) - ind = 0 - for ii in range(len(x)): - ind += 1 - res[ind] = x[ind] - if x[ind] >= 10: - break - - # Invalid loopjitting will miss the usage of `ind` in the - # following loop. - for ii in range(ind + 1, len(x)): - res[ii] = 0 - return res - - x = np.array([1., 4, 2, -3, 5, 2, 10, 5, 2, 6]) - np.testing.assert_equal(test.py_func(x), test(x)) - - def test_issue_2368(self): - from numba import jit - - def lift_issue2368(a, b): - s = 0 - for e in a: - s += e - h = b.__hash__() - return s, h - - a = np.ones(10) - b = object() - jitted = jit(lift_issue2368) - - expected = lift_issue2368(a, b) - got = jitted(a, b) - - self.assertEqual(expected[0], got[0]) - self.assertEqual(expected[1], got[1]) - - jitloop = jitted.overloads[jitted.signatures[0]].lifted[0] - [loopcres] = jitloop.overloads.values() - # assert lifted function is native - self.assertTrue(loopcres.fndesc.native) - - def test_no_iteration(self): - from numba import jit - - @jit(forceobj=True) - def test(n): - res = 0 - for i in range(n): - res = i - return res - - # loop count = 0 - self.assertEqual(test.py_func(-1), test(-1)) - - # loop count = 1 - self.assertEqual(test.py_func(1), test(1)) - - def test_invalid_argument(self): - """Test a problem caused by invalid discovery of loop argument - when a variable is used afterwards but not before. - - Before the fix, this will result in:: - - numba.ir.NotDefinedError: 'i' is not defined - """ - from numba import jit - - @jit(forceobj=True) - def test(arg): - if type(arg) == np.ndarray: # force object mode - if arg.ndim == 1: - result = 0.0 - j = 0 - for i in range(arg.shape[0]): - pass - else: - raise Exception - else: - result = 0.0 - i, j = 0, 0 - return result - - arg = np.arange(10) - self.assertEqual(test.py_func(arg), test(arg)) - - - def test_conditionally_defined_in_loop(self): - from numba import jit - @jit(forceobj=True) - def test(): - x = 5 - y = 0 - for i in range(2): - if i > 0: - x = 6 - y += x - self.assertEqual(test.py_func(), test()) - - def test_stack_offset_error_when_has_no_return(self): - from numba import jit - import warnings - - def pyfunc(a): - if a: - for i in range(10): - pass - - with warnings.catch_warnings(): - warnings.simplefilter("error") - - cfunc = jit(forceobj=True)(pyfunc) - self.assertEqual(pyfunc(True), cfunc(True)) - - def test_variable_scope_bug(self): - """ - https://github.com/numba/numba/issues/2179 - - Looplifting transformation is using the wrong verion of variable `h`. - """ - from numba import jit - - def bar(x): - return x - - def foo(x): - h = 0. - for k in range(x): - h = h + k - h = h - bar(x) - return h - - cfoo = jit(foo) - self.assertEqual(foo(10), cfoo(10)) - - def test_recompilation_loop(self): - """ - https://github.com/numba/numba/issues/2481 - """ - from numba import jit - - def foo(x, y): - # slicing to make array `x` into different layout - # to cause a new compilation of the lifted loop - A = x[::y] - c = 1 - for k in range(A.size): - object() # to force objectmode and looplifting - c = c * A[::-1][k] # the slice that is failing in static_getitem - return c - - cfoo = jit(foo) - # First run just works - args = np.arange(10), 1 - self.assertEqual(foo(*args), cfoo(*args)) - # Exactly 1 lifted loop so far - self.assertEqual(len(cfoo.overloads[cfoo.signatures[0]].lifted), 1) - lifted = cfoo.overloads[cfoo.signatures[0]].lifted[0] - # The lifted loop has 1 signature - self.assertEqual(len(lifted.signatures), 1) - # Use different argument to trigger a new compilation of the lifted loop - args = np.arange(10), -1 - self.assertEqual(foo(*args), cfoo(*args)) - # Ensure that is really a new overload for the lifted loop - self.assertEqual(len(lifted.signatures), 2) - - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_mandelbrot.py b/numba/numba/tests/test_mandelbrot.py deleted file mode 100644 index c32511aeb..000000000 --- a/numba/numba/tests/test_mandelbrot.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, utils - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - - -def is_in_mandelbrot(c): - i = 0 - z = 0.0j - for i in range(100): - z = z ** 2 + c - if (z.real * z.real + z.imag * z.imag) >= 4: - return False - return True - - -class TestMandelbrot(unittest.TestCase): - - def test_mandelbrot(self): - pyfunc = is_in_mandelbrot - cr = compile_isolated(pyfunc, (types.complex64,)) - cfunc = cr.entry_point - - points = [0+0j, 1+0j, 0+1j, 1+1j, 0.1+0.1j] - for p in points: - self.assertEqual(cfunc(p), pyfunc(p)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_mangling.py b/numba/numba/tests/test_mangling.py deleted file mode 100644 index 9ce3bdaf3..000000000 --- a/numba/numba/tests/test_mangling.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Test function name mangling. -The mangling affects the ABI of numba compiled binaries. -""" - -from numba import types, utils -from numba.funcdesc import default_mangler -from .support import unittest, TestCase - - -class TestMangling(TestCase): - def test_one_args(self): - fname = 'foo' - argtypes = types.int32, - name = default_mangler(fname, argtypes) - self.assertEqual(name, '_Z3fooi') - - def test_two_args(self): - fname = 'foo' - argtypes = types.int32, types.float32 - name = default_mangler(fname, argtypes) - self.assertEqual(name, '_Z3fooif') - - def test_unicode_fname(self): - fname = u'foಠ' - argtypes = types.int32, types.float32 - name = default_mangler(fname, argtypes) - self.assertIsInstance(name, str) - # manually encode it - unichar = fname[2] - enc = ''.join('${:02x}'.format(utils.asbyteint(c)) - for c in unichar.encode('utf8')) - text = 'fo' + enc - expect = '_Z{}{}if'.format(len(text), text) - self.assertEqual(name, expect) - # ensure result chars are in the right charset - self.assertRegexpMatches(name, r'^_Z[a-zA-Z0-9_\$]+$') - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_mathlib.py b/numba/numba/tests/test_mathlib.py deleted file mode 100644 index 1468f76ec..000000000 --- a/numba/numba/tests/test_mathlib.py +++ /dev/null @@ -1,670 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import itertools -import math -import sys -import warnings - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated, Flags, utils -from numba import types, numpy_support -from numba.config import PYVERSION, IS_WIN32, IS_32BITS -from .support import TestCase, CompilationCache, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -no_pyobj_flags = Flags() - - -def sin(x): - return math.sin(x) - - -def cos(x): - return math.cos(x) - - -def tan(x): - return math.tan(x) - - -def sinh(x): - return math.sinh(x) - - -def cosh(x): - return math.cosh(x) - - -def tanh(x): - return math.tanh(x) - - -def asin(x): - return math.asin(x) - - -def acos(x): - return math.acos(x) - - -def atan(x): - return math.atan(x) - - -def atan2(y, x): - return math.atan2(y, x) - - -def asinh(x): - return math.asinh(x) - - -def acosh(x): - return math.acosh(x) - - -def atanh(x): - return math.atanh(x) - - -def sqrt(x): - return math.sqrt(x) - - -def npy_sqrt(x): - return np.sqrt(x) - - -def exp(x): - return math.exp(x) - - -def expm1(x): - return math.expm1(x) - - -def log(x): - return math.log(x) - - -def log1p(x): - return math.log1p(x) - - -def log10(x): - return math.log10(x) - - -def floor(x): - return math.floor(x) - - -def ceil(x): - return math.ceil(x) - - -def trunc(x): - return math.trunc(x) - - -def isnan(x): - return math.isnan(x) - - -def isinf(x): - return math.isinf(x) - - -def isfinite(x): - return math.isfinite(x) - - -def hypot(x, y): - return math.hypot(x, y) - - -def degrees(x): - return math.degrees(x) - - -def radians(x): - return math.radians(x) - - -def erf(x): - return math.erf(x) - - -def erfc(x): - return math.erfc(x) - - -def gamma(x): - return math.gamma(x) - - -def lgamma(x): - return math.lgamma(x) - - -def pow(x, y): - return math.pow(x, y) - - -def copysign(x, y): - return math.copysign(x, y) - - -def frexp(x): - return math.frexp(x) - - -def ldexp(x, e): - return math.ldexp(x, e) - - -def get_constants(): - return math.pi, math.e - - -class TestMathLib(TestCase): - - def setUp(self): - self.ccache = CompilationCache() - - def test_constants(self): - self.run_nullary_func(get_constants, no_pyobj_flags) - - def run_unary(self, pyfunc, x_types, x_values, flags=enable_pyobj_flags, - prec='exact', **kwargs): - for tx, vx in zip(x_types, x_values): - cr = self.ccache.compile(pyfunc, (tx,), flags=flags) - cfunc = cr.entry_point - got = cfunc(vx) - expected = pyfunc(vx) - actual_prec = 'single' if tx is types.float32 else prec - msg = 'for input %r' % (vx,) - self.assertPreciseEqual(got, expected, prec=actual_prec, msg=msg, - **kwargs) - - def run_binary(self, pyfunc, x_types, x_values, y_values, - flags=enable_pyobj_flags, prec='exact'): - for ty, x, y in zip(x_types, x_values, y_values): - cr = self.ccache.compile(pyfunc, (ty, ty), flags=flags) - cfunc = cr.entry_point - got = cfunc(x, y) - expected = pyfunc(x, y) - actual_prec = 'single' if ty is types.float32 else prec - msg = 'for inputs (%r, %r)' % (x, y) - self.assertPreciseEqual(got, expected, prec=actual_prec, msg=msg) - - def check_predicate_func(self, pyfunc, flags=enable_pyobj_flags): - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float32, types.float32, - types.float64, types.float64, types.float64] - x_values = [0, 0, 0, 0, 0, 0, - float('inf'), 0.0, float('nan'), - float('inf'), 0.0, float('nan')] - self.run_unary(pyfunc, x_types, x_values, flags) - - def test_sin(self, flags=enable_pyobj_flags): - pyfunc = sin - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_sin_npm(self): - self.test_sin(flags=no_pyobj_flags) - - @unittest.skipIf(sys.platform == 'win32', - "not exactly equal on win32 (issue #597)") - def test_cos(self, flags=enable_pyobj_flags): - pyfunc = cos - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_cos_npm(self): - self.test_cos(flags=no_pyobj_flags) - - def test_tan(self, flags=enable_pyobj_flags): - pyfunc = tan - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_tan_npm(self): - self.test_tan(flags=no_pyobj_flags) - - def test_sqrt(self, flags=enable_pyobj_flags): - pyfunc = sqrt - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [2, 1, 2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_sqrt_npm(self): - self.test_sqrt(flags=no_pyobj_flags) - - def test_npy_sqrt(self, flags=enable_pyobj_flags): - pyfunc = npy_sqrt - x_values = [2, 1, 2, 2, 1, 2, .1, .2] - # XXX poor precision for int16 inputs - x_types = [types.int16, types.uint16] - self.run_unary(pyfunc, x_types, x_values, flags, prec='single') - x_types = [types.int32, types.int64, - types.uint32, types.uint64, - types.float32, types.float64] - self.run_unary(pyfunc, x_types, x_values, flags) - - def test_npy_sqrt_npm(self): - self.test_npy_sqrt(flags=no_pyobj_flags) - - def test_exp(self, flags=enable_pyobj_flags): - pyfunc = exp - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_exp_npm(self): - self.test_exp(flags=no_pyobj_flags) - - def test_expm1(self, flags=enable_pyobj_flags): - pyfunc = expm1 - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_expm1_npm(self): - self.test_expm1(flags=no_pyobj_flags) - - def test_log(self, flags=enable_pyobj_flags): - pyfunc = log - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 10, 100, 1000, 100000, 1000000, 0.1, 1.1] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_log_npm(self): - self.test_log(flags=no_pyobj_flags) - - def test_log1p(self, flags=enable_pyobj_flags): - pyfunc = log1p - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 10, 100, 1000, 100000, 1000000, 0.1, 1.1] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_log1p_npm(self): - self.test_log1p(flags=no_pyobj_flags) - - def test_log10(self, flags=enable_pyobj_flags): - pyfunc = log10 - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 10, 100, 1000, 100000, 1000000, 0.1, 1.1] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_log10_npm(self): - self.test_log10(flags=no_pyobj_flags) - - def test_asin(self, flags=enable_pyobj_flags): - pyfunc = asin - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_asin_npm(self): - self.test_asin(flags=no_pyobj_flags) - - def test_acos(self, flags=enable_pyobj_flags): - pyfunc = acos - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_acos_npm(self): - self.test_acos(flags=no_pyobj_flags) - - def test_atan(self, flags=enable_pyobj_flags): - pyfunc = atan - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_atan_npm(self): - self.test_atan(flags=no_pyobj_flags) - - def test_atan2(self, flags=enable_pyobj_flags): - pyfunc = atan2 - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - y_values = [x * 2 for x in x_values] - self.run_binary(pyfunc, x_types, x_values, y_values, flags) - - @tag('important') - def test_atan2_npm(self): - self.test_atan2(flags=no_pyobj_flags) - - def test_asinh(self, flags=enable_pyobj_flags): - pyfunc = asinh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags, prec='double') - - @tag('important') - def test_asinh_npm(self): - self.test_asinh(flags=no_pyobj_flags) - - def test_acosh(self, flags=enable_pyobj_flags): - pyfunc = acosh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_acosh_npm(self): - self.test_acosh(flags=no_pyobj_flags) - - def test_atanh(self, flags=enable_pyobj_flags): - pyfunc = atanh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [0, 0, 0, 0, 0, 0, 0.1, 0.1] - self.run_unary(pyfunc, x_types, x_values, flags, prec='double') - - @tag('important') - def test_atanh_npm(self): - self.test_atanh(flags=no_pyobj_flags) - - def test_sinh(self, flags=enable_pyobj_flags): - pyfunc = sinh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_sinh_npm(self): - self.test_sinh(flags=no_pyobj_flags) - - def test_cosh(self, flags=enable_pyobj_flags): - pyfunc = cosh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_cosh_npm(self): - self.test_cosh(flags=no_pyobj_flags) - - def test_tanh(self, flags=enable_pyobj_flags): - pyfunc = tanh - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [0, 0, 0, 0, 0, 0, 0.1, 0.1] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_tanh_npm(self): - self.test_tanh(flags=no_pyobj_flags) - - def test_floor(self, flags=enable_pyobj_flags): - pyfunc = floor - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [0, 0, 0, 0, 0, 0, 0.1, 1.9] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_floor_npm(self): - self.test_floor(flags=no_pyobj_flags) - - def test_ceil(self, flags=enable_pyobj_flags): - pyfunc = ceil - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [0, 0, 0, 0, 0, 0, 0.1, 1.9] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_ceil_npm(self): - self.test_ceil(flags=no_pyobj_flags) - - def test_trunc(self, flags=enable_pyobj_flags): - pyfunc = trunc - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [0, 0, 0, 0, 0, 0, 0.1, 1.9] - self.run_unary(pyfunc, x_types, x_values, flags) - - @tag('important') - def test_trunc_npm(self): - self.test_trunc(flags=no_pyobj_flags) - - def test_isnan(self): - self.check_predicate_func(isnan, flags=enable_pyobj_flags) - - @tag('important') - def test_isnan_npm(self): - self.check_predicate_func(isnan, flags=no_pyobj_flags) - - def test_isinf(self): - self.check_predicate_func(isinf, flags=enable_pyobj_flags) - - @tag('important') - def test_isinf_npm(self): - self.check_predicate_func(isinf, flags=no_pyobj_flags) - - @unittest.skipIf(utils.PYVERSION < (3, 2), "needs Python 3.2+") - def test_isfinite(self): - self.check_predicate_func(isfinite, flags=enable_pyobj_flags) - - @unittest.skipIf(utils.PYVERSION < (3, 2), "needs Python 3.2+") - def test_isfinite_npm(self): - self.check_predicate_func(isfinite, flags=no_pyobj_flags) - - def test_hypot(self, flags=enable_pyobj_flags): - pyfunc = hypot - x_types = [types.int64, types.uint64, - types.float32, types.float64] - x_values = [1, 2, 3, 4, 5, 6, .21, .34] - y_values = [x + 2 for x in x_values] - # Issue #563: precision issues with math.hypot() under Windows. - prec = 'single' if sys.platform == 'win32' else 'exact' - self.run_binary(pyfunc, x_types, x_values, y_values, flags, prec) - # Check that values that overflow in naive implementations do not - # in the numba impl - - def naive_hypot(x, y): - return math.sqrt(x * x + y * y) - for fltty in (types.float32, types.float64): - cr = self.ccache.compile(pyfunc, (fltty, fltty), flags=flags) - cfunc = cr.entry_point - dt = numpy_support.as_dtype(fltty).type - val = dt(np.finfo(dt).max / 30.) - nb_ans = cfunc(val, val) - self.assertPreciseEqual(nb_ans, pyfunc(val, val), prec='single') - self.assertTrue(np.isfinite(nb_ans)) - - with warnings.catch_warnings(): - warnings.simplefilter("error", RuntimeWarning) - self.assertRaisesRegexp(RuntimeWarning, - 'overflow encountered in .*_scalars', - naive_hypot, val, val) - - @tag('important') - def test_hypot_npm(self): - self.test_hypot(flags=no_pyobj_flags) - - def test_degrees(self, flags=enable_pyobj_flags): - pyfunc = degrees - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - def test_degrees_npm(self): - self.test_degrees(flags=no_pyobj_flags) - - def test_radians(self, flags=enable_pyobj_flags): - pyfunc = radians - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [1, 1, 1, 1, 1, 1, 1., 1.] - self.run_unary(pyfunc, x_types, x_values, flags) - - def test_radians_npm(self): - self.test_radians(flags=no_pyobj_flags) - - def test_erf(self, flags=enable_pyobj_flags): - pyfunc = erf - x_values = [1., 1., -1., -0.0, 0.0, 0.5, 5, float('inf')] - x_types = [types.float32, types.float64] * (len(x_values) // 2) - self.run_unary(pyfunc, x_types, x_values, flags, - prec='double', ulps=2) - - def test_erf_npm(self): - self.test_erf(flags=no_pyobj_flags) - - def test_erfc(self, flags=enable_pyobj_flags): - pyfunc = erfc - x_values = [1., 1., -1., -0.0, 0.0, 0.5, 5, float('inf')] - x_types = [types.float32, types.float64] * (len(x_values) // 2) - self.run_unary(pyfunc, x_types, x_values, flags, - prec='double', ulps=4) - - def test_erfc_npm(self): - self.test_erfc(flags=no_pyobj_flags) - - @unittest.skipIf(PYVERSION == (2, 7) and IS_WIN32 and not IS_32BITS, - 'unknown error with tgamma') - def test_gamma(self, flags=enable_pyobj_flags): - pyfunc = gamma - x_values = [1., -0.9, -0.5, 0.5] - x_types = [types.float32, types.float64] * (len(x_values) // 2) - self.run_unary(pyfunc, x_types, x_values, flags, prec='double', ulps=3) - x_values = [-0.1, 0.1, 2.5, 10.1, 50., float('inf')] - x_types = [types.float64] * len(x_values) - self.run_unary(pyfunc, x_types, x_values, flags, - prec='double', ulps=8) - - def test_gamma_npm(self): - self.test_gamma(flags=no_pyobj_flags) - - def test_lgamma(self, flags=enable_pyobj_flags): - pyfunc = lgamma - x_values = [1., -0.9, -0.1, 0.1, 200., 1e10, 1e30, float('inf')] - x_types = [types.float32, types.float64] * (len(x_values) // 2) - self.run_unary(pyfunc, x_types, x_values, flags, prec='double') - - def test_lgamma_npm(self): - self.test_lgamma(flags=no_pyobj_flags) - - def test_pow(self, flags=enable_pyobj_flags): - pyfunc = pow - x_types = [types.int16, types.int32, types.int64, - types.uint16, types.uint32, types.uint64, - types.float32, types.float64] - x_values = [-2, -1, -2, 2, 1, 2, .1, .2] - y_values = [x * 2 for x in x_values] - self.run_binary(pyfunc, x_types, x_values, y_values, flags) - - @tag('important') - def test_pow_npm(self): - self.test_pow(flags=no_pyobj_flags) - - def test_copysign(self, flags=enable_pyobj_flags): - pyfunc = copysign - value_types = [types.float32, types.float64] - values = [-2, -1, -0.0, 0.0, 1, 2, float('-inf'), float('inf'), - float('nan')] - x_types, x_values, y_values = list(zip( - *itertools.product(value_types, values, values))) - self.run_binary(pyfunc, x_types, x_values, y_values, flags) - - @tag('important') - def test_copysign_npm(self): - self.test_copysign(flags=no_pyobj_flags) - - def test_frexp(self, flags=enable_pyobj_flags): - pyfunc = frexp - x_types = [types.float32, types.float64] - x_values = [-2.5, -0.0, 0.0, 3.5, - float('-inf'), float('inf'), float('nan')] - self.run_unary(pyfunc, x_types, x_values, flags, prec='exact') - - def test_frexp_npm(self): - self.test_frexp(flags=no_pyobj_flags) - - def test_ldexp(self, flags=enable_pyobj_flags): - pyfunc = ldexp - for fltty in (types.float32, types.float64): - cr = self.ccache.compile(pyfunc, (fltty, types.int32), flags=flags) - cfunc = cr.entry_point - for args in [(2.5, -2), (2.5, 1), (0.0, 0), (0.0, 1), - (-0.0, 0), (-0.0, 1), - (float('inf'), 0), (float('-inf'), 0), - (float('nan'), 0)]: - msg = 'for input %r' % (args,) - self.assertPreciseEqual(cfunc(*args), pyfunc(*args)) - - def test_ldexp_npm(self): - self.test_ldexp(flags=no_pyobj_flags) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_maxmin.py b/numba/numba/tests/test_maxmin.py deleted file mode 100644 index 12972e046..000000000 --- a/numba/numba/tests/test_maxmin.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import print_function, absolute_import, division -from numba import unittest_support as unittest -from numba.compiler import compile_isolated -from numba import types - - -def domax3(a, b, c): - return max(a, b, c) - - -def domin3(a, b, c): - return min(a, b, c) - - -class TestMaxMin(unittest.TestCase): - def test_max3(self): - pyfunc = domax3 - argtys = (types.int32, types.float32, types.double) - cres = compile_isolated(pyfunc, argtys) - cfunc = cres.entry_point - - a = 1 - b = 2 - c = 3 - - self.assertEqual(pyfunc(a, b, c), cfunc(a, b, c)) - - def test_min3(self): - pyfunc = domin3 - argtys = (types.int32, types.float32, types.double) - cres = compile_isolated(pyfunc, argtys) - cfunc = cres.entry_point - - a = 1 - b = 2 - c = 3 - - self.assertEqual(pyfunc(a, b, c), cfunc(a, b, c)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_multi3.py b/numba/numba/tests/test_multi3.py deleted file mode 100644 index 73b2789f8..000000000 --- a/numba/numba/tests/test_multi3.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import random - -import numpy as np - -from numba import njit, types -from numba import unittest_support as unittest - -class TestMulti3(unittest.TestCase): - """ - This test is only relevant for 32-bit architectures. - - Test __multi3 implementation in _helperlib.c. - The symbol defines a i128 multiplication. - It is necessary for working around an issue in LLVM (see issue #969). - The symbol does not exist in 32-bit platform, and should not be used by - LLVM. However, optimization passes will create i65 multiplication that - is then lowered to __multi3. - """ - def test_multi3(self): - @njit("(int64,)") - def func(x): - res = 0 - for i in range(x): - res += i - return res - - x_cases = [-1, 0, 1, 3, 4, 8, - 0xffffffff - 1, 0xffffffff, 0xffffffff + 1, - 0x123456789abcdef, -0x123456789abcdef] - for _ in range(500): - x_cases.append(random.randint(0, 0xffffffff)) - - def expected(x): - if x <= 0: return 0 - return ((x * (x - 1)) // 2) & (2**64 - 1) - - for x in x_cases: - self.assertEqual(expected(x), func(x)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_nan.py b/numba/numba/tests/test_nan.py deleted file mode 100644 index 951ed6ea2..000000000 --- a/numba/numba/tests/test_nan.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import print_function -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -no_pyobj_flags = Flags() - - -def isnan(x): - return x != x - -def isequal(x): - return x == x - - -class TestNaN(unittest.TestCase): - - def test_nans(self, flags=enable_pyobj_flags): - pyfunc = isnan - cr = compile_isolated(pyfunc, (types.float64,), flags=flags) - cfunc = cr.entry_point - - self.assertTrue(cfunc(float('nan'))) - self.assertFalse(cfunc(1.0)) - - pyfunc = isequal - cr = compile_isolated(pyfunc, (types.float64,), flags=flags) - cfunc = cr.entry_point - - self.assertFalse(cfunc(float('nan'))) - self.assertTrue(cfunc(1.0)) - - def test_nans_npm(self): - self.test_nans(flags=no_pyobj_flags) - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_nested_calls.py b/numba/numba/tests/test_nested_calls.py deleted file mode 100644 index 8647a5f36..000000000 --- a/numba/numba/tests/test_nested_calls.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -Test problems in nested calls. -Usually due to invalid type conversion between function boundaries. -""" - -from __future__ import print_function, division, absolute_import - -from numba import int32, int64 -from numba import jit, generated_jit, types -from numba import unittest_support as unittest -from .support import TestCase, tag - - -@jit(nopython=True) -def f_inner(a, b, c): - return a, b, c - -def f(x, y, z): - return f_inner(x, c=y, b=z) - -@jit(nopython=True) -def g_inner(a, b=2, c=3): - return a, b, c - -def g(x, y, z): - return g_inner(x, b=y), g_inner(a=z, c=x) - -@jit(nopython=True) -def star_inner(a=5, *b): - return a, b - -def star(x, y, z): - return star_inner(a=x), star_inner(x, y, z) - -def star_call(x, y, z): - return star_inner(x, *y), star_inner(*z) - -@jit(nopython=True) -def argcast_inner(a, b): - if b: - # Here `a` is unified to int64 (from int32 originally) - a = int64(0) - return a - -def argcast(a, b): - return argcast_inner(int32(a), b) - -@generated_jit(nopython=True) -def generated_inner(x, y=5, z=6): - if isinstance(x, types.Complex): - def impl(x, y, z): - return x + y, z - else: - def impl(x, y, z): - return x - y, z - return impl - -def call_generated(a, b): - return generated_inner(a, z=b) - - -class TestNestedCall(TestCase): - - def compile_func(self, pyfunc, objmode=False): - def check(*args, **kwargs): - expected = pyfunc(*args, **kwargs) - result = f(*args, **kwargs) - self.assertPreciseEqual(result, expected) - flags = dict(forceobj=True) if objmode else dict(nopython=True) - f = jit(**flags)(pyfunc) - return f, check - - def test_boolean_return(self): - @jit(nopython=True) - def inner(x): - return not x - - @jit(nopython=True) - def outer(x): - if inner(x): - return True - else: - return False - - self.assertFalse(outer(True)) - self.assertTrue(outer(False)) - - @tag('important') - def test_named_args(self, objmode=False): - """ - Test a nested function call with named (keyword) arguments. - """ - cfunc, check = self.compile_func(f, objmode) - check(1, 2, 3) - check(1, y=2, z=3) - - def test_named_args_objmode(self): - self.test_named_args(objmode=True) - - @tag('important') - def test_default_args(self, objmode=False): - """ - Test a nested function call using default argument values. - """ - cfunc, check = self.compile_func(g, objmode) - check(1, 2, 3) - check(1, y=2, z=3) - - def test_default_args_objmode(self): - self.test_default_args(objmode=True) - - @tag('important') - def test_star_args(self): - """ - Test a nested function call to a function with *args in its signature. - """ - cfunc, check = self.compile_func(star) - check(1, 2, 3) - - @tag('important') - def test_star_call(self, objmode=False): - """ - Test a function call with a *args. - """ - cfunc, check = self.compile_func(star_call, objmode) - check(1, (2,), (3,)) - - def test_star_call_objmode(self): - self.test_star_call(objmode=True) - - def test_argcast(self): - """ - Issue #1488: implicitly casting an argument variable should not - break nested calls. - """ - cfunc, check = self.compile_func(argcast) - check(1, 0) - check(1, 1) - - @tag('important') - def test_call_generated(self): - """ - Test a nested function call to a generated jit function. - """ - cfunc = jit(nopython=True)(call_generated) - self.assertPreciseEqual(cfunc(1, 2), (-4, 2)) - self.assertPreciseEqual(cfunc(1j, 2), (1j + 5, 2)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_np_functions.py b/numba/numba/tests/test_np_functions.py deleted file mode 100644 index d34604342..000000000 --- a/numba/numba/tests/test_np_functions.py +++ /dev/null @@ -1,595 +0,0 @@ -# Tests numpy methods of -from __future__ import print_function, absolute_import, division - -import itertools -import math -import sys - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated, Flags, utils -from numba import jit, typeof, types -from numba.numpy_support import version as np_version -from numba.errors import TypingError -from .support import TestCase, CompilationCache - -no_pyobj_flags = Flags() -no_pyobj_flags.set("nrt") - - -def sinc(x): - return np.sinc(x) - -def angle1(x): - return np.angle(x) - -def angle2(x, deg): - return np.angle(x, deg) - -def diff1(a): - return np.diff(a) - -def diff2(a, n): - return np.diff(a, n) - -def bincount1(a): - return np.bincount(a) - -def bincount2(a, w): - return np.bincount(a, weights=w) - -def searchsorted(a, v): - return np.searchsorted(a, v) - -def searchsorted_left(a, v): - return np.searchsorted(a, v, side='left') - -def searchsorted_right(a, v): - return np.searchsorted(a, v, side='right') - -def digitize(*args): - return np.digitize(*args) - -def histogram(*args): - return np.histogram(*args) - -def machar(*args): - return np.MachAr() - -def iinfo(*args): - return np.iinfo(*args) - -def finfo(*args): - return np.finfo(*args) - -def finfo_machar(*args): - return np.finfo(*args).machar - -def correlate(a, v): - return np.correlate(a, v) - -def convolve(a, v): - return np.convolve(a, v) - - -class TestNPFunctions(TestCase): - """ - Tests for various Numpy functions. - """ - - def setUp(self): - self.ccache = CompilationCache() - self.rnd = np.random.RandomState(42) - - def run_unary(self, pyfunc, x_types, x_values, flags=no_pyobj_flags, - func_extra_types=None, func_extra_args=None, - ignore_sign_on_zero=False, abs_tol=None, **kwargs): - """ - Runs tests for a unary function operating in the numerical real space. - - Parameters - ---------- - pyfunc : a python function definition holding that calls the numpy - functions to be tested. - x_types: the types of the values being tested, see numba.types - x_values: the numerical values of the values to be tested - flags: flags to pass to the CompilationCache::ccache::compile function - func_extra_types: the types of additional arguments to the numpy - function - func_extra_args: additional arguments to the numpy function - ignore_sign_on_zero: boolean as to whether to allow zero values - with incorrect signs to be considered equal - prec: the required precision match, see assertPreciseEqual - - Notes: - ------ - x_types and x_values must have the same length - - """ - for tx, vx in zip(x_types, x_values): - if func_extra_args is None: - func_extra_types = func_extra_args = [()] - for xtypes, xargs in zip(func_extra_types, func_extra_args): - cr = self.ccache.compile(pyfunc, (tx,) + xtypes, - flags=flags) - cfunc = cr.entry_point - got = cfunc(vx, *xargs) - expected = pyfunc(vx, *xargs) - try: - scalty = tx.dtype - except AttributeError: - scalty = tx - prec = ('single' - if scalty in (types.float32, types.complex64) - else 'double') - msg = 'for input %r with prec %r' % (vx, prec) - self.assertPreciseEqual(got, expected, - prec=prec, - msg=msg, - ignore_sign_on_zero= - ignore_sign_on_zero, - abs_tol=abs_tol, **kwargs) - - def test_sinc(self): - """ - Tests the sinc() function. - This test is purely to assert numerical computations are correct. - """ - - # Ignore sign of zeros, this will need masking depending on numpy - # version once the fix to numpy complex division is in upstream - # See: https://github.com/numpy/numpy/pull/6699 - isoz = True - - # Testing sinc(1.) leads to sin(pi)/pi, which is below machine - # precision in practice on most machines. Small floating point - # differences in sin() etc. may lead to large differences in the result - # that are at a range that is inaccessible using standard width - # floating point representations. - # e.g. Assume float64 type. - # sin(pi) ~= 1e-16, but should be zero - # sin(pi)/pi ~= 1e-17, should be zero, error carried from above - # float64 has log10(2^53)~=15.9 digits of precision and the magnitude - # change in the alg is > 16 digits (1.0...0 -> 0.0...0), - # so comparison via ULP is invalid. - # We therefore opt to assume that values under machine precision are - # equal in this case. - tol = "eps" - - pyfunc = sinc - - def check(x_types, x_values, **kwargs): - self.run_unary(pyfunc, x_types, x_values, - ignore_sign_on_zero=isoz, abs_tol=tol, - **kwargs) - - # real domain scalar context - x_values = [1., -1., 0.0, -0.0, 0.5, -0.5, 5, -5, 5e-21, -5e-21] - x_types = [types.float32, types.float64] * (len(x_values) // 2) - check(x_types, x_values) - - # real domain vector context - x_values = [np.array(x_values, dtype=np.float64)] - x_types = [typeof(v) for v in x_values] - check(x_types, x_values) - - # complex domain scalar context - x_values = [1.+0j, -1+0j, 0.0+0.0j, -0.0+0.0j, 0+1j, 0-1j, 0.5+0.0j, - -0.5+0.0j, 0.5+0.5j, -0.5-0.5j, 5+5j, -5-5j, - # the following are to test sin(x)/x for small x - 5e-21+0j, -5e-21+0j, 5e-21j, +(0-5e-21j) - ] - x_types = [types.complex64, types.complex128] * (len(x_values) // 2) - check(x_types, x_values, ulps=2) - - # complex domain vector context - x_values = [np.array(x_values, dtype=np.complex128)] - x_types = [typeof(v) for v in x_values] - check(x_types, x_values, ulps=2) - - - def test_angle(self, flags=no_pyobj_flags): - """ - Tests the angle() function. - This test is purely to assert numerical computations are correct. - """ - pyfunc1 = angle1 - pyfunc2 = angle2 - - def check(x_types, x_values): - # angle(x) - self.run_unary(pyfunc1, x_types, x_values) - # angle(x, deg) - xtra_values = [(True,), (False,)] - xtra_types = [(types.bool_,)] * len(xtra_values) - self.run_unary(pyfunc2, x_types, x_values, - func_extra_types=xtra_types, - func_extra_args=xtra_values,) - - # real domain scalar context - x_values = [1., -1., 0.0, -0.0, 0.5, -0.5, 5, -5] - x_types = [types.float32, types.float64] * (len(x_values) // 2 + 1) - check(x_types, x_values) - - # real domain vector context - x_values = [np.array(x_values, dtype=np.float64)] - x_types = [typeof(v) for v in x_values] - check(x_types, x_values) - - # complex domain scalar context - x_values = [1.+0j, -1+0j, 0.0+0.0j, -0.0+0.0j, 1j, -1j, 0.5+0.0j, - -0.5+0.0j, 0.5+0.5j, -0.5-0.5j, 5+5j, -5-5j] - x_types = [types.complex64, types.complex128] * (len(x_values) // 2 + 1) - check(x_types, x_values) - - # complex domain vector context - x_values = np.array(x_values) - x_types = [types.complex64, types.complex128] - check(x_types, x_values) - - - def diff_arrays(self): - """ - Some test arrays for np.diff() - """ - a = np.arange(12) ** 3 - yield a - b = a.reshape((3, 4)) - yield b - c = np.arange(24).reshape((3, 2, 4)) ** 3 - yield c - - def test_diff1(self): - pyfunc = diff1 - cfunc = jit(nopython=True)(pyfunc) - for arr in self.diff_arrays(): - expected = pyfunc(arr) - got = cfunc(arr) - self.assertPreciseEqual(expected, got) - - # 0-dim array - a = np.array(42) - with self.assertTypingError(): - cfunc(a) - - def test_diff2(self): - pyfunc = diff2 - cfunc = jit(nopython=True)(pyfunc) - for arr in self.diff_arrays(): - size = arr.shape[-1] - for n in (0, 1, 2, 3, size - 1, size, size + 1, 421): - expected = pyfunc(arr, n) - got = cfunc(arr, n) - self.assertPreciseEqual(expected, got) - - # 0-dim array - arr = np.array(42) - with self.assertTypingError(): - cfunc(arr, 1) - # Invalid `n` - arr = np.arange(10) - for n in (-1, -2, -42): - with self.assertRaises(ValueError) as raises: - cfunc(arr, n) - self.assertIn("order must be non-negative", str(raises.exception)) - - def bincount_sequences(self): - """ - Some test sequences for np.bincount() - """ - a = [1, 2, 5, 2, 3, 20] - b = np.array([5, 8, 42, 5]) - c = self.rnd.randint(0, 100, size=300).astype(np.int8) - return (a, b, c) - - def test_bincount1(self): - pyfunc = bincount1 - cfunc = jit(nopython=True)(pyfunc) - for seq in self.bincount_sequences(): - expected = pyfunc(seq) - got = cfunc(seq) - self.assertPreciseEqual(expected, got) - - # Negative input - with self.assertRaises(ValueError) as raises: - cfunc([2, -1]) - self.assertIn("first argument must be non-negative", - str(raises.exception)) - - def test_bincount2(self): - pyfunc = bincount2 - cfunc = jit(nopython=True)(pyfunc) - for seq in self.bincount_sequences(): - w = [math.sqrt(x) - 2 for x in seq] - # weights as list, then array - for weights in (w, np.array(w)): - expected = pyfunc(seq, weights) - got = cfunc(seq, weights) - self.assertPreciseEqual(expected, got) - - # Negative input - with self.assertRaises(ValueError) as raises: - cfunc([2, -1], [0, 0]) - self.assertIn("first argument must be non-negative", - str(raises.exception)) - - # Mismatching input sizes - with self.assertRaises(ValueError) as raises: - cfunc([2, -1], [0]) - self.assertIn("weights and list don't have the same length", - str(raises.exception)) - - def test_searchsorted(self): - pyfunc = searchsorted - cfunc = jit(nopython=True)(pyfunc) - - pyfunc_left = searchsorted_left - cfunc_left = jit(nopython=True)(pyfunc_left) - - pyfunc_right = searchsorted_right - cfunc_right = jit(nopython=True)(pyfunc_right) - - def check(a, v): - expected = pyfunc(a, v) - got = cfunc(a, v) - self.assertPreciseEqual(expected, got) - - expected = pyfunc_left(a, v) - got = cfunc_left(a, v) - self.assertPreciseEqual(expected, got) - - expected = pyfunc_right(a, v) - got = cfunc_right(a, v) - self.assertPreciseEqual(expected, got) - - # First with integer values (no NaNs) - bins = np.arange(5) ** 2 - values = np.arange(20) - 1 - - for a in (bins, list(bins)): - # Scalar values - for v in values: - check(a, v) - # Array values - for v in (values, values.reshape((4, 5))): - check(a, v) - # Sequence values - check(a, list(values)) - - # Second with float values (including NaNs) - bins = np.float64(list(bins) + [float('nan')] * 7) / 2.0 - values = np.arange(20) - 0.5 - - for a in (bins, list(bins)): - # Scalar values - for v in values: - check(a, v) - # Array values - for v in (values, values.reshape((4, 5))): - check(a, v) - # Sequence values - check(a, list(values)) - - # nonsense value for 'side' raises TypingError - def bad_side(a, v): - return np.searchsorted(a, v, side='nonsense') - cfunc = jit(nopython=True)(bad_side) - with self.assertTypingError(): - cfunc([1,2], 1) - - # non-constant value for 'side' raises TypingError - def nonconst_side(a, v, side='left'): - return np.searchsorted(a, v, side=side) - cfunc = jit(nopython=True)(nonconst_side) - with self.assertTypingError(): - cfunc([1,2], 1, side='right') - - def test_digitize(self): - pyfunc = digitize - cfunc = jit(nopython=True)(pyfunc) - - def check(*args): - expected = pyfunc(*args) - got = cfunc(*args) - self.assertPreciseEqual(expected, got) - - values = np.float64((0, 0.99, 1, 4.4, 4.5, 7, 8, 9, 9.5, - float('inf'), float('-inf'), float('nan'))) - assert len(values) == 12 - self.rnd.shuffle(values) - - bins1 = np.float64([1, 3, 4.5, 8]) - bins2 = np.float64([1, 3, 4.5, 8, float('inf'), float('-inf')]) - bins3 = np.float64([1, 3, 4.5, 8, float('inf'), float('-inf')] - + [float('nan')] * 10) - if np_version >= (1, 10): - all_bins = [bins1, bins2, bins3] - xs = [values, values.reshape((3, 4))] - else: - # Numpy < 1.10 had trouble with NaNs and N-d arrays - all_bins = [bins1, bins2] - xs = [values] - - - # 2-ary digitize() - for bins in all_bins: - bins.sort() - for x in xs: - check(x, bins) - check(x, bins[::-1]) - - # 3-ary digitize() - for bins in all_bins: - bins.sort() - for right in (True, False): - check(values, bins, right) - check(values, bins[::-1], right) - - # Sequence input - check(list(values), bins1) - - def test_histogram(self): - pyfunc = histogram - cfunc = jit(nopython=True)(pyfunc) - - def check(*args): - pyhist, pybins = pyfunc(*args) - chist, cbins = cfunc(*args) - self.assertPreciseEqual(pyhist, chist) - # There can be a slight discrepancy in the linspace() result - # when `bins` is an integer... - self.assertPreciseEqual(pybins, cbins, prec='double', ulps=2) - - def check_values(values): - # Explicit bins array - # (note Numpy seems to not support NaN bins) - bins = np.float64([1, 3, 4.5, 8]) - check(values, bins) - check(values.reshape((3, 4)), bins) - - # Explicit number of bins - check(values, 7) - - # Explicit number of bins and bins range - check(values, 7, (1.0, 13.5)) - - # Implicit bins=10 - check(values) - - values = np.float64((0, 0.99, 1, 4.4, 4.5, 7, 8, - 9, 9.5, 42.5, -1.0, -0.0)) - assert len(values) == 12 - self.rnd.shuffle(values) - - check_values(values) - - def _test_correlate_convolve(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - # only 1d arrays are accepted, test varying lengths - # and varying dtype - lengths = (1, 2, 3, 7) - dts = [np.int8, np.int32, np.int64, np.float32, np.float64, - np.complex64, np.complex128] - - for dt1, dt2, n, m in itertools.product(dts, dts, lengths, lengths): - a = np.arange(n, dtype=dt1) - v = np.arange(m, dtype=dt2) - - if np.issubdtype(dt1, np.complexfloating): - a = (a + 1j * a).astype(dt1) - if np.issubdtype(dt2, np.complexfloating): - v = (v + 1j * v).astype(dt2) - - expected = pyfunc(a, v) - got = cfunc(a, v) - self.assertPreciseEqual(expected, got) - - _a = np.arange(12).reshape(4, 3) - _b = np.arange(12) - for x, y in [(_a, _b), (_b, _a)]: - with self.assertRaises(TypingError) as raises: - cfunc(x, y) - msg = 'only supported on 1D arrays' - self.assertIn(msg, str(raises.exception)) - - def test_correlate(self): - self._test_correlate_convolve(correlate) - # correlate supports 0 dimension arrays - _a = np.ones(shape=(0,)) - _b = np.arange(5) - cfunc = jit(nopython=True)(correlate) - for x, y in [(_a, _b), (_b, _a), (_a, _a)]: - expected = correlate(x, y) - got = cfunc(x, y) - self.assertPreciseEqual(expected, got) - - def test_convolve(self): - self._test_correlate_convolve(convolve) - # convolve raises if either array has a 0 dimension - _a = np.ones(shape=(0,)) - _b = np.arange(5) - cfunc = jit(nopython=True)(convolve) - for x, y in [(_a, _b), (_b, _a)]: - with self.assertRaises(ValueError) as raises: - cfunc(x, y) - if len(x) == 0: - self.assertIn("'a' cannot be empty", str(raises.exception)) - else: - self.assertIn("'v' cannot be empty", str(raises.exception)) - - -class TestNPMachineParameters(TestCase): - # tests np.finfo, np.iinfo, np.MachAr - - template = ''' -def foo(): - ty = np.%s - return np.%s(ty) -''' - - bits = ('bits',) if np_version >= (1, 12) else () - - def check(self, func, attrs, *args): - pyfunc = func - cfunc = jit(nopython=True)(pyfunc) - - expected = pyfunc(*args) - got = cfunc(*args) - - # check result - for attr in attrs: - self.assertPreciseEqual(getattr(expected, attr), - getattr(got, attr)) - - def create_harcoded_variant(self, basefunc, ty): - #create an instance of using the function with a hardcoded type - #and eval it into existence, return the function for use - tystr = ty.__name__ - basestr = basefunc.__name__ - funcstr = self.template % (tystr, basestr) - eval(compile(funcstr, '', 'exec')) - return locals()['foo'] - - def test_MachAr(self): - attrs = ('ibeta', 'it', 'machep', 'eps', 'negep', 'epsneg', 'iexp', - 'minexp', 'xmin', 'maxexp', 'xmax', 'irnd', 'ngrd', - 'epsilon', 'tiny', 'huge', 'precision', 'resolution',) - self.check(machar, attrs) - - def test_finfo(self): - types = [np.float32, np.float64, np.complex64, np.complex128] - attrs = self.bits + ('eps', 'epsneg', 'iexp', 'machep', 'max', - 'maxexp', 'negep', 'nexp', 'nmant', 'precision', - 'resolution', 'tiny',) - for ty in types: - self.check(finfo, attrs, ty(1)) - hc_func = self.create_harcoded_variant(np.finfo, ty) - self.check(hc_func, attrs) - - # check unsupported attr raises - with self.assertRaises(TypingError) as raises: - cfunc = jit(nopython=True)(finfo_machar) - cfunc(7.) - msg = "Unknown attribute 'machar' of type finfo" - self.assertIn(msg, str(raises.exception)) - - # check invalid type raises - with self.assertTypingError(): - cfunc = jit(nopython=True)(finfo) - cfunc(np.int32(7)) - - def test_iinfo(self): - # check types and instances of types - types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, - np.uint32, np.uint64] - attrs = ('min', 'max') + self.bits - for ty in types: - self.check(iinfo, attrs, ty(1)) - hc_func = self.create_harcoded_variant(np.iinfo, ty) - self.check(hc_func, attrs) - - # check invalid type raises - with self.assertTypingError(): - cfunc = jit(nopython=True)(iinfo) - cfunc(np.float64(7)) diff --git a/numba/numba/tests/test_npdatetime.py b/numba/numba/tests/test_npdatetime.py deleted file mode 100644 index eef41b77e..000000000 --- a/numba/numba/tests/test_npdatetime.py +++ /dev/null @@ -1,762 +0,0 @@ -""" -Test np.datetime64 and np.timedelta64 support. -""" - -# NOTE: datetime64 and timedelta64 ufuncs are tested in test_ufuncs. - -from __future__ import print_function - -import contextlib -import itertools -import warnings - -import numpy as np - -import numba.unittest_support as unittest -from numba import config, jit, npdatetime, types, vectorize, numpy_support -from numba.errors import TypingError -from .support import TestCase, tag - - -def value_unit(val): - ty = numpy_support.from_dtype(val.dtype) - return ty.unit - - -date_units = ('Y', 'M') -time_units = ('W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as') -# All except generic ("") -all_units = date_units + time_units - - -def add_usecase(x, y): - return x + y - -def sub_usecase(x, y): - return x - y - -def mul_usecase(x, y): - return x * y - -def div_usecase(x, y): - return x / y - -def floordiv_usecase(x, y): - return x // y - -def eq_usecase(x, y): - return x == y - -def ne_usecase(x, y): - return x != y - -def lt_usecase(x, y): - return x < y - -def le_usecase(x, y): - return x <= y - -def gt_usecase(x, y): - return x > y - -def ge_usecase(x, y): - return x >= y - -def pos_usecase(x): - return +x - -def neg_usecase(x): - return -x - -def abs_usecase(x): - return abs(x) - - -def make_add_constant(const): - def add_constant(x): - return x + const - return add_constant - - -class TestModuleHelpers(TestCase): - """ - Test the various helpers in numba.npdatetime. - """ - - def test_can_cast_timedelta(self): - f = npdatetime.can_cast_timedelta_units - for a, b in itertools.product(date_units, time_units): - self.assertFalse(f(a, b), (a, b)) - self.assertFalse(f(b, a), (a, b)) - for unit in all_units: - self.assertFalse(f(unit, '')) - self.assertTrue(f('', unit)) - for unit in all_units + ('',): - self.assertTrue(f(unit, unit)) - - def check_units_group(group): - for i, a in enumerate(group): - for b in group[:i]: - # large into smaller is ok - self.assertTrue(f(b, a)) - # small into larger is not - self.assertFalse(f(a, b)) - - check_units_group(date_units) - check_units_group(time_units) - - def test_timedelta_conversion(self): - f = npdatetime.get_timedelta_conversion_factor - for unit in all_units + ('',): - self.assertEqual(f(unit, unit), 1) - for unit in all_units: - self.assertEqual(f('', unit), 1) - for a, b in itertools.product(time_units, date_units): - self.assertIs(f(a, b), None) - self.assertIs(f(b, a), None) - - def check_units_group(group): - for i, a in enumerate(group): - for b in group[:i]: - self.assertGreater(f(b, a), 1, (b, a)) - self.assertIs(f(a, b), None) - - check_units_group(date_units) - check_units_group(time_units) - - # Check some hand-picked values - self.assertEqual(f('Y', 'M'), 12) - self.assertEqual(f('W', 'h'), 24 * 7) - self.assertEqual(f('W', 'm'), 24 * 7 * 60) - self.assertEqual(f('W', 'us'), 24 * 7 * 3600 * 1000 * 1000) - - def test_datetime_timedelta_scaling(self): - f = npdatetime.get_datetime_timedelta_conversion - def check_error(dt_unit, td_unit): - with self.assertRaises(RuntimeError): - f(dt_unit, td_unit) - # Cannot combine a Y or M timedelta64 with a finer-grained datetime64 - for dt_unit, td_unit in itertools.product(time_units, date_units): - check_error(dt_unit, td_unit) - # Sanity check that all other unit pairs can be converted, we'll - # check individual results below - for dt_unit, td_unit in itertools.product(time_units, time_units): - f(dt_unit, td_unit) - for dt_unit, td_unit in itertools.product(date_units, time_units): - f(dt_unit, td_unit) - for dt_unit, td_unit in itertools.product(date_units, date_units): - f(dt_unit, td_unit) - # No-op conversions - for unit in all_units: - self.assertEqual(f(unit, unit), (unit, 1, 1)) - self.assertEqual(f(unit, ''), (unit, 1, 1)) - self.assertEqual(f('', unit), ('', 1, 1)) - self.assertEqual(f('', ''), ('', 1, 1)) - # "Regular" values - self.assertEqual(f('Y', 'M'), ('M', 12, 1)) - self.assertEqual(f('M', 'Y'), ('M', 1, 12)) - self.assertEqual(f('W', 'D'), ('D', 7, 1)) - self.assertEqual(f('D', 'W'), ('D', 1, 7)) - self.assertEqual(f('W', 's'), ('s', 7 * 24 * 3600, 1)) - self.assertEqual(f('s', 'W'), ('s', 1, 7 * 24 * 3600)) - self.assertEqual(f('s', 'as'), ('as', 1000 ** 6, 1)) - self.assertEqual(f('as', 's'), ('as', 1, 1000 ** 6)) - # "Interesting" values - self.assertEqual(f('Y', 'D'), ('D', 97 + 400 * 365, 400)) - self.assertEqual(f('Y', 'W'), ('W', 97 + 400 * 365, 400 * 7)) - self.assertEqual(f('M', 'D'), ('D', 97 + 400 * 365, 400 * 12)) - self.assertEqual(f('M', 'W'), ('W', 97 + 400 * 365, 400 * 12 * 7)) - self.assertEqual(f('Y', 's'), ('s', (97 + 400 * 365) * 24 * 3600, 400)) - self.assertEqual(f('M', 's'), ('s', (97 + 400 * 365) * 24 * 3600, 400 * 12)) - - def test_combine_datetime_timedelta_units(self): - f = npdatetime.combine_datetime_timedelta_units - for unit in all_units: - self.assertEqual(f(unit, unit), unit) - self.assertEqual(f('', unit), unit) - self.assertEqual(f(unit, ''), unit) - self.assertEqual(f('', ''), '') - for dt_unit, td_unit in itertools.product(time_units, date_units): - self.assertIs(f(dt_unit, td_unit), None) - for dt_unit, td_unit in itertools.product(date_units, time_units): - self.assertEqual(f(dt_unit, td_unit), td_unit) - - def test_same_kind(self): - f = npdatetime.same_kind - for u in all_units: - self.assertTrue(f(u, u)) - A = ('Y', 'M', 'W', 'D') - B = ('h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as') - for a, b in itertools.product(A, A): - self.assertTrue(f(a, b)) - for a, b in itertools.product(B, B): - self.assertTrue(f(a, b)) - for a, b in itertools.product(A, B): - self.assertFalse(f(a, b)) - self.assertFalse(f(b, a)) - - -TD = np.timedelta64 -DT = np.datetime64 - - -class TestMiscCompiling(TestCase): - - def test_jit_explicit_signature(self): - def _check_explicit_signature(sig): - f = jit(sig, nopython=True)(add_usecase) - # Just a sanity check - args = DT(1, 'ms'), TD(2, 'us') - expected = add_usecase(*args) - self.assertPreciseEqual(f(*args), expected) - - # Test passing the signature in object form - sig = types.NPDatetime('us')(types.NPDatetime('ms'), types.NPTimedelta('us')) - _check_explicit_signature(sig) - # Same with the signature in string form - sig = "NPDatetime('us')(NPDatetime('ms'), NPTimedelta('us'))" - _check_explicit_signature(sig) - - def test_vectorize_explicit_signature(self): - def _check_explicit_signature(sig): - f = vectorize([sig], nopython=True)(mul_usecase) - # This isn't really right but we can't do better than this, - # since Numpy's ufuncs don't store the metadata of return types. - # Related to https://github.com/numpy/numpy/issues/5429 - self.assertPreciseEqual(f(TD(2), 3), TD(6)) - - # Test passing the signature in object form (issue #917) - sig = types.NPTimedelta('s')(types.NPTimedelta('s'), types.int64) - _check_explicit_signature(sig) - # Same with the signature in string form - sig = "NPTimedelta('s')(NPTimedelta('s'), int64)" - _check_explicit_signature(sig) - - def test_constant_datetime(self): - def check(const): - pyfunc = make_add_constant(const) - f = jit(nopython=True)(pyfunc) - x = TD(4, 'D') - expected = pyfunc(x) - self.assertPreciseEqual(f(x), expected) - check(DT('2001-01-01')) - check(DT('NaT', 'D')) - - def test_constant_timedelta(self): - def check(const): - pyfunc = make_add_constant(const) - f = jit(nopython=True)(pyfunc) - x = TD(4, 'D') - expected = pyfunc(x) - self.assertPreciseEqual(f(x), expected) - check(TD(4, 'D')) - check(TD(-4, 'D')) - check(TD('NaT', 'D')) - - -class TestTimedeltaArithmetic(TestCase): - - jitargs = dict(forceobj=True) - - def jit(self, pyfunc): - return jit(**self.jitargs)(pyfunc) - - @tag('important') - def test_add(self): - f = self.jit(add_usecase) - def check(a, b, expected): - self.assertPreciseEqual(f(a, b), expected) - self.assertPreciseEqual(f(b, a), expected) - - check(TD(1), TD(2), TD(3)) - check(TD(1, 's'), TD(2, 's'), TD(3, 's')) - # Implicit unit promotion - if not numpy_support.strict_ufunc_typing: - check(TD(1), TD(2, 's'), TD(3, 's')) - check(TD(1), TD(2, 'ms'), TD(3, 'ms')) - check(TD(1, 's'), TD(2, 'us'), TD(1000002, 'us')) - check(TD(1, 'W'), TD(2, 'D'), TD(9, 'D')) - # NaTs - check(TD('NaT'), TD(1), TD('NaT')) - check(TD('NaT', 's'), TD(1, 'D'), TD('NaT', 's')) - check(TD('NaT', 's'), TD(1, 'ms'), TD('NaT', 'ms')) - # Cannot add days and months - with self.assertRaises((TypeError, TypingError)): - f(TD(1, 'M'), TD(1, 'D')) - - @tag('important') - def test_sub(self): - f = self.jit(sub_usecase) - def check(a, b, expected): - self.assertPreciseEqual(f(a, b), expected) - self.assertPreciseEqual(f(b, a), -expected) - - check(TD(3), TD(2), TD(1)) - check(TD(3, 's'), TD(2, 's'), TD(1, 's')) - # Implicit unit promotion - if not numpy_support.strict_ufunc_typing: - check(TD(3), TD(2, 's'), TD(1, 's')) - check(TD(3), TD(2, 'ms'), TD(1, 'ms')) - check(TD(3, 's'), TD(2, 'us'), TD(2999998, 'us')) - check(TD(1, 'W'), TD(2, 'D'), TD(5, 'D')) - # NaTs - check(TD('NaT'), TD(1), TD('NaT')) - check(TD('NaT', 's'), TD(1, 'D'), TD('NaT', 's')) - check(TD('NaT', 's'), TD(1, 'ms'), TD('NaT', 'ms')) - # Cannot sub days to months - with self.assertRaises((TypeError, TypingError)): - f(TD(1, 'M'), TD(1, 'D')) - - def test_mul(self): - f = self.jit(mul_usecase) - def check(a, b, expected): - self.assertPreciseEqual(f(a, b), expected) - self.assertPreciseEqual(f(b, a), expected) - - # non-int64 int * timedelta64 - check(TD(3), np.uint32(2), TD(6)) - # int * timedelta64 - check(TD(3), 2, TD(6)) - check(TD(3, 'ps'), 2, TD(6, 'ps')) - check(TD('NaT', 'ps'), 2, TD('NaT', 'ps')) - # float * timedelta64 - check(TD(7), 1.5, TD(10)) - check(TD(-7), 1.5, TD(-10)) - check(TD(7, 'ps'), -1.5, TD(-10, 'ps')) - check(TD(-7), -1.5, TD(10)) - check(TD('NaT', 'ps'), -1.5, TD('NaT', 'ps')) - check(TD(7, 'ps'), float('nan'), TD('NaT', 'ps')) - # wraparound on overflow - check(TD(2**62, 'ps'), 16, TD(0, 'ps')) - - def test_div(self): - div = self.jit(div_usecase) - floordiv = self.jit(floordiv_usecase) - def check(a, b, expected): - self.assertPreciseEqual(div(a, b), expected) - self.assertPreciseEqual(floordiv(a, b), expected) - - # timedelta64 / non-int64 int - check(TD(-3, 'ps'), np.uint32(2), TD(-1, 'ps')) - # timedelta64 / int - check(TD(3), 2, TD(1)) - check(TD(-3, 'ps'), 2, TD(-1, 'ps')) - check(TD('NaT', 'ps'), 2, TD('NaT', 'ps')) - check(TD(3, 'ps'), 0, TD('NaT', 'ps')) - check(TD('NaT', 'ps'), 0, TD('NaT', 'ps')) - # timedelta64 / float - check(TD(7), 0.5, TD(14)) - check(TD(-7, 'ps'), 1.5, TD(-4, 'ps')) - check(TD('NaT', 'ps'), 2.5, TD('NaT', 'ps')) - check(TD(3, 'ps'), 0.0, TD('NaT', 'ps')) - check(TD('NaT', 'ps'), 0.0, TD('NaT', 'ps')) - check(TD(3, 'ps'), float('nan'), TD('NaT', 'ps')) - check(TD('NaT', 'ps'), float('nan'), TD('NaT', 'ps')) - - def test_homogeneous_div(self): - div = self.jit(div_usecase) - def check(a, b, expected): - self.assertPreciseEqual(div(a, b), expected) - - # timedelta64 / timedelta64 - check(TD(7), TD(3), 7. / 3.) - if not numpy_support.strict_ufunc_typing: - check(TD(7), TD(3, 'ms'), 7. / 3.) - check(TD(7, 'us'), TD(3, 'ms'), 7. / 3000.) - check(TD(7, 'ms'), TD(3, 'us'), 7000. / 3.) - check(TD(7), TD(0), float('+inf')) - check(TD(-7), TD(0), float('-inf')) - check(TD(0), TD(0), float('nan')) - # NaTs - check(TD('nat'), TD(3), float('nan')) - check(TD(3), TD('nat'), float('nan')) - check(TD('nat'), TD(0), float('nan')) - # Cannot div months with days - with self.assertRaises((TypeError, TypingError)): - div(TD(1, 'M'), TD(1, 'D')) - - @tag('important') - def test_eq_ne(self): - eq = self.jit(eq_usecase) - ne = self.jit(ne_usecase) - def check(a, b, expected): - self.assertPreciseEqual(eq(a, b), expected) - self.assertPreciseEqual(eq(b, a), expected) - self.assertPreciseEqual(ne(a, b), not expected) - self.assertPreciseEqual(ne(b, a), not expected) - - check(TD(1), TD(2), False) - check(TD(1), TD(1), True) - check(TD(1, 's'), TD(2, 's'), False) - check(TD(1, 's'), TD(1, 's'), True) - check(TD(2000, 's'), TD(2, 's'), False) - check(TD(2000, 'ms'), TD(2, 's'), True) - check(TD(1, 'Y'), TD(12, 'M'), True) - # NaTs - check(TD('Nat'), TD('Nat'), True) - check(TD('Nat', 'ms'), TD('Nat', 's'), True) - check(TD('Nat'), TD(1), False) - # Incompatible units => timedeltas compare unequal - check(TD(1, 'Y'), TD(365, 'D'), False) - check(TD(1, 'Y'), TD(366, 'D'), False) - # ... except when both are NaT! - check(TD('NaT', 'W'), TD('NaT', 'D'), True) - - def test_lt_ge(self): - lt = self.jit(lt_usecase) - ge = self.jit(ge_usecase) - def check(a, b, expected): - self.assertPreciseEqual(lt(a, b), expected) - self.assertPreciseEqual(ge(a, b), not expected) - - check(TD(1), TD(2), True) - check(TD(1), TD(1), False) - check(TD(2), TD(1), False) - check(TD(1, 's'), TD(2, 's'), True) - check(TD(1, 's'), TD(1, 's'), False) - check(TD(2, 's'), TD(1, 's'), False) - check(TD(1, 'm'), TD(61, 's'), True) - check(TD(1, 'm'), TD(60, 's'), False) - # NaTs - check(TD('Nat'), TD('Nat'), False) - check(TD('Nat', 'ms'), TD('Nat', 's'), False) - check(TD('Nat'), TD(-(2**63)+1), True) - # Incompatible units => exception raised - with self.assertRaises((TypeError, TypingError)): - lt(TD(1, 'Y'), TD(365, 'D')) - with self.assertRaises((TypeError, TypingError)): - ge(TD(1, 'Y'), TD(365, 'D')) - # ... even when both are NaT - with self.assertRaises((TypeError, TypingError)): - lt(TD('NaT', 'Y'), TD('NaT', 'D')) - with self.assertRaises((TypeError, TypingError)): - ge(TD('NaT', 'Y'), TD('NaT', 'D')) - - def test_le_gt(self): - le = self.jit(le_usecase) - gt = self.jit(gt_usecase) - def check(a, b, expected): - self.assertPreciseEqual(le(a, b), expected) - self.assertPreciseEqual(gt(a, b), not expected) - - check(TD(1), TD(2), True) - check(TD(1), TD(1), True) - check(TD(2), TD(1), False) - check(TD(1, 's'), TD(2, 's'), True) - check(TD(1, 's'), TD(1, 's'), True) - check(TD(2, 's'), TD(1, 's'), False) - check(TD(1, 'm'), TD(61, 's'), True) - check(TD(1, 'm'), TD(60, 's'), True) - check(TD(1, 'm'), TD(59, 's'), False) - # NaTs - check(TD('Nat'), TD('Nat'), True) - check(TD('Nat', 'ms'), TD('Nat', 's'), True) - check(TD('Nat'), TD(-(2**63)+1), True) - # Incompatible units => exception raised - with self.assertRaises((TypeError, TypingError)): - le(TD(1, 'Y'), TD(365, 'D')) - with self.assertRaises((TypeError, TypingError)): - gt(TD(1, 'Y'), TD(365, 'D')) - # ... even when both are NaT - with self.assertRaises((TypeError, TypingError)): - le(TD('NaT', 'Y'), TD('NaT', 'D')) - with self.assertRaises((TypeError, TypingError)): - gt(TD('NaT', 'Y'), TD('NaT', 'D')) - - def test_pos(self): - pos = self.jit(pos_usecase) - def check(a): - self.assertPreciseEqual(pos(a), +a) - - check(TD(3)) - check(TD(-4)) - check(TD(3, 'ms')) - check(TD(-4, 'ms')) - check(TD('NaT')) - check(TD('NaT', 'ms')) - - def test_neg(self): - neg = self.jit(neg_usecase) - def check(a): - self.assertPreciseEqual(neg(a), -a) - - check(TD(3)) - check(TD(-4)) - check(TD(3, 'ms')) - check(TD(-4, 'ms')) - check(TD('NaT')) - check(TD('NaT', 'ms')) - - def test_abs(self): - f = self.jit(abs_usecase) - def check(a): - self.assertPreciseEqual(f(a), abs(a)) - - check(TD(3)) - check(TD(-4)) - check(TD(3, 'ms')) - check(TD(-4, 'ms')) - check(TD('NaT')) - check(TD('NaT', 'ms')) - - -class TestTimedeltaArithmeticNoPython(TestTimedeltaArithmetic): - - jitargs = dict(nopython=True) - - -class TestDatetimeArithmetic(TestCase): - - jitargs = dict(forceobj=True) - - def jit(self, pyfunc): - return jit(**self.jitargs)(pyfunc) - - @contextlib.contextmanager - def silence_numpy_warnings(self): - # Numpy can raise warnings when combining e.g. a generic timedelta64 - # with a non-generic datetime64. - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - message='Implicitly casting between incompatible kinds', - category=DeprecationWarning) - yield - - @tag('important') - def test_add_sub_timedelta(self): - """ - Test `datetime64 + timedelta64` and `datetime64 - timedelta64`. - """ - add = self.jit(add_usecase) - sub = self.jit(sub_usecase) - def check(a, b, expected): - with self.silence_numpy_warnings(): - self.assertPreciseEqual(add(a, b), expected, (a, b)) - self.assertPreciseEqual(add(b, a), expected, (a, b)) - self.assertPreciseEqual(sub(a, -b), expected, (a, b)) - # Did we get it right? - self.assertPreciseEqual(a + b, expected) - - # Y + ... - if not numpy_support.strict_ufunc_typing: - check(DT('2014'), TD(2), DT('2016')) - check(DT('2014'), TD(2, 'Y'), DT('2016')) - check(DT('2014'), TD(2, 'M'), DT('2014-03')) - check(DT('2014'), TD(3, 'W'), DT('2014-01-16', 'W')) - check(DT('2014'), TD(4, 'D'), DT('2014-01-05')) - check(DT('2000'), TD(365, 'D'), DT('2000-12-31')) - if not numpy_support.strict_ufunc_typing: - check(DT('2001'), TD(61, 'm'), DT('2001-01-01T01:01Z')) - check(DT('2001'), TD(61, 's'), DT('2001-01-01T00:01:01Z')) - # M + ... - if not numpy_support.strict_ufunc_typing: - check(DT('2014-02'), TD(2), DT('2014-04')) - check(DT('2014-02'), TD(2, 'Y'), DT('2016-02')) - check(DT('2014-02'), TD(2, 'M'), DT('2014-04')) - check(DT('2014-02'), TD(2, 'D'), DT('2014-02-03')) - if not numpy_support.strict_ufunc_typing: - check(DT('2014-02'), TD(61, 's'), DT('2014-02-01T00:01:01Z')) - # W + ... - check(DT('2014-01-07', 'W'), TD(2, 'W'), DT('2014-01-16', 'W')) - # D + ... - check(DT('2014-02-02'), TD(27, 'D'), DT('2014-03-01')) - check(DT('2012-02-02'), TD(27, 'D'), DT('2012-02-29')) - check(DT('2012-02-02'), TD(2, 'W'), DT('2012-02-16')) - if not numpy_support.strict_ufunc_typing: - check(DT('2014-02-02'), TD(73, 'h'), DT('2014-02-05T01Z')) - # s + ... - check(DT('2000-01-01T01:02:03Z'), TD(2, 'h'), DT('2000-01-01T03:02:03Z')) - check(DT('2000-01-01T01:02:03Z'), TD(2, 'ms'), DT('2000-01-01T01:02:03.002Z')) - # More thorough checking with leap years and faraway years - for dt_str in ('600', '601', '604', '801', - '1900', '1904', '2200', '2300', '2304', - '2400', '6001'): - for dt_suffix in ('', '-01', '-12'): - dt = DT(dt_str + dt_suffix) - for td in [TD(2, 'D'), TD(2, 'W'), - TD(100, 'D'), TD(10000, 'D'), - TD(-100, 'D'), TD(-10000, 'D'), - TD(100, 'W'), TD(10000, 'W'), - TD(-100, 'W'), TD(-10000, 'W'), - TD(100, 'M'), TD(10000, 'M'), - TD(-100, 'M'), TD(-10000, 'M')]: - self.assertEqual(add(dt, td), dt + td, (dt, td)) - self.assertEqual(add(td, dt), dt + td, (dt, td)) - self.assertEqual(sub(dt, -td), dt + td, (dt, td)) - - # NaTs - check(DT('NaT'), TD(2), DT('NaT')) - if not numpy_support.strict_ufunc_typing: - check(DT('NaT', 's'), TD(2), DT('NaT', 's')) - check(DT('NaT', 's'), TD(2, 'h'), DT('NaT', 's')) - check(DT('NaT', 's'), TD(2, 'ms'), DT('NaT', 'ms')) - if not numpy_support.strict_ufunc_typing: - check(DT('2014'), TD('NaT'), DT('NaT', 'Y')) - check(DT('2014'), TD('NaT', 'W'), DT('NaT', 'W')) - check(DT('2014-01-01'), TD('NaT', 'W'), DT('NaT', 'D')) - if not numpy_support.strict_ufunc_typing: - check(DT('NaT', 's'), TD('NaT'), DT('NaT', 's')) - check(DT('NaT', 's'), TD('NaT', 'ms'), DT('NaT', 'ms')) - - # Cannot add datetime days and timedelta months or years - for f in (add, sub): - with self.assertRaises((TypeError, TypingError)): - f(DT(1, '2014-01-01'), TD(1, 'Y')) - with self.assertRaises((TypeError, TypingError)): - f(DT(1, '2014-01-01'), TD(1, 'M')) - - def datetime_samples(self): - dt_years = ['600', '601', '604', '1968', '1969', '1973', - '2000', '2004', '2005', '2100', '2400', '2401'] - dt_suffixes = ['', '-01', '-12', '-02-28', '-12-31', - '-01-05T12:30:56Z', '-01-05T12:30:56.008Z'] - dts = [DT(a + b) for (a, b) in itertools.product(dt_years, dt_suffixes)] - dts += [DT(s, 'W') for s in dt_years] - return dts - - def test_datetime_difference(self): - """ - Test `datetime64 - datetime64`. - """ - sub = self.jit(sub_usecase) - def check(a, b, expected=None): - with self.silence_numpy_warnings(): - self.assertPreciseEqual(sub(a, b), a - b, (a, b)) - self.assertPreciseEqual(sub(b, a), b - a, (a, b)) - # Did we get it right? - self.assertPreciseEqual(a - b, expected) - - check(DT('2014'), DT('2017'), TD(-3, 'Y')) - check(DT('2014-02'), DT('2017-01'), TD(-35, 'M')) - check(DT('2014-02-28'), DT('2015-03-01'), TD(-366, 'D')) - # NaTs - if not numpy_support.strict_ufunc_typing: - check(DT('NaT'), DT('2000'), TD('NaT', 'Y')) - check(DT('NaT', 'M'), DT('2000'), TD('NaT', 'M')) - check(DT('NaT', 'M'), DT('2000-01-01'), TD('NaT', 'D')) - check(DT('NaT'), DT('NaT'), TD('NaT')) - # Test many more values - with self.silence_numpy_warnings(): - dts = self.datetime_samples() - for a, b in itertools.product(dts, dts): - if (numpy_support.strict_ufunc_typing - and not npdatetime.same_kind(value_unit(a), - value_unit(b))): - continue - self.assertPreciseEqual(sub(a, b), a - b, (a, b)) - - @tag('important') - def test_comparisons(self): - # Test all datetime comparisons all at once - eq = self.jit(eq_usecase) - ne = self.jit(ne_usecase) - lt = self.jit(lt_usecase) - le = self.jit(le_usecase) - gt = self.jit(gt_usecase) - ge = self.jit(ge_usecase) - - def check_eq(a, b, expected): - with self.silence_numpy_warnings(): - self.assertPreciseEqual(eq(a, b), expected, (a, b, expected)) - self.assertPreciseEqual(eq(b, a), expected, (a, b, expected)) - self.assertPreciseEqual(ne(a, b), not expected, (a, b, expected)) - self.assertPreciseEqual(ne(b, a), not expected, (a, b, expected)) - if expected: - # If equal, then equal-ordered comparisons are true - self.assertTrue(le(a, b), (a, b)) - self.assertTrue(ge(a, b), (a, b)) - self.assertTrue(le(b, a), (a, b)) - self.assertTrue(ge(b, a), (a, b)) - # and strictly ordered comparisons are false - self.assertFalse(lt(a, b), (a, b)) - self.assertFalse(gt(a, b), (a, b)) - self.assertFalse(lt(b, a), (a, b)) - self.assertFalse(gt(b, a), (a, b)) - # Did we get it right? - self.assertPreciseEqual(a == b, expected) - - def check_lt(a, b, expected): - with self.silence_numpy_warnings(): - self.assertPreciseEqual(lt(a, b), expected, (a, b, expected)) - self.assertPreciseEqual(gt(b, a), expected, (a, b, expected)) - self.assertPreciseEqual(ge(a, b), not expected, (a, b, expected)) - self.assertPreciseEqual(le(b, a), not expected, (a, b, expected)) - if expected: - # If true, then values are not equal - check_eq(a, b, False) - # Did we get it right? - self.assertPreciseEqual(a < b, expected) - - check_eq(DT('2014'), DT('2017'), False) - check_eq(DT('2014'), DT('2014-01'), True) - check_eq(DT('2014'), DT('2014-01-01'), True) - check_eq(DT('2014'), DT('2014-01-01', 'W'), True) - check_eq(DT('2014-01'), DT('2014-01-01', 'W'), True) - # Yes, it's not transitive - check_eq(DT('2014-01-01'), DT('2014-01-01', 'W'), False) - check_eq(DT('2014-01-02'), DT('2014-01-06', 'W'), True) - # with times - check_eq(DT('2014-01-01T00:01:00Z', 's'), - DT('2014-01-01T00:01Z', 'm'), True) - check_eq(DT('2014-01-01T00:01:01Z', 's'), - DT('2014-01-01T00:01Z', 'm'), False) - # NaTs - if not numpy_support.strict_ufunc_typing: - check_lt(DT('NaT'), DT('2017'), True) - check_lt(DT('NaT', 'Y'), DT('2017'), True) - if not numpy_support.strict_ufunc_typing: - check_lt(DT('NaT', 'ms'), DT('2017'), True) - check_eq(DT('NaT'), DT('NaT'), True) - if not numpy_support.strict_ufunc_typing: - check_eq(DT('NaT', 'Y'), DT('NaT'), True) - check_eq(DT('NaT', 'ms'), DT('NaT', 'M'), True) - - # Check comparison between various units - dts = self.datetime_samples() - for a in dts: - # Take a number of smaller units - a_unit = a.dtype.str.split('[')[1][:-1] - i = all_units.index(a_unit) - units = all_units[i:i+6] - for unit in units: - # Force conversion - b = a.astype('M8[%s]' % unit) - if (numpy_support.strict_ufunc_typing - and not npdatetime.same_kind(value_unit(a), - value_unit(b))): - continue - check_eq(a, b, True) - check_lt(a, b + np.timedelta64(1, unit), True) - check_lt(b - np.timedelta64(1, unit), a, True) - - -class TestDatetimeArithmeticNoPython(TestDatetimeArithmetic): - - jitargs = dict(nopython=True) - - -class TestMetadataScalingFactor(TestCase): - """ - Tests than non-1 scaling factors are not supported in datetime64 - and timedelta64 dtypes. - """ - - def test_datetime(self, **jitargs): - eq = jit(**jitargs)(eq_usecase) - self.assertTrue(eq(DT('2014', '10Y'), DT('2010'))) - - def test_datetime_npm(self): - with self.assertTypingError(): - self.test_datetime(nopython=True) - - def test_timedelta(self, **jitargs): - eq = jit(**jitargs)(eq_usecase) - self.assertTrue(eq(TD(2, '10Y'), TD(20, 'Y'))) - - def test_timedelta_npm(self): - with self.assertTypingError(): - self.test_timedelta(nopython=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_nrt.py b/numba/numba/tests/test_nrt.py deleted file mode 100644 index 39c4cab5d..000000000 --- a/numba/numba/tests/test_nrt.py +++ /dev/null @@ -1,508 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import math -import os -import sys -import re - -import numpy as np - -from numba import unittest_support as unittest -from numba import njit, targets, typing -from numba.compiler import compile_isolated, Flags, types -from numba.runtime import rtsys -from numba.runtime import nrtopt -from .support import MemoryLeakMixin, TestCase - -enable_nrt_flags = Flags() -enable_nrt_flags.set("nrt") - - -class Dummy(object): - alive = 0 - - def __init__(self): - type(self).alive += 1 - - def __del__(self): - type(self).alive -= 1 - - -class TestNrtMemInfoNotInitialized(unittest.TestCase): - """ - Unit test for checking the use of the NRT fails if the - initialization sequence has not been run. - """ - _numba_parallel_test_ = False - - def test_init_fail(self): - methods = {'library': (), - 'meminfo_new': ((), ()), - 'meminfo_alloc': ((),), - } - - for meth, args in methods.items(): - try: - with self.assertRaises(RuntimeError) as raises: - rtsys._init = False - fn = getattr(rtsys, meth) - fn(*args) - - msg = "Runtime must be initialized before use." - self.assertIn(msg, str(raises.exception)) - finally: - rtsys._init = True - - -class TestNrtMemInfo(unittest.TestCase): - """ - Unit test for core MemInfo functionality - """ - - def setUp(self): - # Reset the Dummy class - Dummy.alive = 0 - # initialize the NRT (in case the tests are run in isolation) - targets.cpu.CPUContext(typing.Context()) - - def test_meminfo_refct_1(self): - d = Dummy() - self.assertEqual(Dummy.alive, 1) - addr = 0xdeadcafe # some made up location - - mi = rtsys.meminfo_new(addr, d) - self.assertEqual(mi.refcount, 1) - del d - self.assertEqual(Dummy.alive, 1) - mi.acquire() - self.assertEqual(mi.refcount, 2) - self.assertEqual(Dummy.alive, 1) - mi.release() - self.assertEqual(mi.refcount, 1) - del mi - self.assertEqual(Dummy.alive, 0) - - def test_meminfo_refct_2(self): - d = Dummy() - self.assertEqual(Dummy.alive, 1) - addr = 0xdeadcafe # some made up location - - mi = rtsys.meminfo_new(addr, d) - self.assertEqual(mi.refcount, 1) - del d - self.assertEqual(Dummy.alive, 1) - for ct in range(100): - mi.acquire() - self.assertEqual(mi.refcount, 1 + 100) - self.assertEqual(Dummy.alive, 1) - for _ in range(100): - mi.release() - self.assertEqual(mi.refcount, 1) - del mi - self.assertEqual(Dummy.alive, 0) - - @unittest.skipIf(sys.version_info < (3,), "memoryview not supported") - def test_fake_memoryview(self): - d = Dummy() - self.assertEqual(Dummy.alive, 1) - addr = 0xdeadcafe # some made up location - - mi = rtsys.meminfo_new(addr, d) - self.assertEqual(mi.refcount, 1) - mview = memoryview(mi) - self.assertEqual(mi.refcount, 1) - self.assertEqual(addr, mi.data) - self.assertFalse(mview.readonly) - self.assertIs(mi, mview.obj) - self.assertTrue(mview.c_contiguous) - self.assertEqual(mview.itemsize, 1) - self.assertEqual(mview.ndim, 1) - del d - del mi - - self.assertEqual(Dummy.alive, 1) - del mview - self.assertEqual(Dummy.alive, 0) - - @unittest.skipIf(sys.version_info < (3,), "memoryview not supported") - def test_memoryview(self): - from ctypes import c_uint32, c_void_p, POINTER, cast - - dtype = np.dtype(np.uint32) - bytesize = dtype.itemsize * 10 - mi = rtsys.meminfo_alloc(bytesize, safe=True) - addr = mi.data - c_arr = cast(c_void_p(mi.data), POINTER(c_uint32 * 10)) - # Check 0xCB-filling - for i in range(10): - self.assertEqual(c_arr.contents[i], 0xcbcbcbcb) - - # Init array with ctypes - for i in range(10): - c_arr.contents[i] = i + 1 - mview = memoryview(mi) - self.assertEqual(mview.nbytes, bytesize) - self.assertFalse(mview.readonly) - self.assertIs(mi, mview.obj) - self.assertTrue(mview.c_contiguous) - self.assertEqual(mview.itemsize, 1) - self.assertEqual(mview.ndim, 1) - del mi - arr = np.ndarray(dtype=dtype, shape=mview.nbytes // dtype.itemsize, - buffer=mview) - del mview - # Modify array with NumPy - np.testing.assert_equal(np.arange(arr.size) + 1, arr) - - arr += 1 - - # Check value reflected in ctypes - for i in range(10): - self.assertEqual(c_arr.contents[i], i + 2) - - self.assertEqual(arr.ctypes.data, addr) - del arr - # At this point the memory is zero filled - # We can't check this deterministically because the memory could be - # consumed by another thread. - - def test_buffer(self): - from ctypes import c_uint32, c_void_p, POINTER, cast - - dtype = np.dtype(np.uint32) - bytesize = dtype.itemsize * 10 - mi = rtsys.meminfo_alloc(bytesize, safe=True) - self.assertEqual(mi.refcount, 1) - addr = mi.data - c_arr = cast(c_void_p(addr), POINTER(c_uint32 * 10)) - # Check 0xCB-filling - for i in range(10): - self.assertEqual(c_arr.contents[i], 0xcbcbcbcb) - - # Init array with ctypes - for i in range(10): - c_arr.contents[i] = i + 1 - - arr = np.ndarray(dtype=dtype, shape=bytesize // dtype.itemsize, - buffer=mi) - self.assertEqual(mi.refcount, 1) - del mi - # Modify array with NumPy - np.testing.assert_equal(np.arange(arr.size) + 1, arr) - - arr += 1 - - # Check value reflected in ctypes - for i in range(10): - self.assertEqual(c_arr.contents[i], i + 2) - - self.assertEqual(arr.ctypes.data, addr) - del arr - # At this point the memory is zero filled - # We can't check this deterministically because the memory could be - # consumed by another thread. - - -@unittest.skipUnless(sys.version_info >= (3, 4), - "need Python 3.4+ for the tracemalloc module") -class TestTracemalloc(unittest.TestCase): - """ - Test NRT-allocated memory can be tracked by tracemalloc. - """ - - def measure_memory_diff(self, func): - import tracemalloc - tracemalloc.start() - try: - before = tracemalloc.take_snapshot() - # Keep the result and only delete it after taking a snapshot - res = func() - after = tracemalloc.take_snapshot() - del res - return after.compare_to(before, 'lineno') - finally: - tracemalloc.stop() - - def test_snapshot(self): - N = 1000000 - dtype = np.int8 - - @njit - def alloc_nrt_memory(): - """ - Allocate and return a large array. - """ - return np.empty(N, dtype) - - def keep_memory(): - return alloc_nrt_memory() - - def release_memory(): - alloc_nrt_memory() - - alloc_lineno = keep_memory.__code__.co_firstlineno + 1 - - # Warmup JIT - alloc_nrt_memory() - - # The large NRT-allocated array should appear topmost in the diff - diff = self.measure_memory_diff(keep_memory) - stat = diff[0] - # There is a slight overhead, so the allocated size won't exactly be N - self.assertGreaterEqual(stat.size, N) - self.assertLess(stat.size, N * 1.015, - msg=("Unexpected allocation overhead encountered. " - "May be due to difference in CPython " - "builds or running under coverage")) - frame = stat.traceback[0] - self.assertEqual(os.path.basename(frame.filename), "test_nrt.py") - self.assertEqual(frame.lineno, alloc_lineno) - - # If NRT memory is released before taking a snapshot, it shouldn't - # appear. - diff = self.measure_memory_diff(release_memory) - stat = diff[0] - # Something else appears, but nothing the magnitude of N - self.assertLess(stat.size, N * 0.01) - - -class TestNRTIssue(MemoryLeakMixin, TestCase): - def test_issue_with_refct_op_pruning(self): - """ - GitHub Issue #1244 https://github.com/numba/numba/issues/1244 - """ - @njit - def calculate_2D_vector_mag(vector): - x, y = vector - - return math.sqrt(x ** 2 + y ** 2) - - @njit - def normalize_2D_vector(vector): - normalized_vector = np.empty(2, dtype=np.float64) - - mag = calculate_2D_vector_mag(vector) - x, y = vector - - normalized_vector[0] = x / mag - normalized_vector[1] = y / mag - - return normalized_vector - - @njit - def normalize_vectors(num_vectors, vectors): - normalized_vectors = np.empty((num_vectors, 2), dtype=np.float64) - - for i in range(num_vectors): - vector = vectors[i] - - normalized_vector = normalize_2D_vector(vector) - - normalized_vectors[i, 0] = normalized_vector[0] - normalized_vectors[i, 1] = normalized_vector[1] - - return normalized_vectors - - num_vectors = 10 - test_vectors = np.random.random((num_vectors, 2)) - got = normalize_vectors(num_vectors, test_vectors) - expected = normalize_vectors.py_func(num_vectors, test_vectors) - - np.testing.assert_almost_equal(expected, got) - - def test_incref_after_cast(self): - # Issue #1427: when casting a value before returning it, the - # cast result should be incref'ed, not the original value. - def f(): - return 0.0, np.zeros(1, dtype=np.int32) - - # Note the return type isn't the same as the tuple type above: - # the first element is a complex rather than a float. - cres = compile_isolated(f, (), - types.Tuple((types.complex128, - types.Array(types.int32, 1, 'C') - )) - ) - z, arr = cres.entry_point() - self.assertPreciseEqual(z, 0j) - self.assertPreciseEqual(arr, np.zeros(1, dtype=np.int32)) - - def test_refct_pruning_issue_1511(self): - @njit - def f(): - a = np.ones(10, dtype=np.float64) - b = np.ones(10, dtype=np.float64) - return a, b[:] - - a, b = f() - np.testing.assert_equal(a, b) - np.testing.assert_equal(a, np.ones(10, dtype=np.float64)) - - def test_refct_pruning_issue_1526(self): - @njit - def udt(image, x, y): - next_loc = np.where(image == 1) - - if len(next_loc[0]) == 0: - y_offset = 1 - x_offset = 1 - else: - y_offset = next_loc[0][0] - x_offset = next_loc[1][0] - - next_loc_x = (x - 1) + x_offset - next_loc_y = (y - 1) + y_offset - - return next_loc_x, next_loc_y - - a = np.array([[1, 0, 1, 0, 1, 0, 0, 1, 0, 0]]) - expect = udt.py_func(a, 1, 6) - got = udt(a, 1, 6) - - self.assertEqual(expect, got) - - -class TestRefCtPruning(unittest.TestCase): - - sample_llvm_ir = ''' -define i32 @"MyFunction"(i8** noalias nocapture %retptr, { i8*, i32 }** noalias nocapture %excinfo, i8* noalias nocapture readnone %env, double %arg.vt.0, double %arg.vt.1, double %arg.vt.2, double %arg.vt.3, double %arg.bounds.0, double %arg.bounds.1, double %arg.bounds.2, double %arg.bounds.3, i8* %arg.xs.0, i8* nocapture readnone %arg.xs.1, i64 %arg.xs.2, i64 %arg.xs.3, double* nocapture readonly %arg.xs.4, i64 %arg.xs.5.0, i64 %arg.xs.6.0, i8* %arg.ys.0, i8* nocapture readnone %arg.ys.1, i64 %arg.ys.2, i64 %arg.ys.3, double* nocapture readonly %arg.ys.4, i64 %arg.ys.5.0, i64 %arg.ys.6.0, i8* %arg.aggs_and_cols.0.0, i8* nocapture readnone %arg.aggs_and_cols.0.1, i64 %arg.aggs_and_cols.0.2, i64 %arg.aggs_and_cols.0.3, i32* nocapture %arg.aggs_and_cols.0.4, i64 %arg.aggs_and_cols.0.5.0, i64 %arg.aggs_and_cols.0.5.1, i64 %arg.aggs_and_cols.0.6.0, i64 %arg.aggs_and_cols.0.6.1) local_unnamed_addr { -entry: -tail call void @NRT_incref(i8* %arg.xs.0) -tail call void @NRT_incref(i8* %arg.ys.0) -tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0) -%.251 = icmp sgt i64 %arg.xs.5.0, 0 -br i1 %.251, label %B42.preheader, label %B160 - -B42.preheader: ; preds = %entry -%0 = add i64 %arg.xs.5.0, 1 -br label %B42 - -B42: ; preds = %B40.backedge, %B42.preheader -%lsr.iv3 = phi i64 [ %lsr.iv.next, %B40.backedge ], [ %0, %B42.preheader ] -%lsr.iv1 = phi double* [ %scevgep2, %B40.backedge ], [ %arg.xs.4, %B42.preheader ] -%lsr.iv = phi double* [ %scevgep, %B40.backedge ], [ %arg.ys.4, %B42.preheader ] -%.381 = load double, double* %lsr.iv1, align 8 -%.420 = load double, double* %lsr.iv, align 8 -%.458 = fcmp ole double %.381, %arg.bounds.1 -%not..432 = fcmp oge double %.381, %arg.bounds.0 -%"$phi82.1.1" = and i1 %.458, %not..432 -br i1 %"$phi82.1.1", label %B84, label %B40.backedge - -B84: ; preds = %B42 -%.513 = fcmp ole double %.420, %arg.bounds.3 -%not..487 = fcmp oge double %.420, %arg.bounds.2 -%"$phi106.1.1" = and i1 %.513, %not..487 -br i1 %"$phi106.1.1", label %B108.endif.endif.endif, label %B40.backedge - -B160: ; preds = %B40.backedge, %entry -tail call void @NRT_decref(i8* %arg.ys.0) -tail call void @NRT_decref(i8* %arg.xs.0) -tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0) -store i8* null, i8** %retptr, align 8 -ret i32 0 - -B108.endif.endif.endif: ; preds = %B84 -%.575 = fmul double %.381, %arg.vt.0 -%.583 = fadd double %.575, %arg.vt.1 -%.590 = fptosi double %.583 to i64 -%.630 = fmul double %.420, %arg.vt.2 -%.638 = fadd double %.630, %arg.vt.3 -%.645 = fptosi double %.638 to i64 -tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0) ; GONE 1 -tail call void @NRT_decref(i8* null) ; GONE 2 -tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0), !noalias !0 ; GONE 3 -%.62.i.i = icmp slt i64 %.645, 0 -%.63.i.i = select i1 %.62.i.i, i64 %arg.aggs_and_cols.0.5.0, i64 0 -%.64.i.i = add i64 %.63.i.i, %.645 -%.65.i.i = icmp slt i64 %.590, 0 -%.66.i.i = select i1 %.65.i.i, i64 %arg.aggs_and_cols.0.5.1, i64 0 -%.67.i.i = add i64 %.66.i.i, %.590 -%.84.i.i = mul i64 %.64.i.i, %arg.aggs_and_cols.0.5.1 -%.87.i.i = add i64 %.67.i.i, %.84.i.i -%.88.i.i = getelementptr i32, i32* %arg.aggs_and_cols.0.4, i64 %.87.i.i -%.89.i.i = load i32, i32* %.88.i.i, align 4, !noalias !3 -%.99.i.i = add i32 %.89.i.i, 1 -store i32 %.99.i.i, i32* %.88.i.i, align 4, !noalias !3 -tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0), !noalias !0 ; GONE 4 -tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0) ; GONE 5 -br label %B40.backedge - -B40.backedge: ; preds = %B108.endif.endif.endif, %B84, %B42 -%scevgep = getelementptr double, double* %lsr.iv, i64 1 -%scevgep2 = getelementptr double, double* %lsr.iv1, i64 1 -%lsr.iv.next = add i64 %lsr.iv3, -1 -%.294 = icmp sgt i64 %lsr.iv.next, 1 -br i1 %.294, label %B42, label %B160 -} - ''' - - def test_refct_pruning_op_recognize(self): - input_ir = self.sample_llvm_ir - input_lines = list(input_ir.splitlines()) - before_increfs = [ln for ln in input_lines if 'NRT_incref' in ln] - before_decrefs = [ln for ln in input_lines if 'NRT_decref' in ln] - - # prune - output_ir = nrtopt._remove_redundant_nrt_refct(input_ir) - output_lines = list(output_ir.splitlines()) - after_increfs = [ln for ln in output_lines if 'NRT_incref' in ln] - after_decrefs = [ln for ln in output_lines if 'NRT_decref' in ln] - - # check - self.assertNotEqual(before_increfs, after_increfs) - self.assertNotEqual(before_decrefs, after_decrefs) - - pruned_increfs = set(before_increfs) - set(after_increfs) - pruned_decrefs = set(before_decrefs) - set(after_decrefs) - - # the symm difference == or-combined - combined = pruned_increfs | pruned_decrefs - self.assertEqual(combined, pruned_increfs ^ pruned_decrefs) - pruned_lines = '\n'.join(combined) - - # all GONE lines are pruned - for i in [1, 2, 3, 4, 5]: - gone = '; GONE {}'.format(i) - self.assertIn(gone, pruned_lines) - # no other lines - self.assertEqual(len(list(pruned_lines.splitlines())), len(combined)) - - def test_refct_pruning_with_branches(self): - '''testcase from #2350''' - @njit - def _append_non_na(x, y, agg, field): - if not np.isnan(field): - agg[y, x] += 1 - - @njit - def _append(x, y, agg, field): - if not np.isnan(field): - if np.isnan(agg[y, x]): - agg[y, x] = field - else: - agg[y, x] += field - - @njit - def append(x, y, agg, field): - _append_non_na(x, y, agg, field) - _append(x, y, agg, field) - - # Disable python wrapper to avoid detecting necessary - # refcount inside it - @njit(no_cpython_wrapper=True) - def extend(arr, field): - for i in range(arr.shape[0]): - for j in range(arr.shape[1]): - append(j, i, arr, field) - - # Compile - extend.compile("(f4[:,::1], f4)") - - # Test there are no reference count operations - llvmir = str(extend.inspect_llvm(extend.signatures[0])) - refops = list(re.finditer(r'(NRT_incref|NRT_decref)\([^\)]+\)', llvmir)) - self.assertEqual(len(refops), 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_nrt_refct.py b/numba/numba/tests/test_nrt_refct.py deleted file mode 100644 index 95ab5b344..000000000 --- a/numba/numba/tests/test_nrt_refct.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -Tests issues or edge cases for producing invalid NRT refct -""" - -from __future__ import division, absolute_import, print_function - -import gc - -import numpy as np - -import numba.unittest_support as unittest -from numba import njit -from numba.runtime import rtsys -from .support import TestCase - - -class TestNrtRefCt(TestCase): - - def setUp(self): - # Clean up any NRT-backed objects hanging in a dead reference cycle - gc.collect() - - def test_no_return(self): - """ - Test issue #1291 - """ - - @njit - def foo(n): - for i in range(n): - temp = np.zeros(2) - return 0 - - n = 10 - init_stats = rtsys.get_allocation_stats() - foo(n) - cur_stats = rtsys.get_allocation_stats() - self.assertEqual(cur_stats.alloc - init_stats.alloc, n) - self.assertEqual(cur_stats.free - init_stats.free, n) - - def test_escaping_var_init_in_loop(self): - """ - Test issue #1297 - """ - - @njit - def g(n): - - x = np.zeros((n, 2)) - - for i in range(n): - y = x[i] - - for i in range(n): - y = x[i] - - return 0 - - init_stats = rtsys.get_allocation_stats() - g(10) - cur_stats = rtsys.get_allocation_stats() - self.assertEqual(cur_stats.alloc - init_stats.alloc, 1) - self.assertEqual(cur_stats.free - init_stats.free, 1) - - def test_invalid_computation_of_lifetime(self): - """ - Test issue #1573 - """ - @njit - def if_with_allocation_and_initialization(arr1, test1): - tmp_arr = np.zeros_like(arr1) - - for i in range(tmp_arr.shape[0]): - pass - - if test1: - np.zeros_like(arr1) - - return tmp_arr - - arr = np.random.random((5, 5)) # the values are not consumed - - init_stats = rtsys.get_allocation_stats() - if_with_allocation_and_initialization(arr, False) - cur_stats = rtsys.get_allocation_stats() - self.assertEqual(cur_stats.alloc - init_stats.alloc, - cur_stats.free - init_stats.free) - - def test_del_at_beginning_of_loop(self): - """ - Test issue #1734 - """ - @njit - def f(arr): - res = 0 - - for i in (0, 1): - # `del t` is issued here before defining t. It must be - # correctly handled by the lowering phase. - t = arr[i] - if t[i] > 1: - res += t[i] - - return res - - arr = np.ones((2, 2)) - init_stats = rtsys.get_allocation_stats() - f(arr) - cur_stats = rtsys.get_allocation_stats() - self.assertEqual(cur_stats.alloc - init_stats.alloc, - cur_stats.free - init_stats.free) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_numberctor.py b/numba/numba/tests/test_numberctor.py deleted file mode 100644 index dc8eb69f0..000000000 --- a/numba/numba/tests/test_numberctor.py +++ /dev/null @@ -1,256 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated -from numba import jit, types - -from .support import TestCase, tag - - -def dobool(a): - return bool(a) - - -def doint(a): - return int(a) - - -def dofloat(a): - return float(a) - - -def docomplex(a): - return complex(a) - - -def docomplex2(a, b): - return complex(a, b) - - -def complex_calc(a): - z = complex(a) - return z.real ** 2 + z.imag ** 2 - - -def complex_calc2(a, b): - z = complex(a, b) - return z.real ** 2 + z.imag ** 2 - - -def converter(tp): - def f(a): - return tp(a) - return f - - -def real_np_types(): - for tp_name in ('int8', 'int16', 'int32', 'int64', - 'uint8', 'uint16', 'uint32', 'uint64', - 'intc', 'uintc', 'intp', 'uintp', - 'float32', 'float64', 'bool_'): - yield tp_name - -def complex_np_types(): - for tp_name in ('complex64', 'complex128'): - yield tp_name - - -class TestScalarNumberCtor(TestCase): - """ - Test (some scalar) - """ - - def check_int_constructor(self, pyfunc): - x_types = [ - types.boolean, types.int32, types.int64, types.float32, types.float64 - ] - x_values = [1, 0, 1000, 12.2, 23.4] - - for ty, x in zip(x_types, x_values): - cres = compile_isolated(pyfunc, [ty]) - cfunc = cres.entry_point - self.assertPreciseEqual(pyfunc(x), cfunc(x)) - - @tag('important') - def test_bool(self): - self.check_int_constructor(dobool) - - @tag('important') - def test_int(self): - self.check_int_constructor(doint) - - @tag('important') - def test_float(self): - pyfunc = dofloat - - x_types = [ - types.int32, types.int64, types.float32, types.float64 - ] - x_values = [1, 1000, 12.2, 23.4] - - for ty, x in zip(x_types, x_values): - cres = compile_isolated(pyfunc, [ty]) - cfunc = cres.entry_point - self.assertPreciseEqual(pyfunc(x), cfunc(x), - prec='single' if ty is types.float32 else 'exact') - - @tag('important') - def test_complex(self): - pyfunc = docomplex - - x_types = [ - types.int32, types.int64, types.float32, types.float64, - types.complex64, types.complex128, - ] - x_values = [1, 1000, 12.2, 23.4, 1.5-5j, 1-4.75j] - - for ty, x in zip(x_types, x_values): - cres = compile_isolated(pyfunc, [ty]) - cfunc = cres.entry_point - got = cfunc(x) - expected = pyfunc(x) - self.assertPreciseEqual(pyfunc(x), cfunc(x), - prec='single' if ty is types.float32 else 'exact') - - # Check that complex(float32) really creates a complex64, - # by checking the accuracy of computations. - pyfunc = complex_calc - x = 1.0 + 2**-50 - cres = compile_isolated(pyfunc, [types.float32]) - cfunc = cres.entry_point - self.assertPreciseEqual(cfunc(x), 1.0) - # Control (complex128) - cres = compile_isolated(pyfunc, [types.float64]) - cfunc = cres.entry_point - self.assertGreater(cfunc(x), 1.0) - - @tag('important') - def test_complex2(self): - pyfunc = docomplex2 - - x_types = [ - types.int32, types.int64, types.float32, types.float64 - ] - x_values = [1, 1000, 12.2, 23.4] - y_values = [x - 3 for x in x_values] - - for ty, x, y in zip(x_types, x_values, y_values): - cres = compile_isolated(pyfunc, [ty, ty]) - cfunc = cres.entry_point - self.assertPreciseEqual(pyfunc(x, y), cfunc(x, y), - prec='single' if ty is types.float32 else 'exact') - - # Check that complex(float32, float32) really creates a complex64, - # by checking the accuracy of computations. - pyfunc = complex_calc2 - x = 1.0 + 2**-50 - cres = compile_isolated(pyfunc, [types.float32, types.float32]) - cfunc = cres.entry_point - self.assertPreciseEqual(cfunc(x, x), 2.0) - # Control (complex128) - cres = compile_isolated(pyfunc, [types.float64, types.float32]) - cfunc = cres.entry_point - self.assertGreater(cfunc(x, x), 2.0) - - def check_type_converter(self, tp, np_type, values): - pyfunc = converter(tp) - cfunc = jit(nopython=True)(pyfunc) - if issubclass(np_type, np.integer): - # Converting from a Python int to a small Numpy int on 32-bit - # builds can raise "OverflowError: Python int too large to - # convert to C long". Work around by going through a large - # Numpy int first. - np_converter = lambda x: np_type(np.int64(x)) - else: - np_converter = np_type - dtype = np.dtype(np_type) - for val in values: - if dtype.kind == 'u' and isinstance(val, float) and val < 0.0: - # Converting negative float to unsigned int yields undefined - # behaviour (and concretely different on ARM vs. x86) - continue - expected = np_converter(val) - got = cfunc(val) - self.assertPreciseEqual(got, expected, - msg="for type %s with arg %s" % (np_type, val)) - - def check_number_types(self, tp_factory): - values = [0, 1, -1, 100003, 10000000000007, -100003, -10000000000007, - 1.5, -3.5] - for tp_name in real_np_types(): - np_type = getattr(np, tp_name) - tp = tp_factory(tp_name) - self.check_type_converter(tp, np_type, values) - values.append(1.5+3j) - for tp_name in complex_np_types(): - np_type = getattr(np, tp_name) - tp = tp_factory(tp_name) - self.check_type_converter(tp, np_type, values) - - def test_numba_types(self): - """ - Test explicit casting to Numba number types. - """ - def tp_factory(tp_name): - return getattr(types, tp_name) - self.check_number_types(tp_factory) - - def test_numpy_types(self): - """ - Test explicit casting to Numpy number types. - """ - def tp_factory(tp_name): - return getattr(np, tp_name) - self.check_number_types(tp_factory) - - -class TestArrayNumberCtor(TestCase): - """ - Test (some sequence) - """ - - def check_type_constructor(self, np_type, values): - pyfunc = converter(np_type) - cfunc = jit(nopython=True)(pyfunc) - for val in values: - expected = np_type(val) - got = cfunc(val) - self.assertPreciseEqual(got, expected) - - def test_1d(self): - values = [ - (1.0, 2.5), - (1, 2.5), - [1.0, 2.5], - (), - ] - for tp_name in real_np_types(): - np_type = getattr(np, tp_name) - self.check_type_constructor(np_type, values) - values = [ - (1j, 2.5), - [1.0, 2.5], - ] - for tp_name in complex_np_types(): - np_type = getattr(np, tp_name) - self.check_type_constructor(np_type, values) - - def test_2d(self): - values = [ - ((1.0, 2.5), (3.5, 4)), - [(1.0, 2.5), (3.5, 4.0)], - ([1.0, 2.5], [3.5, 4.0]), - [(), ()], - ] - for tp_name in real_np_types(): - np_type = getattr(np, tp_name) - self.check_type_constructor(np_type, values) - for tp_name in complex_np_types(): - np_type = getattr(np, tp_name) - self.check_type_constructor(np_type, values) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_numconv.py b/numba/numba/tests/test_numconv.py deleted file mode 100644 index d799bfb71..000000000 --- a/numba/numba/tests/test_numconv.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import print_function -import itertools -import numba.unittest_support as unittest -from numba.compiler import compile_isolated -from numba import types - - -def template(fromty, toty): - def closure(self): - def cast(x): - y = x - return y - - cres = compile_isolated(cast, args=[fromty], return_type=toty) - self.assertAlmostEqual(cres.entry_point(1), 1) - - return closure - - -class TestNumberConversion(unittest.TestCase): - """ - Test all int/float numeric conversion to ensure we have all the external - dependencies to perform these conversions. - """ - # NOTE: more implicit tests are in test_numberctor - - @classmethod - def automatic_populate(cls): - tys = types.integer_domain | types.real_domain - for fromty, toty in itertools.permutations(tys, r=2): - test_name = "test_{fromty}_to_{toty}".format(fromty=fromty, - toty=toty) - setattr(cls, test_name, template(fromty, toty)) - - -TestNumberConversion.automatic_populate() - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_numpy_support.py b/numba/numba/tests/test_numpy_support.py deleted file mode 100644 index 989e7b6c7..000000000 --- a/numba/numba/tests/test_numpy_support.py +++ /dev/null @@ -1,431 +0,0 @@ -""" -Test helper functions from numba.numpy_support. -""" - -from __future__ import print_function - -import sys -from itertools import product - -import numpy as np - -import numba.unittest_support as unittest -from numba import config, numpy_support, types -from .support import TestCase, tag -from .enum_usecases import Shake, RequestError - - -class TestFromDtype(TestCase): - - @tag('important') - def test_number_types(self): - """ - Test from_dtype() and as_dtype() with the various scalar number types. - """ - f = numpy_support.from_dtype - - def check(typechar, numba_type): - # Only native ordering and alignment is supported - dtype = np.dtype(typechar) - self.assertIs(f(dtype), numba_type) - self.assertIs(f(np.dtype('=' + typechar)), numba_type) - self.assertEqual(dtype, numpy_support.as_dtype(numba_type)) - - check('?', types.bool_) - check('f', types.float32) - check('f4', types.float32) - check('d', types.float64) - check('f8', types.float64) - - check('F', types.complex64) - check('c8', types.complex64) - check('D', types.complex128) - check('c16', types.complex128) - - check('b', types.int8) - check('i1', types.int8) - check('B', types.uint8) - check('u1', types.uint8) - - check('h', types.int16) - check('i2', types.int16) - check('H', types.uint16) - check('u2', types.uint16) - - check('i', types.int32) - check('i4', types.int32) - check('I', types.uint32) - check('u4', types.uint32) - - check('q', types.int64) - check('Q', types.uint64) - for name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', - 'int64', 'uint64', 'intp', 'uintp'): - self.assertIs(f(np.dtype(name)), getattr(types, name)) - - # Non-native alignments are unsupported (except for 1-byte types) - foreign_align = '>' if sys.byteorder == 'little' else '<' - for letter in 'hHiIlLqQfdFD': - self.assertRaises(NotImplementedError, f, - np.dtype(foreign_align + letter)) - - def test_string_types(self): - """ - Test from_dtype() and as_dtype() with the character string types. - """ - def check(typestring, numba_type): - # Only native ordering and alignment is supported - dtype = np.dtype(typestring) - self.assertEqual(numpy_support.from_dtype(dtype), numba_type) - self.assertEqual(dtype, numpy_support.as_dtype(numba_type)) - - check('S10', types.CharSeq(10)) - check('a11', types.CharSeq(11)) - check('U12', types.UnicodeCharSeq(12)) - - def check_datetime_types(self, letter, nb_class): - def check(dtype, numba_type, code): - tp = numpy_support.from_dtype(dtype) - self.assertEqual(tp, numba_type) - self.assertEqual(tp.unit_code, code) - self.assertEqual(numpy_support.as_dtype(numba_type), dtype) - self.assertEqual(numpy_support.as_dtype(tp), dtype) - - # Unit-less ("generic") type - check(np.dtype(letter), nb_class(''), 14) - - @tag('important') - def test_datetime_types(self): - """ - Test from_dtype() and as_dtype() with the datetime types. - """ - self.check_datetime_types('M', types.NPDatetime) - - @tag('important') - def test_timedelta_types(self): - """ - Test from_dtype() and as_dtype() with the timedelta types. - """ - self.check_datetime_types('m', types.NPTimedelta) - - @tag('important') - def test_struct_types(self): - def check(dtype, fields, size, aligned): - tp = numpy_support.from_dtype(dtype) - self.assertIsInstance(tp, types.Record) - # Only check for dtype equality, as the Numba type may be interned - self.assertEqual(tp.dtype, dtype) - self.assertEqual(tp.fields, fields) - self.assertEqual(tp.size, size) - self.assertEqual(tp.aligned, aligned) - - dtype = np.dtype([('a', np.int16), ('b', np.int32)]) - check(dtype, - fields={'a': (types.int16, 0), - 'b': (types.int32, 2)}, - size=6, aligned=False) - - dtype = np.dtype([('a', np.int16), ('b', np.int32)], align=True) - check(dtype, - fields={'a': (types.int16, 0), - 'b': (types.int32, 4)}, - size=8, aligned=True) - - dtype = np.dtype([('m', np.int32), ('n', 'S5')]) - check(dtype, - fields={'m': (types.int32, 0), - 'n': (types.CharSeq(5), 4)}, - size=9, aligned=False) - - @tag('important') - def test_enum_type(self): - - def check(base_inst, enum_def, type_class): - np_dt = np.dtype(base_inst) - nb_ty = numpy_support.from_dtype(np_dt) - inst = type_class(enum_def, nb_ty) - recovered = numpy_support.as_dtype(inst) - self.assertEqual(np_dt, recovered) - - dts = [np.float64, np.int32, np.complex128, np.bool] - enums = [Shake, RequestError] - - for dt, enum in product(dts, enums): - check(dt, enum, types.EnumMember) - - for dt, enum in product(dts, enums): - check(dt, enum, types.IntEnumMember) - - -class ValueTypingTestBase(object): - """ - Common tests for the typing of values. Also used by test_special. - """ - - def check_number_values(self, func): - """ - Test *func*() with scalar numeric values. - """ - f = func - # Standard Python types get inferred by numpy - self.assertIn(f(1), (types.int32, types.int64)) - self.assertIn(f(2**31 - 1), (types.int32, types.int64)) - self.assertIn(f(-2**31), (types.int32, types.int64)) - self.assertIs(f(1.0), types.float64) - self.assertIs(f(1.0j), types.complex128) - self.assertIs(f(True), types.bool_) - self.assertIs(f(False), types.bool_) - # Numpy scalar types get converted by from_dtype() - for name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', - 'int64', 'uint64', 'intc', 'uintc', 'intp', 'uintp', - 'float32', 'float64', 'complex64', 'complex128', - 'bool_'): - val = getattr(np, name)() - self.assertIs(f(val), getattr(types, name)) - - def _base_check_datetime_values(self, func, np_type, nb_type): - f = func - for unit in [ - '', 'Y', 'M', 'D', 'h', 'm', 's', - 'ms', 'us', 'ns', 'ps', 'fs', 'as']: - if unit: - t = np_type(3, unit) - else: - # "generic" datetime / timedelta - t = np_type('Nat') - tp = f(t) - # This ensures the unit hasn't been lost - self.assertEqual(tp, nb_type(unit)) - - def check_datetime_values(self, func): - """ - Test *func*() with np.datetime64 values. - """ - self._base_check_datetime_values(func, np.datetime64, types.NPDatetime) - - def check_timedelta_values(self, func): - """ - Test *func*() with np.timedelta64 values. - """ - self._base_check_datetime_values(func, np.timedelta64, types.NPTimedelta) - - -class TestArrayScalars(ValueTypingTestBase, TestCase): - - @tag('important') - def test_number_values(self): - """ - Test map_arrayscalar_type() with scalar number values. - """ - self.check_number_values(numpy_support.map_arrayscalar_type) - - @tag('important') - def test_datetime_values(self): - """ - Test map_arrayscalar_type() with np.datetime64 values. - """ - f = numpy_support.map_arrayscalar_type - self.check_datetime_values(f) - # datetime64s with a non-one factor shouldn't be supported - t = np.datetime64('2014', '10Y') - with self.assertRaises(NotImplementedError): - f(t) - - @tag('important') - def test_timedelta_values(self): - """ - Test map_arrayscalar_type() with np.timedelta64 values. - """ - f = numpy_support.map_arrayscalar_type - self.check_timedelta_values(f) - # timedelta64s with a non-one factor shouldn't be supported - t = np.timedelta64(10, '10Y') - with self.assertRaises(NotImplementedError): - f(t) - - -class FakeUFunc(object): - __slots__ = ('nin', 'nout', 'types', 'ntypes') - - def __init__(self, types): - self.types = types - in_, out = self.types[0].split('->') - self.nin = len(in_) - self.nout = len(out) - self.ntypes = len(types) - for tp in types: - in_, out = self.types[0].split('->') - assert len(in_) == self.nin - assert len(out) == self.nout - -# Typical types for np.add, np.multiply, np.isnan -_add_types = ['??->?', 'bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', - 'll->l', 'LL->L', 'qq->q', 'QQ->Q', 'ee->e', 'ff->f', 'dd->d', - 'gg->g', 'FF->F', 'DD->D', 'GG->G', 'Mm->M', 'mm->m', 'mM->M', - 'OO->O'] - -_mul_types = ['??->?', 'bb->b', 'BB->B', 'hh->h', 'HH->H', 'ii->i', 'II->I', - 'll->l', 'LL->L', 'qq->q', 'QQ->Q', 'ee->e', 'ff->f', 'dd->d', - 'gg->g', 'FF->F', 'DD->D', 'GG->G', 'mq->m', 'qm->m', 'md->m', - 'dm->m', 'OO->O'] - -# Those ones only have floating-point loops -_isnan_types = ['e->?', 'f->?', 'd->?', 'g->?', 'F->?', 'D->?', 'G->?'] -_sqrt_types = ['e->e', 'f->f', 'd->d', 'g->g', 'F->F', 'D->D', 'G->G', 'O->O'] - - -class TestUFuncs(TestCase): - """ - Test ufunc helpers. - """ - - def test_ufunc_find_matching_loop(self): - f = numpy_support.ufunc_find_matching_loop - np_add = FakeUFunc(_add_types) - np_mul = FakeUFunc(_mul_types) - np_isnan = FakeUFunc(_isnan_types) - np_sqrt = FakeUFunc(_sqrt_types) - - def check(ufunc, input_types, sigs, output_types=()): - """ - Check that ufunc_find_matching_loop() finds one of the given - *sigs* for *ufunc*, *input_types* and optional *output_types*. - """ - loop = f(ufunc, input_types + output_types) - self.assertTrue(loop) - if isinstance(sigs, str): - sigs = (sigs,) - self.assertIn(loop.ufunc_sig, sigs, - "inputs=%s and outputs=%s should have selected " - "one of %s, got %s" - % (input_types, output_types, sigs, loop.ufunc_sig)) - self.assertEqual(len(loop.numpy_inputs), len(loop.inputs)) - self.assertEqual(len(loop.numpy_outputs), len(loop.outputs)) - if not output_types: - # Add explicit outputs and check the result is the same - loop_explicit = f(ufunc, list(input_types) + loop.outputs) - self.assertEqual(loop_explicit, loop) - else: - self.assertEqual(loop.outputs, list(output_types)) - # Round-tripping inputs and outputs - loop_rt = f(ufunc, loop.inputs + loop.outputs) - self.assertEqual(loop_rt, loop) - return loop - - def check_exact(ufunc, input_types, sigs, output_types=()): - """ - Like check(), but also ensure no casting of inputs occurred. - """ - loop = check(ufunc, input_types, sigs, output_types) - self.assertEqual(loop.inputs, list(input_types)) - - def check_no_match(ufunc, input_types): - loop = f(ufunc, input_types) - self.assertIs(loop, None) - - # Exact matching for number types - check_exact(np_add, (types.bool_, types.bool_), '??->?') - check_exact(np_add, (types.int8, types.int8), 'bb->b') - check_exact(np_add, (types.uint8, types.uint8), 'BB->B') - check_exact(np_add, (types.int64, types.int64), ('ll->l', 'qq->q')) - check_exact(np_add, (types.uint64, types.uint64), ('LL->L', 'QQ->Q')) - check_exact(np_add, (types.float32, types.float32), 'ff->f') - check_exact(np_add, (types.float64, types.float64), 'dd->d') - check_exact(np_add, (types.complex64, types.complex64), 'FF->F') - check_exact(np_add, (types.complex128, types.complex128), 'DD->D') - - # Exact matching for datetime64 and timedelta64 types - check_exact(np_add, (types.NPTimedelta('s'), types.NPTimedelta('s')), - 'mm->m', output_types=(types.NPTimedelta('s'),)) - check_exact(np_add, (types.NPTimedelta('ms'), types.NPDatetime('s')), - 'mM->M', output_types=(types.NPDatetime('ms'),)) - check_exact(np_add, (types.NPDatetime('s'), types.NPTimedelta('s')), - 'Mm->M', output_types=(types.NPDatetime('s'),)) - - check_exact(np_mul, (types.NPTimedelta('s'), types.int64), - 'mq->m', output_types=(types.NPTimedelta('s'),)) - check_exact(np_mul, (types.float64, types.NPTimedelta('s')), - 'dm->m', output_types=(types.NPTimedelta('s'),)) - - # Mix and match number types, with casting - check(np_add, (types.bool_, types.int8), 'bb->b') - check(np_add, (types.uint8, types.bool_), 'BB->B') - check(np_add, (types.int16, types.uint16), 'ii->i') - check(np_add, (types.complex64, types.float64), 'DD->D') - check(np_add, (types.float64, types.complex64), 'DD->D') - # Integers, when used together with floating-point numbers, - # should cast to any real or complex (see #2006) - int_types = [types.int32, types.uint32, types.int64, types.uint64] - for intty in int_types: - check(np_add, (types.float32, intty), 'ff->f') - check(np_add, (types.float64, intty), 'dd->d') - check(np_add, (types.complex64, intty), 'FF->F') - check(np_add, (types.complex128, intty), 'DD->D') - # However, when used alone, they should cast only to - # floating-point types of sufficient precision - # (typical use case: np.sqrt(2) should give an accurate enough value) - for intty in int_types: - check(np_sqrt, (intty,), 'd->d') - check(np_isnan, (intty,), 'd->?') - - # With some timedelta64 arguments as well - check(np_mul, (types.NPTimedelta('s'), types.int32), - 'mq->m', output_types=(types.NPTimedelta('s'),)) - check(np_mul, (types.NPTimedelta('s'), types.uint32), - 'mq->m', output_types=(types.NPTimedelta('s'),)) - check(np_mul, (types.NPTimedelta('s'), types.float32), - 'md->m', output_types=(types.NPTimedelta('s'),)) - check(np_mul, (types.float32, types.NPTimedelta('s')), - 'dm->m', output_types=(types.NPTimedelta('s'),)) - - # No match - check_no_match(np_add, (types.NPDatetime('s'), types.NPDatetime('s'))) - # No implicit casting from int64 to timedelta64 (Numpy would allow - # this). - check_no_match(np_add, (types.NPTimedelta('s'), types.int64)) - - def test_layout_checker(self): - def check_arr(arr): - dims = arr.shape - strides = arr.strides - itemsize = arr.dtype.itemsize - is_c = numpy_support.is_contiguous(dims, strides, itemsize) - is_f = numpy_support.is_fortran(dims, strides, itemsize) - expect_c = arr.flags['C_CONTIGUOUS'] - expect_f = arr.flags['F_CONTIGUOUS'] - self.assertEqual(is_c, expect_c) - self.assertEqual(is_f, expect_f) - - arr = np.arange(24) - # 1D - check_arr(arr) - # 2D - check_arr(arr.reshape((3, 8))) - check_arr(arr.reshape((3, 8)).T) - check_arr(arr.reshape((3, 8))[::2]) - # 3D - check_arr(arr.reshape((2, 3, 4))) - check_arr(arr.reshape((2, 3, 4)).T) - # middle axis is shape 1 - check_arr(arr.reshape((2, 3, 4))[:, ::3]) - check_arr(arr.reshape((2, 3, 4)).T[:, ::3]) - if numpy_support.version > (1, 11): - # leading axis is shape 1 - check_arr(arr.reshape((2, 3, 4))[::2]) - check_arr(arr.reshape((2, 3, 4)).T[:, :, ::2]) - # 2 leading axis are shape 1 - check_arr(arr.reshape((2, 3, 4))[::2, ::3]) - check_arr(arr.reshape((2, 3, 4)).T[:, ::3, ::2]) - # single item slices for all axis - check_arr(arr.reshape((2, 3, 4))[::2, ::3, ::4]) - check_arr(arr.reshape((2, 3, 4)).T[::4, ::3, ::2]) - # 4D - check_arr(arr.reshape((2, 2, 3, 2))[::2, ::2, ::3]) - check_arr(arr.reshape((2, 2, 3, 2)).T[:, ::3, ::2, ::2]) - # outer zero dims - check_arr(arr.reshape((2, 2, 3, 2))[::5, ::2, ::3]) - check_arr(arr.reshape((2, 2, 3, 2)).T[:, ::3, ::2, ::5]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_numpyadapt.py b/numba/numba/tests/test_numpyadapt.py deleted file mode 100644 index cc001c295..000000000 --- a/numba/numba/tests/test_numpyadapt.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.ctypes_support import * -from numba import _helperlib - - -class ArrayStruct3D(Structure): - # Mimick the structure defined in numba.targets.arrayobj's make_array() - _fields_ = [ - ("meminfo", c_void_p), - ("parent", c_void_p), - ("nitems", c_ssize_t), - ("itemsize", c_ssize_t), - ("data", c_void_p), - ("shape", (c_ssize_t * 3)), - ("strides", (c_ssize_t * 3)), - ] - - -class TestArrayAdaptor(unittest.TestCase): - def test_array_adaptor(self): - arystruct = ArrayStruct3D() - - adaptorptr = _helperlib.c_helpers['adapt_ndarray'] - adaptor = PYFUNCTYPE(c_int, py_object, c_void_p)(adaptorptr) - - ary = np.arange(60).reshape(2, 3, 10) - status = adaptor(ary, byref(arystruct)) - self.assertEqual(status, 0) - self.assertEqual(arystruct.data, ary.ctypes.data) - self.assertNotEqual(arystruct.meminfo, 0) - self.assertEqual(arystruct.parent, id(ary)) - self.assertEqual(arystruct.nitems, 60) - self.assertEqual(arystruct.itemsize, ary.itemsize) - for i in range(3): - self.assertEqual(arystruct.shape[i], ary.ctypes.shape[i]) - self.assertEqual(arystruct.strides[i], ary.ctypes.strides[i]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_obj_lifetime.py b/numba/numba/tests/test_obj_lifetime.py deleted file mode 100644 index 8d687457e..000000000 --- a/numba/numba/tests/test_obj_lifetime.py +++ /dev/null @@ -1,387 +0,0 @@ -from __future__ import print_function - -import collections -import sys -import weakref - -import numba.unittest_support as unittest -from numba.controlflow import CFGraph, Loop -from numba.compiler import compile_extra, compile_isolated, Flags -from numba import types -from .support import TestCase - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -forceobj_flags = Flags() -forceobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -class _Dummy(object): - - def __init__(self, recorder, name): - self.recorder = recorder - self.name = name - recorder._add_dummy(self) - - def __add__(self, other): - assert isinstance(other, _Dummy) - return _Dummy(self.recorder, "%s + %s" % (self.name, other.name)) - - def __iter__(self): - return _DummyIterator(self.recorder, "iter(%s)" % self.name) - - -class _DummyIterator(_Dummy): - - count = 0 - - def __next__(self): - if self.count >= 3: - raise StopIteration - self.count += 1 - return _Dummy(self.recorder, "%s#%s" % (self.name, self.count)) - - next = __next__ - - -class RefRecorder(object): - """ - An object which records events when instances created through it - are deleted. Custom events can also be recorded to aid in - diagnosis. - """ - - def __init__(self): - self._counts = collections.defaultdict(int) - self._events = [] - self._wrs = {} - - def make_dummy(self, name): - """ - Make an object whose deletion will be recorded as *name*. - """ - return _Dummy(self, name) - - def _add_dummy(self, dummy): - wr = weakref.ref(dummy, self._on_disposal) - self._wrs[wr] = dummy.name - - __call__ = make_dummy - - def mark(self, event): - """ - Manually append *event* to the recorded events. - *event* can be formatted using format(). - """ - count = self._counts[event] + 1 - self._counts[event] = count - self._events.append(event.format(count=count)) - - def _on_disposal(self, wr): - name = self._wrs.pop(wr) - self._events.append(name) - - @property - def alive(self): - """ - A list of objects which haven't been deleted yet. - """ - return [wr() for wr in self._wrs] - - @property - def recorded(self): - """ - A list of recorded events. - """ - return self._events - - -def simple_usecase1(rec): - a = rec('a') - b = rec('b') - c = rec('c') - a = b + c - rec.mark('--1--') - d = a + a # b + c + b + c - rec.mark('--2--') - return d - -def simple_usecase2(rec): - a = rec('a') - b = rec('b') - rec.mark('--1--') - x = a - y = x - a = None - return y - -def looping_usecase1(rec): - a = rec('a') - b = rec('b') - c = rec('c') - x = b - for y in a: - x = x + y - rec.mark('--loop bottom--') - rec.mark('--loop exit--') - x = x + c - return x - -def looping_usecase2(rec): - a = rec('a') - b = rec('b') - cum = rec('cum') - for x in a: - rec.mark('--outer loop top--') - cum = cum + x - z = x + x - rec.mark('--inner loop entry #{count}--') - for y in b: - rec.mark('--inner loop top #{count}--') - cum = cum + y - rec.mark('--inner loop bottom #{count}--') - rec.mark('--inner loop exit #{count}--') - if cum: - cum = y + z - else: - # Never gets here, but let the Numba compiler see a `break` opcode - break - rec.mark('--outer loop bottom #{count}--') - else: - rec.mark('--outer loop else--') - rec.mark('--outer loop exit--') - return cum - -def generator_usecase1(rec): - a = rec('a') - b = rec('b') - yield a - yield b - -def generator_usecase2(rec): - a = rec('a') - b = rec('b') - for x in a: - yield x - yield b - - -class MyError(RuntimeError): - pass - -def do_raise(x): - raise MyError(x) - -def raising_usecase1(rec): - a = rec('a') - b = rec('b') - d = rec('d') - if a: - do_raise("foo") - c = rec('c') - c + a - c + b - -def raising_usecase2(rec): - a = rec('a') - b = rec('b') - if a: - c = rec('c') - do_raise(b) - a + c - -def raising_usecase3(rec): - a = rec('a') - b = rec('b') - if a: - raise MyError(b) - - -def del_before_definition(rec): - """ - This test reveal a bug that there is a del on uninitialized variable - """ - n = 5 - for i in range(n): - rec.mark(str(i)) - n = 0 - for j in range(n): - return 0 - else: - if i < 2: - continue - elif i == 2: - for j in range(i): - return i - rec.mark('FAILED') - rec.mark('FAILED') - rec.mark('FAILED') - rec.mark('OK') - return -1 - - -def inf_loop_multiple_back_edge(rec): - """ - test to reveal bug of invalid liveness when infinite loop has multiple - backedge. - """ - while True: - rec.mark("yield") - yield - p = rec('p') - if p: - rec.mark('bra') - pass - - -class TestObjLifetime(TestCase): - """ - Test lifetime of Python objects inside jit-compiled functions. - """ - - def compile(self, pyfunc): - cr = compile_isolated(pyfunc, (types.pyobject,), flags=forceobj_flags) - return cr.entry_point - - def compile_and_record(self, pyfunc, raises=None): - rec = RefRecorder() - cfunc = self.compile(pyfunc) - if raises is not None: - with self.assertRaises(raises): - cfunc(rec) - else: - cfunc(rec) - return rec - - def assertRecordOrder(self, rec, expected): - """ - Check that the *expected* markers occur in that order in *rec*'s - recorded events. - """ - actual = [] - recorded = rec.recorded - remaining = list(expected) - # Find out in which order, if any, the expected events were recorded - for d in recorded: - if d in remaining: - actual.append(d) - # User may or may not expect duplicates, handle them properly - remaining.remove(d) - self.assertEqual(actual, expected, - "the full list of recorded events is: %r" % (recorded,)) - - def test_simple1(self): - rec = self.compile_and_record(simple_usecase1) - self.assertFalse(rec.alive) - self.assertRecordOrder(rec, ['a', 'b', '--1--']) - self.assertRecordOrder(rec, ['a', 'c', '--1--']) - self.assertRecordOrder(rec, ['--1--', 'b + c', '--2--']) - - def test_simple2(self): - rec = self.compile_and_record(simple_usecase2) - self.assertFalse(rec.alive) - self.assertRecordOrder(rec, ['b', '--1--', 'a']) - - def test_looping1(self): - rec = self.compile_and_record(looping_usecase1) - self.assertFalse(rec.alive) - # a and b are unneeded after the loop, check they were disposed of - self.assertRecordOrder(rec, ['a', 'b', '--loop exit--', 'c']) - # check disposal order of iterator items and iterator - self.assertRecordOrder(rec, ['iter(a)#1', '--loop bottom--', - 'iter(a)#2', '--loop bottom--', - 'iter(a)#3', '--loop bottom--', - 'iter(a)', '--loop exit--', - ]) - - def test_looping2(self): - rec = self.compile_and_record(looping_usecase2) - self.assertFalse(rec.alive) - # `a` is disposed of after its iterator is taken - self.assertRecordOrder(rec, ['a', '--outer loop top--']) - # Check disposal of iterators - self.assertRecordOrder(rec, ['iter(a)', '--outer loop else--', - '--outer loop exit--']) - self.assertRecordOrder(rec, ['iter(b)', '--inner loop exit #1--', - 'iter(b)', '--inner loop exit #2--', - 'iter(b)', '--inner loop exit #3--', - ]) - # Disposal of in-loop variable `x` - self.assertRecordOrder(rec, ['iter(a)#1', '--inner loop entry #1--', - 'iter(a)#2', '--inner loop entry #2--', - 'iter(a)#3', '--inner loop entry #3--', - ]) - # Disposal of in-loop variable `z` - self.assertRecordOrder(rec, ['iter(a)#1 + iter(a)#1', - '--outer loop bottom #1--', - ]) - - def exercise_generator(self, genfunc): - cfunc = self.compile(genfunc) - # Exhaust the generator - rec = RefRecorder() - with self.assertRefCount(rec): - gen = cfunc(rec) - next(gen) - self.assertTrue(rec.alive) - list(gen) - self.assertFalse(rec.alive) - # Instantiate the generator but never iterate - rec = RefRecorder() - with self.assertRefCount(rec): - gen = cfunc(rec) - del gen - self.assertFalse(rec.alive) - # Stop iterating before exhaustion - rec = RefRecorder() - with self.assertRefCount(rec): - gen = cfunc(rec) - next(gen) - self.assertTrue(rec.alive) - del gen - self.assertFalse(rec.alive) - - def test_generator1(self): - self.exercise_generator(generator_usecase1) - - def test_generator2(self): - self.exercise_generator(generator_usecase2) - - def test_del_before_definition(self): - rec = self.compile_and_record(del_before_definition) - self.assertEqual(rec.recorded, ['0', '1', '2']) - - def test_raising1(self): - with self.assertRefCount(do_raise): - rec = self.compile_and_record(raising_usecase1, raises=MyError) - self.assertFalse(rec.alive) - - def test_raising2(self): - with self.assertRefCount(do_raise): - rec = self.compile_and_record(raising_usecase2, raises=MyError) - self.assertFalse(rec.alive) - - def test_raising3(self): - with self.assertRefCount(MyError): - rec = self.compile_and_record(raising_usecase3, raises=MyError) - self.assertFalse(rec.alive) - - def test_inf_loop_multiple_back_edge(self): - cfunc = self.compile(inf_loop_multiple_back_edge) - rec = RefRecorder() - iterator = iter(cfunc(rec)) - next(iterator) - self.assertEqual(rec.alive, []) - next(iterator) - self.assertEqual(rec.alive, []) - next(iterator) - self.assertEqual(rec.alive, []) - self.assertEqual(rec.recorded, - ['yield', 'p', 'bra', 'yield', 'p', 'bra', 'yield']) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_object_mode.py b/numba/numba/tests/test_object_mode.py deleted file mode 100644 index 54fc87b13..000000000 --- a/numba/numba/tests/test_object_mode.py +++ /dev/null @@ -1,163 +0,0 @@ -""" -Testing object mode specifics. - -""" -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import utils, jit -from .support import TestCase - - -def complex_constant(n): - tmp = n + 4 - return tmp + 3j - -def long_constant(n): - return n + 100000000000000000000000000000000000000000000000 - -def delitem_usecase(x): - del x[:] - - -forceobj = Flags() -forceobj.set("force_pyobject") - - -def loop_nest_3(x, y): - n = 0 - for i in range(x): - for j in range(y): - for k in range(x+y): - n += i * j - - return n - - -def array_of_object(x): - return x - - -class TestObjectMode(TestCase): - - def test_complex_constant(self): - pyfunc = complex_constant - cres = compile_isolated(pyfunc, (), flags=forceobj) - cfunc = cres.entry_point - self.assertPreciseEqual(pyfunc(12), cfunc(12)) - - def test_long_constant(self): - pyfunc = long_constant - cres = compile_isolated(pyfunc, (), flags=forceobj) - cfunc = cres.entry_point - self.assertPreciseEqual(pyfunc(12), cfunc(12)) - - def test_loop_nest(self): - """ - Test bug that decref the iterator early. - If the bug occurs, a segfault should occur - """ - pyfunc = loop_nest_3 - cres = compile_isolated(pyfunc, (), flags=forceobj) - cfunc = cres.entry_point - self.assertEqual(pyfunc(5, 5), cfunc(5, 5)) - - def bm_pyfunc(): - pyfunc(5, 5) - - def bm_cfunc(): - cfunc(5, 5) - - print(utils.benchmark(bm_pyfunc)) - print(utils.benchmark(bm_cfunc)) - - def test_array_of_object(self): - cfunc = jit(array_of_object) - objarr = np.array([object()] * 10) - self.assertIs(cfunc(objarr), objarr) - - def test_sequence_contains(self): - """ - Test handling of the `in` comparison - """ - @jit(forceobj=True) - def foo(x, y): - return x in y - - self.assertTrue(foo(1, [0, 1])) - self.assertTrue(foo(0, [0, 1])) - self.assertFalse(foo(2, [0, 1])) - - with self.assertRaises(TypeError) as raises: - foo(None, None) - - self.assertIn("is not iterable", str(raises.exception)) - - def test_delitem(self): - pyfunc = delitem_usecase - cres = compile_isolated(pyfunc, (), flags=forceobj) - cfunc = cres.entry_point - - l = [3, 4, 5] - cfunc(l) - self.assertPreciseEqual(l, []) - with self.assertRaises(TypeError): - cfunc(42) - - -class TestObjectModeInvalidRewrite(TestCase): - """ - Tests to ensure that rewrite passes didn't affect objmode lowering. - """ - - def _ensure_objmode(self, disp): - self.assertTrue(disp.signatures) - self.assertFalse(disp.nopython_signatures) - return disp - - def test_static_raise_in_objmode_fallback(self): - """ - Test code based on user submitted issue at - https://github.com/numba/numba/issues/2159 - """ - def test0(n): - return n - - def test1(n): - if n == 0: - # static raise will fail in objmode if the IR is modified by - # rewrite pass - raise ValueError() - return test0(n) # trigger objmode fallback - - compiled = jit(test1) - self.assertEqual(test1(10), compiled(10)) - self._ensure_objmode(compiled) - - def test_static_setitem_in_objmode_fallback(self): - """ - Test code based on user submitted issue at - https://github.com/numba/numba/issues/2169 - """ - - def test0(n): - return n - - def test(a1, a2): - a1 = np.asarray(a1) - # static setitem here will fail in objmode if the IR is modified by - # rewrite pass - a2[0] = 1 - return test0(a1.sum() + a2.sum()) # trigger objmode fallback - - compiled = jit(test) - args = np.array([3]), np.array([4]) - self.assertEqual(test(*args), compiled(*args)) - self._ensure_objmode(compiled) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_objects.py b/numba/numba/tests/test_objects.py deleted file mode 100644 index 865d19dea..000000000 --- a/numba/numba/tests/test_objects.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Test generic manipulation of objects. -""" - -from __future__ import print_function - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba.utils import PYVERSION -from numba import types -from .support import TestCase - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -class C(object): - pass - - -def setattr_usecase(o, v): - o.x = v - -def delattr_usecase(o): - del o.x - - -class TestAttributes(TestCase): - - def test_setattr(self, flags=enable_pyobj_flags): - pyfunc = setattr_usecase - cr = compile_isolated(pyfunc, (object, types.int32), flags=flags) - cfunc = cr.entry_point - c = C() - cfunc(c, 123) - self.assertEqual(c.x, 123) - - def test_setattr_attribute_error(self, flags=enable_pyobj_flags): - pyfunc = setattr_usecase - cr = compile_isolated(pyfunc, (object, types.int32), flags=flags) - cfunc = cr.entry_point - # Can't set undeclared slot - with self.assertRaises(AttributeError): - cfunc(object(), 123) - - def test_delattr(self, flags=enable_pyobj_flags): - pyfunc = delattr_usecase - cr = compile_isolated(pyfunc, (object,), flags=flags) - cfunc = cr.entry_point - c = C() - c.x = 123 - cfunc(c) - with self.assertRaises(AttributeError): - c.x - - def test_delattr_attribute_error(self, flags=enable_pyobj_flags): - pyfunc = delattr_usecase - cr = compile_isolated(pyfunc, (object,), flags=flags) - cfunc = cr.entry_point - # Can't delete non-existing attribute - with self.assertRaises(AttributeError): - cfunc(C()) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_operators.py b/numba/numba/tests/test_operators.py deleted file mode 100644 index eed8bd16e..000000000 --- a/numba/numba/tests/test_operators.py +++ /dev/null @@ -1,1558 +0,0 @@ -from __future__ import print_function - -import copy -import itertools -import operator -import sys -import warnings - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import jit, types, typeinfer, utils, errors -from numba.config import PYVERSION -from .support import TestCase, tag -from .true_div_usecase import truediv_usecase, itruediv_usecase -from .matmul_usecase import (matmul_usecase, imatmul_usecase, DumbMatrix, - needs_matmul, needs_blas) - -Noflags = Flags() - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - - -def make_static_power(exp): - def pow_usecase(x): - return x ** exp - return pow_usecase - - -class LiteralOperatorImpl(object): - - @staticmethod - def add_usecase(x, y): - return x + y - - @staticmethod - def iadd_usecase(x, y): - x += y - return x - - @staticmethod - def sub_usecase(x, y): - return x - y - - @staticmethod - def isub_usecase(x, y): - x -= y - return x - - @staticmethod - def mul_usecase(x, y): - return x * y - - @staticmethod - def imul_usecase(x, y): - x *= y - return x - - @staticmethod - def div_usecase(x, y): - return x / y - - @staticmethod - def idiv_usecase(x, y): - x /= y - return x - - @staticmethod - def floordiv_usecase(x, y): - return x // y - - @staticmethod - def ifloordiv_usecase(x, y): - x //= y - return x - - truediv_usecase = staticmethod(truediv_usecase) - itruediv_usecase = staticmethod(itruediv_usecase) - if matmul_usecase: - matmul_usecase = staticmethod(matmul_usecase) - imatmul_usecase = staticmethod(imatmul_usecase) - - @staticmethod - def mod_usecase(x, y): - return x % y - - @staticmethod - def imod_usecase(x, y): - x %= y - return x - - @staticmethod - def pow_usecase(x, y): - return x ** y - - @staticmethod - def ipow_usecase(x, y): - x **= y - return x - - @staticmethod - def bitshift_left_usecase(x, y): - return x << y - - @staticmethod - def bitshift_ileft_usecase(x, y): - x <<= y - return x - - @staticmethod - def bitshift_right_usecase(x, y): - return x >> y - - @staticmethod - def bitshift_iright_usecase(x, y): - x >>= y - return x - - @staticmethod - def bitwise_and_usecase(x, y): - return x & y - - @staticmethod - def bitwise_iand_usecase(x, y): - x &= y - return x - - @staticmethod - def bitwise_or_usecase(x, y): - return x | y - - @staticmethod - def bitwise_ior_usecase(x, y): - x |= y - return x - - @staticmethod - def bitwise_xor_usecase(x, y): - return x ^ y - - @staticmethod - def bitwise_ixor_usecase(x, y): - x ^= y - return x - - @staticmethod - def bitwise_not_usecase_binary(x, _unused): - return ~x - - @staticmethod - def bitwise_not_usecase(x): - return ~x - - @staticmethod - def not_usecase(x): - return not(x) - - @staticmethod - def negate_usecase(x): - return -x - - @staticmethod - def unary_positive_usecase(x): - return +x - - @staticmethod - def lt_usecase(x, y): - return x < y - - @staticmethod - def le_usecase(x, y): - return x <= y - - @staticmethod - def gt_usecase(x, y): - return x > y - - @staticmethod - def ge_usecase(x, y): - return x >= y - - @staticmethod - def eq_usecase(x, y): - return x == y - - @staticmethod - def ne_usecase(x, y): - return x != y - - @staticmethod - def in_usecase(x, y): - return x in y - - @staticmethod - def not_in_usecase(x, y): - return x not in y - - -class FunctionalOperatorImpl(object): - - @staticmethod - def add_usecase(x, y): - return operator.add(x, y) - - @staticmethod - def iadd_usecase(x, y): - return operator.iadd(x, y) - - @staticmethod - def sub_usecase(x, y): - return operator.sub(x, y) - - @staticmethod - def isub_usecase(x, y): - return operator.isub(x, y) - - @staticmethod - def mul_usecase(x, y): - return operator.mul(x, y) - - @staticmethod - def imul_usecase(x, y): - return operator.imul(x, y) - - if PYVERSION >= (3, 0): - div_usecase = NotImplemented - idiv_usecase = NotImplemented - else: - @staticmethod - def div_usecase(x, y): - return operator.div(x, y) - - @staticmethod - def idiv_usecase(x, y): - return operator.idiv(x, y) - - @staticmethod - def floordiv_usecase(x, y): - return operator.floordiv(x, y) - - @staticmethod - def ifloordiv_usecase(x, y): - return operator.ifloordiv(x, y) - - @staticmethod - def truediv_usecase(x, y): - return operator.truediv(x, y) - - @staticmethod - def itruediv_usecase(x, y): - return operator.itruediv(x, y) - - @staticmethod - def mod_usecase(x, y): - return operator.mod(x, y) - - @staticmethod - def imod_usecase(x, y): - return operator.imod(x, y) - - @staticmethod - def pow_usecase(x, y): - return operator.pow(x, y) - - @staticmethod - def ipow_usecase(x, y): - return operator.ipow(x, y) - - @staticmethod - def matmul_usecase(x, y): - return operator.matmul(x, y) - - @staticmethod - def imatmul_usecase(x, y): - return operator.imatmul(x, y) - - @staticmethod - def bitshift_left_usecase(x, y): - return operator.lshift(x, y) - - @staticmethod - def bitshift_ileft_usecase(x, y): - return operator.ilshift(x, y) - - @staticmethod - def bitshift_right_usecase(x, y): - return operator.rshift(x, y) - - @staticmethod - def bitshift_iright_usecase(x, y): - return operator.irshift(x, y) - - @staticmethod - def bitwise_and_usecase(x, y): - return operator.and_(x, y) - - @staticmethod - def bitwise_iand_usecase(x, y): - return operator.iand(x, y) - - @staticmethod - def bitwise_or_usecase(x, y): - return operator.or_(x, y) - - @staticmethod - def bitwise_ior_usecase(x, y): - return operator.ior(x, y) - - @staticmethod - def bitwise_xor_usecase(x, y): - return operator.xor(x, y) - - @staticmethod - def bitwise_ixor_usecase(x, y): - return operator.ixor(x, y) - - @staticmethod - def bitwise_not_usecase_binary(x, _unused): - return operator.invert(x) - - @staticmethod - def bitwise_not_usecase(x): - return operator.invert(x) - - @staticmethod - def not_usecase(x): - return operator.not_(x) - - @staticmethod - def negate_usecase(x): - return operator.neg(x) - - @staticmethod - def unary_positive_usecase(x): - return operator.pos(x) - - @staticmethod - def lt_usecase(x, y): - return operator.lt(x, y) - - @staticmethod - def le_usecase(x, y): - return operator.le(x, y) - - @staticmethod - def gt_usecase(x, y): - return operator.gt(x, y) - - @staticmethod - def ge_usecase(x, y): - return operator.ge(x, y) - - @staticmethod - def eq_usecase(x, y): - return operator.eq(x, y) - - @staticmethod - def ne_usecase(x, y): - return operator.ne(x, y) - - @staticmethod - def in_usecase(x, y): - return operator.contains(y, x) - - @staticmethod - def not_in_usecase(x, y): - return not operator.contains(y, x) - - -class TestOperators(TestCase): - """ - Test standard Python operators on scalars. - - NOTE: operators on array are generally tested in test_ufuncs. - """ - - op = LiteralOperatorImpl - - _bitwise_opnames = { - 'bitshift_left_usecase': '<<', - 'bitshift_ileft_usecase': '<<', - 'bitshift_right_usecase': '>>', - 'bitshift_iright_usecase': '>>', - 'bitwise_and_usecase': '&', - 'bitwise_iand_usecase': '&', - 'bitwise_or_usecase': '|', - 'bitwise_ior_usecase': '|', - 'bitwise_xor_usecase': '^', - 'bitwise_ixor_usecase': '^', - 'bitwise_not_usecase_binary': '~', - } - - def run_test_ints(self, pyfunc, x_operands, y_operands, types_list, - flags=force_pyobj_flags): - if pyfunc is NotImplemented: - self.skipTest("test irrelevant on this version of Python") - for arg_types in types_list: - cr = compile_isolated(pyfunc, arg_types, flags=flags) - cfunc = cr.entry_point - for x, y in itertools.product(x_operands, y_operands): - # For inplace ops, we check that the first operand - # was correctly mutated. - x_got = copy.copy(x) - x_expected = copy.copy(x) - got = cfunc(x_got, y) - expected = pyfunc(x_expected, y) - self.assertPreciseEqual( - got, expected, - msg="mismatch for (%r, %r) with types %s: %r != %r" - % (x, y, arg_types, got, expected)) - self.assertPreciseEqual( - x_got, x_expected, - msg="mismatch for (%r, %r) with types %s: %r != %r" - % (x, y, arg_types, x_got, x_expected)) - - def run_test_floats(self, pyfunc, x_operands, y_operands, types_list, - flags=force_pyobj_flags): - if pyfunc is NotImplemented: - self.skipTest("test irrelevant on this version of Python") - for arg_types in types_list: - cr = compile_isolated(pyfunc, arg_types, flags=flags) - cfunc = cr.entry_point - for x, y in itertools.product(x_operands, y_operands): - # For inplace ops, we check that the first operand - # was correctly mutated. - x_got = copy.copy(x) - x_expected = copy.copy(x) - got = cfunc(x_got, y) - expected = pyfunc(x_expected, y) - np.testing.assert_allclose(got, expected, rtol=1e-5) - np.testing.assert_allclose(x_got, x_expected, rtol=1e-5) - - def coerce_operand(self, op, numba_type): - if hasattr(op, "dtype"): - return numba_type.cast_python_value(op) - elif numba_type in types.unsigned_domain: - return abs(int(op.real)) - elif numba_type in types.integer_domain: - return int(op.real) - elif numba_type in types.real_domain: - return float(op.real) - else: - return op - - def run_test_scalar_compare(self, pyfunc, flags=force_pyobj_flags, - ordered=True): - ops = self.compare_scalar_operands - types_list = self.compare_types - if not ordered: - types_list = types_list + self.compare_unordered_types - for typ in types_list: - cr = compile_isolated(pyfunc, (typ, typ), flags=flags) - cfunc = cr.entry_point - for x, y in itertools.product(ops, ops): - x = self.coerce_operand(x, typ) - y = self.coerce_operand(y, typ) - expected = pyfunc(x, y) - got = cfunc(x, y) - # Scalar ops => scalar result - self.assertIs(type(got), type(expected)) - self.assertEqual(got, expected, - "mismatch with %r (%r, %r)" - % (typ, x, y)) - - - # - # Comparison operators - # - - compare_scalar_operands = [-0.5, -1.0 + 1j, -1.0 + 2j, -0.5 + 1j, 1.5] - compare_types = [types.int32, types.int64, - types.uint32, types.uint64, - types.float32, types.float64] - compare_unordered_types = [types.complex64, types.complex128] - - def test_lt_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.lt_usecase, flags) - - @tag('important') - def test_lt_scalar_npm(self): - self.test_lt_scalar(flags=Noflags) - - def test_le_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.le_usecase, flags) - - @tag('important') - def test_le_scalar_npm(self): - self.test_le_scalar(flags=Noflags) - - def test_gt_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.gt_usecase, flags) - - @tag('important') - def test_gt_scalar_npm(self): - self.test_gt_scalar(flags=Noflags) - - def test_ge_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.ge_usecase, flags) - - @tag('important') - def test_ge_scalar_npm(self): - self.test_ge_scalar(flags=Noflags) - - def test_eq_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.eq_usecase, flags, ordered=False) - - @tag('important') - def test_eq_scalar_npm(self): - self.test_eq_scalar(flags=Noflags) - - def test_ne_scalar(self, flags=force_pyobj_flags): - self.run_test_scalar_compare(self.op.ne_usecase, flags, ordered=False) - - @tag('important') - def test_ne_scalar_npm(self): - self.test_ne_scalar(flags=Noflags) - - - # - # Arithmetic operators - # - - def run_binop_bools(self, pyfunc, flags=force_pyobj_flags): - x_operands = [False, False, True, True] - y_operands = [False, True, False, True] - - types_list = [(types.boolean, types.boolean)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - def run_binop_ints(self, pyfunc, flags=force_pyobj_flags): - x_operands = [-5, 0, 1, 2] - y_operands = [-3, -1, 1, 3] - - types_list = [(types.int32, types.int32), - (types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [2, 3] - y_operands = [1, 2] - - types_list = [(types.byte, types.byte), - (types.uint32, types.uint32), - (types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - def run_binop_floats(self, pyfunc, flags=force_pyobj_flags): - x_operands = [-1.1, 0.0, 1.1] - y_operands = [-1.5, 0.8, 2.1] - - types_list = [(types.float32, types.float32), - (types.float64, types.float64)] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - def run_binop_floats_floordiv(self, pyfunc, flags=force_pyobj_flags): - self.run_binop_floats(pyfunc, flags=flags) - - def run_binop_complex(self, pyfunc, flags=force_pyobj_flags): - x_operands = [-1.1 + 0.3j, 0.0 + 0.0j, 1.1j] - y_operands = [-1.5 - 0.7j, 0.8j, 2.1 - 2.0j] - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128)] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - def generate_binop_tests(ns, usecases, tp_runners, npm_array=False): - for usecase in usecases: - for tp_name, runner_name in tp_runners.items(): - for nopython in (False, True): - test_name = "test_%s_%s" % (usecase, tp_name) - if nopython: - test_name += "_npm" - flags = Noflags if nopython else force_pyobj_flags - usecase_name = "%s_usecase" % usecase - - def inner(self, runner_name=runner_name, - usecase_name=usecase_name, flags=flags): - runner = getattr(self, runner_name) - op_usecase = getattr(self.op, usecase_name) - runner(op_usecase, flags) - - if nopython and 'array' in tp_name and not npm_array: - def test_meth(self): - with self.assertTypingError(): - inner() - else: - test_meth = inner - - test_meth.__name__ = test_name - - if nopython: - test_meth = tag('important')(test_meth) - - ns[test_name] = test_meth - - - generate_binop_tests(locals(), - ('add', 'iadd', 'sub', 'isub', 'mul', 'imul'), - {'ints': 'run_binop_ints', - 'floats': 'run_binop_floats', - 'complex': 'run_binop_complex', - }) - - generate_binop_tests(locals(), - ('div', 'idiv', 'truediv', 'itruediv'), - {'ints': 'run_binop_ints', - 'floats': 'run_binop_floats', - 'complex': 'run_binop_complex', - }) - - # NOTE: floordiv and mod unsupported for complex numbers - generate_binop_tests(locals(), - ('floordiv', 'ifloordiv', 'mod', 'imod'), - {'ints': 'run_binop_ints', - 'floats': 'run_binop_floats_floordiv', - }) - - def check_div_errors(self, usecase_name, msg, flags=force_pyobj_flags, - allow_complex=False): - pyfunc = getattr(self.op, usecase_name) - if pyfunc is NotImplemented: - self.skipTest("%r not implemented" % (usecase_name,)) - # Signed and unsigned division can take different code paths, - # test them both. - arg_types = [types.int32, types.uint32, types.float64] - if allow_complex: - arg_types.append(types.complex128) - for tp in arg_types: - cr = compile_isolated(pyfunc, (tp, tp), flags=flags) - cfunc = cr.entry_point - with self.assertRaises(ZeroDivisionError) as cm: - cfunc(1, 0) - # Test exception message if not in object mode - if flags is not force_pyobj_flags: - self.assertIn(msg, str(cm.exception)) - - def test_truediv_errors(self, flags=force_pyobj_flags): - self.check_div_errors("truediv_usecase", "division by zero", flags=flags, - allow_complex=True) - - def test_truediv_errors_npm(self): - self.test_truediv_errors(flags=Noflags) - - def test_floordiv_errors(self, flags=force_pyobj_flags): - self.check_div_errors("floordiv_usecase", "division by zero", flags=flags) - - def test_floordiv_errors_npm(self): - self.test_floordiv_errors(flags=Noflags) - - def test_div_errors(self, flags=force_pyobj_flags): - self.check_div_errors("div_usecase", "division by zero", flags=flags) - - def test_div_errors_npm(self): - self.test_div_errors(flags=Noflags) - - def test_mod_errors(self, flags=force_pyobj_flags): - self.check_div_errors("mod_usecase", "modulo by zero", flags=flags) - - def test_mod_errors_npm(self): - self.test_mod_errors(flags=Noflags) - - def run_pow_ints(self, pyfunc, flags=force_pyobj_flags): - x_operands = [-2, -1, 0, 1, 2] - y_operands = [0, 1, 2] - - types_list = [(types.int32, types.int32), - (types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, 1, 2] - y_operands = [0, 1, 2] - - types_list = [(types.byte, types.byte), - (types.uint32, types.uint32), - (types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - def run_pow_floats(self, pyfunc, flags=force_pyobj_flags): - x_operands = [-222.222, -111.111, 111.111, 222.222] - y_operands = [-2, -1, 0, 1, 2] - - types_list = [(types.float32, types.float32), - (types.float64, types.float64)] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0.0] - y_operands = [0, 1, 2] # TODO native handling of 0 ** negative power - - types_list = [(types.float32, types.float32), - (types.float64, types.float64)] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - # XXX power operator is unsupported on complex numbers (see issue #488) - generate_binop_tests(locals(), - ('pow', 'ipow'), - {'ints': 'run_pow_ints', - 'floats': 'run_pow_floats', - }) - - def test_add_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.add_usecase - - x_operands = [1+0j, 1j, -1-1j] - y_operands = x_operands - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128),] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - @tag('important') - def test_add_complex_npm(self): - self.test_add_complex(flags=Noflags) - - def test_sub_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.sub_usecase - - x_operands = [1+0j, 1j, -1-1j] - y_operands = [1, 2, 3] - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128),] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - @tag('important') - def test_sub_complex_npm(self): - self.test_sub_complex(flags=Noflags) - - def test_mul_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.mul_usecase - - x_operands = [1+0j, 1j, -1-1j] - y_operands = [1, 2, 3] - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128),] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - @tag('important') - def test_mul_complex_npm(self): - self.test_mul_complex(flags=Noflags) - - def test_div_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.div_usecase - - x_operands = [1+0j, 1j, -1-1j] - y_operands = [1, 2, 3] - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128),] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - @tag('important') - def test_div_complex_npm(self): - self.test_div_complex(flags=Noflags) - - def test_truediv_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.truediv_usecase - - x_operands = [1+0j, 1j, -1-1j] - y_operands = [1, 2, 3] - - types_list = [(types.complex64, types.complex64), - (types.complex128, types.complex128),] - - self.run_test_floats(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - @tag('important') - def test_truediv_complex_npm(self): - self.test_truediv_complex(flags=Noflags) - - def test_mod_complex(self, flags=force_pyobj_flags): - pyfunc = self.op.mod_usecase - with self.assertTypingError(): - cres = compile_isolated(pyfunc, (types.complex64, types.complex64)) - - @tag('important') - def test_mod_complex_npm(self): - self.test_mod_complex(flags=Noflags) - - # - # Matrix multiplication - # (just check with simple values; computational tests are in test_linalg) - # - - @needs_matmul - def check_matmul_objmode(self, pyfunc, inplace): - # Use dummy objects, to work with any Numpy / Scipy version - # (and because Numpy 1.10 doesn't implement "@=") - cres = compile_isolated(pyfunc, (), flags=force_pyobj_flags) - cfunc = cres.entry_point - a = DumbMatrix(3) - b = DumbMatrix(4) - got = cfunc(a, b) - self.assertEqual(got.value, 12) - if inplace: - self.assertIs(got, a) - else: - self.assertIsNot(got, a) - self.assertIsNot(got, b) - - @needs_matmul - def test_matmul(self): - self.check_matmul_objmode(self.op.matmul_usecase, inplace=False) - - @needs_matmul - def test_imatmul(self): - self.check_matmul_objmode(self.op.imatmul_usecase, inplace=True) - - @needs_blas - @needs_matmul - def check_matmul_npm(self, pyfunc): - arrty = types.Array(types.float32, 1, 'C') - cres = compile_isolated(pyfunc, (arrty, arrty), flags=Noflags) - cfunc = cres.entry_point - a = np.float32([1, 2]) - b = np.float32([3, 4]) - got = cfunc(a, b) - self.assertPreciseEqual(got, np.dot(a, b)) - # Never inplace - self.assertIsNot(got, a) - self.assertIsNot(got, b) - - @tag('important') - @needs_matmul - def test_matmul_npm(self): - self.check_matmul_npm(self.op.matmul_usecase) - - @tag('important') - @needs_matmul - def test_imatmul_npm(self): - with self.assertTypingError() as raises: - self.check_matmul_npm(self.op.imatmul_usecase) - - # - # Bitwise operators - # - - def run_bitshift_left(self, pyfunc, flags=force_pyobj_flags): - x_operands = [0, 1] - y_operands = [0, 1, 2, 4, 8, 16, 31] - - types_list = [(types.uint32, types.uint32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, 1] - y_operands = [0, 1, 2, 4, 8, 16, 32, 63] - - types_list = [(types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, -1] - y_operands = [0, 1, 2, 4, 8, 16, 31] - - types_list = [(types.int32, types.int32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, -1] - y_operands = [0, 1, 2, 4, 8, 16, 32, 63] - - types_list = [(types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - generate_binop_tests(locals(), - ('bitshift_left', 'bitshift_ileft'), - {'ints': 'run_bitshift_left', - }) - - def run_bitshift_right(self, pyfunc, flags=force_pyobj_flags): - x_operands = [0, 1, 2**32 - 1] - y_operands = [0, 1, 2, 4, 8, 16, 31] - - types_list = [(types.uint32, types.uint32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, 1, 2**64 - 1] - y_operands = [0, 1, 2, 4, 8, 16, 32, 63] - - types_list = [(types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, 1, -(2**31)] - y_operands = [0, 1, 2, 4, 8, 16, 31] - - types_list = [(types.int32, types.int32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = [0, -1, -(2**31)] - y_operands = [0, 1, 2, 4, 8, 16, 32, 63] - - types_list = [(types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - generate_binop_tests(locals(), - ('bitshift_right', 'bitshift_iright'), - {'ints': 'run_bitshift_right', - }) - - def run_logical(self, pyfunc, flags=force_pyobj_flags): - x_operands = list(range(0, 8)) + [2**32 - 1] - y_operands = list(range(0, 8)) + [2**32 - 1] - - types_list = [(types.uint32, types.uint32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(0, 8)) + [2**64 - 1] - y_operands = list(range(0, 8)) + [2**64 - 1] - - types_list = [(types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(-4, 4)) + [-(2**31), 2**31 - 1] - y_operands = list(range(-4, 4)) + [-(2**31), 2**31 - 1] - - types_list = [(types.int32, types.int32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(-4, 4)) + [-(2**63), 2**63 - 1] - y_operands = list(range(-4, 4)) + [-(2**63), 2**63 - 1] - - types_list = [(types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - generate_binop_tests(locals(), - ('bitwise_and', 'bitwise_iand', - 'bitwise_or', 'bitwise_ior', - 'bitwise_xor', 'bitwise_ixor'), - {'ints': 'run_logical', - 'bools': 'run_binop_bools', - }) - - # - # Unary operators - # - - def test_bitwise_not(self, flags=force_pyobj_flags): - pyfunc = self.op.bitwise_not_usecase_binary - - x_operands = list(range(0, 8)) + [2**32 - 1] - x_operands = [np.uint32(x) for x in x_operands] - y_operands = [0] - - types_list = [(types.uint32, types.uint32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(-4, 4)) + [-(2**31), 2**31 - 1] - y_operands = [0] - - types_list = [(types.int32, types.int32)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(0, 8)) + [2**64 - 1] - x_operands = [np.uint64(x) for x in x_operands] - y_operands = [0] - - types_list = [(types.uint64, types.uint64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - x_operands = list(range(-4, 4)) + [-(2**63), 2**63 - 1] - y_operands = [0] - - types_list = [(types.int64, types.int64)] - - self.run_test_ints(pyfunc, x_operands, y_operands, types_list, - flags=flags) - - # For booleans, we follow Numpy semantics (i.e. ~True == False, - # not ~True == -2) - values = [False, False, True, True] - values = list(map(np.bool_, values)) - - pyfunc = self.op.bitwise_not_usecase - cres = compile_isolated(pyfunc, (types.boolean,), flags=flags) - cfunc = cres.entry_point - for val in values: - self.assertPreciseEqual(pyfunc(val), cfunc(val)) - - @tag('important') - def test_bitwise_not_npm(self): - self.test_bitwise_not(flags=Noflags) - - def test_bitwise_float(self): - """ - Make sure that bitwise float operations are not allowed - """ - def assert_reject_compile(pyfunc, argtypes, opname): - msg = 'expecting TypingError when compiling {}'.format(pyfunc) - with self.assertRaises(errors.TypingError, msg=msg) as raises: - compile_isolated(pyfunc, argtypes) - # check error message - fmt = 'Invalid use of {}' - expecting = fmt.format(opname - if isinstance(opname, str) - else 'Function({})'.format(opname)) - self.assertIn(expecting, str(raises.exception)) - - methods = [ - 'bitshift_left_usecase', - 'bitshift_ileft_usecase', - 'bitshift_right_usecase', - 'bitshift_iright_usecase', - 'bitwise_and_usecase', - 'bitwise_iand_usecase', - 'bitwise_or_usecase', - 'bitwise_ior_usecase', - 'bitwise_xor_usecase', - 'bitwise_ixor_usecase', - 'bitwise_not_usecase_binary', - ] - - for name in methods: - pyfunc = getattr(self.op, name) - assert_reject_compile(pyfunc, (types.float32, types.float32), - opname=self._bitwise_opnames[name]) - - def test_not(self): - pyfunc = self.op.not_usecase - - values = [ - 1, - 2, - 3, - 1.2, - 3.4j, - ] - - cres = compile_isolated(pyfunc, (), flags=force_pyobj_flags) - cfunc = cres.entry_point - for val in values: - self.assertEqual(pyfunc(val), cfunc(val)) - - @tag('important') - def test_not_npm(self): - pyfunc = self.op.not_usecase - # test native mode - argtys = [ - types.int8, - types.int32, - types.int64, - types.float32, - types.complex128, - ] - values = [ - 1, - 2, - 3, - 1.2, - 3.4j, - ] - for ty, val in zip(argtys, values): - cres = compile_isolated(pyfunc, [ty]) - self.assertEqual(cres.signature.return_type, types.boolean) - cfunc = cres.entry_point - self.assertEqual(pyfunc(val), cfunc(val)) - - # XXX test_negate should check for negative and positive zeros and infinities - - @tag('important') - def test_negate_npm(self): - pyfunc = self.op.negate_usecase - # test native mode - argtys = [ - types.int8, - types.int32, - types.int64, - types.float32, - types.float64, - types.complex128, - types.boolean, - types.boolean, - ] - values = [ - 1, - 2, - 3, - 1.2, - 2.4, - 3.4j, - True, - False, - ] - for ty, val in zip(argtys, values): - cres = compile_isolated(pyfunc, [ty]) - cfunc = cres.entry_point - self.assertAlmostEqual(pyfunc(val), cfunc(val)) - - - def test_negate(self): - pyfunc = self.op.negate_usecase - values = [ - 1, - 2, - 3, - 1.2, - 3.4j, - True, - False, - ] - cres = compile_isolated(pyfunc, (), flags=force_pyobj_flags) - cfunc = cres.entry_point - for val in values: - self.assertEqual(pyfunc(val), cfunc(val)) - - def test_unary_positive_npm(self): - pyfunc = self.op.unary_positive_usecase - # test native mode - argtys = [ - types.int8, - types.int32, - types.int64, - types.float32, - types.float64, - types.complex128, - types.boolean, - types.boolean, - ] - values = [ - 1, - 2, - 3, - 1.2, - 2.4, - 3.4j, - True, - False - ] - for ty, val in zip(argtys, values): - cres = compile_isolated(pyfunc, [ty]) - cfunc = cres.entry_point - self.assertAlmostEqual(pyfunc(val), cfunc(val)) - - def test_unary_positive(self): - pyfunc = self.op.unary_positive_usecase - values = [ - 1, - 2, - 3, - 1.2, - 3.4j, - True, - False, - ] - cres = compile_isolated(pyfunc, (), flags=force_pyobj_flags) - cfunc = cres.entry_point - for val in values: - self.assertEqual(pyfunc(val), cfunc(val)) - - def _check_in(self, pyfunc, flags): - dtype = types.int64 - cres = compile_isolated(pyfunc, (dtype, types.UniTuple(dtype, 3)), - flags=flags) - cfunc = cres.entry_point - for i in (3, 4, 5, 6, 42): - tup = (3, 42, 5) - self.assertPreciseEqual(pyfunc(i, tup), cfunc(i, tup)) - - def test_in(self, flags=force_pyobj_flags): - self._check_in(self.op.in_usecase, flags) - - def test_in_npm(self): - self.test_in(flags=Noflags) - - def test_not_in(self, flags=force_pyobj_flags): - self._check_in(self.op.not_in_usecase, flags) - - def test_not_in_npm(self): - self.test_not_in(flags=Noflags) - - -class TestOperatorModule(TestOperators): - - op = FunctionalOperatorImpl - - _bitwise_opnames = { - 'bitshift_left_usecase': operator.lshift, - 'bitshift_ileft_usecase': operator.ilshift, - 'bitshift_right_usecase': operator.rshift, - 'bitshift_iright_usecase': operator.irshift, - 'bitwise_and_usecase': operator.and_, - 'bitwise_iand_usecase': operator.iand, - 'bitwise_or_usecase': operator.or_, - 'bitwise_ior_usecase': operator.ior, - 'bitwise_xor_usecase': operator.xor, - 'bitwise_ixor_usecase': operator.ixor, - 'bitwise_not_usecase_binary': operator.invert, - } - - -class TestMixedInts(TestCase): - """ - Tests for operator calls with mixed integer types. - """ - - op = LiteralOperatorImpl - - int_samples = [0, 1, 3, 10, 42, 127, 10000, -1, -3, -10, -42, -127, -10000] - - int_types = [types.int8, types.uint8, types.int64, types.uint64] - signed_types = [tp for tp in int_types if tp.signed] - unsigned_types = [tp for tp in int_types if not tp.signed] - type_pairs = list(itertools.product(int_types, int_types)) - signed_pairs = [(u, v) for u, v in type_pairs - if u.signed or v.signed] - unsigned_pairs = [(u, v) for u, v in type_pairs - if not (u.signed or v.signed)] - - def get_numpy_signed_upcast(self, *vals): - bitwidth = max(v.dtype.itemsize * 8 for v in vals) - bitwidth = max(bitwidth, types.intp.bitwidth) - return getattr(np, "int%d" % bitwidth) - - def get_numpy_unsigned_upcast(self, *vals): - bitwidth = max(v.dtype.itemsize * 8 for v in vals) - bitwidth = max(bitwidth, types.intp.bitwidth) - return getattr(np, "uint%d" % bitwidth) - - def get_typed_int(self, typ, val): - return getattr(np, typ.name)(val) - - def get_control_signed(self, opname): - op = getattr(operator, opname) - def control_signed(a, b): - tp = self.get_numpy_signed_upcast(a, b) - return op(tp(a), tp(b)) - return control_signed - - def get_control_unsigned(self, opname): - op = getattr(operator, opname) - def control_unsigned(a, b): - tp = self.get_numpy_unsigned_upcast(a, b) - return op(tp(a), tp(b)) - return control_unsigned - - def run_binary(self, pyfunc, control_func, operands, types, - expected_type=utils.INT_TYPES, **assertPreciseEqualArgs): - if pyfunc is NotImplemented: - self.skipTest("test irrelevant on this version of Python") - - for xt, yt in types: - cr = compile_isolated(pyfunc, (xt, yt), flags=Noflags) - cfunc = cr.entry_point - for x, y in itertools.product(operands, operands): - # Get Numpy typed scalars for the given types and values - x = self.get_typed_int(xt, x) - y = self.get_typed_int(yt, y) - expected = control_func(x, y) - got = cfunc(x, y) - self.assertIsInstance(got, expected_type) - msg = ("mismatch for (%r, %r) with types %s" - % (x, y, (xt, yt))) - self.assertPreciseEqual(got, expected, msg=msg, - **assertPreciseEqualArgs) - - def run_unary(self, pyfunc, control_func, operands, types, - expected_type=utils.INT_TYPES): - if pyfunc is NotImplemented: - self.skipTest("test irrelevant on this version of Python") - - for xt in types: - cr = compile_isolated(pyfunc, (xt,), flags=Noflags) - cfunc = cr.entry_point - for x in operands: - x = self.get_typed_int(xt, x) - expected = control_func(x) - got = cfunc(x) - self.assertIsInstance(got, expected_type) - self.assertPreciseEqual( - got, expected, - msg="mismatch for %r with type %s: %r != %r" - % (x, xt, got, expected)) - - def run_arith_binop(self, pyfunc, opname, samples, - expected_type=utils.INT_TYPES): - self.run_binary(pyfunc, self.get_control_signed(opname), - samples, self.signed_pairs, expected_type) - self.run_binary(pyfunc, self.get_control_unsigned(opname), - samples, self.unsigned_pairs, expected_type) - - @tag('important') - def test_add(self): - self.run_arith_binop(self.op.add_usecase, 'add', self.int_samples) - - @tag('important') - def test_sub(self): - self.run_arith_binop(self.op.sub_usecase, 'sub', self.int_samples) - - @tag('important') - def test_mul(self): - self.run_arith_binop(self.op.mul_usecase, 'mul', self.int_samples) - - def test_floordiv(self): - samples = [x for x in self.int_samples if x != 0] - self.run_arith_binop(self.op.floordiv_usecase, 'floordiv', samples) - - def test_mod(self): - samples = [x for x in self.int_samples if x != 0] - self.run_arith_binop(self.op.mod_usecase, 'mod', samples) - - def test_pow(self): - pyfunc = self.op.pow_usecase - # Only test with positive values, as otherwise trying to write the - # control function in terms of Python or Numpy power turns out insane. - samples = [x for x in self.int_samples if x >= 0] - self.run_arith_binop(pyfunc, 'pow', samples) - - # Now test all non-zero values, but only with signed types - def control_signed(a, b): - tp = self.get_numpy_signed_upcast(a, b) - if b >= 0: - return tp(a) ** tp(b) - else: - inv = tp(a) ** tp(-b) - if inv == 0: - # Overflow - return 0 - return np.intp(1.0 / inv) - samples = [x for x in self.int_samples if x != 0] - signed_pairs = [(u, v) for u, v in self.type_pairs - if u.signed and v.signed] - self.run_binary(pyfunc, control_signed, - samples, signed_pairs) - - def test_truediv(self): - def control(a, b): - return truediv_usecase(float(a), float(b)) - samples = [x for x in self.int_samples if x != 0] - pyfunc = self.op.truediv_usecase - - # Note: there can be precision issues on x87 - # e.g. for `1 / 18446744073709541616` - # -> 0x1.0000000000002p-64 vs. 0x1.0000000000003p-64. - self.run_binary(pyfunc, control, samples, self.signed_pairs, - expected_type=float, prec='double') - self.run_binary(pyfunc, control, samples, self.unsigned_pairs, - expected_type=float, prec='double') - - def test_and(self): - self.run_arith_binop(self.op.bitwise_and_usecase, 'and_', self.int_samples) - - def test_or(self): - self.run_arith_binop(self.op.bitwise_or_usecase, 'or_', self.int_samples) - - def test_xor(self): - self.run_arith_binop(self.op.bitwise_xor_usecase, 'xor', self.int_samples) - - def run_shift_binop(self, pyfunc, opname): - opfunc = getattr(operator, opname) - def control_signed(a, b): - tp = self.get_numpy_signed_upcast(a, b) - return opfunc(tp(a), tp(b)) - def control_unsigned(a, b): - tp = self.get_numpy_unsigned_upcast(a, b) - return opfunc(tp(a), tp(b)) - - samples = self.int_samples - - def check(xt, yt, control_func): - cr = compile_isolated(pyfunc, (xt, yt), flags=Noflags) - cfunc = cr.entry_point - for x in samples: - # Avoid shifting by more than the shiftand's bitwidth, as - # we would hit undefined behaviour. - maxshift = xt.bitwidth - 1 - for y in (0, 1, 3, 5, maxshift - 1, maxshift): - # Get Numpy typed scalars for the given types and values - x = self.get_typed_int(xt, x) - y = self.get_typed_int(yt, y) - expected = control_func(x, y) - got = cfunc(x, y) - msg = ("mismatch for (%r, %r) with types %s" - % (x, y, (xt, yt))) - self.assertPreciseEqual(got, expected, msg=msg) - - # For bitshifts, only the first operand's signedness matters - # to choose the operation's signedness. - signed_pairs = [(u, v) for u, v in self.type_pairs - if u.signed] - unsigned_pairs = [(u, v) for u, v in self.type_pairs - if not u.signed] - - for xt, yt in signed_pairs: - check(xt, yt, control_signed) - for xt, yt in unsigned_pairs: - check(xt, yt, control_unsigned) - - def test_lshift(self): - self.run_shift_binop(self.op.bitshift_left_usecase, 'lshift') - - def test_rshift(self): - self.run_shift_binop(self.op.bitshift_right_usecase, 'rshift') - - def test_unary_positive(self): - def control(a): - return a - samples = self.int_samples - pyfunc = self.op.unary_positive_usecase - - self.run_unary(pyfunc, control, samples, self.int_types) - - def test_unary_negative(self): - def control_signed(a): - tp = self.get_numpy_signed_upcast(a) - return tp(-a) - def control_unsigned(a): - tp = self.get_numpy_unsigned_upcast(a) - return tp(-a) - samples = self.int_samples - pyfunc = self.op.negate_usecase - - self.run_unary(pyfunc, control_signed, samples, self.signed_types) - self.run_unary(pyfunc, control_unsigned, samples, self.unsigned_types) - - def test_invert(self): - def control_signed(a): - tp = self.get_numpy_signed_upcast(a) - return tp(~a) - def control_unsigned(a): - tp = self.get_numpy_unsigned_upcast(a) - return tp(~a) - samples = self.int_samples - pyfunc = self.op.bitwise_not_usecase - - self.run_unary(pyfunc, control_signed, samples, self.signed_types) - self.run_unary(pyfunc, control_unsigned, samples, self.unsigned_types) - - -class TestMixedIntsOperatorModule(TestMixedInts): - - op = FunctionalOperatorImpl - - -class TestStaticPower(TestCase): - """ - Test the ** operator with a static exponent, to exercise a - dedicated optimization. - """ - - def _check_pow(self, exponents, values): - for exp in exponents: - # test against non-static version of the @jit-ed function - regular_func = LiteralOperatorImpl.pow_usecase - static_func = make_static_power(exp) - - static_cfunc = jit(nopython=True)(static_func) - regular_cfunc = jit(nopython=True)(regular_func) - for v in values: - try: - expected = regular_cfunc(v, exp) - except ZeroDivisionError: - with self.assertRaises(ZeroDivisionError): - static_cfunc(v) - else: - got = static_cfunc(v) - self.assertPreciseEqual(expected, got, prec='double') - - def test_int_values(self): - exponents = [1, 2, 3, 5, 17, 0, -1, -2, -3] - vals = [0, 1, 3, -1, -4, np.int8(-3), np.uint16(4)] - - self._check_pow(exponents, vals) - - def test_real_values(self): - exponents = [1, 2, 3, 5, 17, 0, -1, -2, -3, 0x111111, -0x111112] - vals = [1.5, 3.25, -1.25, np.float32(-2.0), float('inf'), float('nan')] - - self._check_pow(exponents, vals) - -class TestStringConstComparison(TestCase): - """ - Test comparison of string constants - """ - def test_eq(self): - def test_impl1(): - s = 'test' - return s == 'test' - - def test_impl2(): - s = 'test1' - return s == 'test' - - cfunc1 = jit(nopython=True)(test_impl1) - cfunc2 = jit(nopython=True)(test_impl2) - self.assertEqual(test_impl1(), cfunc1()) - self.assertEqual(test_impl2(), cfunc2()) - - def test_neq(self): - def test_impl1(): - s = 'test' - return s != 'test' - - def test_impl2(): - s = 'test1' - return s != 'test' - - cfunc1 = jit(nopython=True)(test_impl1) - cfunc2 = jit(nopython=True)(test_impl2) - self.assertEqual(test_impl1(), cfunc1()) - self.assertEqual(test_impl2(), cfunc2()) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_optional.py b/numba/numba/tests/test_optional.py deleted file mode 100644 index 344c2fdb1..000000000 --- a/numba/numba/tests/test_optional.py +++ /dev/null @@ -1,245 +0,0 @@ -from __future__ import print_function, absolute_import - -import itertools - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, typeof, njit -from numba import lowering -from .support import TestCase - - -def return_double_or_none(x): - if x: - ret = None - else: - ret = 1.2 - return ret - - -def return_different_statement(x): - if x: - return None - else: - return 1.2 - - -def return_bool_optional_or_none(x, y): - if y: - z = False - else: - z = None - if x == 2: - # A boolean - return True - elif x == 1: - # A runtime optional - return z - else: - # None - return None - - -def is_this_a_none(x): - if x: - val_or_none = None - else: - val_or_none = x - - if val_or_none is None: - return x - 1 - - if val_or_none is not None: - return x + 1 - - -def a_is_b(a, b): - """ - Note in nopython mode, this operation does not make much sense. - Because we don't have objects anymore. - `a is b` is always False if not operating on None and Optional type - """ - return a is b - - -def a_is_not_b(a, b): - """ - This is `not (a is b)` - """ - return a is not b - - -class TestOptional(TestCase): - - _numba_parallel_test_ = False - - def test_return_double_or_none(self): - pyfunc = return_double_or_none - cres = compile_isolated(pyfunc, [types.boolean]) - cfunc = cres.entry_point - - for v in [True, False]: - self.assertPreciseEqual(pyfunc(v), cfunc(v)) - - def test_return_different_statement(self): - pyfunc = return_different_statement - cres = compile_isolated(pyfunc, [types.boolean]) - cfunc = cres.entry_point - - for v in [True, False]: - self.assertPreciseEqual(pyfunc(v), cfunc(v)) - - def test_return_bool_optional_or_none(self): - pyfunc = return_bool_optional_or_none - cres = compile_isolated(pyfunc, [types.int32, types.int32]) - cfunc = cres.entry_point - - for x, y in itertools.product((0, 1, 2), (0, 1)): - self.assertPreciseEqual(pyfunc(x, y), cfunc(x, y)) - - def test_is_this_a_none(self): - pyfunc = is_this_a_none - cres = compile_isolated(pyfunc, [types.intp]) - cfunc = cres.entry_point - - for v in [-1, 0, 1, 2]: - self.assertPreciseEqual(pyfunc(v), cfunc(v)) - - def test_is_this_a_none_objmode(self): - pyfunc = is_this_a_none - flags = Flags() - flags.set('force_pyobject') - cres = compile_isolated(pyfunc, [types.intp], flags=flags) - cfunc = cres.entry_point - self.assertTrue(cres.objectmode) - for v in [-1, 0, 1, 2]: - self.assertPreciseEqual(pyfunc(v), cfunc(v)) - - def test_a_is_b_intp(self): - pyfunc = a_is_b - cres = compile_isolated(pyfunc, [types.intp, types.intp]) - cfunc = cres.entry_point - # integer identity relies on `==` - self.assertTrue(cfunc(1, 1)) - self.assertFalse(cfunc(1, 2)) - - def test_a_is_not_b_intp(self): - pyfunc = a_is_not_b - cres = compile_isolated(pyfunc, [types.intp, types.intp]) - cfunc = cres.entry_point - # integer identity relies on `==` - self.assertFalse(cfunc(1, 1)) - self.assertTrue(cfunc(1, 2)) - - def test_optional_float(self): - def pyfunc(x, y): - if y is None: - return x - else: - return x + y - - cfunc = njit("(float64, optional(float64))")(pyfunc) - self.assertAlmostEqual(pyfunc(1., 12.3), cfunc(1., 12.3)) - self.assertAlmostEqual(pyfunc(1., None), cfunc(1., None)) - - def test_optional_array(self): - def pyfunc(x, y): - if y is None: - return x - else: - y[0] += x - return y[0] - - cfunc = njit("(float32, optional(float32[:]))")(pyfunc) - cy = np.array([12.3], dtype=np.float32) - py = cy.copy() - self.assertAlmostEqual(pyfunc(1., py), cfunc(1., cy)) - np.testing.assert_almost_equal(py, cy) - self.assertAlmostEqual(pyfunc(1., None), cfunc(1., None)) - - def test_optional_array_error(self): - def pyfunc(y): - return y[0] - - cfunc = njit("(optional(int32[:]),)")(pyfunc) - with self.assertRaises(TypeError) as raised: - cfunc(None) - self.assertIn('expected array(int32, 1d, A), got None', - str(raised.exception)) - - y = np.array([0xabcd], dtype=np.int32) - self.assertEqual(cfunc(y), pyfunc(y)) - - def test_optional_array_attribute(self): - """ - Check that we can access attribute of an optional - """ - def pyfunc(arr, do_it): - opt = None - if do_it: # forces `opt` to be an optional of arr - opt = arr - return opt.shape[0] - - cfunc = njit(pyfunc) - arr = np.arange(5) - self.assertEqual(pyfunc(arr, True), cfunc(arr, True)) - - def test_assign_to_optional(self): - """ - Check that we can assign to a variable of optional type - """ - @njit - def make_optional(val, get_none): - if get_none: - ret = None - else: - ret = val - return ret - - @njit - def foo(val, run_second): - a = make_optional(val, True) - if run_second: - a = make_optional(val, False) - return a - - self.assertIsNone(foo(123, False)) - self.assertEqual(foo(231, True), 231) - - def test_optional_thru_omitted_arg(self): - """ - Issue 1868 - """ - - def pyfunc(x=None): - if x is None: - x = 1 - return x - - cfunc = njit(pyfunc) - self.assertEqual(pyfunc(), cfunc()) - self.assertEqual(pyfunc(3), cfunc(3)) - - def test_optional_unpack(self): - """ - Issue 2171 - """ - def pyfunc(x): - if x is None: - return - else: - a, b = x - return a, b - - tup = types.Tuple([types.intp] * 2) - opt_tup = types.Optional(tup) - sig = (opt_tup,) - cfunc = njit(sig)(pyfunc) - self.assertEqual(pyfunc(None), cfunc(None)) - self.assertEqual(pyfunc((1, 2)), cfunc((1, 2))) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_overlap.py b/numba/numba/tests/test_overlap.py deleted file mode 100644 index 5fb1c1bc2..000000000 --- a/numba/numba/tests/test_overlap.py +++ /dev/null @@ -1,135 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, types -from .support import TestCase, tag - - -# Array overlaps involving a displacement - -def array_overlap1(src, dest, k=1): - assert src.shape == dest.shape - dest[k:] = src[:-k] - -def array_overlap2(src, dest, k=1): - assert src.shape == dest.shape - dest[:-k] = src[k:] - -def array_overlap3(src, dest, k=1): - assert src.shape == dest.shape - dest[:,:-k] = src[:,k:] - -def array_overlap4(src, dest, k=1): - assert src.shape == dest.shape - dest[:,k:] = src[:,:-k] - -def array_overlap5(src, dest, k=1): - assert src.shape == dest.shape - dest[...,:-k] = src[...,k:] - -def array_overlap6(src, dest, k=1): - assert src.shape == dest.shape - dest[...,k:] = src[...,:-k] - -# Array overlaps involving an in-place reversal - -def array_overlap11(src, dest): - assert src.shape == dest.shape - dest[::-1] = src - -def array_overlap12(src, dest): - assert src.shape == dest.shape - dest[:] = src[::-1] - -def array_overlap13(src, dest): - assert src.shape == dest.shape - dest[:,::-1] = src - -def array_overlap14(src, dest): - assert src.shape == dest.shape - dest[:] = src[:,::-1] - -def array_overlap15(src, dest): - assert src.shape == dest.shape - dest[...,::-1] = src - -def array_overlap16(src, dest): - assert src.shape == dest.shape - dest[:] = src[...,::-1] - - -class TestArrayOverlap(TestCase): - - def check_overlap(self, pyfunc, min_ndim, have_k_argument=False): - N = 4 - - def vary_layouts(orig): - yield orig.copy(order='C') - yield orig.copy(order='F') - a = orig[::-1].copy()[::-1] - assert not a.flags.c_contiguous and not a.flags.f_contiguous - yield a - - def check(pyfunc, cfunc, pydest, cdest, kwargs): - pyfunc(pydest, pydest, **kwargs) - cfunc(cdest, cdest, **kwargs) - self.assertPreciseEqual(pydest, cdest) - - cfunc = jit(nopython=True)(pyfunc) - # Check for up to 3d arrays - for ndim in range(min_ndim, 4): - shape = (N,) * ndim - orig = np.arange(0, N**ndim).reshape(shape) - # Note we cannot copy a 'A' layout array exactly (bitwise), - # so instead we call vary_layouts() twice - for pydest, cdest in zip(vary_layouts(orig), vary_layouts(orig)): - if have_k_argument: - for k in range(1, N): - check(pyfunc, cfunc, pydest, cdest, dict(k=k)) - else: - check(pyfunc, cfunc, pydest, cdest, {}) - - def check_overlap_with_k(self, pyfunc, min_ndim): - self.check_overlap(pyfunc, min_ndim=min_ndim, have_k_argument=True) - - def test_overlap1(self): - self.check_overlap_with_k(array_overlap1, min_ndim=1) - - def test_overlap2(self): - self.check_overlap_with_k(array_overlap2, min_ndim=1) - - def test_overlap3(self): - self.check_overlap_with_k(array_overlap3, min_ndim=2) - - def test_overlap4(self): - self.check_overlap_with_k(array_overlap4, min_ndim=2) - - def test_overlap5(self): - self.check_overlap_with_k(array_overlap5, min_ndim=1) - - def test_overlap6(self): - self.check_overlap_with_k(array_overlap6, min_ndim=1) - - def test_overlap11(self): - self.check_overlap(array_overlap11, min_ndim=1) - - def test_overlap12(self): - self.check_overlap(array_overlap12, min_ndim=1) - - def test_overlap13(self): - self.check_overlap(array_overlap13, min_ndim=2) - - def test_overlap14(self): - self.check_overlap(array_overlap14, min_ndim=2) - - def test_overlap15(self): - self.check_overlap(array_overlap15, min_ndim=1) - - def test_overlap16(self): - self.check_overlap(array_overlap16, min_ndim=1) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_parfors.py b/numba/numba/tests/test_parfors.py deleted file mode 100644 index d766fdf3f..000000000 --- a/numba/numba/tests/test_parfors.py +++ /dev/null @@ -1,2342 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function, division, absolute_import - -from math import sqrt -import numbers -import re -import sys -import platform -import types as pytypes -import warnings -from functools import reduce -import numpy as np -from numpy.random import randn - -import numba -from numba import unittest_support as unittest -from .support import TestCase -from numba import njit, prange, stencil, inline_closurecall -from numba import compiler, typing -from numba.targets import cpu -from numba import types -from numba.targets.registry import cpu_target -from numba import config -from numba.annotations import type_annotations -from numba.ir_utils import (find_callname, guard, build_definitions, - get_definition, is_getitem, is_setitem, - index_var_of_get_setitem) -from numba import ir -from numba.unsafe.ndarray import empty_inferred as unsafe_empty -from numba.compiler import compile_isolated, Flags -from numba.bytecode import ByteCodeIter -from .support import tag, override_env_config -from .matmul_usecase import needs_blas -from .test_linalg import needs_lapack - -# for decorating tests, marking that Windows with Python 2.7 is not supported -_windows_py27 = (sys.platform.startswith('win32') and - sys.version_info[:2] == (2, 7)) -_32bit = sys.maxsize <= 2 ** 32 -_reason = 'parfors not supported' -skip_unsupported = unittest.skipIf(_32bit or _windows_py27, _reason) -test_disabled = unittest.skipIf(True, 'Test disabled') -_lnx_reason = 'linux only test' -linux_only = unittest.skipIf(not sys.platform.startswith('linux'), _lnx_reason) -x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') - -class TestParforsBase(TestCase): - """ - Base class for testing parfors. - Provides functions for compilation and three way comparison between - python functions, njit'd functions and parfor njit'd functions. - """ - - def __init__(self, *args): - # flags for njit() - self.cflags = Flags() - self.cflags.set('nrt') - - # flags for njit(parallel=True) - self.pflags = Flags() - self.pflags.set('auto_parallel', cpu.ParallelOptions(True)) - self.pflags.set('nrt') - - # flags for njit(parallel=True, fastmath=True) - self.fast_pflags = Flags() - self.fast_pflags.set('auto_parallel', cpu.ParallelOptions(True)) - self.fast_pflags.set('nrt') - self.fast_pflags.set('fastmath') - super(TestParforsBase, self).__init__(*args) - - def _compile_this(self, func, sig, flags): - return compile_isolated(func, sig, flags=flags) - - def compile_parallel(self, func, sig): - return self._compile_this(func, sig, flags=self.pflags) - - def compile_parallel_fastmath(self, func, sig): - return self._compile_this(func, sig, flags=self.fast_pflags) - - def compile_njit(self, func, sig): - return self._compile_this(func, sig, flags=self.cflags) - - def compile_all(self, pyfunc, *args, **kwargs): - sig = tuple([numba.typeof(x) for x in args]) - - # compile the prange injected function - cpfunc = self.compile_parallel(pyfunc, sig) - - # compile a standard njit of the original function - cfunc = self.compile_njit(pyfunc, sig) - - return cfunc, cpfunc - - def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs): - """ - Checks python, njit and parfor impls produce the same result. - - Arguments: - pyfunc - the python function to test - cfunc - CompilerResult from njit of pyfunc - cpfunc - CompilerResult from njit(parallel=True) of pyfunc - args - arguments for the function being tested - Keyword Arguments: - scheduler_type - 'signed', 'unsigned' or None, default is None. - Supply in cases where the presence of a specific - scheduler is to be asserted. - fastmath_pcres - a fastmath parallel compile result, if supplied - will be run to make sure the result is correct - Remaining kwargs are passed to np.testing.assert_almost_equal - """ - scheduler_type = kwargs.pop('scheduler_type', None) - check_fastmath = kwargs.pop('check_fastmath', None) - fastmath_pcres = kwargs.pop('fastmath_pcres', None) - - def copy_args(*args): - if not args: - return tuple() - new_args = [] - for x in args: - if isinstance(x, np.ndarray): - new_args.append(x.copy('k')) - elif isinstance(x, np.number): - new_args.append(x.copy()) - elif isinstance(x, numbers.Number): - new_args.append(x) - else: - raise ValueError('Unsupported argument type encountered') - return tuple(new_args) - - # python result - py_expected = pyfunc(*copy_args(*args)) - - # njit result - njit_output = cfunc.entry_point(*copy_args(*args)) - - # parfor result - parfor_output = cpfunc.entry_point(*copy_args(*args)) - - np.testing.assert_almost_equal(njit_output, py_expected, **kwargs) - np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs) - - self.check_scheduling(cpfunc, scheduler_type) - - # if requested check fastmath variant - if fastmath_pcres is not None: - parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args)) - np.testing.assert_almost_equal(parfor_fastmath_output, py_expected, - **kwargs) - - - def check_scheduling(self, cres, scheduler_type): - # make sure parfor set up scheduling - scheduler_str = '@do_scheduling' - if scheduler_type is not None: - if scheduler_type in ['signed', 'unsigned']: - scheduler_str += '_' + scheduler_type - else: - msg = "Unknown scheduler_type specified: %s" - raise ValueError(msg % scheduler_type) - - self.assertIn(scheduler_str, cres.library.get_llvm_str()) - - def _filter_mod(self, mod, magicstr, checkstr=None): - """ helper function to filter out modules by name""" - filt = [x for x in mod if magicstr in x.name] - if checkstr is not None: - for x in filt: - assert checkstr in str(x) - return filt - - def _get_gufunc_modules(self, cres, magicstr, checkstr=None): - """ gets the gufunc LLVM Modules""" - _modules = [x for x in cres.library._codegen._engine._ee._modules] - return self._filter_mod(_modules, magicstr, checkstr=checkstr) - - def _get_gufunc_info(self, cres, fn): - """ helper for gufunc IR/asm generation""" - # get the gufunc modules - magicstr = '__numba_parfor_gufunc' - gufunc_mods = self._get_gufunc_modules(cres, magicstr) - x = dict() - for mod in gufunc_mods: - x[mod.name] = fn(mod) - return x - - def _get_gufunc_ir(self, cres): - """ - Returns the IR of the gufuncs used as parfor kernels - as a dict mapping the gufunc name to its IR. - - Arguments: - cres - a CompileResult from `njit(parallel=True, ...)` - """ - return self._get_gufunc_info(cres, str) - - def _get_gufunc_asm(self, cres): - """ - Returns the assembly of the gufuncs used as parfor kernels - as a dict mapping the gufunc name to its assembly. - - Arguments: - cres - a CompileResult from `njit(parallel=True, ...)` - """ - tm = cres.library._codegen._tm - def emit_asm(mod): - return str(tm.emit_assembly(mod)) - return self._get_gufunc_info(cres, emit_asm) - - def assert_fastmath(self, pyfunc, sig): - """ - Asserts that the fastmath flag has some effect in that suitable - instructions are now labelled as `fast`. Whether LLVM can actually do - anything to optimise better now the derestrictions are supplied is - another matter! - - Arguments: - pyfunc - a function that contains operations with parallel semantics - sig - the type signature of pyfunc - """ - - cres = self.compile_parallel_fastmath(pyfunc, sig) - _ir = self._get_gufunc_ir(cres) - - def _get_fast_instructions(ir): - splitted = ir.splitlines() - fast_inst = [] - for x in splitted: - if 'fast' in x: - fast_inst.append(x) - return fast_inst - - def _assert_fast(instrs): - ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp') - for inst in instrs: - count = 0 - for op in ops: - match = op + ' fast' - if match in inst: - count += 1 - self.assertTrue(count > 0) - - for name, guir in _ir.items(): - inst = _get_fast_instructions(guir) - _assert_fast(inst) - - -def blackscholes_impl(sptprice, strike, rate, volatility, timev): - # blackscholes example - logterm = np.log(sptprice / strike) - powterm = 0.5 * volatility * volatility - den = volatility * np.sqrt(timev) - d1 = (((rate + powterm) * timev) + logterm) / den - d2 = d1 - den - NofXd1 = 0.5 + 0.5 * 2.0 * d1 - NofXd2 = 0.5 + 0.5 * 2.0 * d2 - futureValue = strike * np.exp(- rate * timev) - c1 = futureValue * NofXd2 - call = sptprice * NofXd1 - c1 - put = call - futureValue + sptprice - return put - - -def lr_impl(Y, X, w, iterations): - # logistic regression example - for i in range(iterations): - w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) - return w - -def test_kmeans_example(A, numCenter, numIter, init_centroids): - centroids = init_centroids - N, D = A.shape - - for l in range(numIter): - dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) - for j in range(numCenter)] for i in range(N)]) - labels = np.array([dist[i,:].argmin() for i in range(N)]) - - centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) - for j in range(D)] for i in range(numCenter)]) - - return centroids - -def get_optimized_numba_ir(test_func, args, **kws): - typingctx = typing.Context() - targetctx = cpu.CPUContext(typingctx) - test_ir = compiler.run_frontend(test_func) - if kws: - options = cpu.ParallelOptions(kws) - else: - options = cpu.ParallelOptions(True) - - tp = TestPipeline(typingctx, targetctx, args, test_ir) - - with cpu_target.nested_context(typingctx, targetctx): - typingctx.refresh() - targetctx.refresh() - - inline_pass = inline_closurecall.InlineClosureCallPass(tp.func_ir, options) - inline_pass.run() - - numba.rewrites.rewrite_registry.apply( - 'before-inference', tp, tp.func_ir) - - tp.typemap, tp.return_type, tp.calltypes = compiler.type_inference_stage( - tp.typingctx, tp.func_ir, tp.args, None) - - type_annotations.TypeAnnotation( - func_ir=tp.func_ir, - typemap=tp.typemap, - calltypes=tp.calltypes, - lifted=(), - lifted_from=None, - args=tp.args, - return_type=tp.return_type, - html_output=config.HTML) - - preparfor_pass = numba.parfor.PreParforPass( - tp.func_ir, tp.typemap, tp.calltypes, tp.typingctx, options) - preparfor_pass.run() - - numba.rewrites.rewrite_registry.apply( - 'after-inference', tp, tp.func_ir) - - flags = compiler.Flags() - parfor_pass = numba.parfor.ParforPass( - tp.func_ir, tp.typemap, tp.calltypes, tp.return_type, - tp.typingctx, options, flags) - parfor_pass.run() - test_ir._definitions = build_definitions(test_ir.blocks) - - return test_ir, tp - -def countParfors(test_func, args, **kws): - test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) - ret_count = 0 - - for label, block in test_ir.blocks.items(): - for i, inst in enumerate(block.body): - if isinstance(inst, numba.parfor.Parfor): - ret_count += 1 - - return ret_count - - -def countArrays(test_func, args, **kws): - test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) - return _count_arrays_inner(test_ir.blocks, tp.typemap) - -def _count_arrays_inner(blocks, typemap): - ret_count = 0 - arr_set = set() - - for label, block in blocks.items(): - for i, inst in enumerate(block.body): - if isinstance(inst, numba.parfor.Parfor): - parfor_blocks = inst.loop_body.copy() - parfor_blocks[0] = inst.init_block - ret_count += _count_arrays_inner(parfor_blocks, typemap) - if (isinstance(inst, ir.Assign) - and isinstance(typemap[inst.target.name], - types.ArrayCompatible)): - arr_set.add(inst.target.name) - - ret_count += len(arr_set) - return ret_count - -def countArrayAllocs(test_func, args, **kws): - test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) - ret_count = 0 - - for block in test_ir.blocks.values(): - ret_count += _count_array_allocs_inner(test_ir, block) - - return ret_count - -def _count_array_allocs_inner(func_ir, block): - ret_count = 0 - for inst in block.body: - if isinstance(inst, numba.parfor.Parfor): - ret_count += _count_array_allocs_inner(func_ir, inst.init_block) - for b in inst.loop_body.values(): - ret_count += _count_array_allocs_inner(func_ir, b) - - if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) - and inst.value.op == 'call' - and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy') - or guard(find_callname, func_ir, inst.value) - == ('empty_inferred', 'numba.unsafe.ndarray'))): - ret_count += 1 - - return ret_count - -def countNonParforArrayAccesses(test_func, args, **kws): - test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) - return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks, tp.typemap) - -def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None): - ret_count = 0 - if parfor_indices is None: - parfor_indices = set() - - for label, block in blocks.items(): - for stmt in block.body: - if isinstance(stmt, numba.parfor.Parfor): - parfor_indices.add(stmt.index_var.name) - parfor_blocks = stmt.loop_body.copy() - parfor_blocks[0] = stmt.init_block - ret_count += _count_non_parfor_array_accesses_inner( - f_ir, parfor_blocks, typemap, parfor_indices) - - # getitem - if (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name], - types.ArrayCompatible) and not _uses_indices( - f_ir, index_var_of_get_setitem(stmt), parfor_indices)): - ret_count += 1 - - # setitem - if (is_setitem(stmt) and isinstance(typemap[stmt.target.name], - types.ArrayCompatible) and not _uses_indices( - f_ir, index_var_of_get_setitem(stmt), parfor_indices)): - ret_count += 1 - - return ret_count - -def _uses_indices(f_ir, index, index_set): - if index.name in index_set: - return True - - ind_def = guard(get_definition, f_ir, index) - if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple': - varnames = set(v.name for v in ind_def.items) - return len(varnames & index_set) != 0 - - return False - - -class TestPipeline(object): - def __init__(self, typingctx, targetctx, args, test_ir): - self.typingctx = typingctx - self.targetctx = targetctx - self.args = args - self.func_ir = test_ir - self.typemap = None - self.return_type = None - self.calltypes = None - - -class TestParfors(TestParforsBase): - - def __init__(self, *args): - TestParforsBase.__init__(self, *args) - # these are used in the mass of simple tests - m = np.reshape(np.arange(12.), (3, 4)) - self.simple_args = [np.arange(3.), np.arange(4.), m, m.T] - - def check(self, pyfunc, *args, **kwargs): - cfunc, cpfunc = self.compile_all(pyfunc, *args) - self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) - - @skip_unsupported - @tag('important') - def test_arraymap(self): - def test_impl(a, x, y): - return a * x + y - - A = np.linspace(0, 1, 10) - X = np.linspace(2, 1, 10) - Y = np.linspace(1, 2, 10) - - self.check(test_impl, A, X, Y) - - @skip_unsupported - @needs_blas - @tag('important') - def test_mvdot(self): - def test_impl(a, v): - return np.dot(a, v) - - A = np.linspace(0, 1, 20).reshape(2, 10) - v = np.linspace(2, 1, 10) - - self.check(test_impl, A, v) - - @skip_unsupported - @tag('important') - def test_0d_broadcast(self): - def test_impl(): - X = np.array(1) - Y = np.ones((10, 12)) - return np.sum(X + Y) - self.check(test_impl) - self.assertTrue(countParfors(test_impl, ()) == 1) - - @skip_unsupported - @tag('important') - def test_2d_parfor(self): - def test_impl(): - X = np.ones((10, 12)) - Y = np.zeros((10, 12)) - return np.sum(X + Y) - self.check(test_impl) - self.assertTrue(countParfors(test_impl, ()) == 1) - - @skip_unsupported - @tag('important') - def test_pi(self): - def test_impl(n): - x = 2 * np.random.ranf(n) - 1 - y = 2 * np.random.ranf(n) - 1 - return 4 * np.sum(x**2 + y**2 < 1) / n - - self.check(test_impl, 100000, decimal=1) - self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) - self.assertTrue(countArrays(test_impl, (types.intp,)) == 0) - - @skip_unsupported - @tag('important') - def test_fuse_argmin(self): - def test_impl(n): - A = np.ones(n) - C = A.argmin() - B = A.sum() - return B+C - - self.check(test_impl, 256) - self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) - self.assertTrue(countArrays(test_impl, (types.intp,)) == 0) - - @skip_unsupported - @tag('important') - def test_blackscholes(self): - # blackscholes takes 5 1D float array args - args = (numba.float64[:], ) * 5 - self.assertTrue(countParfors(blackscholes_impl, args) == 1) - - @skip_unsupported - @needs_blas - @tag('important') - def test_logistic_regression(self): - args = (numba.float64[:], numba.float64[:,:], numba.float64[:], - numba.int64) - self.assertTrue(countParfors(lr_impl, args) == 1) - self.assertTrue(countArrayAllocs(lr_impl, args) == 1) - - @skip_unsupported - @tag('important') - def test_kmeans(self): - np.random.seed(0) - N = 1024 - D = 10 - centers = 3 - A = np.random.ranf((N, D)) - init_centroids = np.random.ranf((centers, D)) - self.check(test_kmeans_example, A, centers, 3, init_centroids, - decimal=1) - # TODO: count parfors after k-means fusion is working - # requires recursive parfor counting - arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp, - types.Array(types.float64, 2, 'C')) - self.assertTrue( - countNonParforArrayAccesses(test_kmeans_example, arg_typs) == 0) - - @unittest.skipIf(not (_windows_py27 or _32bit), - "Only impacts Windows with Python 2.7 / 32 bit hardware") - @needs_blas - def test_unsupported_combination_raises(self): - """ - This test is in place until issues with the 'parallel' - target on Windows with Python 2.7 / 32 bit hardware are fixed. - """ - - with self.assertRaises(RuntimeError) as raised: - @njit(parallel=True) - def ddot(a, v): - return np.dot(a, v) - - A = np.linspace(0, 1, 20).reshape(2, 10) - v = np.linspace(2, 1, 10) - ddot(A, v) - - msg = ("The 'parallel' target is not currently supported on " - "Windows operating systems when using Python 2.7, " - "or on 32 bit hardware") - self.assertIn(msg, str(raised.exception)) - - @skip_unsupported - def test_simple01(self): - def test_impl(): - return np.ones(()) - with self.assertRaises(AssertionError) as raises: - self.check(test_impl) - self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) - - @skip_unsupported - def test_simple02(self): - def test_impl(): - return np.ones((1,)) - self.check(test_impl) - - @skip_unsupported - def test_simple03(self): - def test_impl(): - return np.ones((1, 2)) - self.check(test_impl) - - @skip_unsupported - def test_simple04(self): - def test_impl(): - return np.ones(1) - self.check(test_impl) - - @skip_unsupported - def test_simple07(self): - def test_impl(): - return np.ones((1, 2), dtype=np.complex128) - self.check(test_impl) - - @skip_unsupported - def test_simple08(self): - def test_impl(): - return np.ones((1, 2)) + np.ones((1, 2)) - self.check(test_impl) - - @skip_unsupported - def test_simple09(self): - def test_impl(): - return np.ones((1, 1)) - self.check(test_impl) - - @skip_unsupported - def test_simple10(self): - def test_impl(): - return np.ones((0, 0)) - self.check(test_impl) - - @skip_unsupported - def test_simple11(self): - def test_impl(): - return np.ones((10, 10)) + 1. - self.check(test_impl) - - @skip_unsupported - def test_simple12(self): - def test_impl(): - return np.ones((10, 10)) + np.complex128(1.) - self.check(test_impl) - - @skip_unsupported - def test_simple13(self): - def test_impl(): - return np.complex128(1.) - with self.assertRaises(AssertionError) as raises: - self.check(test_impl) - self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) - - @skip_unsupported - def test_simple14(self): - def test_impl(): - return np.ones((10, 10))[0::20] - self.check(test_impl) - - @skip_unsupported - def test_simple15(self): - def test_impl(v1, v2, m1, m2): - return v1 + v1 - self.check(test_impl, *self.simple_args) - - @skip_unsupported - def test_simple16(self): - def test_impl(v1, v2, m1, m2): - return m1 + m1 - self.check(test_impl, *self.simple_args) - - @skip_unsupported - def test_simple17(self): - def test_impl(v1, v2, m1, m2): - return m2 + v1 - self.check(test_impl, *self.simple_args) - - @skip_unsupported - @needs_lapack - def test_simple18(self): - def test_impl(v1, v2, m1, m2): - return m1.T + np.linalg.svd(m2)[1] - self.check(test_impl, *self.simple_args) - - @skip_unsupported - @needs_blas - def test_simple19(self): - def test_impl(v1, v2, m1, m2): - return np.dot(m1, v2) - self.check(test_impl, *self.simple_args) - - @skip_unsupported - @needs_blas - def test_simple20(self): - def test_impl(v1, v2, m1, m2): - return np.dot(m1, m2) - # gemm is left to BLAS - with self.assertRaises(AssertionError) as raises: - self.check(test_impl, *self.simple_args) - self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) - - @skip_unsupported - @needs_blas - def test_simple21(self): - def test_impl(v1, v2, m1, m2): - return np.dot(v1, v1) - self.check(test_impl, *self.simple_args) - - @skip_unsupported - def test_simple22(self): - def test_impl(v1, v2, m1, m2): - return np.sum(v1 + v1) - self.check(test_impl, *self.simple_args) - - @skip_unsupported - def test_simple23(self): - def test_impl(v1, v2, m1, m2): - x = 2 * v1 - y = 2 * v1 - return 4 * np.sum(x**2 + y**2 < 1) / 10 - self.check(test_impl, *self.simple_args) - - @skip_unsupported - def test_simple24(self): - def test_impl(): - n = 20 - A = np.ones((n, n)) - b = np.arange(n) - return np.sum(A[:, b]) - self.check(test_impl) - - @skip_unsupported - def test_np_func_direct_import(self): - from numpy import ones # import here becomes FreeVar - def test_impl(n): - A = ones(n) - return A[0] - n = 111 - self.check(test_impl, n) - - @skip_unsupported - def test_np_random_func_direct_import(self): - def test_impl(n): - A = randn(n) - return A[0] - self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) - - @skip_unsupported - def test_arange(self): - # test with stop only - def test_impl1(n): - return np.arange(n) - # start and stop - def test_impl2(s, n): - return np.arange(n) - # start, step, stop - def test_impl3(s, n, t): - return np.arange(s, n, t) - - for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: - self.check(test_impl1, arg) - self.check(test_impl2, 2, arg) - self.check(test_impl3, 2, arg, 2) - - @skip_unsupported - def test_linspace(self): - # without num - def test_impl1(start, stop): - return np.linspace(start, stop) - # with num - def test_impl2(start, stop, num): - return np.linspace(start, stop, num) - - for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: - self.check(test_impl1, 2, arg) - self.check(test_impl2, 2, arg, 30) - - @skip_unsupported - def test_size_assertion(self): - def test_impl(m, n): - A = np.ones(m) - B = np.ones(n) - return np.sum(A + B) - - self.check(test_impl, 10, 10) - with self.assertRaises(AssertionError) as raises: - cfunc = njit(parallel=True)(test_impl) - cfunc(10, 9) - msg = "Sizes of A, B do not match" - self.assertIn(msg, str(raises.exception)) - - @skip_unsupported - def test_mean(self): - def test_impl(A): - return A.mean() - N = 100 - A = np.random.ranf(N) - B = np.random.randint(10, size=(N, 3)) - self.check(test_impl, A) - self.check(test_impl, B) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 1) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 1) - - @skip_unsupported - def test_var(self): - def test_impl(A): - return A.var() - N = 100 - A = np.random.ranf(N) - B = np.random.randint(10, size=(N, 3)) - C = A + 1j * A - self.check(test_impl, A) - self.check(test_impl, B) - self.check(test_impl, C) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2) - - @skip_unsupported - def test_std(self): - def test_impl(A): - return A.std() - N = 100 - A = np.random.ranf(N) - B = np.random.randint(10, size=(N, 3)) - C = A + 1j * A - self.check(test_impl, A) - self.check(test_impl, B) - self.check(test_impl, C) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2) - self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2) - - @skip_unsupported - def test_random_parfor(self): - """ - Test function with only a random call to make sure a random function - like ranf is actually translated to a parfor. - """ - def test_impl(n): - A = np.random.ranf((n, n)) - return A - self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) - - @skip_unsupported - def test_randoms(self): - def test_impl(n): - A = np.random.standard_normal(size=(n, n)) - B = np.random.randn(n, n) - C = np.random.normal(0.0, 1.0, (n, n)) - D = np.random.chisquare(1.0, (n, n)) - E = np.random.randint(1, high=3, size=(n, n)) - F = np.random.triangular(1, 2, 3, (n, n)) - return np.sum(A+B+C+D+E+F) - - n = 128 - cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) - parfor_output = cpfunc.entry_point(n) - py_output = test_impl(n) - # check results within 5% since random numbers generated in parallel - np.testing.assert_allclose(parfor_output, py_output, rtol=0.05) - self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) - - @skip_unsupported - def test_dead_randoms(self): - def test_impl(n): - A = np.random.standard_normal(size=(n, n)) - B = np.random.randn(n, n) - C = np.random.normal(0.0, 1.0, (n, n)) - D = np.random.chisquare(1.0, (n, n)) - E = np.random.randint(1, high=3, size=(n, n)) - F = np.random.triangular(1, 2, 3, (n, n)) - return 3 - - n = 128 - cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) - parfor_output = cpfunc.entry_point(n) - py_output = test_impl(n) - self.assertEqual(parfor_output, py_output) - self.assertTrue(countParfors(test_impl, (types.int64, )) == 0) - - @skip_unsupported - def test_cfg(self): - # from issue #2477 - def test_impl(x, is_positive, N): - for i in numba.prange(2): - for j in range( i*N//2, (i+1)*N//2 ): - is_positive[j] = 0 - if x[j] > 0: - is_positive[j] = 1 - - return is_positive - - N = 100 - x = np.random.rand(N) - is_positive = np.zeros(N) - self.check(test_impl, x, is_positive, N) - - @skip_unsupported - def test_reduce(self): - def test_impl(A): - init_val = 10 - return reduce(lambda a,b: min(a, b), A, init_val) - - n = 211 - A = np.random.ranf(n) - self.check(test_impl, A) - A = np.random.randint(10, size=n).astype(np.int32) - self.check(test_impl, A) - - # test checking the number of arguments for the reduce function - def test_impl(): - g = lambda x: x ** 2 - return reduce(g, np.array([1, 2, 3, 4, 5]), 2) - with self.assertTypingError(): - self.check(test_impl) - - # test checking reduction over bitarray masked arrays - n = 160 - A = np.random.randint(10, size=n).astype(np.int32) - def test_impl(A): - return np.sum(A[A>=3]) - self.check(test_impl, A) - # TODO: this should fuse - # self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1) - - def test_impl(A): - B = A[:,0] - return np.sum(A[B>=3,1]) - self.check(test_impl, A.reshape((16,10))) - # TODO: this should also fuse - #self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1) - - def test_impl(A): - B = A[:,0] - return np.sum(A[B>=3,1:2]) - self.check(test_impl, A.reshape((16,10))) - # this doesn't fuse due to mixed indices - self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 2) - - @skip_unsupported - def test_min(self): - def test_impl1(A): - return A.min() - - def test_impl2(A): - return np.min(A) - - n = 211 - A = np.random.ranf(n) - B = np.random.randint(10, size=n).astype(np.int32) - C = np.random.ranf((n, n)) # test multi-dimensional array - self.check(test_impl1, A) - self.check(test_impl1, B) - self.check(test_impl1, C) - self.check(test_impl2, A) - self.check(test_impl2, B) - self.check(test_impl2, C) - - @skip_unsupported - def test_max(self): - def test_impl1(A): - return A.max() - - def test_impl2(A): - return np.max(A) - - n = 211 - A = np.random.ranf(n) - B = np.random.randint(10, size=n).astype(np.int32) - C = np.random.ranf((n, n)) # test multi-dimensional array - self.check(test_impl1, A) - self.check(test_impl1, B) - self.check(test_impl1, C) - self.check(test_impl2, A) - self.check(test_impl2, B) - self.check(test_impl2, C) - - @skip_unsupported - def test_argmin(self): - def test_impl1(A): - return A.argmin() - - def test_impl2(A): - return np.argmin(A) - - n = 211 - A = np.array([1., 0., 2., 0., 3.]) - B = np.random.randint(10, size=n).astype(np.int32) - C = np.random.ranf((n, n)) # test multi-dimensional array - self.check(test_impl1, A) - self.check(test_impl1, B) - self.check(test_impl1, C) - self.check(test_impl2, A) - self.check(test_impl2, B) - self.check(test_impl2, C) - - @skip_unsupported - def test_argmax(self): - def test_impl1(A): - return A.argmax() - - def test_impl2(A): - return np.argmax(A) - - n = 211 - A = np.array([1., 0., 3., 2., 3.]) - B = np.random.randint(10, size=n).astype(np.int32) - C = np.random.ranf((n, n)) # test multi-dimensional array - self.check(test_impl1, A) - self.check(test_impl1, B) - self.check(test_impl1, C) - self.check(test_impl2, A) - self.check(test_impl2, B) - self.check(test_impl2, C) - - - @skip_unsupported - def test_parfor_array_access1(self): - # signed index of the prange generated by sum() should be replaced - # resulting in array A to be eliminated (see issue #2846) - def test_impl(n): - A = np.ones(n) - return A.sum() - - n = 211 - self.check(test_impl, n) - self.assertEqual(countArrays(test_impl, (types.intp,)), 0) - - @skip_unsupported - def test_parfor_array_access2(self): - # in this test, the prange index has the same name (i) in two loops - # thus, i has multiple definitions and is harder to replace - def test_impl(n): - A = np.ones(n) - m = 0 - n = 0 - for i in numba.prange(len(A)): - m += A[i] - - for i in numba.prange(len(A)): - if m == n: # access in another block - n += A[i] - - return m + n - - n = 211 - self.check(test_impl, n) - self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) - - @skip_unsupported - def test_parfor_array_access3(self): - def test_impl(n): - A = np.ones(n, np.int64) - m = 0 - for i in numba.prange(len(A)): - m += A[i] - if m==2: - i = m - - n = 211 - with self.assertRaises(ValueError) as raises: - self.check(test_impl, n) - self.assertIn("Overwrite of parallel loop index", str(raises.exception)) - - @skip_unsupported - @needs_blas - def test_parfor_array_access4(self): - # in this test, one index of a multi-dim access should be replaced - # np.dot parallel implementation produces this case - def test_impl(A, b): - return np.dot(A, b) - - n = 211 - d = 4 - A = np.random.ranf((n, d)) - b = np.random.ranf(d) - self.check(test_impl, A, b) - # make sure the parfor index is replaced in build_tuple of access to A - test_ir, tp = get_optimized_numba_ir( - test_impl, (types.Array(types.float64, 2, 'C'), - types.Array(types.float64, 1, 'C'))) - # this code should have one basic block after optimization - self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks) - block = test_ir.blocks[0] - parfor_found = False - parfor = None - for stmt in block.body: - if isinstance(stmt, numba.parfor.Parfor): - parfor_found = True - parfor = stmt - - self.assertTrue(parfor_found) - build_tuple_found = False - # there should be only one build_tuple - for bl in parfor.loop_body.values(): - for stmt in bl.body: - if (isinstance(stmt, ir.Assign) - and isinstance(stmt.value, ir.Expr) - and stmt.value.op == 'build_tuple'): - build_tuple_found = True - self.assertTrue(parfor.index_var in stmt.value.items) - - self.assertTrue(build_tuple_found) - - @skip_unsupported - def test_parfor_array_access5(self): - # one dim is slice in multi-dim access - def test_impl(n): - X = np.ones((n, 3)) - y = 0 - for i in numba.prange(n): - y += X[i,:].sum() - return y - - n = 211 - self.check(test_impl, n) - self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) - - @skip_unsupported - def test_parfor_hoist_setitem(self): - # Make sure that read of out is not hoisted. - def test_impl(out): - for i in prange(10): - out[0] = 2 * out[0] - return out[0] - - out = np.ones(1) - self.check(test_impl, out) - - @skip_unsupported - @needs_blas - def test_parfor_generate_fuse(self): - # issue #2857 - def test_impl(N, D): - w = np.ones(D) - X = np.ones((N, D)) - Y = np.ones(N) - for i in range(3): - B = (-Y * np.dot(X, w)) - - return B - - n = 211 - d = 3 - self.check(test_impl, n, d) - self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4) - self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4) - - @skip_unsupported - def test_ufunc_expr(self): - # issue #2885 - def test_impl(A, B): - return np.bitwise_and(A, B) - - A = np.ones(3, np.uint8) - B = np.ones(3, np.uint8) - B[1] = 0 - self.check(test_impl, A, B) - - @skip_unsupported - def test_find_callname_intrinsic(self): - def test_impl(n): - A = unsafe_empty((n,)) - for i in range(n): - A[i] = i + 2.0 - return A - - # the unsafe allocation should be found even though it is imported - # as a different name - self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1) - - -class TestPrangeBase(TestParforsBase): - - def __init__(self, *args): - TestParforsBase.__init__(self, *args) - - def generate_prange_func(self, pyfunc, patch_instance): - """ - This function does the actual code augmentation to enable the explicit - testing of `prange` calls in place of `range`. - """ - pyfunc_code = pyfunc.__code__ - - prange_names = list(pyfunc_code.co_names) - - if patch_instance is None: - # patch all instances, cheat by just switching - # range for prange - assert 'range' in pyfunc_code.co_names - prange_names = tuple([x if x != 'range' else 'prange' - for x in pyfunc_code.co_names]) - new_code = bytes(pyfunc_code.co_code) - else: - # patch specified instances... - # find where 'range' is in co_names - range_idx = pyfunc_code.co_names.index('range') - range_locations = [] - # look for LOAD_GLOBALs that point to 'range' - for _, instr in ByteCodeIter(pyfunc_code): - if instr.opname == 'LOAD_GLOBAL': - if instr.arg == range_idx: - range_locations.append(instr.offset + 1) - # add in 'prange' ref - prange_names.append('prange') - prange_names = tuple(prange_names) - prange_idx = len(prange_names) - 1 - new_code = bytearray(pyfunc_code.co_code) - assert len(patch_instance) <= len(range_locations) - # patch up the new byte code - for i in patch_instance: - idx = range_locations[i] - new_code[idx] = prange_idx - new_code = bytes(new_code) - - # create new code parts - co_args = [pyfunc_code.co_argcount] - if sys.version_info > (3, 0): - co_args.append(pyfunc_code.co_kwonlyargcount) - co_args.extend([pyfunc_code.co_nlocals, - pyfunc_code.co_stacksize, - pyfunc_code.co_flags, - new_code, - pyfunc_code.co_consts, - prange_names, - pyfunc_code.co_varnames, - pyfunc_code.co_filename, - pyfunc_code.co_name, - pyfunc_code.co_firstlineno, - pyfunc_code.co_lnotab, - pyfunc_code.co_freevars, - pyfunc_code.co_cellvars - ]) - - # create code object with prange mutation - prange_code = pytypes.CodeType(*co_args) - - # get function - pfunc = pytypes.FunctionType(prange_code, globals()) - - return pfunc - - def prange_tester(self, pyfunc, *args, **kwargs): - """ - The `prange` tester - This is a hack. It basically switches out range calls for prange. - It does this by copying the live code object of a function - containing 'range' then copying the .co_names and mutating it so - that 'range' is replaced with 'prange'. It then creates a new code - object containing the mutation and instantiates a function to contain - it. At this point three results are created: - 1. The result of calling the original python function. - 2. The result of calling a njit compiled version of the original - python function. - 3. The result of calling a njit(parallel=True) version of the mutated - function containing `prange`. - The three results are then compared and the `prange` based function's - llvm_ir is inspected to ensure the scheduler code is present. - - Arguments: - pyfunc - the python function to test - args - data arguments to pass to the pyfunc under test - - Keyword Arguments: - patch_instance - iterable containing which instances of `range` to - replace. If not present all instance of `range` are - replaced. - scheduler_type - 'signed', 'unsigned' or None, default is None. - Supply in cases where the presence of a specific - scheduler is to be asserted. - check_fastmath - if True then a check will be performed to ensure the - IR contains instructions labelled with 'fast' - check_fastmath_result - if True then a check will be performed to - ensure the result of running with fastmath - on matches that of the pyfunc - Remaining kwargs are passed to np.testing.assert_almost_equal - - - Example: - def foo(): - acc = 0 - for x in range(5): - for y in range(10): - acc +=1 - return acc - - # calling as - prange_tester(foo) - # will test code equivalent to - # def foo(): - # acc = 0 - # for x in prange(5): # <- changed - # for y in prange(10): # <- changed - # acc +=1 - # return acc - - # calling as - prange_tester(foo, patch_instance=[1]) - # will test code equivalent to - # def foo(): - # acc = 0 - # for x in range(5): # <- outer loop (0) unchanged - # for y in prange(10): # <- inner loop (1) changed - # acc +=1 - # return acc - - """ - patch_instance = kwargs.pop('patch_instance', None) - check_fastmath = kwargs.pop('check_fastmath', False) - check_fastmath_result = kwargs.pop('check_fastmath_result', False) - - pfunc = self.generate_prange_func(pyfunc, patch_instance) - - # Compile functions - # compile a standard njit of the original function - sig = tuple([numba.typeof(x) for x in args]) - cfunc = self.compile_njit(pyfunc, sig) - - # compile the prange injected function - cpfunc = self.compile_parallel(pfunc, sig) - - # if check_fastmath is True then check fast instructions - if check_fastmath: - self.assert_fastmath(pfunc, sig) - - # if check_fastmath_result is True then compile a function - # so that the parfors checker can assert the result is ok. - if check_fastmath_result: - fastcpfunc = self.compile_parallel_fastmath(pfunc, sig) - kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs) - - self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) - - -class TestPrange(TestPrangeBase): - """ Tests Prange """ - - @skip_unsupported - def test_prange01(self): - def test_impl(): - n = 4 - A = np.zeros(n) - for i in range(n): - A[i] = 2.0 * i - return A - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange02(self): - def test_impl(): - n = 4 - A = np.zeros(n - 1) - for i in range(1, n): - A[i - 1] = 2.0 * i - return A - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange03(self): - def test_impl(): - s = 0 - for i in range(10): - s += 2 - return s - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange04(self): - def test_impl(): - a = 2 - b = 3 - A = np.empty(4) - for i in range(4): - if i == a: - A[i] = b - else: - A[i] = 0 - return A - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange05(self): - def test_impl(): - n = 4 - A = np.ones((n), dtype=np.float64) - s = 0 - for i in range(1, n - 1, 1): - s += A[i] - return s - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange06(self): - def test_impl(): - n = 4 - A = np.ones((n), dtype=np.float64) - s = 0 - for i in range(1, 1, 1): - s += A[i] - return s - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange07(self): - def test_impl(): - n = 4 - A = np.ones((n), dtype=np.float64) - s = 0 - for i in range(n, 1): - s += A[i] - return s - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange08(self): - def test_impl(): - n = 4 - A = np.ones((n)) - acc = 0 - for i in range(len(A)): - for j in range(len(A)): - acc += A[i] - return acc - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange08_1(self): - def test_impl(): - n = 4 - A = np.ones((n)) - acc = 0 - for i in range(4): - for j in range(4): - acc += A[i] - return acc - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange09(self): - def test_impl(): - n = 4 - acc = 0 - for i in range(n): - for j in range(n): - acc += 1 - return acc - # patch inner loop to 'prange' - self.prange_tester(test_impl, patch_instance=[1], - scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange10(self): - def test_impl(): - n = 4 - acc2 = 0 - for j in range(n): - acc1 = 0 - for i in range(n): - acc1 += 1 - acc2 += acc1 - return acc2 - # patch outer loop to 'prange' - self.prange_tester(test_impl, patch_instance=[0], - scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - @unittest.skip("list append is not thread-safe yet (#2391, #2408)") - def test_prange11(self): - def test_impl(): - n = 4 - return [np.sin(j) for j in range(n)] - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange12(self): - def test_impl(): - acc = 0 - n = 4 - X = np.ones(n) - for i in range(-len(X)): - acc += X[i] - return acc - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange13(self): - def test_impl(n): - acc = 0 - for i in range(n): - acc += 1 - return acc - self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange14(self): - def test_impl(A): - s = 3 - for i in range(len(A)): - s += A[i]*2 - return s - # this tests reduction detection well since the accumulated variable - # is initialized before the parfor and the value accessed from the array - # is updated before accumulation - self.prange_tester(test_impl, np.random.ranf(4), - scheduler_type='unsigned', - check_fastmath=True) - - @skip_unsupported - def test_prange15(self): - # from issue 2587 - # test parfor type inference when there is multi-dimensional indexing - def test_impl(N): - acc = 0 - for i in range(N): - x = np.ones((1, 1)) - acc += x[0, 0] - return acc - self.prange_tester(test_impl, 1024, scheduler_type='unsigned', - check_fastmath=True) - - # Tests for negative ranges - @skip_unsupported - def test_prange16(self): - def test_impl(N): - acc = 0 - for i in range(-N, N): - acc += 2 - return acc - self.prange_tester(test_impl, 1024, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange17(self): - def test_impl(N): - acc = 0 - X = np.ones(N) - for i in range(-N, N): - acc += X[i] - return acc - self.prange_tester(test_impl, 9, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange18(self): - def test_impl(N): - acc = 0 - X = np.ones(N) - for i in range(-N, 5): - acc -= X[i] - for j in range(-4, N): - acc += X[j] - return acc - self.prange_tester(test_impl, 9, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange19(self): - def test_impl(N): - acc = 0 - M = N + 4 - X = np.ones((N, M)) - for i in range(-N, N): - for j in range(-M, M): - acc += X[i, j] - return acc - self.prange_tester(test_impl, 9, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange20(self): - def test_impl(N): - acc = 0 - X = np.ones(N) - for i in range(-1, N): - acc += X[i] - return acc - self.prange_tester(test_impl, 9, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange21(self): - def test_impl(N): - acc = 0 - for i in range(-3, -1): - acc += 3 - return acc - self.prange_tester(test_impl, 9, scheduler_type='signed', - check_fastmath=True) - - @skip_unsupported - def test_prange22(self): - def test_impl(): - a = 0 - b = 3 - A = np.empty(4) - for i in range(-2, 2): - if i == a: - A[i] = b - elif i < 1: - A[i] = -1 - else: - A[i] = 7 - return A - self.prange_tester(test_impl, scheduler_type='signed', - check_fastmath=True, check_fastmath_result=True) - - @skip_unsupported - def test_prange23(self): - # test non-contig input - def test_impl(A): - for i in range(len(A)): - A[i] = i - return A - A = np.zeros(32)[::2] - self.prange_tester(test_impl, A, scheduler_type='unsigned', - check_fastmath=True, check_fastmath_result=True) - - @skip_unsupported - def test_prange24(self): - # test non-contig input, signed range - def test_impl(A): - for i in range(-len(A), 0): - A[i] = i - return A - A = np.zeros(32)[::2] - self.prange_tester(test_impl, A, scheduler_type='signed', - check_fastmath=True, check_fastmath_result=True) - - # should this work? - @skip_unsupported - def test_prange25(self): - def test_impl(A): - B = A[::3] - for i in range(len(B)): - B[i] = i - return A - A = np.zeros(32)[::2] - self.prange_tester(test_impl, A, scheduler_type='unsigned', - check_fastmath=True, check_fastmath_result=True) - -# @skip_unsupported - @test_disabled - def test_check_error_model(self): - def test_impl(): - n = 32 - A = np.zeros(n) - for i in range(n): - A[i] = 1 / i # div-by-zero when i = 0 - return A - - with self.assertRaises(ZeroDivisionError) as raises: - test_impl() - - # compile parallel functions - pfunc = self.generate_prange_func(test_impl, None) - pcres = self.compile_parallel(pfunc, ()) - pfcres = self.compile_parallel_fastmath(pfunc, ()) - - # should raise - with self.assertRaises(ZeroDivisionError) as raises: - pcres.entry_point() - - # should not raise - result = pfcres.entry_point() - self.assertEqual(result[0], np.inf) - - - @skip_unsupported - def test_check_alias_analysis(self): - # check alias analysis reports ok - def test_impl(A): - for i in range(len(A)): - B = A[i] - B[:] = 1 - return A - A = np.zeros(32).reshape(4, 8) - self.prange_tester(test_impl, A, scheduler_type='unsigned', - check_fastmath=True, check_fastmath_result=True) - pfunc = self.generate_prange_func(test_impl, None) - sig = tuple([numba.typeof(A)]) - cres = self.compile_parallel_fastmath(pfunc, sig) - _ir = self._get_gufunc_ir(cres) - for k, v in _ir.items(): - for line in v.splitlines(): - # get the fn definition line - if 'define' in line and k in line: - # there should only be 2x noalias, one on each of the first - # 2 args (retptr, excinfo). - # Note: used to be 3x no noalias, but env arg is dropped. - self.assertEqual(line.count('noalias'), 2) - break - - @skip_unsupported - def test_prange_raises_invalid_step_size(self): - def test_impl(N): - acc = 0 - for i in range(0, N, 2): - acc += 2 - return acc - - with self.assertRaises(NotImplementedError) as raises: - self.prange_tester(test_impl, 1024) - msg = 'Only constant step size of 1 is supported for prange' - self.assertIn(msg, str(raises.exception)) - - @skip_unsupported - def test_prange_fastmath_check_works(self): - # this function will benefit from `fastmath`, the div will - # get optimised to a multiply by reciprocal and the accumulator - # then becomes an fmadd: A = A + i * 0.5 - def test_impl(): - n = 128 - A = 0 - for i in range(n): - A += i / 2.0 - return A - self.prange_tester(test_impl, scheduler_type='unsigned', - check_fastmath=True) - pfunc = self.generate_prange_func(test_impl, None) - cres = self.compile_parallel_fastmath(pfunc, ()) - ir = self._get_gufunc_ir(cres) - _id = '%[A-Z]?.[0-9]+[.]?[i]?' - recipr_str = '\s+%s = fmul fast double %s, 5.000000e-01' - reciprocal_inst = re.compile(recipr_str % (_id, _id)) - fadd_inst = re.compile('\s+%s = fadd fast double %s, %s' - % (_id, _id, _id)) - # check there is something like: - # %.329 = fmul fast double %.325, 5.000000e-01 - # %.337 = fadd fast double %A.07, %.329 - for name, kernel in ir.items(): - splitted = kernel.splitlines() - for i, x in enumerate(splitted): - if reciprocal_inst.match(x): - break - self.assertTrue(fadd_inst.match(splitted[i + 1])) - - @skip_unsupported - def test_kde_example(self): - def test_impl(X): - # KDE example - b = 0.5 - points = np.array([-1.0, 2.0, 5.0]) - N = points.shape[0] - n = X.shape[0] - exps = 0 - for i in range(n): - p = X[i] - d = (-(p - points)**2) / (2 * b**2) - m = np.min(d) - exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) - return exps - - n = 128 - X = np.random.ranf(n) - self.prange_tester(test_impl, X) - - @skip_unsupported - def test_parfor_alias1(self): - def test_impl(n): - b = np.zeros((n, n)) - a = b[0] - for j in range(n): - a[j] = j + 1 - return b.sum() - self.prange_tester(test_impl, 4) - - @skip_unsupported - def test_parfor_alias2(self): - def test_impl(n): - b = np.zeros((n, n)) - for i in range(n): - a = b[i] - for j in range(n): - a[j] = i + j - return b.sum() - self.prange_tester(test_impl, 4) - - @skip_unsupported - def test_parfor_alias3(self): - def test_impl(n): - b = np.zeros((n, n, n)) - for i in range(n): - a = b[i] - for j in range(n): - c = a[j] - for k in range(n): - c[k] = i + j + k - return b.sum() - self.prange_tester(test_impl, 4) - - -@x86_only -class TestParforsVectorizer(TestPrangeBase): - - # env mutating test - _numba_parallel_test_ = False - - def get_gufunc_asm(self, func, schedule_type, *args, **kwargs): - - fastmath = kwargs.pop('fastmath', False) - nthreads = kwargs.pop('nthreads', 2) - cpu_name = kwargs.pop('cpu_name', 'skylake-avx512') - assertions = kwargs.pop('assertions', True) - - env_opts = {'NUMBA_CPU_NAME': cpu_name, - 'NUMBA_CPU_FEATURES': '', - 'NUMBA_NUM_THREADS': str(nthreads) - } - - overrides = [] - for k, v in env_opts.items(): - overrides.append(override_env_config(k, v)) - - with overrides[0], overrides[1], overrides[2]: - sig = tuple([numba.typeof(x) for x in args]) - pfunc_vectorizable = self.generate_prange_func(func, None) - if fastmath == True: - cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig) - else: - cres = self.compile_parallel(pfunc_vectorizable, sig) - - # get the gufunc asm - asm = self._get_gufunc_asm(cres) - - if assertions: - schedty = re.compile('call\s+\w+\*\s+@do_scheduling_(\w+)\(') - matches = schedty.findall(cres.library.get_llvm_str()) - self.assertEqual(len(matches), 2) # 1x decl, 1x call - self.assertEqual(matches[0], matches[1]) - self.assertTrue(asm != {}) - - return asm - - # this is a common match pattern for something like: - # \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n - # to check vsqrtpd operates on zmm - match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n') - - @linux_only - def test_vectorizer_fastmath_asm(self): - """ This checks that if fastmath is set and the underlying hardware - is suitable, and the function supplied is amenable to fastmath based - vectorization, that the vectorizer actually runs. - """ - - # This function will benefit from `fastmath` if run on a suitable - # target. The vectorizer should unwind the loop and generate - # packed dtype=double add and sqrt instructions. - def will_vectorize(A): - n = len(A) - acc = 0 - for i in range(n): - acc += np.sqrt(i) - return acc - - arg = np.zeros(10) - - fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, - fastmath=True) - slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, - fastmath=False) - - for v in fast_asm.values(): - # should unwind and call vector sqrt then vector add - # all on packed doubles using zmm's - self.assertTrue('vaddpd' in v) - self.assertTrue('vsqrtpd' in v) - self.assertTrue('zmm' in v) - # make sure vsqrtpd operates on zmm - self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1) - - for v in slow_asm.values(): - # vector variants should not be present - self.assertTrue('vaddpd' not in v) - self.assertTrue('vsqrtpd' not in v) - # check scalar variant is present - self.assertTrue('vsqrtsd' in v) - self.assertTrue('vaddsd' in v) - # check no zmm addressing is present - self.assertTrue('zmm' not in v) - - @linux_only - def test_unsigned_refusal_to_vectorize(self): - """ This checks that if fastmath is set and the underlying hardware - is suitable, and the function supplied is amenable to fastmath based - vectorization, that the vectorizer actually runs. - """ - - def will_not_vectorize(A): - n = len(A) - for i in range(-n, 0): - A[i] = np.sqrt(A[i]) - return A - - def will_vectorize(A): - n = len(A) - for i in range(n): - A[i] = np.sqrt(A[i]) - return A - - arg = np.zeros(10) - - novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg, - fastmath=True) - - vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, - fastmath=True) - - for v in novec_asm.values(): - # vector variant should not be present - self.assertTrue('vsqrtpd' not in v) - # check scalar variant is present - self.assertTrue('vsqrtsd' in v) - # check no zmm addressing is present - self.assertTrue('zmm' not in v) - - for v in vec_asm.values(): - # should unwind and call vector sqrt then vector mov - # all on packed doubles using zmm's - self.assertTrue('vsqrtpd' in v) - self.assertTrue('vmovupd' in v) - self.assertTrue('zmm' in v) - # make sure vsqrtpd operates on zmm - self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1) - - @linux_only - # needed as 32bit doesn't have equivalent signed/unsigned instruction generation - # for this function - @skip_unsupported - def test_signed_vs_unsigned_vec_asm(self): - """ This checks vectorization for signed vs unsigned variants of a - trivial accumulator, the only meaningful difference should be the - presence of signed vs. unsigned unpack instructions (for the - induction var). - """ - def signed_variant(): - n = 4096 - A = 0. - for i in range(-n, 0): - A += i - return A - - def unsigned_variant(): - n = 4096 - A = 0. - for i in range(n): - A += i - return A - - signed_asm = self.get_gufunc_asm(signed_variant, 'signed', - fastmath=True) - unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned', - fastmath=True) - - def strip_instrs(asm): - acc = [] - for x in asm.splitlines(): - spd = x.strip() - # filter out anything that isn't a trivial instruction - # and anything with the gufunc id as it contains an address - if spd != '' and not (spd.startswith('.') - or spd.startswith('_') - or spd.startswith('"') - or '__numba_parfor_gufunc' in spd): - acc.append(re.sub('[\t]', '', spd)) - return acc - - for k, v in signed_asm.items(): - signed_instr = strip_instrs(v) - break - - for k, v in unsigned_asm.items(): - unsigned_instr = strip_instrs(v) - break - - from difflib import SequenceMatcher as sm - # make sure that the only difference in instruction (if there is a - # difference) is the char 'u'. For example: - # vcvtsi2sdq vs. vcvtusi2sdq - self.assertEqual(len(signed_instr), len(unsigned_instr)) - for a, b in zip(signed_instr, unsigned_instr): - if a == b: - continue - else: - s = sm(lambda x: x == '\t', a, b) - ops = s.get_opcodes() - for op in ops: - if op[0] == 'insert': - self.assertEqual(b[op[-2]:op[-1]], 'u') - - -class TestParforsSlice(TestParforsBase): - - def check(self, pyfunc, *args, **kwargs): - cfunc, cpfunc = self.compile_all(pyfunc, *args) - self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) - - @skip_unsupported - def test_parfor_slice1(self): - def test_impl(a): - (n,) = a.shape - b = a[0:n-2] + a[1:n-1] - return b - - self.check(test_impl, np.ones(10)) - - @skip_unsupported - def test_parfor_slice2(self): - def test_impl(a, m): - (n,) = a.shape - b = a[0:n-2] + a[1:m] - return b - - # runtime assertion should succeed - self.check(test_impl, np.ones(10), 9) - # next we expect failure - with self.assertRaises(AssertionError) as raises: - njit(parallel=True)(test_impl)(np.ones(10),10) - self.assertIn("do not match", str(raises.exception)) - - @skip_unsupported - def test_parfor_slice3(self): - def test_impl(a): - (m,n) = a.shape - b = a[0:m-1,0:n-1] + a[1:m,1:n] - return b - - self.check(test_impl, np.ones((4,3))) - - @skip_unsupported - def test_parfor_slice4(self): - def test_impl(a): - (m,n) = a.shape - b = a[:,0:n-1] + a[:,1:n] - return b - - self.check(test_impl, np.ones((4,3))) - - @skip_unsupported - def test_parfor_slice5(self): - def test_impl(a): - (m,n) = a.shape - b = a[0:m-1,:] + a[1:m,:] - return b - - self.check(test_impl, np.ones((4,3))) - - @skip_unsupported - def test_parfor_slice6(self): - def test_impl(a): - b = a.transpose() - c = a[1,:] + b[:,1] - return c - - self.check(test_impl, np.ones((4,3))) - - @skip_unsupported - def test_parfor_slice7(self): - def test_impl(a): - b = a.transpose() - c = a[1,:] + b[1,:] - return c - - # runtime check should succeed - self.check(test_impl, np.ones((3,3))) - # next we expect failure - with self.assertRaises(AssertionError) as raises: - njit(parallel=True)(test_impl)(np.ones((3,4))) - self.assertIn("do not match", str(raises.exception)) - -# @skip_unsupported - @test_disabled - def test_parfor_slice8(self): - def test_impl(a): - (m,n) = a.shape - b = a.transpose() - b[1:m,1:n] = a[1:m,1:n] - return b - - self.check(test_impl, np.arange(9).reshape((3,3))) - -# @skip_unsupported - @test_disabled - def test_parfor_slice9(self): - def test_impl(a): - (m,n) = a.shape - b = a.transpose() - b[1:n,1:m] = a[:,1:m] - return b - - self.check(test_impl, np.arange(12).reshape((3,4))) - -# @skip_unsupported - @test_disabled - def test_parfor_slice10(self): - def test_impl(a): - (m,n) = a.shape - b = a.transpose() - b[2,1:m] = a[2,1:m] - return b - - self.check(test_impl, np.arange(9).reshape((3,3))) - - @skip_unsupported - def test_parfor_slice11(self): - def test_impl(a): - (m,n,l) = a.shape - b = a.copy() - b[:,1,1:l] = a[:,2,1:l] - return b - - self.check(test_impl, np.arange(27).reshape((3,3,3))) - - @skip_unsupported - def test_parfor_slice12(self): - def test_impl(a): - (m,n) = a.shape - b = a.copy() - b[1,1:-1] = a[0,:-2] - return b - - self.check(test_impl, np.arange(12).reshape((3,4))) - - @skip_unsupported - def test_parfor_slice13(self): - def test_impl(a): - (m,n) = a.shape - b = a.copy() - c = -1 - b[1,1:c] = a[0,-n:c-1] - return b - - self.check(test_impl, np.arange(12).reshape((3,4))) - - @skip_unsupported - def test_parfor_slice14(self): - def test_impl(a): - (m,n) = a.shape - b = a.copy() - c = -1 - b[1,:-1] = a[0,-3:4] - return b - - self.check(test_impl, np.arange(12).reshape((3,4))) - - @skip_unsupported - def test_parfor_slice15(self): - def test_impl(a): - (m,n) = a.shape - b = a.copy() - c = -1 - b[1,-(n-1):] = a[0,-3:4] - return b - - self.check(test_impl, np.arange(12).reshape((3,4))) - - @skip_unsupported - def test_parfor_slice16(self): - def test_impl(a, b, n): - assert(a.shape == b.shape) - a[1:n] = 10 - b[0:(n-1)] = 10 - return a * b - - self.check(test_impl, np.ones(10), np.zeros(10), 8) - args = (numba.float64[:], numba.float64[:], numba.int64) - self.assertEqual(countParfors(test_impl, args), 2) - - @skip_unsupported - def test_parfor_slice17(self): - def test_impl(m, A): - B = np.zeros(m) - n = len(A) - B[-n:] = A - return B - - self.check(test_impl, 10, np.ones(10)) - - -class TestParforsOptions(TestParforsBase): - - def check(self, pyfunc, *args, **kwargs): - cfunc, cpfunc = self.compile_all(pyfunc, *args) - self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) - - @skip_unsupported - def test_parfor_options(self): - def test_impl(a): - n = a.shape[0] - b = np.ones(n) - c = np.array([ i for i in range(n) ]) - b[:n] = a + b * c - for i in prange(n): - c[i] = b[i] * a[i] - return reduce(lambda x,y:x+y, c, 0) - - self.check(test_impl, np.ones(10)) - args = (numba.float64[:],) - # everything should fuse with default option - self.assertEqual(countParfors(test_impl, args), 1) - # with no fusion - self.assertEqual(countParfors(test_impl, args, fusion=False), 6) - # with no fusion, comprehension - self.assertEqual(countParfors(test_impl, args, fusion=False, - comprehension=False), 5) - #with no fusion, comprehension, setitem - self.assertEqual(countParfors(test_impl, args, fusion=False, - comprehension=False, setitem=False), 4) - # with no fusion, comprehension, prange - self.assertEqual(countParfors(test_impl, args, fusion=False, - comprehension=False, setitem=False, prange=False), 3) - # with no fusion, comprehension, prange, reduction - self.assertEqual(countParfors(test_impl, args, fusion=False, - comprehension=False, setitem=False, prange=False, - reduction=False), 2) - # with no fusion, comprehension, prange, reduction, numpy - self.assertEqual(countParfors(test_impl, args, fusion=False, - comprehension=False, setitem=False, prange=False, - reduction=False, numpy=False), 0) - - -class TestParforsBitMask(TestParforsBase): - - def check(self, pyfunc, *args, **kwargs): - cfunc, cpfunc = self.compile_all(pyfunc, *args) - self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) - - @skip_unsupported - def test_parfor_bitmask1(self): - def test_impl(a, n): - b = a > n - a[b] = 0 - return a - - self.check(test_impl, np.arange(10), 5) - - @skip_unsupported - def test_parfor_bitmask2(self): - def test_impl(a, b): - a[b] = 0 - return a - - a = np.arange(10) - b = a > 5 - self.check(test_impl, a, b) - - @skip_unsupported - def test_parfor_bitmask3(self): - def test_impl(a, b): - a[b] = a[b] - return a - - a = np.arange(10) - b = a > 5 - self.check(test_impl, a, b) - - @skip_unsupported - def test_parfor_bitmask4(self): - def test_impl(a, b): - a[b] = (2 * a)[b] - return a - - a = np.arange(10) - b = a > 5 - self.check(test_impl, a, b) - - @skip_unsupported - def test_parfor_bitmask5(self): - def test_impl(a, b): - a[b] = a[b] * a[b] - return a - - a = np.arange(10) - b = a > 5 - self.check(test_impl, a, b) - - @skip_unsupported - def test_parfor_bitmask6(self): - def test_impl(a, b, c): - a[b] = c - return a - - a = np.arange(10) - b = a > 5 - c = np.zeros(sum(b)) - - # expect failure due to lack of parallelism - with self.assertRaises(AssertionError) as raises: - self.check(test_impl, a, b, c) - self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) - -class TestParforsMisc(TestCase): - """ - Tests miscellaneous parts of ParallelAccelerator use. - """ - - @skip_unsupported - def test_warn_if_cache_set(self): - - def pyfunc(): - return - - with warnings.catch_warnings(record=True) as raised_warnings: - warnings.simplefilter('always') - cfunc = njit(parallel=True, cache=True)(pyfunc) - cfunc() - - self.assertEqual(len(raised_warnings), 1) - - warning_obj = raised_warnings[0] - - expected_msg = ("Caching is not available when the 'parallel' target " - "is in use. Caching is now being disabled to allow " - "execution to continue.") - - # check warning message appeared - self.assertIn(expected_msg, str(warning_obj.message)) - - # make sure the cache is set to false, cf. NullCache - self.assertTrue(isinstance(cfunc._cache, numba.caching.NullCache)) - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_pipeline.py b/numba/numba/tests/test_pipeline.py deleted file mode 100644 index aa7460bd0..000000000 --- a/numba/numba/tests/test_pipeline.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import print_function - -from numba.compiler import Pipeline -from numba import jit, generated_jit, types -from .support import TestCase - - -class TestCustomPipeline(TestCase): - def setUp(self): - super(TestCustomPipeline, self).setUp() - - # Define custom pipeline class - class CustomPipeline(Pipeline): - custom_pipeline_cache = [] - - def compile_extra(self, func): - # Store the compiled function - self.custom_pipeline_cache.append(func) - return super(CustomPipeline, self).compile_extra(func) - - self.pipeline_class = CustomPipeline - - def test_jit_custom_pipeline(self): - self.assertListEqual(self.pipeline_class.custom_pipeline_cache, []) - - @jit(pipeline_class=self.pipeline_class) - def foo(x): - return x - - self.assertEqual(foo(4), 4) - self.assertListEqual(self.pipeline_class.custom_pipeline_cache, - [foo.py_func]) - - def test_generated_jit_custom_pipeline(self): - self.assertListEqual(self.pipeline_class.custom_pipeline_cache, []) - - def inner(x): - return x - - @generated_jit(pipeline_class=self.pipeline_class) - def foo(x): - if isinstance(x, types.Integer): - return inner - - self.assertEqual(foo(5), 5) - self.assertListEqual(self.pipeline_class.custom_pipeline_cache, - [inner]) diff --git a/numba/numba/tests/test_polynomial.py b/numba/numba/tests/test_polynomial.py deleted file mode 100644 index 1eed9cedb..000000000 --- a/numba/numba/tests/test_polynomial.py +++ /dev/null @@ -1,119 +0,0 @@ -from __future__ import division, print_function - -import gc -from itertools import product - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit -from .support import TestCase, tag -from .test_linalg import needs_lapack - - -def roots_fn(p): - return np.roots(p) - - -class TestPolynomialBase(TestCase): - """ - Provides setUp and common data/error modes for testing polynomial functions. - """ - - # supported dtypes - dtypes = (np.float64, np.float32, np.complex128, np.complex64) - - def setUp(self): - # Collect leftovers from previous test cases before checking for leaks - gc.collect() - - def assert_error(self, cfunc, args, msg, err=ValueError): - with self.assertRaises(err) as raises: - cfunc(*args) - self.assertIn(msg, str(raises.exception)) - - def assert_1d_input(self, cfunc, args): - msg = "Input must be a 1d array." - self.assert_error(cfunc, args, msg) - - -class TestPoly1D(TestPolynomialBase): - - def assert_no_domain_change(self, name, cfunc, args): - msg = name + "() argument must not cause a domain change." - self.assert_error(cfunc, args, msg) - - @needs_lapack - def test_roots(self): - - cfunc = jit(nopython=True)(roots_fn) - - default_resolution = np.finfo(np.float64).resolution - - def check(a, **kwargs): - expected = roots_fn(a, **kwargs) - got = cfunc(a, **kwargs) - - # eigen decomposition used so type specific impl - # will be used in numba whereas a wide type impl - # will be used in numpy, so compare using a more - # fuzzy comparator - - if a.dtype in self.dtypes: - resolution = np.finfo(a.dtype).resolution - else: - # this is for integer types when roots() will cast to float64 - resolution = default_resolution - - np.testing.assert_allclose( - expected, - got, - rtol=10 * resolution, - atol=100 * resolution # zeros tend to be fuzzy - ) - - # Ensure proper resource management - with self.assertNoNRTLeak(): - cfunc(a, **kwargs) - - # test vectors in real space - # contrived examples to trip branches - r_vectors = ( - np.array([1]), - np.array([1, 3, 2]), - np.array([0, 0, 0]), - np.array([1, 6, 11, 6]), - np.array([0, 0, 0, 1, 3, 2]), - np.array([1, 1, 0, 0, 0]), - np.array([0, 0, 1, 0, 0, 0]) - ) - - # test loop real space - for v, dtype in \ - product(r_vectors, [np.int32, np.int64] + list(self.dtypes)): - a = v.astype(dtype) - check(a) - - c_vectors = ( - np.array([1 + 1j]), - np.array([1, 3 + 1j, 2]), - np.array([0, 0 + 0j, 0]), - np.array([1, 6 + 1j, 11, 6]), - np.array([0, 0, 0, 1 + 1j, 3, 2]), - np.array([1 + 1j, 1, 0, 0, 0]), - np.array([0, 0, 1 + 1j, 0, 0, 0]) - ) - - # test loop complex space - for v, dtype in product(c_vectors, self.dtypes[2:]): - a = v.astype(dtype) - check(a) - - # check input with dimension > 1 raises - self.assert_1d_input(cfunc, (np.arange(4.).reshape(2, 2),)) - - # check real input with complex roots raises - x = np.array([7., 2., 0., 1.]) - self.assert_no_domain_change("eigvals", cfunc, (x,)) - # but works fine if type conv to complex first - cfunc(x.astype(np.complex128)) diff --git a/numba/numba/tests/test_print.py b/numba/numba/tests/test_print.py deleted file mode 100644 index b99ffe710..000000000 --- a/numba/numba/tests/test_print.py +++ /dev/null @@ -1,184 +0,0 @@ -from __future__ import print_function - -import sys - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import jit, types -from .support import captured_stdout, tag, TestCase - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - - -def print_value(x): - print(x) - -def print_array_item(arr, i): - print(arr[i].x) - -def print_values(a, b, c): - print(a, b, c) - -def print_empty(): - print() - -def print_string(x): - print(x, "hop!", 3.5) - -def print_vararg(a, b, c): - print(a, b, *c) - -def print_string_vararg(a, b, c): - print(a, "hop!", b, *c) - - -def make_print_closure(x): - def print_closure(): - return x - return jit(nopython=True)(x) - - -class TestPrint(TestCase): - - @tag('important') - def test_print_values(self): - """ - Test printing a single argument value. - """ - pyfunc = print_value - - def check_values(typ, values): - cr = compile_isolated(pyfunc, (typ,)) - cfunc = cr.entry_point - for val in values: - with captured_stdout(): - cfunc(val) - self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') - - # Various scalars - check_values(types.int32, (1, -234)) - check_values(types.int64, (1, -234, - 123456789876543210, -123456789876543210)) - check_values(types.uint64, (1, 234, - 123456789876543210, 2**63 + 123)) - check_values(types.boolean, (True, False)) - check_values(types.float64, (1.5, 100.0**10.0, float('nan'))) - check_values(types.complex64, (1+1j,)) - check_values(types.NPTimedelta('ms'), (np.timedelta64(100, 'ms'),)) - - cr = compile_isolated(pyfunc, (types.float32,)) - cfunc = cr.entry_point - with captured_stdout(): - cfunc(1.1) - # Float32 will lose precision - got = sys.stdout.getvalue() - expect = '1.10000002384' - self.assertTrue(got.startswith(expect)) - self.assertTrue(got.endswith('\n')) - - # NRT-enabled type - with self.assertNoNRTLeak(): - x = [1, 3, 5, 7] - with self.assertRefCount(x): - check_values(types.List(types.int32), (x,)) - - # Array will have to use object mode - arraytype = types.Array(types.int32, 1, 'C') - cr = compile_isolated(pyfunc, (arraytype,), flags=enable_pyobj_flags) - cfunc = cr.entry_point - with captured_stdout(): - cfunc(np.arange(10)) - self.assertEqual(sys.stdout.getvalue(), - '[0 1 2 3 4 5 6 7 8 9]\n') - - @tag('important') - def test_print_array_item(self): - """ - Test printing a Numpy character sequence - """ - dtype = np.dtype([('x', 'S4')]) - arr = np.frombuffer(bytearray(range(1, 9)), dtype=dtype) - - pyfunc = print_array_item - cfunc = jit(nopython=True)(pyfunc) - for i in range(len(arr)): - with captured_stdout(): - cfunc(arr, i) - self.assertEqual(sys.stdout.getvalue(), str(arr[i]['x']) + '\n') - - @tag('important') - def test_print_multiple_values(self): - pyfunc = print_values - cr = compile_isolated(pyfunc, (types.int32,) * 3) - cfunc = cr.entry_point - with captured_stdout(): - cfunc(1, 2, 3) - self.assertEqual(sys.stdout.getvalue(), '1 2 3\n') - - def test_print_nogil(self): - pyfunc = print_values - cfunc = jit(nopython=True, nogil=True)(pyfunc) - with captured_stdout(): - cfunc(1, 2, 3) - self.assertEqual(sys.stdout.getvalue(), '1 2 3\n') - - @tag('important') - def test_print_empty(self): - pyfunc = print_empty - cr = compile_isolated(pyfunc, ()) - cfunc = cr.entry_point - with captured_stdout(): - cfunc() - self.assertEqual(sys.stdout.getvalue(), '\n') - - @tag('important') - def test_print_strings(self): - pyfunc = print_string - cr = compile_isolated(pyfunc, (types.int32,)) - cfunc = cr.entry_point - with captured_stdout(): - cfunc(1) - self.assertEqual(sys.stdout.getvalue(), '1 hop! 3.5\n') - - def test_print_vararg(self): - # Test *args support for print(). This is desired since - # print() can use a dedicated IR node. - pyfunc = print_vararg - cfunc = jit(nopython=True)(pyfunc) - with captured_stdout(): - cfunc(1, (2, 3), (4, 5j)) - self.assertEqual(sys.stdout.getvalue(), '1 (2, 3) 4 5j\n') - - pyfunc = print_string_vararg - cfunc = jit(nopython=True)(pyfunc) - with captured_stdout(): - cfunc(1, (2, 3), (4, 5j)) - self.assertEqual(sys.stdout.getvalue(), '1 hop! (2, 3) 4 5j\n') - - def test_inner_fn_print(self): - @jit(nopython=True) - def foo(x): - print(x) - - @jit(nopython=True) - def bar(x): - foo(x) - foo('hello') - - # Printing an array requires the Env. - # We need to make sure the inner function can obtain the Env. - x = np.arange(5) - with captured_stdout(): - bar(x) - self.assertEqual(sys.stdout.getvalue(), '[0 1 2 3 4]\nhello\n') - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_profiler.py b/numba/numba/tests/test_profiler.py deleted file mode 100644 index 31b222ae3..000000000 --- a/numba/numba/tests/test_profiler.py +++ /dev/null @@ -1,79 +0,0 @@ -import cProfile as profiler -import os -import pstats -import subprocess -import sys - -import numpy as np - -from numba import jit -from numba import unittest_support as unittest -from .test_linalg import needs_blas - - -def dot(a, b): - sum = 0 - for i in range(len(a)): - sum += a[i]*b[i] - return sum - -def np_dot(a, b): - return np.dot(a, b) - - -class TestProfiler(unittest.TestCase): - - def check_profiler_dot(self, pyfunc): - """ - Make sure the jit-compiled function shows up in the profile stats - as a regular Python function. - """ - a = np.arange(16, dtype=np.float32) - b = np.arange(16, dtype=np.float32) - cfunc = jit(nopython=True)(pyfunc) - # Warm up JIT - cfunc(a, b) - p = profiler.Profile() - p.enable() - try: - cfunc(a, b) - finally: - p.disable() - stats = pstats.Stats(p).strip_dirs() - code = pyfunc.__code__ - expected_key = (os.path.basename(code.co_filename), - code.co_firstlineno, - code.co_name, - ) - self.assertIn(expected_key, stats.stats) - - def test_profiler(self): - self.check_profiler_dot(dot) - - @needs_blas - def test_profiler_np_dot(self): - # Issue #1786: initializing BLAS would crash when profiling - code = """if 1: - import cProfile as profiler - - import numpy as np - - from numba import jit - from numba.tests.test_profiler import np_dot - - cfunc = jit(nopython=True)(np_dot) - - a = np.arange(16, dtype=np.float32) - b = np.arange(16, dtype=np.float32) - - p = profiler.Profile() - p.enable() - cfunc(a, b) - cfunc(a, b) - p.disable() - """ - subprocess.check_call([sys.executable, "-c", code]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_pycc.py b/numba/numba/tests/test_pycc.py deleted file mode 100644 index 791798390..000000000 --- a/numba/numba/tests/test_pycc.py +++ /dev/null @@ -1,335 +0,0 @@ -from __future__ import print_function - -import contextlib -import imp -import os -import shutil -import subprocess -import sys -import tempfile -from unittest import skip -from ctypes import * - -import numpy as np -try: - import setuptools -except ImportError: - setuptools = None - -import llvmlite.binding as ll - -from numba import unittest_support as unittest -from numba.pycc import main -from numba.pycc.decorators import clear_export_registry -from numba.pycc.platform import find_shared_ending, find_pyext_ending -from numba.pycc.platform import _external_compiler_ok - -# if suitable compilers are not present then skip. -_skip_reason = 'AOT compatible compilers missing' -_skip_missing_compilers = unittest.skipIf(not _external_compiler_ok, - _skip_reason) - -from .matmul_usecase import has_blas -from .support import TestCase, tag, import_dynamic, temp_directory - - -base_path = os.path.dirname(os.path.abspath(__file__)) - - -def unset_macosx_deployment_target(): - """Unset MACOSX_DEPLOYMENT_TARGET because we are not building portable - libraries - """ - if 'MACOSX_DEPLOYMENT_TARGET' in os.environ: - del os.environ['MACOSX_DEPLOYMENT_TARGET'] - - -class BasePYCCTest(TestCase): - - def setUp(self): - unset_macosx_deployment_target() - - self.tmpdir = temp_directory('test_pycc') - # Make sure temporary files and directories created by - # distutils don't clutter the top-level /tmp - tempfile.tempdir = self.tmpdir - - def tearDown(self): - tempfile.tempdir = None - # Since we're executing the module-under-test several times - # from the same process, we must clear the exports registry - # between invocations. - clear_export_registry() - - @contextlib.contextmanager - def check_c_ext(self, extdir, name): - sys.path.append(extdir) - try: - lib = import_dynamic(name) - yield lib - finally: - sys.path.remove(extdir) - sys.modules.pop(name, None) - - -@_skip_missing_compilers -class TestLegacyAPI(BasePYCCTest): - - def test_pycc_ctypes_lib(self): - """ - Test creating a C shared library object using pycc. - """ - source = os.path.join(base_path, 'compile_with_pycc.py') - cdll_modulename = 'test_dll_legacy' + find_shared_ending() - cdll_path = os.path.join(self.tmpdir, cdll_modulename) - if os.path.exists(cdll_path): - os.unlink(cdll_path) - - main(args=['--debug', '-o', cdll_path, source]) - lib = CDLL(cdll_path) - lib.mult.argtypes = [POINTER(c_double), c_void_p, - c_double, c_double] - lib.mult.restype = c_int - - lib.multf.argtypes = [POINTER(c_float), c_void_p, - c_float, c_float] - lib.multf.restype = c_int - - res = c_double() - lib.mult(byref(res), None, 123, 321) - self.assertEqual(res.value, 123 * 321) - - res = c_float() - lib.multf(byref(res), None, 987, 321) - self.assertEqual(res.value, 987 * 321) - - def test_pycc_pymodule(self): - """ - Test creating a CPython extension module using pycc. - """ - self.skipTest("lack of environment can make the extension crash") - - source = os.path.join(base_path, 'compile_with_pycc.py') - modulename = 'test_pyext_legacy' - out_modulename = os.path.join(self.tmpdir, - modulename + find_pyext_ending()) - if os.path.exists(out_modulename): - os.unlink(out_modulename) - - main(args=['--debug', '--python', '-o', out_modulename, source]) - - with self.check_c_ext(self.tmpdir, modulename) as lib: - res = lib.multi(123, 321) - self.assertPreciseEqual(res, 123 * 321) - res = lib.multf(987, 321) - self.assertPreciseEqual(res, 987.0 * 321.0) - - def test_pycc_bitcode(self): - """ - Test creating a LLVM bitcode file using pycc. - """ - modulename = os.path.join(base_path, 'compile_with_pycc') - bitcode_modulename = os.path.join(self.tmpdir, 'test_bitcode_legacy.bc') - if os.path.exists(bitcode_modulename): - os.unlink(bitcode_modulename) - - main(args=['--debug', '--llvm', '-o', bitcode_modulename, - modulename + '.py']) - - # Sanity check bitcode file contents - with open(bitcode_modulename, "rb") as f: - bc = f.read() - - bitcode_wrapper_magic = b'\xde\xc0\x17\x0b' - bitcode_magic = b'BC\xc0\xde' - self.assertTrue(bc.startswith((bitcode_magic, bitcode_wrapper_magic)), bc) - - -@_skip_missing_compilers -class TestCC(BasePYCCTest): - - def setUp(self): - super(TestCC, self).setUp() - from . import compile_with_pycc - self._test_module = compile_with_pycc - imp.reload(self._test_module) - - @contextlib.contextmanager - def check_cc_compiled(self, cc): - #cc.verbose = True - cc.output_dir = self.tmpdir - cc.compile() - - with self.check_c_ext(self.tmpdir, cc.name) as lib: - yield lib - - def check_cc_compiled_in_subprocess(self, lib, code): - prolog = """if 1: - import sys - sys.path.insert(0, %(path)r) - import %(name)s as lib - """ % {'name': lib.__name__, - 'path': os.path.dirname(lib.__file__)} - code = prolog.strip(' ') + code - subprocess.check_call([sys.executable, '-c', code]) - - def test_cc_properties(self): - cc = self._test_module.cc - self.assertEqual(cc.name, 'pycc_test_simple') - - # Inferred output directory - d = self._test_module.cc.output_dir - self.assertTrue(os.path.isdir(d), d) - - # Inferred output filename - f = self._test_module.cc.output_file - self.assertFalse(os.path.exists(f), f) - self.assertTrue(os.path.basename(f).startswith('pycc_test_simple.'), f) - if sys.platform.startswith('linux'): - self.assertTrue(f.endswith('.so'), f) - if sys.version_info >= (3,): - self.assertIn('.cpython', f) - - def test_compile(self): - with self.check_cc_compiled(self._test_module.cc) as lib: - res = lib.multi(123, 321) - self.assertPreciseEqual(res, 123 * 321) - res = lib.multf(987, 321) - self.assertPreciseEqual(res, 987.0 * 321.0) - res = lib.square(5) - self.assertPreciseEqual(res, 25) - self.assertIs(lib.get_none(), None) - with self.assertRaises(ZeroDivisionError): - lib.div(1, 0) - - def check_compile_for_cpu(self, cpu_name): - cc = self._test_module.cc - cc.target_cpu = cpu_name - - with self.check_cc_compiled(cc) as lib: - res = lib.multi(123, 321) - self.assertPreciseEqual(res, 123 * 321) - self.assertEqual(lib.multi.__module__, 'pycc_test_simple') - - def test_compile_for_cpu(self): - # Compiling for the host CPU should always succeed - self.check_compile_for_cpu(ll.get_host_cpu_name()) - - def test_compile_for_cpu_host(self): - # Compiling for the host CPU should always succeed - self.check_compile_for_cpu("host") - - @tag('important') - def test_compile_helperlib(self): - with self.check_cc_compiled(self._test_module.cc_helperlib) as lib: - res = lib.power(2, 7) - self.assertPreciseEqual(res, 128) - for val in (-1, -1 + 0j, np.complex128(-1)): - res = lib.sqrt(val) - self.assertPreciseEqual(res, 1j) - for val in (4, 4.0, np.float64(4)): - res = lib.np_sqrt(val) - self.assertPreciseEqual(res, 2.0) - res = lib.spacing(1.0) - self.assertPreciseEqual(res, 2**-52) - # Implicit seeding at startup should guarantee a non-pathological - # start state. - self.assertNotEqual(lib.random(-1), lib.random(-1)) - res = lib.random(42) - expected = np.random.RandomState(42).random_sample() - self.assertPreciseEqual(res, expected) - res = lib.size(np.float64([0] * 3)) - self.assertPreciseEqual(res, 3) - - code = """if 1: - from numpy.testing import assert_equal, assert_allclose - res = lib.power(2, 7) - assert res == 128 - res = lib.random(42) - assert_allclose(res, %(expected)s) - res = lib.spacing(1.0) - assert_allclose(res, 2**-52) - """ % {'expected': expected} - self.check_cc_compiled_in_subprocess(lib, code) - - @tag('important') - def test_compile_nrt(self): - with self.check_cc_compiled(self._test_module.cc_nrt) as lib: - # Sanity check - self.assertPreciseEqual(lib.zero_scalar(1), 0.0) - res = lib.zeros(3) - self.assertEqual(list(res), [0, 0, 0]) - if has_blas: - res = lib.vector_dot(4) - self.assertPreciseEqual(res, 30.0) - - code = """if 1: - res = lib.zero_scalar(1) - assert res == 0.0 - res = lib.zeros(3) - assert list(res) == [0, 0, 0] - if %(has_blas)s: - res = lib.vector_dot(4) - assert res == 30.0 - """ % dict(has_blas=has_blas) - self.check_cc_compiled_in_subprocess(lib, code) - - -@_skip_missing_compilers -class TestDistutilsSupport(TestCase): - - def setUp(self): - unset_macosx_deployment_target() - - # Copy the test project into a temp directory to avoid - # keeping any build leftovers in the source tree - self.tmpdir = temp_directory('test_pycc_distutils') - source_dir = os.path.join(base_path, 'pycc_distutils_usecase') - self.usecase_dir = os.path.join(self.tmpdir, 'work') - shutil.copytree(source_dir, self.usecase_dir) - - def check_setup_py(self, setup_py_file): - # Compute PYTHONPATH to ensure the child processes see this Numba - import numba - numba_path = os.path.abspath(os.path.dirname( - os.path.dirname(numba.__file__))) - env = dict(os.environ) - if env.get('PYTHONPATH', ''): - env['PYTHONPATH'] = numba_path + os.pathsep + env['PYTHONPATH'] - else: - env['PYTHONPATH'] = numba_path - - def run_python(args): - p = subprocess.Popen([sys.executable] + args, - cwd=self.usecase_dir, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - env=env) - out, _ = p.communicate() - rc = p.wait() - if rc != 0: - self.fail("python failed with the following output:\n%s" - % out.decode('utf-8', 'ignore')) - - run_python([setup_py_file, "build_ext", "--inplace"]) - code = """if 1: - import pycc_compiled_module as lib - assert lib.get_const() == 42 - res = lib.ones(3) - assert list(res) == [1.0, 1.0, 1.0] - """ - run_python(["-c", code]) - - def test_setup_py_distutils(self): - if sys.version_info < (3,) and sys.platform == "win32": - # See e.g. https://stackoverflow.com/questions/28931875/problems-finding-vcvarsall-bat-when-using-distutils - self.skipTest("must use setuptools to build extensions for Python 2") - self.check_setup_py("setup_distutils.py") - - @unittest.skipIf(setuptools is None, "test needs setuptools") - def test_setup_py_setuptools(self): - self.check_setup_py("setup_setuptools.py") - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_python_int.py b/numba/numba/tests/test_python_int.py deleted file mode 100644 index b32f31e17..000000000 --- a/numba/numba/tests/test_python_int.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types - - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() - - -def return_int(a, b): - return a + b - - -class TestPythonInt(unittest.TestCase): - - # Issue #474: ints should be returned rather than longs under Python 2, - # as much as possible. - - def test_int_return_type(self, flags=force_pyobj_flags, - int_type=types.int64, operands=(3, 4)): - pyfunc = return_int - cr = compile_isolated(pyfunc, (int_type, int_type), flags=flags) - cfunc = cr.entry_point - expected = pyfunc(*operands) - got = cfunc(*operands) - self.assertIs(type(got), type(expected)) - self.assertEqual(got, expected) - - def test_int_return_type_npm(self): - self.test_int_return_type(flags=no_pyobj_flags) - - def test_unsigned_int_return_type(self, flags=force_pyobj_flags): - self.test_int_return_type(int_type=types.uint64, flags=flags) - - def test_unsigned_int_return_type_npm(self): - self.test_unsigned_int_return_type(flags=no_pyobj_flags) - - def test_long_int_return_type(self, flags=force_pyobj_flags): - # Same but returning a 64-bit integer. The return type should be - # `int` on 64-bit builds, `long` on 32-bit ones (or Windows). - self.test_int_return_type(flags=flags, operands=(2**33, 2**40)) - - def test_long_int_return_type_npm(self): - self.test_long_int_return_type(flags=no_pyobj_flags) - - def test_longer_int_return_type(self, flags=force_pyobj_flags): - # This won't be supported in nopython mode. - self.test_int_return_type(flags=flags, operands=(2**70, 2**75)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_random.py b/numba/numba/tests/test_random.py deleted file mode 100644 index aa276bacb..000000000 --- a/numba/numba/tests/test_random.py +++ /dev/null @@ -1,1521 +0,0 @@ -from __future__ import print_function - -import collections -import functools -import math -import multiprocessing -import os -import random -import subprocess -import sys -import threading - -import numpy as np - -import numba.unittest_support as unittest -from numba import jit, _helperlib, types -from numba.compiler import compile_isolated -from .support import TestCase, compile_function, tag - - -# State size of the Mersenne Twister -N = 624 - - -def get_py_state_ptr(): - return _helperlib.rnd_get_py_state_ptr() - -def get_np_state_ptr(): - return _helperlib.rnd_get_np_state_ptr() - - -def numpy_randint1(a): - return np.random.randint(a) - -def numpy_randint2(a, b): - return np.random.randint(a, b) - -def random_randint(a, b): - return random.randint(a, b) - -def random_randrange1(a): - return random.randrange(a) - -def random_randrange2(a, b): - return random.randrange(a, b) - -def random_randrange3(a, b, c): - return random.randrange(a, b, c) - -def numpy_choice1(a): - return np.random.choice(a) - -def numpy_choice2(a, size): - return np.random.choice(a, size=size) - -def numpy_choice3(a, size, replace): - return np.random.choice(a, size=size, replace=replace) - -def numpy_multinomial2(n, pvals): - return np.random.multinomial(n, pvals) - -def numpy_multinomial3(n, pvals, size): - return np.random.multinomial(n, pvals=pvals, size=size) - -def numpy_check_rand(seed, a, b): - np.random.seed(seed) - expected = np.random.random((a, b)) - np.random.seed(seed) - got = np.random.rand(a, b) - return expected, got - -def numpy_check_randn(seed, a, b): - np.random.seed(seed) - expected = np.random.standard_normal((a, b)) - np.random.seed(seed) - got = np.random.randn(a, b) - return expected, got - -def jit_with_args(name, argstring): - code = """def func(%(argstring)s): - return %(name)s(%(argstring)s) -""" % locals() - pyfunc = compile_function("func", code, globals()) - return jit(nopython=True)(pyfunc) - -def jit_nullary(name): - return jit_with_args(name, "") - -def jit_unary(name): - return jit_with_args(name, "a") - -def jit_binary(name): - return jit_with_args(name, "a, b") - -def jit_ternary(name): - return jit_with_args(name, "a, b, c") - - -random_gauss = jit_binary("random.gauss") -random_random = jit_nullary("random.random") -random_seed = jit_unary("random.seed") - -numpy_normal = jit_binary("np.random.normal") -numpy_random = jit_nullary("np.random.random") -numpy_seed = jit_unary("np.random.seed") - - -def _copy_py_state(r, ptr): - """ - Copy state of Python random *r* to Numba state *ptr*. - """ - mt = r.getstate()[1] - ints, index = mt[:-1], mt[-1] - _helperlib.rnd_set_state(ptr, (index, list(ints))) - return ints, index - -def _copy_np_state(r, ptr): - """ - Copy state of Numpy random *r* to Numba state *ptr*. - """ - ints, index = r.get_state()[1:3] - _helperlib.rnd_set_state(ptr, (index, [int(x) for x in ints])) - return ints, index - -def sync_to_numpy(r): - _ver, mt_st, _gauss_next = r.getstate() - mt_pos = mt_st[-1] - mt_ints = mt_st[:-1] - assert len(mt_ints) == 624 - - np_st = ('MT19937', np.array(mt_ints, dtype='uint32'), mt_pos) - if _gauss_next is None: - np_st += (0, 0.0) - else: - np_st += (1, _gauss_next) - - np.random.set_state(np_st) - -# Pure Python equivalents of some of the Numpy distributions, using -# Python's basic generators. - -def py_chisquare(r, df): - return 2.0 * r.gammavariate(df / 2.0, 1.0) - -def py_f(r, num, denom): - return ((py_chisquare(r, num) * denom) / - (py_chisquare(r, denom) * num)) - - -class BaseTest(TestCase): - - def _follow_cpython(self, ptr, seed=2): - r = random.Random(seed) - _copy_py_state(r, ptr) - return r - - def _follow_numpy(self, ptr, seed=2): - r = np.random.RandomState(seed) - _copy_np_state(r, ptr) - return r - - -class TestInternals(BaseTest): - """ - Test low-level internals of the implementation. - """ - - def _check_get_set_state(self, ptr): - state = _helperlib.rnd_get_state(ptr) - i, ints = state - self.assertIsInstance(i, int) - self.assertIsInstance(ints, list) - self.assertEqual(len(ints), N) - j = (i * 100007) % N - ints = [i * 3 for i in range(N)] - # Roundtrip - _helperlib.rnd_set_state(ptr, (j, ints)) - self.assertEqual(_helperlib.rnd_get_state(ptr), (j, ints)) - - def _check_shuffle(self, ptr): - # We test shuffling against CPython - r = random.Random() - ints, index = _copy_py_state(r, ptr) - # Force shuffling in CPython generator - for i in range(index, N + 1, 2): - r.random() - _helperlib.rnd_shuffle(ptr) - # Check new integer keys - mt = r.getstate()[1] - ints, index = mt[:-1], mt[-1] - self.assertEqual(_helperlib.rnd_get_state(ptr)[1], list(ints)) - - def _check_init(self, ptr): - # We use the same integer seeding as Numpy - # (CPython is different: it treats the integer as a byte array) - r = np.random.RandomState() - for i in [0, 1, 125, 2**32 - 5]: - # Need to cast to a C-sized int (for Numpy <= 1.7) - r.seed(np.uint32(i)) - st = r.get_state() - ints = list(st[1]) - index = st[2] - assert index == N # sanity check - _helperlib.rnd_seed(ptr, i) - self.assertEqual(_helperlib.rnd_get_state(ptr), (index, ints)) - - def _check_perturb(self, ptr): - states = [] - for i in range(10): - # Initialize with known state - _helperlib.rnd_seed(ptr, 0) - # Perturb with entropy - _helperlib.rnd_seed(ptr, os.urandom(512)) - states.append(tuple(_helperlib.rnd_get_state(ptr)[1])) - # No two identical states - self.assertEqual(len(set(states)), len(states)) - - def test_get_set_state(self): - self._check_get_set_state(get_py_state_ptr()) - - def test_shuffle(self): - self._check_shuffle(get_py_state_ptr()) - - def test_init(self): - self._check_init(get_py_state_ptr()) - - def test_perturb(self): - self._check_perturb(get_py_state_ptr()) - - -class TestRandom(BaseTest): - - # NOTE: there may be cascading imprecision issues (e.g. between x87-using - # C code and SSE-using LLVM code), which is especially brutal for some - # iterative algorithms with sensitive exit conditions. - # Therefore we stick to hardcoded integers for seed values. - - def _check_random_seed(self, seedfunc, randomfunc): - """ - Check seed()- and random()-like functions. - """ - # Our seed() mimicks Numpy's. - r = np.random.RandomState() - for i in [0, 1, 125, 2**32 - 1]: - # Need to cast to a C-sized int (for Numpy <= 1.7) - r.seed(np.uint32(i)) - seedfunc(i) - # Be sure to trigger a reshuffle - for j in range(N + 10): - self.assertPreciseEqual(randomfunc(), r.uniform(0.0, 1.0)) - - @tag('important') - def test_random_random(self): - self._check_random_seed(random_seed, random_random) - - @tag('important') - def test_numpy_random(self): - self._check_random_seed(numpy_seed, numpy_random) - # Test aliases - self._check_random_seed(numpy_seed, jit_nullary("np.random.random_sample")) - self._check_random_seed(numpy_seed, jit_nullary("np.random.ranf")) - self._check_random_seed(numpy_seed, jit_nullary("np.random.sample")) - self._check_random_seed(numpy_seed, jit_nullary("np.random.rand")) - - def test_independent_generators(self): - # PRNGs for Numpy and Python are independent. - N = 10 - random_seed(1) - py_numbers = [random_random() for i in range(N)] - numpy_seed(2) - np_numbers = [numpy_random() for i in range(N)] - random_seed(1) - numpy_seed(2) - pairs = [(random_random(), numpy_random()) for i in range(N)] - self.assertPreciseEqual([p[0] for p in pairs], py_numbers) - self.assertPreciseEqual([p[1] for p in pairs], np_numbers) - - def _check_getrandbits(self, func, ptr): - """ - Check a getrandbits()-like function. - """ - # Our implementation follows CPython's for bits <= 64. - r = self._follow_cpython(ptr) - for nbits in range(1, 65): - expected = r.getrandbits(nbits) - got = func(nbits) - self.assertPreciseEqual(expected, got) - self.assertRaises(OverflowError, func, 65) - self.assertRaises(OverflowError, func, 9999999) - self.assertRaises(OverflowError, func, -1) - - @tag('important') - def test_random_getrandbits(self): - self._check_getrandbits(jit_unary("random.getrandbits"), get_py_state_ptr()) - - # Explanation for the large ulps value: on 32-bit platforms, our - # LLVM-compiled functions use SSE but they are compared against - # C functions which use x87. - # On some distributions, the errors seem to accumulate dramatically. - - def _check_dist(self, func, pyfunc, argslist, niters=3, - prec='double', ulps=12, pydtype=None): - assert len(argslist) - for args in argslist: - results = [func(*args) for i in range(niters)] - pyresults = [(pyfunc(*args, dtype=pydtype) if pydtype else pyfunc(*args)) - for i in range(niters)] - self.assertPreciseEqual(results, pyresults, prec=prec, ulps=ulps, - msg="for arguments %s" % (args,)) - - def _check_gauss(self, func2, func1, func0, ptr): - """ - Check a gauss()-like function. - """ - # Our implementation follows Numpy's. - r = self._follow_numpy(ptr) - if func2 is not None: - self._check_dist(func2, r.normal, - [(1.0, 1.0), (2.0, 0.5), (-2.0, 0.5)], - niters=N // 2 + 10) - if func1 is not None: - self._check_dist(func1, r.normal, [(0.5,)]) - if func0 is not None: - self._check_dist(func0, r.normal, [()]) - - @tag('important') - def test_random_gauss(self): - self._check_gauss(jit_binary("random.gauss"), None, None, get_py_state_ptr()) - - def test_random_normalvariate(self): - # normalvariate() is really an alias to gauss() in Numba - # (not in Python, though - they use different algorithms) - self._check_gauss(jit_binary("random.normalvariate"), None, None, - get_py_state_ptr()) - - @tag('important') - def test_numpy_normal(self): - self._check_gauss(jit_binary("np.random.normal"), - jit_unary("np.random.normal"), - jit_nullary("np.random.normal"), - get_np_state_ptr()) - - @tag('important') - def test_numpy_standard_normal(self): - self._check_gauss(None, None, jit_nullary("np.random.standard_normal"), - get_np_state_ptr()) - - @tag('important') - def test_numpy_randn(self): - self._check_gauss(None, None, jit_nullary("np.random.randn"), - get_np_state_ptr()) - - def _check_lognormvariate(self, func2, func1, func0, ptr): - """ - Check a lognormvariate()-like function. - """ - # Our implementation follows Numpy's. - r = self._follow_numpy(ptr) - if func2 is not None: - self._check_dist(func2, r.lognormal, - [(1.0, 1.0), (2.0, 0.5), (-2.0, 0.5)], - niters=N // 2 + 10) - if func1 is not None: - self._check_dist(func1, r.lognormal, [(0.5,)]) - if func0 is not None: - self._check_dist(func0, r.lognormal, [()]) - - def test_random_lognormvariate(self): - self._check_lognormvariate(jit_binary("random.lognormvariate"), - None, None, get_py_state_ptr()) - - def test_numpy_lognormal(self): - self._check_lognormvariate(jit_binary("np.random.lognormal"), - jit_unary("np.random.lognormal"), - jit_nullary("np.random.lognormal"), - get_np_state_ptr()) - - def _check_randrange(self, func1, func2, func3, ptr, max_width, is_numpy, tp=None): - """ - Check a randrange()-like function. - """ - # Sanity check - ints = [] - for i in range(10): - ints.append(func1(500000000)) - ints.append(func2(5, 500000000)) - if func3 is not None: - ints.append(func3(5, 500000000, 3)) - self.assertEqual(len(ints), len(set(ints)), ints) - # Our implementation follows Python 3's. - if sys.version_info >= (3,): - if is_numpy: - rr = self._follow_numpy(ptr).randint - else: - rr = self._follow_cpython(ptr).randrange - widths = [w for w in [1, 5, 8, 5000, 2**40, 2**62 + 2**61] if w < max_width] - pydtype = tp if is_numpy and np.__version__ >= '1.11.0' else None - for width in widths: - self._check_dist(func1, rr, [(width,)], niters=10, - pydtype=pydtype) - self._check_dist(func2, rr, [(-2, 2 +width)], niters=10, - pydtype=pydtype) - if func3 is not None: - self.assertPreciseEqual(func3(-2, 2 + width, 6), - rr(-2, 2 + width, 6)) - self.assertPreciseEqual(func3(2 + width, 2, -3), - rr(2 + width, 2, -3)) - # Empty ranges - self.assertRaises(ValueError, func1, 0) - self.assertRaises(ValueError, func1, -5) - self.assertRaises(ValueError, func2, 5, 5) - self.assertRaises(ValueError, func2, 5, 2) - if func3 is not None: - self.assertRaises(ValueError, func3, 5, 7, -1) - self.assertRaises(ValueError, func3, 7, 5, 1) - - @tag('important') - def test_random_randrange(self): - for tp, max_width in [(types.int64, 2**63), (types.int32, 2**31)]: - cr1 = compile_isolated(random_randrange1, (tp,)) - cr2 = compile_isolated(random_randrange2, (tp, tp)) - cr3 = compile_isolated(random_randrange3, (tp, tp, tp)) - self._check_randrange(cr1.entry_point, cr2.entry_point, - cr3.entry_point, get_py_state_ptr(), - max_width, False) - - @tag('important') - def test_numpy_randint(self): - for tp, np_tp, max_width in [(types.int64, np.int64, 2**63), - (types.int32, np.int32, 2**31)]: - cr1 = compile_isolated(numpy_randint1, (tp,)) - cr2 = compile_isolated(numpy_randint2, (tp, tp)) - self._check_randrange(cr1.entry_point, cr2.entry_point, - None, get_np_state_ptr(), max_width, True, np_tp) - - def _check_randint(self, func, ptr, max_width): - """ - Check a randint()-like function. - """ - # Sanity check - ints = [] - for i in range(10): - ints.append(func(5, 500000000)) - self.assertEqual(len(ints), len(set(ints)), ints) - # Our implementation follows Python 3's. - if sys.version_info >= (3,): - r = self._follow_cpython(ptr) - for args in [(1, 5), (13, 5000), (20, 2**62 + 2**61)]: - if args[1] > max_width: - continue - self._check_dist(func, r.randint, [args], niters=10) - # Empty ranges - self.assertRaises(ValueError, func, 5, 4) - self.assertRaises(ValueError, func, 5, 2) - - @tag('important') - def test_random_randint(self): - for tp, max_width in [(types.int64, 2**63), (types.int32, 2**31)]: - cr = compile_isolated(random_randint, (tp, tp)) - self._check_randint(cr.entry_point, get_py_state_ptr(), max_width) - - def _check_uniform(self, func, ptr): - """ - Check a uniform()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - self._check_dist(func, r.uniform, - [(1.5, 1e6), (-2.5, 1e3), (1.5, -2.5)]) - - @tag('important') - def test_random_uniform(self): - self._check_uniform(jit_binary("random.uniform"), get_py_state_ptr()) - - @tag('important') - def test_numpy_uniform(self): - self._check_uniform(jit_binary("np.random.uniform"), get_np_state_ptr()) - - def _check_triangular(self, func2, func3, ptr): - """ - Check a triangular()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - if func2 is not None: - self._check_dist(func2, r.triangular, - [(1.5, 3.5), (-2.5, 1.5), (1.5, 1.5)]) - self._check_dist(func3, r.triangular, [(1.5, 3.5, 2.2)]) - - def test_random_triangular(self): - self._check_triangular(jit_binary("random.triangular"), - jit_ternary("random.triangular"), - get_py_state_ptr()) - - def test_numpy_triangular(self): - triangular = jit_ternary("np.random.triangular") - fixed_triangular = lambda l, r, m: triangular(l, m, r) - self._check_triangular(None, fixed_triangular, get_np_state_ptr()) - - def _check_gammavariate(self, func2, func1, ptr): - """ - Check a gammavariate()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - if func2 is not None: - self._check_dist(func2, r.gammavariate, - [(0.5, 2.5), (1.0, 1.5), (1.5, 3.5)]) - if func1 is not None: - self.assertPreciseEqual(func1(1.5), r.gammavariate(1.5, 1.0)) - # Invalid inputs - if func2 is not None: - self.assertRaises(ValueError, func2, 0.0, 1.0) - self.assertRaises(ValueError, func2, 1.0, 0.0) - self.assertRaises(ValueError, func2, -0.5, 1.0) - self.assertRaises(ValueError, func2, 1.0, -0.5) - if func1 is not None: - self.assertRaises(ValueError, func1, 0.0) - self.assertRaises(ValueError, func1, -0.5) - - def test_random_gammavariate(self): - self._check_gammavariate(jit_binary("random.gammavariate"), None, - get_py_state_ptr()) - - def test_numpy_gamma(self): - self._check_gammavariate(jit_binary("np.random.gamma"), - jit_unary("np.random.gamma"), - get_np_state_ptr()) - self._check_gammavariate(None, - jit_unary("np.random.standard_gamma"), - get_np_state_ptr()) - - def _check_betavariate(self, func, ptr): - """ - Check a betavariate()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - self._check_dist(func, r.betavariate, [(0.5, 2.5)]) - # Invalid inputs - self.assertRaises(ValueError, func, 0.0, 1.0) - self.assertRaises(ValueError, func, 1.0, 0.0) - self.assertRaises(ValueError, func, -0.5, 1.0) - self.assertRaises(ValueError, func, 1.0, -0.5) - - def test_random_betavariate(self): - self._check_betavariate(jit_binary("random.betavariate"), get_py_state_ptr()) - - def test_numpy_beta(self): - self._check_betavariate(jit_binary("np.random.beta"), get_np_state_ptr()) - - def _check_vonmisesvariate(self, func, ptr): - """ - Check a vonmisesvariate()-like function. - """ - # Our implementation follows Python 2.7+'s. - r = self._follow_cpython(ptr) - self._check_dist(func, r.vonmisesvariate, [(0.5, 2.5)]) - - def test_random_vonmisesvariate(self): - self._check_vonmisesvariate(jit_binary("random.vonmisesvariate"), - get_py_state_ptr()) - - def test_numpy_vonmises(self): - self._check_vonmisesvariate(jit_binary("np.random.vonmises"), - get_np_state_ptr()) - - def _check_expovariate(self, func, ptr): - """ - Check a expovariate()-like function. Note the second argument - is inversed compared to np.random.exponential(). - """ - # Our implementation follows Numpy's (and Python 2.7+'s). - r = self._follow_numpy(ptr) - for lambd in (0.2, 0.5, 1.5): - for i in range(3): - self.assertPreciseEqual(func(lambd), r.exponential(1 / lambd), - prec='double') - - def test_random_expovariate(self): - self._check_expovariate(jit_unary("random.expovariate"), get_py_state_ptr()) - - def _check_exponential(self, func1, func0, ptr): - """ - Check a exponential()-like function. - """ - # Our implementation follows Numpy's (and Python 2.7+'s). - r = self._follow_numpy(ptr) - if func1 is not None: - self._check_dist(func1, r.exponential, [(0.5,), (1.0,), (1.5,)]) - if func0 is not None: - self._check_dist(func0, r.exponential, [()]) - - def test_numpy_exponential(self): - self._check_exponential(jit_unary("np.random.exponential"), - jit_nullary("np.random.exponential"), - get_np_state_ptr()) - - def test_numpy_standard_exponential(self): - self._check_exponential(None, - jit_nullary("np.random.standard_exponential"), - get_np_state_ptr()) - - def _check_paretovariate(self, func, ptr): - """ - Check a paretovariate()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - self._check_dist(func, r.paretovariate, [(0.5,), (3.5,)]) - - def test_random_paretovariate(self): - self._check_paretovariate(jit_unary("random.paretovariate"), get_py_state_ptr()) - - def test_numpy_pareto(self): - pareto = jit_unary("np.random.pareto") - fixed_pareto = lambda a: pareto(a) + 1.0 - self._check_paretovariate(fixed_pareto, get_np_state_ptr()) - - def _check_weibullvariate(self, func2, func1, ptr): - """ - Check a weibullvariate()-like function. - """ - # Our implementation follows Python's. - r = self._follow_cpython(ptr) - if func2 is not None: - self._check_dist(func2, r.weibullvariate, [(0.5, 2.5)]) - if func1 is not None: - for i in range(3): - self.assertPreciseEqual(func1(2.5), - r.weibullvariate(1.0, 2.5)) - - def test_random_weibullvariate(self): - self._check_weibullvariate(jit_binary("random.weibullvariate"), - None, get_py_state_ptr()) - - def test_numpy_weibull(self): - self._check_weibullvariate(None, jit_unary("np.random.weibull"), - get_np_state_ptr()) - - @tag('important') - def test_numpy_binomial(self): - # We follow Numpy's algorithm up to n*p == 30 - binomial = jit_binary("np.random.binomial") - r = self._follow_numpy(get_np_state_ptr(), 0) - self._check_dist(binomial, r.binomial, [(18, 0.25)]) - # Sanity check many values - for n in (100, 1000, 10000): - self.assertEqual(binomial(n, 0.0), 0) - self.assertEqual(binomial(n, 1.0), n) - for p in (0.0001, 0.1, 0.4, 0.49999, 0.5, 0.50001, 0.8, 0.9, 0.9999): - r = binomial(n, p) - if p > 0.5: - r = n - r - p = 1 - p - self.assertGreaterEqual(r, 0) - self.assertLessEqual(r, n) - expected = p * n - tol = 3 * n / math.sqrt(n) - self.assertGreaterEqual(r, expected - tol, (p, n, r)) - self.assertLessEqual(r, expected + tol, (p, n, r)) - # Invalid values - self.assertRaises(ValueError, binomial, -1, 0.5) - self.assertRaises(ValueError, binomial, 10, -0.1) - self.assertRaises(ValueError, binomial, 10, 1.1) - - @tag('important') - def test_numpy_chisquare(self): - chisquare = jit_unary("np.random.chisquare") - r = self._follow_cpython(get_np_state_ptr()) - self._check_dist(chisquare, - functools.partial(py_chisquare, r), - [(1.5,), (2.5,)]) - - def test_numpy_f(self): - f = jit_binary("np.random.f") - r = self._follow_cpython(get_np_state_ptr()) - self._check_dist(f, functools.partial(py_f, r), - [(0.5, 1.5), (1.5, 0.8)]) - - def test_numpy_geometric(self): - geom = jit_unary("np.random.geometric") - # p out of domain - self.assertRaises(ValueError, geom, -1.0) - self.assertRaises(ValueError, geom, 0.0) - self.assertRaises(ValueError, geom, 1.001) - # Some basic checks - N = 200 - r = [geom(1.0) for i in range(N)] - self.assertPreciseEqual(r, [1] * N) - r = [geom(0.9) for i in range(N)] - n = r.count(1) - self.assertGreaterEqual(n, N // 2) - self.assertLess(n, N) - self.assertFalse([i for i in r if i > 1000]) # unlikely - r = [geom(0.4) for i in range(N)] - self.assertTrue([i for i in r if i > 4]) # likely - r = [geom(0.01) for i in range(N)] - self.assertTrue([i for i in r if i > 50]) # likely - r = [geom(1e-15) for i in range(N)] - self.assertTrue([i for i in r if i > 2**32]) # likely - - def test_numpy_gumbel(self): - gumbel = jit_binary("np.random.gumbel") - r = self._follow_numpy(get_np_state_ptr()) - self._check_dist(gumbel, r.gumbel, [(0.0, 1.0), (-1.5, 3.5)]) - - def test_numpy_hypergeometric(self): - # Our implementation follows Numpy's up to nsamples = 10. - hg = jit_ternary("np.random.hypergeometric") - r = self._follow_numpy(get_np_state_ptr()) - self._check_dist(hg, r.hypergeometric, - [(1000, 5000, 10), (5000, 1000, 10)], - niters=30) - # Sanity checks - r = [hg(1000, 1000, 100) for i in range(100)] - self.assertTrue(all(x >= 0 and x <= 100 for x in r), r) - self.assertGreaterEqual(np.mean(r), 40.0) - self.assertLessEqual(np.mean(r), 60.0) - r = [hg(1000, 100000, 100) for i in range(100)] - self.assertTrue(all(x >= 0 and x <= 100 for x in r), r) - self.assertLessEqual(np.mean(r), 10.0) - r = [hg(100000, 1000, 100) for i in range(100)] - self.assertTrue(all(x >= 0 and x <= 100 for x in r), r) - self.assertGreaterEqual(np.mean(r), 90.0) - - def test_numpy_laplace(self): - r = self._follow_numpy(get_np_state_ptr()) - self._check_dist(jit_binary("np.random.laplace"), r.laplace, - [(0.0, 1.0), (-1.5, 3.5)]) - self._check_dist(jit_unary("np.random.laplace"), r.laplace, - [(0.0,), (-1.5,)]) - self._check_dist(jit_nullary("np.random.laplace"), r.laplace, [()]) - - @tag('important') - def test_numpy_logistic(self): - r = self._follow_numpy(get_np_state_ptr()) - self._check_dist(jit_binary("np.random.logistic"), r.logistic, - [(0.0, 1.0), (-1.5, 3.5)]) - self._check_dist(jit_unary("np.random.logistic"), r.logistic, - [(0.0,), (-1.5,)]) - self._check_dist(jit_nullary("np.random.logistic"), r.logistic, [()]) - - def test_numpy_logseries(self): - r = self._follow_numpy(get_np_state_ptr()) - logseries = jit_unary("np.random.logseries") - self._check_dist(logseries, r.logseries, - [(0.1,), (0.99,), (0.9999,)], - niters=50) - # Numpy's logseries overflows on 32-bit builds, so instead - # hardcode Numpy's (correct) output on 64-bit builds. - r = self._follow_numpy(get_np_state_ptr(), seed=1) - self.assertEqual([logseries(0.9999999999999) for i in range(10)], - [2022733531, 77296, 30, 52204, 9341294, 703057324, - 413147702918, 1870715907, 16009330, 738]) - self.assertRaises(ValueError, logseries, 0.0) - self.assertRaises(ValueError, logseries, -0.1) - self.assertRaises(ValueError, logseries, 1.1) - - def test_numpy_poisson(self): - r = self._follow_numpy(get_np_state_ptr()) - poisson = jit_unary("np.random.poisson") - # Our implementation follows Numpy's. - self._check_dist(poisson, r.poisson, - [(0.0,), (0.5,), (2.0,), (10.0,), (900.5,)], - niters=50) - self.assertRaises(ValueError, poisson, -0.1) - - def test_numpy_negative_binomial(self): - self._follow_numpy(get_np_state_ptr(), 0) - negbin = jit_binary("np.random.negative_binomial") - self.assertEqual([negbin(10, 0.9) for i in range(10)], - [2, 3, 1, 5, 2, 1, 0, 1, 0, 0]) - self.assertEqual([negbin(10, 0.1) for i in range(10)], - [55, 71, 56, 57, 56, 56, 34, 55, 101, 67]) - self.assertEqual([negbin(1000, 0.1) for i in range(10)], - [9203, 8640, 9081, 9292, 8938, - 9165, 9149, 8774, 8886, 9117]) - m = np.mean([negbin(1000000000, 0.1) - for i in range(50)]) - self.assertGreater(m, 9e9 * 0.99) - self.assertLess(m, 9e9 * 1.01) - self.assertRaises(ValueError, negbin, 0, 0.5) - self.assertRaises(ValueError, negbin, -1, 0.5) - self.assertRaises(ValueError, negbin, 10, -0.1) - self.assertRaises(ValueError, negbin, 10, 1.1) - - @tag('important') - def test_numpy_power(self): - r = self._follow_numpy(get_np_state_ptr()) - power = jit_unary("np.random.power") - self._check_dist(power, r.power, - [(0.1,), (0.5,), (0.9,), (6.0,)]) - self.assertRaises(ValueError, power, 0.0) - self.assertRaises(ValueError, power, -0.1) - - def test_numpy_rayleigh(self): - r = self._follow_numpy(get_np_state_ptr()) - rayleigh1 = jit_unary("np.random.rayleigh") - rayleigh0 = jit_nullary("np.random.rayleigh") - self._check_dist(rayleigh1, r.rayleigh, - [(0.1,), (0.8,), (25.,), (1e3,)]) - self._check_dist(rayleigh0, r.rayleigh, [()]) - self.assertRaises(ValueError, rayleigh1, 0.0) - self.assertRaises(ValueError, rayleigh1, -0.1) - - def test_numpy_standard_cauchy(self): - r = self._follow_numpy(get_np_state_ptr()) - cauchy = jit_nullary("np.random.standard_cauchy") - self._check_dist(cauchy, r.standard_cauchy, [()]) - - def test_numpy_standard_t(self): - # We use CPython's algorithm for the gamma dist and numpy's - # for the normal dist. Standard T calls both so we can't check - # against either generator's output. - r = self._follow_cpython(get_np_state_ptr()) - standard_t = jit_unary("np.random.standard_t") - avg = np.mean([standard_t(5) for i in range(5000)]) - # Sanity check - self.assertLess(abs(avg), 0.5) - - def test_numpy_wald(self): - r = self._follow_numpy(get_np_state_ptr()) - wald = jit_binary("np.random.wald") - self._check_dist(wald, r.wald, [(1.0, 1.0), (2.0, 5.0)]) - self.assertRaises(ValueError, wald, 0.0, 1.0) - self.assertRaises(ValueError, wald, -0.1, 1.0) - self.assertRaises(ValueError, wald, 1.0, 0.0) - self.assertRaises(ValueError, wald, 1.0, -0.1) - - def test_numpy_zipf(self): - r = self._follow_numpy(get_np_state_ptr()) - zipf = jit_unary("np.random.zipf") - self._check_dist(zipf, r.zipf, [(1.5,), (2.5,)], niters=100) - for val in (1.0, 0.5, 0.0, -0.1): - self.assertRaises(ValueError, zipf, val) - - def _check_shuffle(self, func, ptr, is_numpy): - """ - Check a shuffle()-like function for arrays. - """ - # Our implementation follows Python 3's. - arrs = [np.arange(20), np.arange(32).reshape((8, 4))] - if sys.version_info >= (3,): - if is_numpy: - r = self._follow_numpy(ptr) - else: - r = self._follow_cpython(ptr) - for a in arrs: - for i in range(3): - got = a.copy() - expected = a.copy() - func(got) - if is_numpy or len(a.shape) == 1: - r.shuffle(expected) - self.assertPreciseEqual(got, expected) - else: - # Sanity check - for a in arrs: - for i in range(3): - b = a.copy() - func(b) - self.assertFalse(np.array_equal(a, b)) - self.assertTrue(np.array_equal(np.sort(a, axis=0), - np.sort(b, axis=0))) - a = b - # Test with an arbitrary buffer-providing object - a = arrs[0] - b = a.copy() - func(memoryview(b)) - self.assertNotEqual(list(a), list(b)) - self.assertEqual(sorted(a), sorted(b)) - # Read-only object - with self.assertTypingError(): - func(memoryview(b"xyz")) - - @tag('important') - def test_random_shuffle(self): - self._check_shuffle(jit_unary("random.shuffle"), get_py_state_ptr(), False) - - @tag('important') - def test_numpy_shuffle(self): - self._check_shuffle(jit_unary("np.random.shuffle"), get_np_state_ptr(), True) - - def _check_startup_randomness(self, func_name, func_args): - """ - Check that the state is properly randomized at startup. - """ - code = """if 1: - from numba.tests import test_random - func = getattr(test_random, %(func_name)r) - print(func(*%(func_args)r)) - """ % (locals()) - numbers = set() - for i in range(3): - popen = subprocess.Popen([sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError("process failed with code %s: stderr follows\n%s\n" - % (popen.returncode, err.decode())) - numbers.add(float(out.strip())) - self.assertEqual(len(numbers), 3, numbers) - - def test_random_random_startup(self): - self._check_startup_randomness("random_random", ()) - - def test_random_gauss_startup(self): - self._check_startup_randomness("random_gauss", (1.0, 1.0)) - - def test_numpy_random_startup(self): - self._check_startup_randomness("numpy_random", ()) - - def test_numpy_gauss_startup(self): - self._check_startup_randomness("numpy_normal", (1.0, 1.0)) - - def test_numpy_random_permutation(self): - # Our implementation follows Python 3's. - func = jit_unary("np.random.permutation") - if sys.version_info >= (3,): - r = self._follow_numpy(get_np_state_ptr()) - for s in [5, 10, 15, 20]: - a = np.arange(s) - b = a.copy() - # Test array version - self.assertPreciseEqual(func(a), r.permutation(a)) - # Test int version - self.assertPreciseEqual(func(s), r.permutation(s)) - # Permutation should not modify its argument - self.assertPreciseEqual(a, b) - # Check multi-dimensional arrays - arrs = [np.arange(10).reshape(2, 5), - np.arange(27).reshape(3, 3, 3), - np.arange(36).reshape(2, 3, 3, 2)] - for a in arrs: - b = a.copy() - self.assertPreciseEqual(func(a), r.permutation(a)) - self.assertPreciseEqual(a, b) - else: - # Sanity check - arrs = [np.arange(20), np.arange(20).reshape(5, 2, 2)] - for a in arrs: - checked = 0 - while checked < 3: - b = func(a) - # check that permuted arrays are equal when sorted - # account for the possibility of the identity permutation - if not np.array_equal(a, b): - self.assertTrue(np.array_equal(np.sort(a, axis=0), - np.sort(b, axis=0))) - checked += 1 - - -class TestRandomArrays(BaseTest): - """ - Test array-producing variants of np.random.* functions. - """ - - def _compile_array_dist(self, funcname, nargs): - qualname = "np.random.%s" % (funcname,) - argstring = ', '.join('abcd'[:nargs]) - return jit_with_args(qualname, argstring) - - def _check_array_dist(self, funcname, scalar_args): - """ - Check returning an array according to a given distribution. - """ - cfunc = self._compile_array_dist(funcname, len(scalar_args) + 1) - r = self._follow_numpy(get_np_state_ptr()) - pyfunc = getattr(r, funcname) - for size in (8, (2, 3)): - args = scalar_args + (size,) - expected = pyfunc(*args) - got = cfunc(*args) - # Numpy may return int32s where we return int64s, adjust - if (expected.dtype == np.dtype('int32') - and got.dtype == np.dtype('int64')): - expected = expected.astype(got.dtype) - self.assertPreciseEqual(expected, got, prec='double', ulps=5) - - def test_numpy_randint(self): - cfunc = self._compile_array_dist("randint", 3) - low, high = 1000, 10000 - size = (30, 30) - res = cfunc(low, high, size) - self.assertIsInstance(res, np.ndarray) - self.assertEqual(res.shape, size) - self.assertIn(res.dtype, (np.dtype('int32'), np.dtype('int64'))) - self.assertTrue(np.all(res >= low)) - self.assertTrue(np.all(res < high)) - # Crude statistical tests - mean = (low + high) / 2 - tol = (high - low) / 20 - self.assertGreaterEqual(res.mean(), mean - tol) - self.assertLessEqual(res.mean(), mean + tol) - - def test_numpy_random_random(self): - cfunc = self._compile_array_dist("random", 1) - size = (30, 30) - res = cfunc(size) - self.assertIsInstance(res, np.ndarray) - self.assertEqual(res.shape, size) - self.assertEqual(res.dtype, np.dtype('float64')) - # Results are within expected bounds - self.assertTrue(np.all(res >= 0.0)) - self.assertTrue(np.all(res < 1.0)) - # Crude statistical tests - self.assertTrue(np.any(res <= 0.1)) - self.assertTrue(np.any(res >= 0.9)) - mean = res.mean() - self.assertGreaterEqual(mean, 0.45) - self.assertLessEqual(mean, 0.55) - - # Sanity-check various distributions. For convenience, we only check - # those distributions that produce the exact same values as Numpy's. - - def test_numpy_binomial(self): - self._check_array_dist("binomial", (20, 0.5)) - - def test_numpy_exponential(self): - self._check_array_dist("exponential", (1.5,)) - - def test_numpy_gumbel(self): - self._check_array_dist("gumbel", (1.5, 0.5)) - - def test_numpy_laplace(self): - self._check_array_dist("laplace", (1.5, 0.5)) - - def test_numpy_logistic(self): - self._check_array_dist("logistic", (1.5, 0.5)) - - def test_numpy_lognormal(self): - self._check_array_dist("lognormal", (1.5, 2.0)) - - def test_numpy_logseries(self): - self._check_array_dist("logseries", (0.8,)) - - @tag('important') - def test_numpy_normal(self): - self._check_array_dist("normal", (0.5, 2.0)) - - def test_numpy_poisson(self): - self._check_array_dist("poisson", (0.8,)) - - def test_numpy_power(self): - self._check_array_dist("power", (0.8,)) - - @tag('important') - def test_numpy_rand(self): - cfunc = jit(nopython=True)(numpy_check_rand) - expected, got = cfunc(42, 2, 3) - self.assertEqual(got.shape, (2, 3)) - self.assertPreciseEqual(expected, got) - - @tag('important') - def test_numpy_randn(self): - cfunc = jit(nopython=True)(numpy_check_randn) - expected, got = cfunc(42, 2, 3) - self.assertEqual(got.shape, (2, 3)) - self.assertPreciseEqual(expected, got) - - def test_numpy_rayleigh(self): - self._check_array_dist("rayleigh", (0.8,)) - - def test_numpy_standard_cauchy(self): - self._check_array_dist("standard_cauchy", ()) - - def test_numpy_standard_exponential(self): - self._check_array_dist("standard_exponential", ()) - - def test_numpy_standard_normal(self): - self._check_array_dist("standard_normal", ()) - - def test_numpy_uniform(self): - self._check_array_dist("uniform", (0.1, 0.4)) - - def test_numpy_wald(self): - self._check_array_dist("wald", (0.1, 0.4)) - - def test_numpy_zipf(self): - self._check_array_dist("zipf", (2.5,)) - - -class TestRandomChoice(BaseTest): - """ - Test np.random.choice. - """ - - def _check_results(self, pop, res, replace=True): - """ - Check basic expectations about a batch of samples. - """ - spop = set(pop) - sres = set(res) - # All results are in the population - self.assertLessEqual(sres, spop) - # Sorted results are unlikely - self.assertNotEqual(sorted(res), list(res)) - if replace: - # Duplicates are likely - self.assertLess(len(sres), len(res), res) - else: - # No duplicates - self.assertEqual(len(sres), len(res), res) - - def _check_dist(self, pop, samples): - """ - Check distribution of some samples. - """ - # Sanity check that we have enough samples - self.assertGreaterEqual(len(samples), len(pop) * 100) - # Check equidistribution of samples - expected_frequency = len(samples) / len(pop) - c = collections.Counter(samples) - for value in pop: - n = c[value] - self.assertGreaterEqual(n, expected_frequency * 0.5) - self.assertLessEqual(n, expected_frequency * 2.0) - - def _accumulate_array_results(self, func, nresults): - """ - Accumulate array results produced by *func* until they reach - *nresults* elements. - """ - res = [] - while len(res) < nresults: - res += list(func().flat) - return res[:nresults] - - def _check_choice_1(self, a, pop): - """ - Check choice(a) against pop. - """ - cfunc = jit(nopython=True)(numpy_choice1) - n = len(pop) - res = [cfunc(a) for i in range(n)] - self._check_results(pop, res) - dist = [cfunc(a) for i in range(n * 100)] - self._check_dist(pop, dist) - - def test_choice_scalar_1(self): - """ - Test choice(int) - """ - n = 50 - pop = list(range(n)) - self._check_choice_1(n, pop) - - def test_choice_array_1(self): - """ - Test choice(array) - """ - pop = np.arange(50) * 2 + 100 - self._check_choice_1(pop, pop) - - def _check_array_results(self, func, pop, replace=True): - """ - Check array results produced by *func* and their distribution. - """ - n = len(pop) - res = list(func().flat) - self._check_results(pop, res, replace) - dist = self._accumulate_array_results(func, n * 100) - self._check_dist(pop, dist) - - def _check_choice_2(self, a, pop): - """ - Check choice(a, size) against pop. - """ - cfunc = jit(nopython=True)(numpy_choice2) - n = len(pop) - # Final sizes should be large enough, so as to stress - # replacement - sizes = [n - 10, (3, (n - 1) // 3), n * 10] - - for size in sizes: - # Check result shape - res = cfunc(a, size) - expected_shape = size if isinstance(size, tuple) else (size,) - self.assertEqual(res.shape, expected_shape) - # Check results and their distribution - self._check_array_results(lambda: cfunc(a, size), pop) - - def test_choice_scalar_2(self): - """ - Test choice(int, size) - """ - n = 50 - pop = np.arange(n) - self._check_choice_2(n, pop) - - def test_choice_array_2(self): - """ - Test choice(array, size) - """ - pop = np.arange(50) * 2 + 100 - self._check_choice_2(pop, pop) - - def _check_choice_3(self, a, pop): - """ - Check choice(a, size, replace) against pop. - """ - cfunc = jit(nopython=True)(numpy_choice3) - n = len(pop) - # Final sizes should be close but slightly <= n, so as to stress - # replacement (or not) - sizes = [n - 10, (3, (n - 1) // 3)] - replaces = [True, False] - - # Check result shapes - for size in sizes: - for replace in [True, False]: - res = cfunc(a, size, replace) - expected_shape = size if isinstance(size, tuple) else (size,) - self.assertEqual(res.shape, expected_shape) - - # Check results for replace=True - for size in sizes: - self._check_array_results(lambda: cfunc(a, size, True), pop) - # Check results for replace=False - for size in sizes: - self._check_array_results(lambda: cfunc(a, size, False), pop, False) - - # Can't ask for more samples than population size with replace=False - for size in [n + 1, (3, n // 3 + 1)]: - with self.assertRaises(ValueError): - cfunc(a, size, False) - - def test_choice_scalar_3(self): - """ - Test choice(int, size, replace) - """ - n = 50 - pop = np.arange(n) - self._check_choice_3(n, pop) - - def test_choice_array_3(self): - """ - Test choice(array, size, replace) - """ - pop = np.arange(50) * 2 + 100 - self._check_choice_3(pop, pop) - - -class TestRandomMultinomial(BaseTest): - """ - Test np.random.multinomial. - """ - # A biased dice - pvals = np.array([1, 1, 1, 2, 3, 1], dtype=np.float64) - pvals /= pvals.sum() - - def _check_sample(self, n, pvals, sample): - """ - Check distribution of some samples. - """ - self.assertIsInstance(sample, np.ndarray) - self.assertEqual(sample.shape, (len(pvals),)) - self.assertIn(sample.dtype, (np.dtype('int32'), np.dtype('int64'))) - # Statistical properties - self.assertEqual(sample.sum(), n) - for p, nexp in zip(pvals, sample): - self.assertGreaterEqual(nexp, 0) - self.assertLessEqual(nexp, n) - pexp = float(nexp) / n - self.assertGreaterEqual(pexp, p * 0.5) - self.assertLessEqual(pexp, p * 2.0) - - def test_multinomial_2(self): - """ - Test multinomial(n, pvals) - """ - cfunc = jit(nopython=True)(numpy_multinomial2) - n, pvals = 1000, self.pvals - res = cfunc(n, pvals) - self._check_sample(n, pvals, res) - # pvals as list - pvals = list(pvals) - res = cfunc(n, pvals) - self._check_sample(n, pvals, res) - # A case with extreme probabilities - n = 1000000 - pvals = np.array([1, 0, n // 100, 1], dtype=np.float64) - pvals /= pvals.sum() - res = cfunc(n, pvals) - self._check_sample(n, pvals, res) - - def test_multinomial_3_int(self): - """ - Test multinomial(n, pvals, size: int) - """ - cfunc = jit(nopython=True)(numpy_multinomial3) - n, pvals = 1000, self.pvals - k = 10 - res = cfunc(n, pvals, k) - self.assertEqual(res.shape[0], k) - for sample in res: - self._check_sample(n, pvals, sample) - - def test_multinomial_3_tuple(self): - """ - Test multinomial(n, pvals, size: tuple) - """ - cfunc = jit(nopython=True)(numpy_multinomial3) - n, pvals = 1000, self.pvals - k = (3, 4) - res = cfunc(n, pvals, k) - self.assertEqual(res.shape[:-1], k) - for sample in res.reshape((-1, res.shape[-1])): - self._check_sample(n, pvals, sample) - - - -@jit(nopython=True, nogil=True) -def py_extract_randomness(seed, out): - if seed != 0: - random.seed(seed) - for i in range(out.size): - out[i] = random.getrandbits(32) - -_randint_limit = 1 << 32 - -@jit(nopython=True, nogil=True) -def np_extract_randomness(seed, out): - if seed != 0: - np.random.seed(seed) - s = 0 - for i in range(out.size): - out[i] = np.random.randint(_randint_limit) - - - -class ConcurrencyBaseTest(TestCase): - - # Enough iterations for: - # 1. Mersenne-Twister state shuffles to occur (once every 624) - # 2. Race conditions to be plausible - # 3. Nice statistical properties to emerge - _extract_iterations = 100000 - - def setUp(self): - # Warm up, to avoid compiling in the threads - args = (42, self._get_output(1)) - py_extract_randomness(*args) - np_extract_randomness(*args) - - def _get_output(self, size): - return np.zeros(size, dtype=np.uint32) - - def check_output(self, out): - """ - Check statistical properties of output. - """ - # Output should follow a uniform distribution in [0, 1<<32) - expected_avg = 1 << 31 - expected_std = (1 << 32) / np.sqrt(12) - rtol = 0.05 # given enough iterations - np.testing.assert_allclose(out.mean(), expected_avg, rtol=rtol) - np.testing.assert_allclose(out.std(), expected_std, rtol=rtol) - - def check_several_outputs(self, results, same_expected): - # Outputs should have the expected statistical properties - # (an unitialized PRNG or a PRNG whose internal state was - # corrupted by a race condition could produce bogus randomness) - for out in results: - self.check_output(out) - - # Check all threads gave either the same sequence or - # distinct sequences - if same_expected: - expected_distinct = 1 - else: - expected_distinct = len(results) - - heads = {tuple(out[:5]) for out in results} - tails = {tuple(out[-5:]) for out in results} - sums = {out.sum() for out in results} - self.assertEqual(len(heads), expected_distinct, heads) - self.assertEqual(len(tails), expected_distinct, tails) - self.assertEqual(len(sums), expected_distinct, sums) - - -class TestThreads(ConcurrencyBaseTest): - """ - Check the PRNG behaves well with threads. - """ - - def extract_in_threads(self, nthreads, extract_randomness, seed): - """ - Run *nthreads* threads extracting randomness with the given *seed* - (no seeding if 0). - """ - results = [self._get_output(self._extract_iterations) - for i in range(nthreads + 1)] - - def target(i): - # The PRNG will be seeded in thread - extract_randomness(seed=seed, out=results[i]) - - threads = [threading.Thread(target=target, args=(i,)) - for i in range(nthreads)] - - for th in threads: - th.start() - # Exercise main thread as well - target(nthreads) - for th in threads: - th.join() - - return results - - def check_thread_safety(self, extract_randomness): - """ - When initializing the PRNG the same way, each thread - should produce the same sequence of random numbers, - using independent states, regardless of parallel - execution. - """ - # Note the seed value doesn't matter, as long as it's - # the same for all threads - results = self.extract_in_threads(15, extract_randomness, seed=42) - - # All threads gave the same sequence - self.check_several_outputs(results, same_expected=True) - - def check_implicit_initialization(self, extract_randomness): - """ - The PRNG in new threads should be implicitly initialized with - system entropy, if seed() wasn't called. - """ - results = self.extract_in_threads(4, extract_randomness, seed=0) - - # All threads gave a different, valid random sequence - self.check_several_outputs(results, same_expected=False) - - def test_py_thread_safety(self): - self.check_thread_safety(py_extract_randomness) - - def test_np_thread_safety(self): - self.check_thread_safety(np_extract_randomness) - - def test_py_implicit_initialization(self): - self.check_implicit_initialization(py_extract_randomness) - - def test_np_implicit_initialization(self): - self.check_implicit_initialization(np_extract_randomness) - - -@unittest.skipIf(os.name == 'nt', "Windows is not affected by fork() issues") -class TestProcesses(ConcurrencyBaseTest): - """ - Check the PRNG behaves well in child processes. - """ - - # Avoid nested multiprocessing AssertionError - # ("daemonic processes are not allowed to have children") - _numba_parallel_test_ = False - - - def extract_in_processes(self, nprocs, extract_randomness): - """ - Run *nprocs* processes extracting randomness - without explicit seeding. - """ - q = multiprocessing.Queue() - results = [] - - def target_inner(): - out = self._get_output(self._extract_iterations) - extract_randomness(seed=0, out=out) - return out - - def target(): - try: - out = target_inner() - q.put(out) - except Exception as e: - # Ensure an exception in a child gets reported - # in the parent. - q.put(e) - raise - - procs = [multiprocessing.Process(target=target) - for i in range(nprocs)] - for p in procs: - p.start() - # Need to dequeue before joining, otherwise the large size of the - # enqueued objects will lead to deadlock. - for i in range(nprocs): - results.append(q.get(timeout=5)) - for p in procs: - p.join() - - # Exercise parent process as well; this will detect if the - # same state was reused for one of the children. - results.append(target_inner()) - for res in results: - if isinstance(res, Exception): - self.fail("Exception in child: %s" % (res,)) - - return results - - def check_implicit_initialization(self, extract_randomness): - """ - The PRNG in new processes should be implicitly initialized - with system entropy, to avoid reproducing the same sequences. - """ - results = self.extract_in_processes(2, extract_randomness) - - # All processes gave a different, valid random sequence - self.check_several_outputs(results, same_expected=False) - - def test_py_implicit_initialization(self): - self.check_implicit_initialization(py_extract_randomness) - - def test_np_implicit_initialization(self): - self.check_implicit_initialization(np_extract_randomness) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_range.py b/numba/numba/tests/test_range.py deleted file mode 100644 index ef73243e6..000000000 --- a/numba/numba/tests/test_range.py +++ /dev/null @@ -1,157 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest - -import sys - -import numpy - -from numba.compiler import compile_isolated -from numba import types -from .support import tag - - -def loop1(n): - s = 0 - for i in range(n): - s += i - return s - - -def loop2(a, b): - s = 0 - for i in range(a, b): - s += i - return s - - -def loop3(a, b, c): - s = 0 - for i in range(a, b, c): - s += i - return s - - -def xrange_usecase(n): - s = 0 - for i in xrange(n): - s += i - return s - -def range_len1(n): - return len(range(n)) - -def range_len2(a, b): - return len(range(a, b)) - -def range_len3(a, b, c): - return len(range(a, b, c)) - -from numba.targets.rangeobj import range_iter_len -def range_iter_len1(a): - return range_iter_len(iter(range(a))) - -def range_iter_len2(a): - return range_iter_len(iter(a)) - -class TestRange(unittest.TestCase): - - @tag('important') - def test_loop1_int16(self): - pyfunc = loop1 - cres = compile_isolated(pyfunc, [types.int16]) - cfunc = cres.entry_point - self.assertTrue(cfunc(5), pyfunc(5)) - - @tag('important') - def test_loop2_int16(self): - pyfunc = loop2 - cres = compile_isolated(pyfunc, [types.int16, types.int16]) - cfunc = cres.entry_point - self.assertTrue(cfunc(1, 6), pyfunc(1, 6)) - - @tag('important') - def test_loop3_int32(self): - pyfunc = loop3 - cres = compile_isolated(pyfunc, [types.int32] * 3) - cfunc = cres.entry_point - arglist = [ - (1, 2, 1), - (2, 8, 3), - (-10, -11, -10), - (-10, -10, -2), - ] - for args in arglist: - self.assertEqual(cfunc(*args), pyfunc(*args)) - - @tag('important') - @unittest.skipIf(sys.version_info >= (3,), "test is Python 2-specific") - def test_xrange(self): - pyfunc = xrange_usecase - cres = compile_isolated(pyfunc, (types.int32,)) - cfunc = cres.entry_point - self.assertEqual(cfunc(5), pyfunc(5)) - - @tag('important') - def test_range_len1(self): - pyfunc = range_len1 - typelist = [types.int16, types.int32, types.int64] - arglist = [5, 0, -5] - for typ in typelist: - cres = compile_isolated(pyfunc, [typ]) - cfunc = cres.entry_point - for arg in arglist: - self.assertEqual(cfunc(typ(arg)), pyfunc(typ(arg))) - - @tag('important') - def test_range_len2(self): - pyfunc = range_len2 - typelist = [types.int16, types.int32, types.int64] - arglist = [(1,6), (6,1), (-5, -1)] - for typ in typelist: - cres = compile_isolated(pyfunc, [typ] * 2) - cfunc = cres.entry_point - for args in arglist: - args_ = tuple(typ(x) for x in args) - self.assertEqual(cfunc(*args_), pyfunc(*args_)) - - @tag('important') - def test_range_len3(self): - pyfunc = range_len3 - typelist = [types.int16, types.int32, types.int64] - arglist = [ - (1, 2, 1), - (2, 8, 3), - (-10, -11, -10), - (-10, -10, -2), - ] - for typ in typelist: - cres = compile_isolated(pyfunc, [typ] * 3) - cfunc = cres.entry_point - for args in arglist: - args_ = tuple(typ(x) for x in args) - self.assertEqual(cfunc(*args_), pyfunc(*args_)) - - @tag('important') - def test_range_iter_len1(self): - range_func = range_len1 - range_iter_func = range_iter_len1 - typelist = [types.int16, types.int32, types.int64] - arglist = [5, 0, -5] - for typ in typelist: - cres = compile_isolated(range_iter_func, [typ]) - cfunc = cres.entry_point - for arg in arglist: - self.assertEqual(cfunc(typ(arg)), range_func(typ(arg))) - - @tag('important') - def test_range_iter_list(self): - range_iter_func = range_iter_len2 - cres = compile_isolated(range_iter_func, [types.List(types.intp)]) - cfunc = cres.entry_point - arglist = [1, 2, 3, 4, 5] - self.assertEqual(cfunc(arglist), len(arglist)) - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_recarray_usecases.py b/numba/numba/tests/test_recarray_usecases.py deleted file mode 100644 index 26d153dbc..000000000 --- a/numba/numba/tests/test_recarray_usecases.py +++ /dev/null @@ -1,150 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import sys - -import numpy as np - -from numba import numpy_support, types -from numba.compiler import compile_isolated -from numba import unittest_support as unittest -from .support import captured_stdout, tag, TestCase - - -def usecase1(arr1, arr2): - """Base on https://github.com/numba/numba/issues/370 - - Modified to add test-able side effect. - """ - n1 = arr1.size - n2 = arr2.size - - for i1 in range(n1): - st1 = arr1[i1] - for i2 in range(n2): - st2 = arr2[i2] - st2.row += st1.p * st2.p + st1.row - st1.col - - st1.p += st2.p - st1.col -= st2.col - - -def usecase2(x, N): - """ - Base on test1 of https://github.com/numba/numba/issues/381 - """ - for k in range(N): - y = x[k] - print(y.f1, y.s1, y.f2) - - -def usecase3(x, N): - """ - Base on test2 of https://github.com/numba/numba/issues/381 - """ - for k in range(N): - print(x.f1[k], x.s1[k], x.f2[k]) - - -def usecase4(x, N): - """ - Base on test3 of https://github.com/numba/numba/issues/381 - """ - for k in range(N): - y = x[k] - print(y.f1, x.s1[k], y.f2) - - -def usecase5(x, N): - """ - Base on test4 of https://github.com/numba/numba/issues/381 - """ - for k in range(N): - print(x[k].f1, x.s1[k], x[k].f2) - - -class TestRecordUsecase(TestCase): - - def setUp(self): - fields = [('f1', ' 0: - y = z + w - else: - y = 0 - a = 2 * x - return a < b - -def null_func(a,b,c,d): - False - -def findLhsAssign(func_ir, var): - for label, block in func_ir.blocks.items(): - for i, inst in enumerate(block.body): - if isinstance(inst, ir.Assign) and inst.target.name==var: - return True - - return False - -class TestRemoveDead(unittest.TestCase): - def compile_parallel(self, func, arg_types): - fast_pflags = Flags() - fast_pflags.set('auto_parallel', cpu.ParallelOptions(True)) - fast_pflags.set('nrt') - fast_pflags.set('fastmath') - return compile_isolated(func, arg_types, flags=fast_pflags).entry_point - - def test1(self): - typingctx = typing.Context() - targetctx = cpu.CPUContext(typingctx) - test_ir = compiler.run_frontend(test_will_propagate) - #print("Num blocks = ", len(test_ir.blocks)) - #print(test_ir.dump()) - with cpu_target.nested_context(typingctx, targetctx): - typingctx.refresh() - targetctx.refresh() - args = (types.int64, types.int64, types.int64) - typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) - #print("typemap = ", typemap) - #print("return_type = ", return_type) - type_annotation = type_annotations.TypeAnnotation( - func_ir=test_ir, - typemap=typemap, - calltypes=calltypes, - lifted=(), - lifted_from=None, - args=args, - return_type=return_type, - html_output=config.HTML) - remove_dels(test_ir.blocks) - in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) - apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) - - remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) - self.assertFalse(findLhsAssign(test_ir, "x")) - - def test2(self): - def call_np_random_seed(): - np.random.seed(2) - - def seed_call_exists(func_ir): - for inst in func_ir.blocks[0].body: - if (isinstance(inst, ir.Assign) and - isinstance(inst.value, ir.Expr) and - inst.value.op == 'call' and - func_ir.get_definition(inst.value.func).attr == 'seed'): - return True - return False - - test_ir = compiler.run_frontend(call_np_random_seed) - remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) - self.assertTrue(seed_call_exists(test_ir)) - - def run_array_index_test(self, func): - A1 = np.arange(6).reshape(2,3) - A2 = A1.copy() - i = 0 - pfunc = self.compile_parallel(func, (numba.typeof(A1), numba.typeof(i))) - - func(A1, i) - pfunc(A2, i) - np.testing.assert_array_equal(A1, A2) - - def test_alias_ravel(self): - def func(A, i): - B = A.ravel() - B[i] = 3 - - self.run_array_index_test(func) - - def test_alias_flat(self): - def func(A, i): - B = A.flat - B[i] = 3 - - self.run_array_index_test(func) - - def test_alias_transpose1(self): - def func(A, i): - B = A.T - B[i,0] = 3 - - self.run_array_index_test(func) - - def test_alias_transpose2(self): - def func(A, i): - B = A.transpose() - B[i,0] = 3 - - self.run_array_index_test(func) - - def test_alias_transpose3(self): - def func(A, i): - B = np.transpose(A) - B[i,0] = 3 - - self.run_array_index_test(func) - - @needs_blas - def test_alias_ctypes(self): - # use xxnrm2 to test call a C function with ctypes - from numba.targets.linalg import _BLAS - xxnrm2 = _BLAS().numba_xxnrm2(types.float64) - - def remove_dead_xxnrm2(rhs, lives, call_list): - if call_list == [xxnrm2]: - return rhs.args[4].name not in lives - return False - - # adding this handler has no-op effect since this function won't match - # anything else but it's a bit cleaner to save the state and recover - old_remove_handlers = remove_call_handlers[:] - remove_call_handlers.append(remove_dead_xxnrm2) - - def func(ret): - a = np.ones(4) - xxnrm2(100, 4, a.ctypes, 1, ret.ctypes) - - A1 = np.zeros(1) - A2 = A1.copy() - - try: - pfunc = self.compile_parallel(func, (numba.typeof(A1),)) - numba.njit(func)(A1) - pfunc(A2) - finally: - # recover global state - remove_call_handlers[:] = old_remove_handlers - - self.assertEqual(A1[0], A2[0]) - - def test_alias_reshape1(self): - def func(A, i): - B = np.reshape(A, (3,2)) - B[i,0] = 3 - - self.run_array_index_test(func) - - def test_alias_reshape2(self): - def func(A, i): - B = A.reshape(3,2) - B[i,0] = 3 - - self.run_array_index_test(func) - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_return_values.py b/numba/numba/tests/test_return_values.py deleted file mode 100644 index 41b4b6d56..000000000 --- a/numba/numba/tests/test_return_values.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Test return values -""" - -from __future__ import print_function - -import math - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba.utils import PYVERSION -from numba import types -from numba.errors import TypingError - - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") -no_pyobj_flags = Flags() - - -def get_nopython_func(): - return abs - -def get_pyobj_func(): - return open - -def get_module_func(): - return math.floor - - -class TestReturnValues(unittest.TestCase): - - def test_nopython_func(self, flags=enable_pyobj_flags): - # Test returning func that is supported in nopython mode - pyfunc = get_nopython_func - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - if flags == enable_pyobj_flags: - result = cfunc() - self.assertEqual(result, abs) - else: - result = cfunc() - - def test_nopython_func_npm(self): - with self.assertRaises(TypeError): - self.test_nopython_func(flags=no_pyobj_flags) - - def test_pyobj_func(self, flags=enable_pyobj_flags): - # Test returning func that is only supported in object mode - pyfunc = get_pyobj_func - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - if flags == enable_pyobj_flags: - result = cfunc() - self.assertEqual(result, open) - else: - result = cfunc() - - def test_pyobj_func_npm(self): - with self.assertRaises(TypingError): - self.test_pyobj_func(flags=no_pyobj_flags) - - def test_module_func(self, flags=enable_pyobj_flags): - # Test returning imported func that is only supported in object mode - pyfunc = get_module_func - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - if flags == enable_pyobj_flags: - result = cfunc() - self.assertEqual(result, math.floor) - else: - result = cfunc() - - def test_module_func_npm(self): - with self.assertRaises(TypeError): - self.test_module_func(flags=no_pyobj_flags) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_runtests.py b/numba/numba/tests/test_runtests.py deleted file mode 100755 index 48b64a42e..000000000 --- a/numba/numba/tests/test_runtests.py +++ /dev/null @@ -1,110 +0,0 @@ -from __future__ import division, print_function - -import sys -import subprocess - -from numba import unittest_support as unittest -from numba import cuda - - -class TestCase(unittest.TestCase): - """These test cases are meant to test the Numba test infrastructure itself. - Therefore, the logic used here shouldn't use numba.testing, but only the upstream - unittest, and run the numba test suite only in a subprocess.""" - - def get_testsuite_listing(self, args): - cmd = ['python', '-m', 'numba.runtests', '-l'] + list(args) - lines = subprocess.check_output(cmd).decode('UTF-8').splitlines() - lines = [line for line in lines if line.strip()] - return lines - - def check_listing_prefix(self, prefix): - listing = self.get_testsuite_listing([prefix]) - for ln in listing[:-1]: - errmsg = '{!r} not startswith {!r}'.format(ln, prefix) - self.assertTrue(ln.startswith(prefix), msg=errmsg) - - def check_testsuite_size(self, args, minsize, maxsize=None): - """ - Check that the reported numbers of tests are in the - (minsize, maxsize) range, or are equal to minsize if maxsize is None. - """ - lines = self.get_testsuite_listing(args) - last_line = lines[-1] - self.assertTrue(last_line.endswith('tests found')) - number = int(last_line.split(' ')[0]) - # There may be some "skipped" messages at the beginning, - # so do an approximate check. - try: - self.assertIn(len(lines), range(number + 1, number + 10)) - if maxsize is None: - self.assertEqual(number, minsize) - else: - self.assertGreaterEqual(number, minsize) - self.assertLessEqual(number, maxsize) - except AssertionError: - # catch any error in the above, chances are test discovery - # has failed due to a syntax error or import problem. - # run the actual test suite to try and find the cause to - # inject into the error message for the user - try: - cmd = ['python', '-m', 'numba.runtests'] + list(args) - subprocess.check_output(cmd, stderr=subprocess.STDOUT) - except Exception as e: - msg = ("Test discovery has failed, the reported cause of the " - " failure is:\n\n:") - indented = '\n'.join(['\t' + x for x in - e.output.decode('UTF-8').splitlines()]) - raise RuntimeError(msg + indented) - return lines - - def check_all(self, ids): - lines = self.check_testsuite_size(ids, 5000, 8000) - # CUDA should be included by default - self.assertTrue(any('numba.cuda.tests.' in line for line in lines)) - # As well as subpackage - self.assertTrue(any('numba.tests.npyufunc.test_' in line for line in lines)) - - def test_default(self): - self.check_all([]) - - def test_all(self): - self.check_all(['numba.tests']) - - def test_cuda(self): - # Even without CUDA enabled, there is at least one test - # (in numba.cuda.tests.nocuda) - self.check_testsuite_size(['numba.cuda.tests'], 1, 470) - - @unittest.skipIf(not cuda.is_available(), "NO CUDA") - def test_cuda_submodules(self): - self.check_listing_prefix('numba.cuda.tests.cudadrv') - self.check_listing_prefix('numba.cuda.tests.cudapy') - self.check_listing_prefix('numba.cuda.tests.nocuda') - self.check_listing_prefix('numba.cuda.tests.cudasim') - - def test_module(self): - self.check_testsuite_size(['numba.tests.test_utils'], 3, 15) - self.check_testsuite_size(['numba.tests.test_nested_calls'], 5, 15) - # Several modules - self.check_testsuite_size(['numba.tests.test_nested_calls', - 'numba.tests.test_utils'], 13, 30) - - def test_subpackage(self): - self.check_testsuite_size(['numba.tests.npyufunc'], 50, 200) - - @unittest.skipIf(sys.version_info < (3, 4), - "'--random' only supported on Python 3.4 or higher") - def test_random(self): - self.check_testsuite_size(['--random', '0.1', 'numba.tests.npyufunc'], - 5, 20) - - @unittest.skipIf(sys.version_info < (3, 4), - "'--tags' only supported on Python 3.4 or higher") - def test_tags(self): - self.check_testsuite_size(['--tags', 'important', 'numba.tests.npyufunc'], - 20, 50) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_serialize.py b/numba/numba/tests/test_serialize.py deleted file mode 100644 index 19efeb643..000000000 --- a/numba/numba/tests/test_serialize.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import contextlib -import gc -import pickle -import subprocess -import sys - -from numba import unittest_support as unittest -from numba.errors import TypingError -from numba.targets import registry -from .support import TestCase, tag -from .serialize_usecases import * - - -class TestDispatcherPickling(TestCase): - - def run_with_protocols(self, meth, *args, **kwargs): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - meth(proto, *args, **kwargs) - - @contextlib.contextmanager - def simulate_fresh_target(self): - dispatcher_cls = registry.dispatcher_registry['cpu'] - old_descr = dispatcher_cls.targetdescr - # Simulate fresh targetdescr - dispatcher_cls.targetdescr = type(dispatcher_cls.targetdescr)() - try: - yield - finally: - # Be sure to reinstantiate old descriptor, otherwise other - # objects may be out of sync. - dispatcher_cls.targetdescr = old_descr - - def check_call(self, proto, func, expected_result, args): - def check_result(func): - if (isinstance(expected_result, type) - and issubclass(expected_result, Exception)): - self.assertRaises(expected_result, func, *args) - else: - self.assertPreciseEqual(func(*args), expected_result) - - # Control - check_result(func) - pickled = pickle.dumps(func, proto) - with self.simulate_fresh_target(): - new_func = pickle.loads(pickled) - check_result(new_func) - - @tag('important') - def test_call_with_sig(self): - self.run_with_protocols(self.check_call, add_with_sig, 5, (1, 4)) - # Compilation has been disabled => float inputs will be coerced to int - self.run_with_protocols(self.check_call, add_with_sig, 5, (1.2, 4.2)) - - @tag('important') - def test_call_without_sig(self): - self.run_with_protocols(self.check_call, add_without_sig, 5, (1, 4)) - self.run_with_protocols(self.check_call, add_without_sig, 5.5, (1.2, 4.3)) - # Object mode is enabled - self.run_with_protocols(self.check_call, add_without_sig, "abc", ("a", "bc")) - - @tag('important') - def test_call_nopython(self): - self.run_with_protocols(self.check_call, add_nopython, 5.5, (1.2, 4.3)) - # Object mode is disabled - self.run_with_protocols(self.check_call, add_nopython, TypingError, ("a", "bc")) - - def test_call_nopython_fail(self): - # Compilation fails - self.run_with_protocols(self.check_call, add_nopython_fail, TypingError, (1, 2)) - - def test_call_objmode_with_global(self): - self.run_with_protocols(self.check_call, get_global_objmode, 7.5, (2.5,)) - - def test_call_closure(self): - inner = closure(1) - self.run_with_protocols(self.check_call, inner, 6, (2, 3)) - - def check_call_closure_with_globals(self, **jit_args): - inner = closure_with_globals(3.0, **jit_args) - self.run_with_protocols(self.check_call, inner, 7.0, (4.0,)) - - def test_call_closure_with_globals_nopython(self): - self.check_call_closure_with_globals(nopython=True) - - def test_call_closure_with_globals_objmode(self): - self.check_call_closure_with_globals(forceobj=True) - - def test_call_closure_calling_other_function(self): - inner = closure_calling_other_function(3.0) - self.run_with_protocols(self.check_call, inner, 11.0, (4.0, 6.0)) - - def test_call_closure_calling_other_closure(self): - inner = closure_calling_other_closure(3.0) - self.run_with_protocols(self.check_call, inner, 8.0, (4.0,)) - - def test_call_dyn_func(self): - # Check serializing a dynamically-created function - self.run_with_protocols(self.check_call, dyn_func, 36, (6,)) - - def test_call_dyn_func_objmode(self): - # Same with an object mode function - self.run_with_protocols(self.check_call, dyn_func_objmode, 36, (6,)) - - def test_renamed_module(self): - # Issue #1559: using a renamed module (e.g. `import numpy as np`) - # should not fail serializing - expected = get_renamed_module(0.0) - self.run_with_protocols(self.check_call, get_renamed_module, - expected, (0.0,)) - - def test_call_generated(self): - self.run_with_protocols(self.check_call, generated_add, - 46, (1, 2)) - self.run_with_protocols(self.check_call, generated_add, - 1j + 7, (1j, 2)) - - @tag('important') - def test_other_process(self): - """ - Check that reconstructing doesn't depend on resources already - instantiated in the original process. - """ - func = closure_calling_other_closure(3.0) - pickled = pickle.dumps(func) - code = """if 1: - import pickle - - data = {pickled!r} - func = pickle.loads(data) - res = func(4.0) - assert res == 8.0, res - """.format(**locals()) - subprocess.check_call([sys.executable, "-c", code]) - - @tag('important') - def test_reuse(self): - """ - Check that deserializing the same function multiple times re-uses - the same dispatcher object. - - Note that "same function" is intentionally under-specified. - """ - func = closure(5) - pickled = pickle.dumps(func) - func2 = closure(6) - pickled2 = pickle.dumps(func2) - - f = pickle.loads(pickled) - g = pickle.loads(pickled) - h = pickle.loads(pickled2) - self.assertIs(f, g) - self.assertEqual(f(2, 3), 10) - g.disable_compile() - self.assertEqual(g(2, 4), 11) - - self.assertIsNot(f, h) - self.assertEqual(h(2, 3), 11) - - # Now make sure the original object doesn't exist when deserializing - func = closure(7) - func(42, 43) - pickled = pickle.dumps(func) - del func - gc.collect() - - f = pickle.loads(pickled) - g = pickle.loads(pickled) - self.assertIs(f, g) - self.assertEqual(f(2, 3), 12) - g.disable_compile() - self.assertEqual(g(2, 4), 13) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_sets.py b/numba/numba/tests/test_sets.py deleted file mode 100644 index 9fc55b8f8..000000000 --- a/numba/numba/tests/test_sets.py +++ /dev/null @@ -1,816 +0,0 @@ -from __future__ import print_function - -import numba.unittest_support as unittest - -from collections import namedtuple -import contextlib -import itertools -import math -import random -import sys - -import numpy as np - -from numba.compiler import compile_isolated, Flags -from numba import jit, types -import numba.unittest_support as unittest -from .support import (TestCase, enable_pyobj_flags, MemoryLeakMixin, tag, - compile_function) - - -Point = namedtuple('Point', ('a', 'b')) - - -def _build_set_literal_usecase(code, args): - code = code % {'initializer': ', '.join(repr(arg) for arg in args)} - return compile_function('build_set', code, globals()) - -def set_literal_return_usecase(args): - code = """if 1: - def build_set(): - return {%(initializer)s} - """ - return _build_set_literal_usecase(code, args) - -def set_literal_convert_usecase(args): - code = """if 1: - def build_set(): - my_set = {%(initializer)s} - return list(my_set) - """ - return _build_set_literal_usecase(code, args) - - -def empty_constructor_usecase(): - s = set() - s.add(1) - return len(s) - -def constructor_usecase(arg): - s = set(arg) - return len(s) - -def iterator_usecase(arg): - s = set(arg) - l = [] - for v in s: - l.append(v) - return l - -def update_usecase(a, b, c): - s = set() - s.update(a) - s.update(b) - s.update(c) - return list(s) - -def remove_usecase(a, b): - s = set(a) - for v in b: - s.remove(v) - return list(s) - -def discard_usecase(a, b): - s = set(a) - for v in b: - s.discard(v) - return list(s) - -def add_discard_usecase(a, u, v): - s = set(a) - for i in range(1000): - s.add(u) - s.discard(v) - return list(s) - -def pop_usecase(a): - s = set(a) - l = [] - while len(s) > 0: - l.append(s.pop()) - return l - -def contains_usecase(a, b): - s = set(a) - l = [] - for v in b: - l.append(v in s) - return l - -def difference_update_usecase(a, b): - s = set(a) - s.difference_update(set(b)) - return list(s) - -def intersection_update_usecase(a, b): - s = set(a) - s.intersection_update(set(b)) - return list(s) - -def symmetric_difference_update_usecase(a, b): - s = set(a) - s.symmetric_difference_update(set(b)) - return list(s) - -def isdisjoint_usecase(a, b): - return set(a).isdisjoint(set(b)) - -def issubset_usecase(a, b): - return set(a).issubset(set(b)) - -def issuperset_usecase(a, b): - return set(a).issuperset(set(b)) - -def clear_usecase(a): - s = set(a) - s.clear() - return len(s), list(s) - -def copy_usecase(a): - s = set(a) - ss = s.copy() - s.pop() - return len(ss), list(ss) - -def copy_usecase_empty(a): - s = set(a) - s.clear() - ss = s.copy() - s.add(42) - return len(ss), list(ss) - -def copy_usecase_deleted(a, b): - s = set(a) - s.remove(b) - ss = s.copy() - s.pop() - return len(ss), list(ss) - -def difference_usecase(a, b): - sa = set(a) - s = sa.difference(set(b)) - return list(s) - -def intersection_usecase(a, b): - sa = set(a) - s = sa.intersection(set(b)) - return list(s) - -def symmetric_difference_usecase(a, b): - sa = set(a) - s = sa.symmetric_difference(set(b)) - return list(s) - -def union_usecase(a, b): - sa = set(a) - s = sa.union(set(b)) - return list(s) - -def set_return_usecase(a): - s = set(a) - return s - - -def make_operator_usecase(op): - code = """if 1: - def operator_usecase(a, b): - s = set(a) %(op)s set(b) - return list(s) - """ % dict(op=op) - return compile_function('operator_usecase', code, globals()) - -def make_inplace_operator_usecase(op): - code = """if 1: - def inplace_operator_usecase(a, b): - sa = set(a) - sb = set(b) - sc = sa - sc %(op)s sb - return list(sc), list(sa) - """ % dict(op=op) - return compile_function('inplace_operator_usecase', code, globals()) - -def make_comparison_usecase(op): - code = """if 1: - def comparison_usecase(a, b): - return set(a) %(op)s set(b) - """ % dict(op=op) - return compile_function('comparison_usecase', code, globals()) - - -def noop(x): - pass - -def unbox_usecase(x): - """ - Expect a set of numbers - """ - res = 0 - for v in x: - res += v - return res - -def unbox_usecase2(x): - """ - Expect a set of tuples - """ - res = 0 - for v in x: - res += len(v) - return res - -def unbox_usecase3(x): - """ - Expect a (number, set of numbers) tuple. - """ - a, b = x - res = a - for v in b: - res += v - return res - -def unbox_usecase4(x): - """ - Expect a (number, set of tuples) tuple. - """ - a, b = x - res = a - for v in b: - res += len(v) - return res - - -def reflect_simple(sa, sb): - sa.add(42) - sa.update(sb) - return sa, len(sa), len(sb) - -def reflect_conditional(sa, sb): - # `sa` may or may not actually reflect a Python set - if len(sb) > 1: - sa = set((11., 22., 33., 44.)) - sa.add(42.) - sa.update(sb) - # Combine with a non-reflected set (to check method typing) - sc = set((55., 66.)) - sa.symmetric_difference_update(sc) - return sa, len(sa), len(sb) - -def reflect_exception(s): - s.add(42) - raise ZeroDivisionError - -def reflect_dual(sa, sb): - sa.add(sb.pop()) - return sa is sb - - -def unique_usecase(src): - seen = set() - res = [] - for v in src: - if v not in seen: - seen.add(v) - res.append(v) - return res - - -needs_set_literals = unittest.skipIf(sys.version_info < (2, 7), - "set literals unavailable before Python 2.7") - - -class BaseTest(MemoryLeakMixin, TestCase): - - def setUp(self): - super(BaseTest, self).setUp() - self.rnd = random.Random(42) - - def _range(self, stop): - return np.arange(int(stop)) - - def _random_choice(self, seq, n): - """ - Choose *n* possibly duplicate items from sequence. - """ - l = [self.rnd.choice(list(seq)) for i in range(n)] - if isinstance(seq, np.ndarray): - return np.array(l, dtype=seq.dtype) - else: - return l - - def duplicates_array(self, n): - """ - Get a 1d array with many duplicate values. - """ - a = self._range(np.sqrt(n)) - return self._random_choice(a, n) - - def sparse_array(self, n): - """ - Get a 1d array with values spread around. - """ - # Note two calls to sparse_array() should generate reasonable overlap - a = self._range(n ** 1.3) - return self._random_choice(a, n) - - def _assert_equal_unordered(self, a, b): - if isinstance(a, tuple): - self.assertIsInstance(b, tuple) - for u, v in zip(a, b): - self._assert_equal_unordered(u, v) - elif isinstance(a, list): - self.assertIsInstance(b, list) - self.assertPreciseEqual(sorted(a), sorted(b)) - else: - self.assertPreciseEqual(a, b) - - def unordered_checker(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - def check(*args): - expected = pyfunc(*args) - got = cfunc(*args) - self._assert_equal_unordered(expected, got) - return check - - -class TestSetLiterals(BaseTest): - - @needs_set_literals - def test_build_set(self, flags=enable_pyobj_flags): - pyfunc = set_literal_return_usecase((1, 2, 3, 2)) - self.run_nullary_func(pyfunc, flags=flags) - - @needs_set_literals - def test_build_heterogeneous_set(self, flags=enable_pyobj_flags): - pyfunc = set_literal_return_usecase((1, 2.0, 3j, 2)) - self.run_nullary_func(pyfunc, flags=flags) - pyfunc = set_literal_return_usecase((2.0, 2)) - got, expected = self.run_nullary_func(pyfunc, flags=flags) - - # Check that items are inserted in the right order (here the - # result will be {2.0}, not {2}) - # Note: http://bugs.python.org/issue26020 changed the previously invalid - # ordering. - if ((sys.version_info[:2] == (2, 7) and sys.version_info[2] >= 13) or - (sys.version_info[:2] == (3, 5) and sys.version_info[2] >= 3) or - (sys.version_info[:2] >= (3, 6))): - self.assertIs(type(got.pop()), type(expected.pop())) - - @tag('important') - @needs_set_literals - def test_build_set_nopython(self): - arg = list(self.sparse_array(50)) - pyfunc = set_literal_convert_usecase(arg) - cfunc = jit(nopython=True)(pyfunc) - - expected = pyfunc() - got = cfunc() - self.assertPreciseEqual(sorted(expected), sorted(got)) - - -class TestSets(BaseTest): - - def test_constructor(self): - pyfunc = empty_constructor_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(), pyfunc()) - - pyfunc = constructor_usecase - cfunc = jit(nopython=True)(pyfunc) - def check(arg): - self.assertPreciseEqual(pyfunc(arg), cfunc(arg)) - - check((1, 2, 3, 2, 7)) - check(self.duplicates_array(200)) - check(self.sparse_array(200)) - - @tag('important') - def test_set_return(self): - pyfunc = set_return_usecase - cfunc = jit(nopython=True)(pyfunc) - - arg = (1, 2, 3, 2, 7) - self.assertEqual(cfunc(arg), set(arg)) - - @tag('important') - def test_iterator(self): - pyfunc = iterator_usecase - check = self.unordered_checker(pyfunc) - - check((1, 2, 3, 2, 7)) - check(self.duplicates_array(200)) - check(self.sparse_array(200)) - - @tag('important') - def test_update(self): - pyfunc = update_usecase - check = self.unordered_checker(pyfunc) - - a, b, c = (1, 2, 4, 9), (2, 3, 5, 11, 42), (4, 5, 6, 42) - check(a, b, c) - - a = self.sparse_array(50) - b = self.duplicates_array(50) - c = self.sparse_array(50) - check(a, b, c) - - def test_remove(self): - pyfunc = remove_usecase - check = self.unordered_checker(pyfunc) - - a = (1, 2, 3, 5, 8, 42) - b = (5, 2, 8) - check(a, b) - - def test_remove_error(self): - # References are leaked on exception - self.disable_leak_check() - - pyfunc = remove_usecase - cfunc = jit(nopython=True)(pyfunc) - with self.assertRaises(KeyError) as raises: - cfunc((1, 2, 3), (5, )) - - @tag('important') - def test_discard(self): - pyfunc = discard_usecase - check = self.unordered_checker(pyfunc) - - a = (1, 2, 3, 5, 8, 42) - b = (5, 2, 8) - check(a, b) - a = self.sparse_array(50) - b = self.sparse_array(50) - check(a, b) - - def test_add_discard(self): - """ - Check that the insertion logic does not create an infinite lookup - chain with deleted entries (insertion should happen at the first - deleted entry, not at the free entry at the end of the chain). - See issue #1913. - """ - pyfunc = add_discard_usecase - check = self.unordered_checker(pyfunc) - check((1,), 5, 5) - - @tag('important') - def test_pop(self): - pyfunc = pop_usecase - check = self.unordered_checker(pyfunc) - - check((2, 3, 55, 11, 8, 42)) - check(self.sparse_array(50)) - - @tag('important') - def test_contains(self): - pyfunc = contains_usecase - cfunc = jit(nopython=True)(pyfunc) - def check(a, b): - self.assertPreciseEqual(pyfunc(a, b), cfunc(a, b)) - - a = (1, 2, 3, 5, 42) - b = (5, 2, 8, 3) - check(a, b) - - def _test_xxx_update(self, pyfunc): - check = self.unordered_checker(pyfunc) - - a, b = (1, 2, 4, 11), (2, 3, 5, 11, 42) - check(a, b) - - sizes = (0, 50, 500) - for na, nb in itertools.product(sizes, sizes): - a = self.sparse_array(na) - b = self.sparse_array(nb) - check(a, b) - - def test_difference_update(self): - self._test_xxx_update(difference_update_usecase) - - def test_intersection_update(self): - self._test_xxx_update(intersection_update_usecase) - - def test_symmetric_difference_update(self): - self._test_xxx_update(symmetric_difference_update_usecase) - - def _test_comparator(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - def check(a, b): - self.assertPreciseEqual(pyfunc(a, b), cfunc(a, b)) - - a, b = map(set, [(1, 2, 4, 11), (2, 3, 5, 11, 42)]) - args = [a & b, a - b, a | b, a ^ b] - args = [tuple(x) for x in args] - for a, b in itertools.product(args, args): - check(a, b) - - def test_isdisjoint(self): - self._test_comparator(isdisjoint_usecase) - - def test_issubset(self): - self._test_comparator(issubset_usecase) - - def test_issuperset(self): - self._test_comparator(issuperset_usecase) - - def test_clear(self): - pyfunc = clear_usecase - check = self.unordered_checker(pyfunc) - - check((1, 2, 4, 11)) - check(self.sparse_array(50)) - - def test_copy(self): - # Source set doesn't have any deleted entries - pyfunc = copy_usecase - check = self.unordered_checker(pyfunc) - check((1, 2, 4, 11)) - check(self.sparse_array(50)) - - pyfunc = copy_usecase_empty - check = self.unordered_checker(pyfunc) - check((1,)) - - # Source set has deleted entries - pyfunc = copy_usecase_deleted - check = self.unordered_checker(pyfunc) - check((1, 2, 4, 11), 2) - a = self.sparse_array(50) - check(a, a[len(a) // 2]) - - def _test_set_operator(self, pyfunc): - check = self.unordered_checker(pyfunc) - - a, b = (1, 2, 4, 11), (2, 3, 5, 11, 42) - check(a, b) - - sizes = (0, 50, 500) - for na, nb in itertools.product(sizes, sizes): - a = self.sparse_array(na) - b = self.sparse_array(nb) - check(a, b) - - def test_difference(self): - self._test_set_operator(difference_usecase) - - def test_intersection(self): - self._test_set_operator(intersection_usecase) - - def test_symmetric_difference(self): - self._test_set_operator(symmetric_difference_usecase) - - def test_union(self): - self._test_set_operator(union_usecase) - - def test_and(self): - self._test_set_operator(make_operator_usecase('&')) - - def test_or(self): - self._test_set_operator(make_operator_usecase('|')) - - def test_sub(self): - self._test_set_operator(make_operator_usecase('-')) - - def test_xor(self): - self._test_set_operator(make_operator_usecase('^')) - - def test_eq(self): - self._test_set_operator(make_comparison_usecase('==')) - - def test_ne(self): - self._test_set_operator(make_comparison_usecase('!=')) - - def test_le(self): - self._test_set_operator(make_comparison_usecase('<=')) - - def test_lt(self): - self._test_set_operator(make_comparison_usecase('<')) - - def test_ge(self): - self._test_set_operator(make_comparison_usecase('>=')) - - def test_gt(self): - self._test_set_operator(make_comparison_usecase('>')) - - def test_iand(self): - self._test_set_operator(make_inplace_operator_usecase('&=')) - - def test_ior(self): - self._test_set_operator(make_inplace_operator_usecase('|=')) - - def test_isub(self): - self._test_set_operator(make_inplace_operator_usecase('-=')) - - def test_ixor(self): - self._test_set_operator(make_inplace_operator_usecase('^=')) - - -class OtherTypesTest(object): - - def test_constructor(self): - pyfunc = empty_constructor_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(), pyfunc()) - - pyfunc = constructor_usecase - cfunc = jit(nopython=True)(pyfunc) - def check(arg): - self.assertPreciseEqual(pyfunc(arg), cfunc(arg)) - - check(self.duplicates_array(200)) - check(self.sparse_array(200)) - - def test_iterator(self): - pyfunc = iterator_usecase - check = self.unordered_checker(pyfunc) - - check(self.duplicates_array(200)) - check(self.sparse_array(200)) - - @tag('important') - def test_update(self): - pyfunc = update_usecase - check = self.unordered_checker(pyfunc) - - a = self.sparse_array(50) - b = self.duplicates_array(50) - c = self.sparse_array(50) - check(a, b, c) - - -class TestFloatSets(OtherTypesTest, BaseTest): - """ - Test sets with floating-point keys. - """ - # Only a few basic tests here, as the sanity of most operations doesn't - # depend on the key type. - - def _range(self, stop): - return np.arange(stop, dtype=np.float32) * np.float32(0.1) - - -class TestTupleSets(OtherTypesTest, BaseTest): - """ - Test sets with tuple keys. - """ - def _range(self, stop): - a = np.arange(stop, dtype=np.int64) - b = a & 0x5555555555555555 - c = (a & 0xaaaaaaaa).astype(np.int32) - d = ((a >> 32) & 1).astype(np.bool_) - return list(zip(b, c, d)) - - -class TestUnboxing(BaseTest): - """ - Test unboxing of Python sets into native Numba sets. - """ - - @contextlib.contextmanager - def assert_type_error(self, msg): - with self.assertRaises(TypeError) as raises: - yield - if msg is not None: - self.assertRegexpMatches(str(raises.exception), msg) - - def check_unary(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - def check(arg): - expected = pyfunc(arg) - got = cfunc(arg) - self.assertPreciseEqual(got, expected) - return check - - @tag('important') - def test_numbers(self): - check = self.check_unary(unbox_usecase) - check(set([1, 2])) - check(set([1j, 2.5j])) - # Check allocation and sizing - check(set(range(100))) - - def test_tuples(self): - check = self.check_unary(unbox_usecase2) - check(set([(1, 2), (3, 4)])) - check(set([(1, 2j), (3, 4j)])) - - @tag('important') - def test_set_inside_tuple(self): - check = self.check_unary(unbox_usecase3) - check((1, set([2, 3, 4]))) - - def test_set_of_tuples_inside_tuple(self): - check = self.check_unary(unbox_usecase4) - check((1, set([(2,), (3,)]))) - - def test_errors(self): - # Error checking should ensure the set is homogeneous - msg = "can't unbox heterogeneous set" - pyfunc = noop - cfunc = jit(nopython=True)(pyfunc) - val = set([1, 2.5]) - with self.assert_type_error(msg): - cfunc(val) - # The set hasn't been changed (bogus reflecting) - self.assertEqual(val, set([1, 2.5])) - with self.assert_type_error(msg): - cfunc(set([1, 2j])) - # Same when the set is nested in a tuple or namedtuple - with self.assert_type_error(msg): - cfunc((1, set([1, 2j]))) - with self.assert_type_error(msg): - cfunc(Point(1, set([1, 2j]))) - # Tuples of different size. - # Note the check is really on the tuple side. - lst = set([(1,), (2, 3)]) - # Depending on which tuple is examined first, we could get - # a IndexError or a ValueError. - with self.assertRaises((IndexError, ValueError)) as raises: - cfunc(lst) - - -class TestSetReflection(BaseTest): - """ - Test reflection of native Numba sets on Python set objects. - """ - - def check_reflection(self, pyfunc): - cfunc = jit(nopython=True)(pyfunc) - samples = [(set([1., 2., 3., 4.]), set([0.])), - (set([1., 2., 3., 4.]), set([5., 6., 7., 8., 9.])), - ] - for dest, src in samples: - expected = set(dest) - got = set(dest) - pyres = pyfunc(expected, src) - with self.assertRefCount(got, src): - cres = cfunc(got, src) - self.assertPreciseEqual(cres, pyres) - self.assertPreciseEqual(expected, got) - self.assertEqual(pyres[0] is expected, cres[0] is got) - del pyres, cres - - def test_reflect_simple(self): - self.check_reflection(reflect_simple) - - def test_reflect_conditional(self): - self.check_reflection(reflect_conditional) - - def test_reflect_exception(self): - """ - When the function exits with an exception, sets should still be - reflected. - """ - pyfunc = reflect_exception - cfunc = jit(nopython=True)(pyfunc) - s = set([1, 2, 3]) - with self.assertRefCount(s): - with self.assertRaises(ZeroDivisionError): - cfunc(s) - self.assertPreciseEqual(s, set([1, 2, 3, 42])) - - @tag('important') - def test_reflect_same_set(self): - """ - When the same set object is reflected twice, behaviour should - be consistent. - """ - pyfunc = reflect_dual - cfunc = jit(nopython=True)(pyfunc) - pyset = set([1, 2, 3]) - cset = pyset.copy() - expected = pyfunc(pyset, pyset) - got = cfunc(cset, cset) - self.assertPreciseEqual(expected, got) - self.assertPreciseEqual(pyset, cset) - self.assertPreciseEqual(sys.getrefcount(pyset), sys.getrefcount(cset)) - - def test_reflect_clean(self): - """ - When the set wasn't mutated, no reflection should take place. - """ - cfunc = jit(nopython=True)(noop) - # Use a complex, as Python integers can be cached - s = set([12.5j]) - ids = [id(x) for x in s] - cfunc(s) - self.assertEqual([id(x) for x in s], ids) - - -class TestExamples(BaseTest): - """ - Examples of using sets. - """ - - @tag('important') - def test_unique(self): - pyfunc = unique_usecase - check = self.unordered_checker(pyfunc) - - check(self.duplicates_array(200)) - check(self.sparse_array(200)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_slices.py b/numba/numba/tests/test_slices.py deleted file mode 100644 index 979549ef7..000000000 --- a/numba/numba/tests/test_slices.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import itertools -import sys - -from numba import unittest_support as unittest -from numba import jit, typeof -from .support import TestCase - - -def slice_passing(sl): - return sl.start, sl.stop, sl.step - -def slice_constructor(*args): - sl = slice(*args) - return sl.start, sl.stop, sl.step - - -class TestSlices(TestCase): - - def test_slice_passing(self): - """ - Check passing a slice object to a Numba function. - """ - # NOTE this also checks slice attributes - def check(a, b, c, d, e, f): - sl = slice(a, b, c) - got = cfunc(sl) - self.assertPreciseEqual(got, (d, e, f)) - - maxposint = sys.maxsize - maxnegint = -maxposint - 1 - cfunc = jit(nopython=True)(slice_passing) - - # Positive steps - start_cases = [(None, 0), (42, 42), (-1, -1)] - stop_cases = [(None, maxposint), (9, 9), (-11, -11)] - step_cases = [(None, 1), (12, 12)] - for (a, d), (b, e), (c, f) in itertools.product(start_cases, - stop_cases, - step_cases): - check(a, b, c, d, e, f) - - # Negative steps - start_cases = [(None, maxposint), (42, 42), (-1, -1)] - stop_cases = [(None, maxnegint), (9, 9), (-11, -11)] - step_cases = [(-1, -1), (-12, -12)] - for (a, d), (b, e), (c, f) in itertools.product(start_cases, - stop_cases, - step_cases): - check(a, b, c, d, e, f) - - # Some member is neither integer nor None - with self.assertRaises(TypeError): - cfunc(slice(1.5, 1, 1)) - - def test_slice_constructor(self): - """ - Test the slice() constructor in nopython mode. - """ - maxposint = sys.maxsize - maxnegint = -maxposint - 1 - cfunc = jit(nopython=True)(slice_constructor) - for args, expected in [((), (0, maxposint, 1)), - ((None, None), (0, maxposint, 1)), - ((1, None), (1, maxposint, 1)), - ((None, 2), (0, 2, 1)), - ((1, 2), (1, 2, 1)), - ((None, None, 3), (0, maxposint, 3)), - ((None, 2, 3), (0, 2, 3)), - ((1, None, 3), (1, maxposint, 3)), - ((1, 2, 3), (1, 2, 3)), - ((None, None, -1), (maxposint, maxnegint, -1)), - ((10, None, -1), (10, maxnegint, -1)), - ((None, 5, -1), (maxposint, 5, -1)), - ((10, 5, -1), (10, 5, -1)), - ]: - got = cfunc(*args) - self.assertPreciseEqual(got, expected) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_smart_array.py b/numba/numba/tests/test_smart_array.py deleted file mode 100644 index 0d657f81e..000000000 --- a/numba/numba/tests/test_smart_array.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from numba import unittest_support as unittest -from numba import jit, types, errors, typeof, numpy_support, cgutils -from numba.compiler import compile_isolated -from .support import TestCase, captured_stdout -from numba import SmartArray - -def len_usecase(x): - return len(x) - -def print_usecase(x): - print(x) - -def getitem_usecase(x, key): - return x[key] - -def shape_usecase(x): - return x.shape - -def npyufunc_usecase(x): - return np.cos(np.sin(x)) - -def identity(x): return x - -class TestJIT(TestCase): - - def test_identity(self): - # make sure unboxing and boxing works. - a = SmartArray(np.arange(3)) - cfunc = jit(nopython=True)(identity) - self.assertIs(cfunc(a),a) - - def test_len(self): - a = SmartArray(np.arange(3)) - cfunc = jit(nopython=True)(len_usecase) - self.assertPreciseEqual(cfunc(a), 3) - - def test_shape(self): - a = SmartArray(np.arange(3)) - cfunc = jit(nopython=True)(shape_usecase) - self.assertPreciseEqual(cfunc(a), (3,)) - - def test_getitem(self): - a = SmartArray(np.int32([42, 8, -5])) - cfunc = jit(nopython=True)(getitem_usecase) - self.assertPreciseEqual(cfunc(a, 1), 8) - aa = cfunc(a, slice(1, None)) - self.assertIsInstance(aa, SmartArray) - self.assertEqual(list(aa), [8, -5]) - - def test_ufunc(self): - a = SmartArray(np.int32([42, 8, -5])) - cfunc = jit(nopython=True)(npyufunc_usecase) - aa = cfunc(a) - self.assertIsInstance(aa, SmartArray) - self.assertPreciseEqual(aa.get('host'), np.cos(np.sin(a.get('host')))) - - def test_astype(self): - a = SmartArray(np.int32([42, 8, -5])) - aa = a.astype(np.float64) - self.assertIsInstance(aa, SmartArray) - # verify that SmartArray.astype() operates like ndarray.astype()... - self.assertPreciseEqual(aa.get('host'), a.get('host').astype(np.float64)) - # ...and that both actually yield the expected dtype. - self.assertPreciseEqual(aa.get('host').dtype.type, np.float64) - self.assertIs(aa.dtype.type, np.float64) - -class TestInterface(TestCase): - - def test_interface(self): - # show that the SmartArray type supports all ndarray operations transparently - - a = np.arange(16).reshape(4,4) - ap = SmartArray(a) - ap[:,:] = 1 - ref = SmartArray(np.ones(dtype=ap.dtype, shape=(4,4))) - eq = ap == ref - self.assertIsInstance(eq, SmartArray) - self.assertTrue(eq.all()) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_sort.py b/numba/numba/tests/test_sort.py deleted file mode 100644 index 80a45d91b..000000000 --- a/numba/numba/tests/test_sort.py +++ /dev/null @@ -1,927 +0,0 @@ -from __future__ import print_function - -import copy -import itertools -import math -import random -import sys - -import numpy as np - -from numba.compiler import compile_isolated, Flags -from numba import jit, types, utils, njit -import numba.unittest_support as unittest -from numba import testing -from .support import TestCase, MemoryLeakMixin, tag - -from numba.targets.quicksort import make_py_quicksort, make_jit_quicksort -from numba.targets.mergesort import make_jit_mergesort -from .timsort import make_py_timsort, make_jit_timsort, MergeRun - - -def make_temp_list(keys, n): - return [keys[0]] * n - -def make_temp_array(keys, n): - return np.empty(n, keys.dtype) - - -py_list_timsort = make_py_timsort(make_temp_list) - -py_array_timsort = make_py_timsort(make_temp_array) - -jit_list_timsort = make_jit_timsort(make_temp_list) - -jit_array_timsort = make_jit_timsort(make_temp_array) - -py_quicksort = make_py_quicksort() - -jit_quicksort = make_jit_quicksort() - - -def sort_usecase(val): - val.sort() - -def argsort_usecase(val): - return val.argsort() - -def argsort_kind_usecase(val, is_stable=False): - if is_stable: - return val.argsort(kind='mergesort') - else: - return val.argsort(kind='quicksort') - -def sorted_usecase(val): - return sorted(val) - -def sorted_reverse_usecase(val, b): - return sorted(val, reverse=b) - -def np_sort_usecase(val): - return np.sort(val) - -def np_argsort_usecase(val): - return np.argsort(val) - -def np_argsort_kind_usecase(val, is_stable=False): - if is_stable: - return np.argsort(val, kind='mergesort') - else: - return np.argsort(val, kind='quicksort') - -def list_sort_usecase(n): - np.random.seed(42) - l = [] - for i in range(n): - l.append(np.random.random()) - ll = l[:] - ll.sort() - return l, ll - -def list_sort_reverse_usecase(n, b): - np.random.seed(42) - l = [] - for i in range(n): - l.append(np.random.random()) - ll = l[:] - ll.sort(reverse=b) - return l, ll - - -class BaseSortingTest(object): - - def random_list(self, n, offset=10): - random.seed(42) - l = list(range(offset, offset + n)) - random.shuffle(l) - return l - - def sorted_list(self, n, offset=10): - return list(range(offset, offset + n)) - - def revsorted_list(self, n, offset=10): - return list(range(offset, offset + n))[::-1] - - def initially_sorted_list(self, n, m=None, offset=10): - if m is None: - m = n // 2 - l = self.sorted_list(m, offset) - l += self.random_list(n - m, offset=l[-1] + offset) - return l - - def duprandom_list(self, n, factor=None, offset=10): - random.seed(42) - if factor is None: - factor = int(math.sqrt(n)) - l = (list(range(offset, offset + (n // factor) + 1)) * (factor + 1))[:n] - assert len(l) == n - random.shuffle(l) - return l - - def dupsorted_list(self, n, factor=None, offset=10): - if factor is None: - factor = int(math.sqrt(n)) - l = (list(range(offset, offset + (n // factor) + 1)) * (factor + 1))[:n] - assert len(l) == n, (len(l), n) - l.sort() - return l - - def assertSorted(self, orig, result): - self.assertEqual(len(result), len(orig)) - # sorted() returns a list, so make sure we compare to another list - self.assertEqual(list(result), sorted(orig)) - - def assertSortedValues(self, orig, orig_values, result, result_values): - self.assertEqual(len(result), len(orig)) - self.assertEqual(list(result), sorted(orig)) - zip_sorted = sorted(zip(orig, orig_values), key=lambda x: x[0]) - zip_result = list(zip(result, result_values)) - self.assertEqual(zip_sorted, zip_result) - # Check stability - for i in range(len(zip_result) - 1): - (k1, v1), (k2, v2) = zip_result[i], zip_result[i + 1] - if k1 == k2: - # Assuming values are unique, which is enforced by the tests - self.assertLess(orig_values.index(v1), orig_values.index(v2)) - - def fibo(self): - a = 1 - b = 1 - while True: - yield a - a, b = b, a + b - - def make_sample_sorted_lists(self, n): - lists = [] - for offset in (20, 120): - lists.append(self.sorted_list(n, offset)) - lists.append(self.dupsorted_list(n, offset)) - return lists - - def make_sample_lists(self, n): - lists = [] - for offset in (20, 120): - lists.append(self.sorted_list(n, offset)) - lists.append(self.dupsorted_list(n, offset)) - lists.append(self.revsorted_list(n, offset)) - lists.append(self.duprandom_list(n, offset)) - return lists - - -class BaseTimsortTest(BaseSortingTest): - - def merge_init(self, keys): - f = self.timsort.merge_init - return f(keys) - - @tag('important') - def test_binarysort(self): - n = 20 - def check(l, n, start=0): - res = self.array_factory(l) - f(res, res, 0, n, start) - self.assertSorted(l, res) - - f = self.timsort.binarysort - l = self.sorted_list(n) - check(l, n) - check(l, n, n//2) - l = self.revsorted_list(n) - check(l, n) - l = self.initially_sorted_list(n, n//2) - check(l, n) - check(l, n, n//2) - l = self.revsorted_list(n) - check(l, n) - l = self.random_list(n) - check(l, n) - l = self.duprandom_list(n) - check(l, n) - - def test_binarysort_with_values(self): - n = 20 - v = list(range(100, 100+n)) - - def check(l, n, start=0): - res = self.array_factory(l) - res_v = self.array_factory(v) - f(res, res_v, 0, n, start) - self.assertSortedValues(l, v, res, res_v) - - f = self.timsort.binarysort - l = self.sorted_list(n) - check(l, n) - check(l, n, n//2) - l = self.revsorted_list(n) - check(l, n) - l = self.initially_sorted_list(n, n//2) - check(l, n) - check(l, n, n//2) - l = self.revsorted_list(n) - check(l, n) - l = self.random_list(n) - check(l, n) - l = self.duprandom_list(n) - check(l, n) - - def test_count_run(self): - n = 16 - f = self.timsort.count_run - - def check(l, lo, hi): - n, desc = f(self.array_factory(l), lo, hi) - # Fully check invariants - if desc: - for k in range(lo, lo + n - 1): - a, b = l[k], l[k + 1] - self.assertGreater(a, b) - if lo + n < hi: - self.assertLessEqual(l[lo + n - 1], l[lo + n]) - else: - for k in range(lo, lo + n - 1): - a, b = l[k], l[k + 1] - self.assertLessEqual(a, b) - if lo + n < hi: - self.assertGreater(l[lo + n - 1], l[lo + n], l) - - - l = self.sorted_list(n, offset=100) - check(l, 0, n) - check(l, 1, n - 1) - check(l, 1, 2) - l = self.revsorted_list(n, offset=100) - check(l, 0, n) - check(l, 1, n - 1) - check(l, 1, 2) - l = self.random_list(n, offset=100) - for i in range(len(l) - 1): - check(l, i, n) - l = self.duprandom_list(n, offset=100) - for i in range(len(l) - 1): - check(l, i, n) - - @tag('important') - def test_gallop_left(self): - n = 20 - f = self.timsort.gallop_left - - def check(l, key, start, stop, hint): - k = f(key, l, start, stop, hint) - # Fully check invariants - self.assertGreaterEqual(k, start) - self.assertLessEqual(k, stop) - if k > start: - self.assertLess(l[k - 1], key) - if k < stop: - self.assertGreaterEqual(l[k], key) - - def check_all_hints(l, key, start, stop): - for hint in range(start, stop): - check(l, key, start, stop, hint) - - def check_sorted_list(l): - l = self.array_factory(l) - for key in (l[5], l[15], l[0], -1000, l[-1], 1000): - check_all_hints(l, key, 0, n) - check_all_hints(l, key, 1, n - 1) - check_all_hints(l, key, 8, n - 8) - - l = self.sorted_list(n, offset=100) - check_sorted_list(l) - l = self.dupsorted_list(n, offset=100) - check_sorted_list(l) - - def test_gallop_right(self): - n = 20 - f = self.timsort.gallop_right - - def check(l, key, start, stop, hint): - k = f(key, l, start, stop, hint) - # Fully check invariants - self.assertGreaterEqual(k, start) - self.assertLessEqual(k, stop) - if k > start: - self.assertLessEqual(l[k - 1], key) - if k < stop: - self.assertGreater(l[k], key) - - def check_all_hints(l, key, start, stop): - for hint in range(start, stop): - check(l, key, start, stop, hint) - - def check_sorted_list(l): - l = self.array_factory(l) - for key in (l[5], l[15], l[0], -1000, l[-1], 1000): - check_all_hints(l, key, 0, n) - check_all_hints(l, key, 1, n - 1) - check_all_hints(l, key, 8, n - 8) - - l = self.sorted_list(n, offset=100) - check_sorted_list(l) - l = self.dupsorted_list(n, offset=100) - check_sorted_list(l) - - def test_merge_compute_minrun(self): - f = self.timsort.merge_compute_minrun - - for i in range(0, 64): - self.assertEqual(f(i), i) - for i in range(6, 63): - if 2**i > sys.maxsize: - break - self.assertEqual(f(2**i), 32) - for i in self.fibo(): - if i < 64: - continue - if i >= sys.maxsize: - break - k = f(i) - self.assertGreaterEqual(k, 32) - self.assertLessEqual(k, 64) - if i > 500: - # i/k is close to, but strictly less than, an exact power of 2 - quot = i // k - p = 2 ** utils.bit_length(quot) - self.assertLess(quot, p) - self.assertGreaterEqual(quot, 0.9 * p) - - def check_merge_lo_hi(self, func, a, b): - na = len(a) - nb = len(b) - - # Add sentinels at start and end, to check they weren't moved - orig_keys = [42] + a + b + [-42] - keys = self.array_factory(orig_keys) - ms = self.merge_init(keys) - ssa = 1 - ssb = ssa + na - - #new_ms = func(ms, keys, [], ssa, na, ssb, nb) - new_ms = func(ms, keys, keys, ssa, na, ssb, nb) - self.assertEqual(keys[0], orig_keys[0]) - self.assertEqual(keys[-1], orig_keys[-1]) - self.assertSorted(orig_keys[1:-1], keys[1:-1]) - # Check the MergeState result - self.assertGreaterEqual(len(new_ms.keys), len(ms.keys)) - self.assertGreaterEqual(len(new_ms.values), len(ms.values)) - self.assertIs(new_ms.pending, ms.pending) - self.assertGreaterEqual(new_ms.min_gallop, 1) - - def test_merge_lo_hi(self): - f_lo = self.timsort.merge_lo - f_hi = self.timsort.merge_hi - - # The larger sizes exercise galloping - for (na, nb) in [(12, 16), (40, 40), (100, 110), (1000, 1100)]: - for a, b in itertools.product(self.make_sample_sorted_lists(na), - self.make_sample_sorted_lists(nb)): - self.check_merge_lo_hi(f_lo, a, b) - self.check_merge_lo_hi(f_hi, b, a) - - def check_merge_at(self, a, b): - f = self.timsort.merge_at - # Prepare the array to be sorted - na = len(a) - nb = len(b) - # Add sentinels at start and end, to check they weren't moved - orig_keys = [42] + a + b + [-42] - ssa = 1 - ssb = ssa + na - - stack_sentinel = MergeRun(-42, -42) - - def run_merge_at(ms, keys, i): - new_ms = f(ms, keys, keys, i) - self.assertEqual(keys[0], orig_keys[0]) - self.assertEqual(keys[-1], orig_keys[-1]) - self.assertSorted(orig_keys[1:-1], keys[1:-1]) - # Check stack state - self.assertIs(new_ms.pending, ms.pending) - self.assertEqual(ms.pending[i], (ssa, na + nb)) - self.assertEqual(ms.pending[0], stack_sentinel) - return new_ms - - # First check with i == len(stack) - 2 - keys = self.array_factory(orig_keys) - ms = self.merge_init(keys) - # Push sentinel on stack, to check it was't touched - ms = self.timsort.merge_append(ms, stack_sentinel) - i = ms.n - ms = self.timsort.merge_append(ms, MergeRun(ssa, na)) - ms = self.timsort.merge_append(ms, MergeRun(ssb, nb)) - ms = run_merge_at(ms, keys, i) - self.assertEqual(ms.n, i + 1) - - # Now check with i == len(stack) - 3 - keys = self.array_factory(orig_keys) - ms = self.merge_init(keys) - # Push sentinel on stack, to check it was't touched - ms = self.timsort.merge_append(ms, stack_sentinel) - i = ms.n - ms = self.timsort.merge_append(ms, MergeRun(ssa, na)) - ms = self.timsort.merge_append(ms, MergeRun(ssb, nb)) - # A last run (trivial here) - last_run = MergeRun(ssb + nb, 1) - ms = self.timsort.merge_append(ms, last_run) - ms = run_merge_at(ms, keys, i) - self.assertEqual(ms.n, i + 2) - self.assertEqual(ms.pending[ms.n - 1], last_run) - - def test_merge_at(self): - # The larger sizes exercise galloping - for (na, nb) in [(12, 16), (40, 40), (100, 110), (500, 510)]: - for a, b in itertools.product(self.make_sample_sorted_lists(na), - self.make_sample_sorted_lists(nb)): - self.check_merge_at(a, b) - self.check_merge_at(b, a) - - def test_merge_force_collapse(self): - f = self.timsort.merge_force_collapse - - # Test with runs of ascending sizes, then descending sizes - sizes_list = [(8, 10, 15, 20)] - sizes_list.append(sizes_list[0][::-1]) - - for sizes in sizes_list: - for chunks in itertools.product(*(self.make_sample_sorted_lists(n) - for n in sizes)): - # Create runs of the given sizes - orig_keys = sum(chunks, []) - keys = self.array_factory(orig_keys) - ms = self.merge_init(keys) - pos = 0 - for c in chunks: - ms = self.timsort.merge_append(ms, MergeRun(pos, len(c))) - pos += len(c) - # Sanity check - self.assertEqual(sum(ms.pending[ms.n - 1]), len(keys)) - # Now merge the runs - ms = f(ms, keys, keys) - # Remaining run is the whole list - self.assertEqual(ms.n, 1) - self.assertEqual(ms.pending[0], MergeRun(0, len(keys))) - # The list is now sorted - self.assertSorted(orig_keys, keys) - - def test_run_timsort(self): - f = self.timsort.run_timsort - - for size_factor in (1, 10): - # Make lists to be sorted from three chunks of different kinds. - sizes = (15, 30, 20) - - all_lists = [self.make_sample_lists(n * size_factor) for n in sizes] - for chunks in itertools.product(*all_lists): - orig_keys = sum(chunks, []) - keys = self.array_factory(orig_keys) - f(keys) - # The list is now sorted - self.assertSorted(orig_keys, keys) - - def test_run_timsort_with_values(self): - # Run timsort, but also with a values array - f = self.timsort.run_timsort_with_values - - for size_factor in (1, 5): - chunk_size = 80 * size_factor - a = self.dupsorted_list(chunk_size) - b = self.duprandom_list(chunk_size) - c = self.revsorted_list(chunk_size) - orig_keys = a + b + c - orig_values = list(range(1000, 1000 + len(orig_keys))) - - keys = self.array_factory(orig_keys) - values = self.array_factory(orig_values) - f(keys, values) - # This checks sort stability - self.assertSortedValues(orig_keys, orig_values, keys, values) - - -class TestTimsortPurePython(BaseTimsortTest, TestCase): - - timsort = py_list_timsort - - # Much faster than a Numpy array in pure Python - array_factory = list - - -class TestTimsortArraysPurePython(BaseTimsortTest, TestCase): - - timsort = py_array_timsort - - def array_factory(self, lst): - return np.array(lst, dtype=np.int32) - - -class JITTimsortMixin(object): - - timsort = jit_array_timsort - - test_merge_at = None - test_merge_force_collapse = None - - def wrap_with_mergestate(self, timsort, func, _cache={}): - """ - Wrap *func* into another compiled function inserting a runtime-created - mergestate as the first function argument. - """ - key = timsort, func - if key in _cache: - return _cache[key] - - merge_init = timsort.merge_init - - @timsort.compile - def wrapper(keys, values, *args): - ms = merge_init(keys) - res = func(ms, keys, values, *args) - return res - - _cache[key] = wrapper - return wrapper - - -class TestTimsortArrays(JITTimsortMixin, BaseTimsortTest, TestCase): - - def array_factory(self, lst): - return np.array(lst, dtype=np.int32) - - def check_merge_lo_hi(self, func, a, b): - na = len(a) - nb = len(b) - - func = self.wrap_with_mergestate(self.timsort, func) - - # Add sentinels at start and end, to check they weren't moved - orig_keys = [42] + a + b + [-42] - keys = self.array_factory(orig_keys) - ssa = 1 - ssb = ssa + na - - new_ms = func(keys, keys, ssa, na, ssb, nb) - self.assertEqual(keys[0], orig_keys[0]) - self.assertEqual(keys[-1], orig_keys[-1]) - self.assertSorted(orig_keys[1:-1], keys[1:-1]) - - - -class BaseQuicksortTest(BaseSortingTest): - - # NOTE these tests assume a non-argsort quicksort. - - def test_insertion_sort(self): - n = 20 - def check(l, n): - res = self.array_factory([9999] + l + [-9999]) - f(res, res, 1, n) - self.assertEqual(res[0], 9999) - self.assertEqual(res[-1], -9999) - self.assertSorted(l, res[1:-1]) - - f = self.quicksort.insertion_sort - l = self.sorted_list(n) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.initially_sorted_list(n, n//2) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.random_list(n) - check(l, n) - l = self.duprandom_list(n) - check(l, n) - - def test_partition(self): - n = 20 - def check(l, n): - res = self.array_factory([9999] + l + [-9999]) - index = f(res, res, 1, n) - self.assertEqual(res[0], 9999) - self.assertEqual(res[-1], -9999) - pivot = res[index] - for i in range(1, index): - self.assertLessEqual(res[i], pivot) - for i in range(index + 1, n): - self.assertGreaterEqual(res[i], pivot) - - f = self.quicksort.partition - l = self.sorted_list(n) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.initially_sorted_list(n, n//2) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.random_list(n) - check(l, n) - l = self.duprandom_list(n) - check(l, n) - - def test_partition3(self): - # Test the unused partition3() function - n = 20 - def check(l, n): - res = self.array_factory([9999] + l + [-9999]) - lt, gt = f(res, 1, n) - self.assertEqual(res[0], 9999) - self.assertEqual(res[-1], -9999) - pivot = res[lt] - for i in range(1, lt): - self.assertLessEqual(res[i], pivot) - for i in range(lt, gt + 1): - self.assertEqual(res[i], pivot) - for i in range(gt + 1, n): - self.assertGreater(res[i], pivot) - - f = self.quicksort.partition3 - l = self.sorted_list(n) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.initially_sorted_list(n, n//2) - check(l, n) - l = self.revsorted_list(n) - check(l, n) - l = self.random_list(n) - check(l, n) - l = self.duprandom_list(n) - check(l, n) - - @tag('important') - def test_run_quicksort(self): - f = self.quicksort.run_quicksort - - for size_factor in (1, 5): - # Make lists to be sorted from two chunks of different kinds. - sizes = (15, 20) - - all_lists = [self.make_sample_lists(n * size_factor) for n in sizes] - for chunks in itertools.product(*all_lists): - orig_keys = sum(chunks, []) - keys = self.array_factory(orig_keys) - f(keys) - # The list is now sorted - self.assertSorted(orig_keys, keys) - - def test_run_quicksort_lt(self): - def lt(a, b): - return a > b - - f = self.make_quicksort(lt=lt).run_quicksort - - for size_factor in (1, 5): - # Make lists to be sorted from two chunks of different kinds. - sizes = (15, 20) - - all_lists = [self.make_sample_lists(n * size_factor) for n in sizes] - for chunks in itertools.product(*all_lists): - orig_keys = sum(chunks, []) - keys = self.array_factory(orig_keys) - f(keys) - # The list is now rev-sorted - self.assertSorted(orig_keys, keys[::-1]) - - # An imperfect comparison function, as LT(a, b) does not imply not LT(b, a). - # The sort should handle it gracefully. - def lt_floats(a, b): - return math.isnan(b) or a < b - - f = self.make_quicksort(lt=lt_floats).run_quicksort - - np.random.seed(42) - for size in (5, 20, 50, 500): - orig = np.random.random(size=size) * 100 - orig[np.random.random(size=size) < 0.1] = float('nan') - orig_keys = list(orig) - keys = self.array_factory(orig_keys) - f(keys) - non_nans = orig[~np.isnan(orig)] - # Non-NaNs are sorted at the front - self.assertSorted(non_nans, keys[:len(non_nans)]) - - -class TestQuicksortPurePython(BaseQuicksortTest, TestCase): - - quicksort = py_quicksort - make_quicksort = staticmethod(make_py_quicksort) - - # Much faster than a Numpy array in pure Python - array_factory = list - - -class TestQuicksortArrays(BaseQuicksortTest, TestCase): - - quicksort = jit_quicksort - make_quicksort = staticmethod(make_jit_quicksort) - - def array_factory(self, lst): - return np.array(lst, dtype=np.float64) - - -class TestNumpySort(TestCase): - - def setUp(self): - np.random.seed(42) - - def int_arrays(self): - for size in (5, 20, 50, 500): - yield np.random.randint(99, size=size) - - def float_arrays(self): - for size in (5, 20, 50, 500): - yield np.random.random(size=size) * 100 - # Now with NaNs. Numpy sorts them at the end. - for size in (5, 20, 50, 500): - orig = np.random.random(size=size) * 100 - orig[np.random.random(size=size) < 0.1] = float('nan') - yield orig - - def has_duplicates(self, arr): - """ - Whether the array has duplicates. Takes NaNs into account. - """ - if np.count_nonzero(np.isnan(arr)) > 1: - return True - if np.unique(arr).size < arr.size: - return True - return False - - def check_sort_inplace(self, pyfunc, cfunc, val): - expected = copy.copy(val) - got = copy.copy(val) - pyfunc(expected) - cfunc(got) - self.assertPreciseEqual(got, expected) - - def check_sort_copy(self, pyfunc, cfunc, val): - orig = copy.copy(val) - expected = pyfunc(val) - got = cfunc(val) - self.assertPreciseEqual(got, expected) - # The original wasn't mutated - self.assertPreciseEqual(val, orig) - - def check_argsort(self, pyfunc, cfunc, val, kwargs={}): - orig = copy.copy(val) - expected = pyfunc(val, **kwargs) - got = cfunc(val, **kwargs) - self.assertPreciseEqual(orig[got], np.sort(orig), - msg="the array wasn't argsorted") - # Numba and Numpy results may differ if there are duplicates - # in the array - if not self.has_duplicates(orig): - self.assertPreciseEqual(got, expected) - # The original wasn't mutated - self.assertPreciseEqual(val, orig) - - def test_array_sort_int(self): - pyfunc = sort_usecase - cfunc = jit(nopython=True)(pyfunc) - - for orig in self.int_arrays(): - self.check_sort_inplace(pyfunc, cfunc, orig) - - @tag('important') - def test_array_sort_float(self): - pyfunc = sort_usecase - cfunc = jit(nopython=True)(pyfunc) - - for orig in self.float_arrays(): - self.check_sort_inplace(pyfunc, cfunc, orig) - - def test_np_sort_int(self): - pyfunc = np_sort_usecase - cfunc = jit(nopython=True)(pyfunc) - - for orig in self.int_arrays(): - self.check_sort_copy(pyfunc, cfunc, orig) - - def test_np_sort_float(self): - pyfunc = np_sort_usecase - cfunc = jit(nopython=True)(pyfunc) - - for size in (5, 20, 50, 500): - orig = np.random.random(size=size) * 100 - orig[np.random.random(size=size) < 0.1] = float('nan') - self.check_sort_copy(pyfunc, cfunc, orig) - - def test_argsort_int(self): - def check(pyfunc): - cfunc = jit(nopython=True)(pyfunc) - for orig in self.int_arrays(): - self.check_argsort(pyfunc, cfunc, orig) - - check(argsort_usecase) - check(np_argsort_usecase) - - def test_argsort_kind_int(self): - def check(pyfunc, is_stable): - cfunc = jit(nopython=True)(pyfunc) - for orig in self.int_arrays(): - self.check_argsort(pyfunc, cfunc, orig, - dict(is_stable=is_stable)) - - check(argsort_kind_usecase, is_stable=True) - check(np_argsort_kind_usecase, is_stable=True) - check(argsort_kind_usecase, is_stable=False) - check(np_argsort_kind_usecase, is_stable=False) - - @tag('important') - def test_argsort_float(self): - def check(pyfunc): - cfunc = jit(nopython=True)(pyfunc) - for orig in self.float_arrays(): - self.check_argsort(pyfunc, cfunc, orig) - - check(argsort_usecase) - check(np_argsort_usecase) - - @tag('important') - def test_argsort_float(self): - def check(pyfunc, is_stable): - cfunc = jit(nopython=True)(pyfunc) - for orig in self.float_arrays(): - self.check_argsort(pyfunc, cfunc, orig, - dict(is_stable=is_stable)) - - check(argsort_kind_usecase, is_stable=True) - check(np_argsort_kind_usecase, is_stable=True) - check(argsort_kind_usecase, is_stable=False) - check(np_argsort_kind_usecase, is_stable=False) - - -class TestPythonSort(TestCase): - - @tag('important') - def test_list_sort(self): - pyfunc = list_sort_usecase - cfunc = jit(nopython=True)(pyfunc) - - for size in (20, 50, 500): - orig, ret = cfunc(size) - self.assertEqual(sorted(orig), ret) - self.assertNotEqual(orig, ret) # sanity check - - def test_list_sort_reverse(self): - pyfunc = list_sort_reverse_usecase - cfunc = jit(nopython=True)(pyfunc) - - for size in (20, 50, 500): - for b in (False, True): - orig, ret = cfunc(size, b) - self.assertEqual(sorted(orig, reverse=b), ret) - self.assertNotEqual(orig, ret) # sanity check - - def test_sorted(self): - pyfunc = sorted_usecase - cfunc = jit(nopython=True)(pyfunc) - - for size in (20, 50, 500): - orig = np.random.random(size=size) * 100 - expected = sorted(orig) - got = cfunc(orig) - self.assertPreciseEqual(got, expected) - self.assertNotEqual(list(orig), got) # sanity check - - def test_sorted_reverse(self): - pyfunc = sorted_reverse_usecase - cfunc = jit(nopython=True)(pyfunc) - size = 20 - - orig = np.random.random(size=size) * 100 - for b in (False, True): - expected = sorted(orig, reverse=b) - got = cfunc(orig, b) - self.assertPreciseEqual(got, expected) - self.assertNotEqual(list(orig), got) # sanity check - - -class TestMergeSort(unittest.TestCase): - def setUp(self): - np.random.seed(321) - - def check_argsort_stable(self, sorter, low, high, count): - # make data with high possibility of duplicated key - data = np.random.randint(low, high, count) - expect = np.argsort(data, kind='mergesort') - got = sorter(data) - np.testing.assert_equal(expect, got) - - def test_argsort_stable(self): - arglist = [ - (-2, 2, 5), - (-5, 5, 10), - (0, 10, 101), - (0, 100, 1003), - ] - imp = make_jit_mergesort(is_argsort=True) - toplevel = imp.run_mergesort - sorter = njit(lambda arr: toplevel(arr)) - for args in arglist: - self.check_argsort_stable(sorter, *args) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_stencils.py b/numba/numba/tests/test_stencils.py deleted file mode 100644 index 6c67d0f39..000000000 --- a/numba/numba/tests/test_stencils.py +++ /dev/null @@ -1,2401 +0,0 @@ -# -# Copyright (c) 2017 Intel Corporation -# SPDX-License-Identifier: BSD-2-Clause -# - -from __future__ import print_function, division, absolute_import - -import sys -import numpy as np -import ast -import inspect -import operator -import types as pytypes -from contextlib import contextmanager -from copy import deepcopy - -import numba -from numba import unittest_support as unittest -from numba import njit, stencil, types -from numba.compiler import compile_extra, Flags -from numba.targets import registry -from numba.targets.cpu import ParallelOptions -from .support import tag -from numba.errors import LoweringError, TypingError - - -# for decorating tests, marking that Windows with Python 2.7 is not supported -_py27 = sys.version_info[:2] == (2, 7) -_windows_py27 = (sys.platform.startswith('win32') and _py27) -_32bit = sys.maxsize <= 2 ** 32 -_reason = 'parfors not supported' -_unsupported = _32bit or _windows_py27 -skip_unsupported = unittest.skipIf(_unsupported, _reason) - - -@stencil -def stencil1_kernel(a): - return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0]) - - -@stencil(neighborhood=((-5, 0), )) -def stencil2_kernel(a): - cum = a[-5] - for i in range(-4, 1): - cum += a[i] - return 0.3 * cum - - -@stencil(cval=1.0) -def stencil3_kernel(a): - return 0.25 * a[-2, 2] - - -@stencil -def stencil_multiple_input_kernel(a, b): - return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0] + - b[0, 1] + b[1, 0] + b[0, -1] + b[-1, 0]) - - -@stencil -def stencil_multiple_input_kernel_var(a, b, w): - return w * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0] + - b[0, 1] + b[1, 0] + b[0, -1] + b[-1, 0]) - - -@stencil(standard_indexing=("b",)) -def stencil_with_standard_indexing_1d(a, b): - return a[-1] * b[0] + a[0] * b[1] - - -@stencil(standard_indexing=("b",)) -def stencil_with_standard_indexing_2d(a, b): - return (a[0, 1] * b[0, 1] + a[1, 0] * b[1, 0] - + a[0, -1] * b[0, -1] + a[-1, 0] * b[-1, 0]) - - -@njit -def addone_njit(a): - return a + 1 - -# guard against the decorator being run on unsupported platforms -# as it will raise and stop test discovery from working -if not _unsupported: - @njit(parallel=True) - def addone_pjit(a): - return a + 1 - - -class TestStencilBase(unittest.TestCase): - - def __init__(self, *args): - # flags for njit() - self.cflags = Flags() - self.cflags.set('nrt') - - super(TestStencilBase, self).__init__(*args) - - def _compile_this(self, func, sig, flags): - return compile_extra(registry.cpu_target.typing_context, - registry.cpu_target.target_context, func, sig, - None, flags, {}) - - def compile_parallel(self, func, sig, **kws): - flags = Flags() - flags.set('nrt') - options = True if not kws else kws - flags.set('auto_parallel', ParallelOptions(options)) - return self._compile_this(func, sig, flags) - - def compile_njit(self, func, sig): - return self._compile_this(func, sig, flags=self.cflags) - - def compile_all(self, pyfunc, *args, **kwargs): - sig = tuple([numba.typeof(x) for x in args]) - # compile with parallel=True - cpfunc = self.compile_parallel(pyfunc, sig) - # compile a standard njit of the original function - cfunc = self.compile_njit(pyfunc, sig) - return cfunc, cpfunc - - def check(self, no_stencil_func, pyfunc, *args): - cfunc, cpfunc = self.compile_all(pyfunc, *args) - # results without stencil macro - expected = no_stencil_func(*args) - # python result - py_output = pyfunc(*args) - - # njit result - njit_output = cfunc.entry_point(*args) - - # parfor result - parfor_output = cpfunc.entry_point(*args) - - np.testing.assert_almost_equal(py_output, expected, decimal=3) - np.testing.assert_almost_equal(njit_output, expected, decimal=3) - np.testing.assert_almost_equal(parfor_output, expected, decimal=3) - - # make sure parfor set up scheduling - self.assertIn('@do_scheduling', cpfunc.library.get_llvm_str()) - - -class TestStencil(TestStencilBase): - - def __init__(self, *args, **kwargs): - super(TestStencil, self).__init__(*args, **kwargs) - - @skip_unsupported - @tag('important') - def test_stencil1(self): - """Tests whether the optional out argument to stencil calls works. - """ - def test_with_out(n): - A = np.arange(n**2).reshape((n, n)) - B = np.zeros(n**2).reshape((n, n)) - B = stencil1_kernel(A, out=B) - return B - - def test_without_out(n): - A = np.arange(n**2).reshape((n, n)) - B = stencil1_kernel(A) - return B - - def test_impl_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.zeros(n**2).reshape((n, n)) - for i in range(1, n - 1): - for j in range(1, n - 1): - B[i, j] = 0.25 * (A[i, j + 1] + - A[i + 1, j] + A[i, j - 1] + A[i - 1, j]) - return B - - n = 100 - self.check(test_impl_seq, test_with_out, n) - self.check(test_impl_seq, test_without_out, n) - - @skip_unsupported - @tag('important') - def test_stencil2(self): - """Tests whether the optional neighborhood argument to the stencil - decorate works. - """ - def test_seq(n): - A = np.arange(n) - B = stencil2_kernel(A) - return B - - def test_impl_seq(n): - A = np.arange(n) - B = np.zeros(n) - for i in range(5, len(A)): - B[i] = 0.3 * sum(A[i - 5:i + 1]) - return B - - n = 100 - self.check(test_impl_seq, test_seq, n) - # variable length neighborhood in numba.stencil call - # only supported in parallel path - - def test_seq(n, w): - A = np.arange(n) - - def stencil2_kernel(a, w): - cum = a[-w] - for i in range(-w + 1, w + 1): - cum += a[i] - return 0.3 * cum - B = numba.stencil(stencil2_kernel, neighborhood=((-w, w), ))(A, w) - return B - - def test_impl_seq(n, w): - A = np.arange(n) - B = np.zeros(n) - for i in range(w, len(A) - w): - B[i] = 0.3 * sum(A[i - w:i + w + 1]) - return B - n = 100 - w = 5 - cpfunc = self.compile_parallel(test_seq, (types.intp, types.intp)) - expected = test_impl_seq(n, w) - # parfor result - parfor_output = cpfunc.entry_point(n, w) - np.testing.assert_almost_equal(parfor_output, expected, decimal=3) - self.assertIn('@do_scheduling', cpfunc.library.get_llvm_str()) - # test index_offsets - - def test_seq(n, w, offset): - A = np.arange(n) - - def stencil2_kernel(a, w): - cum = a[-w + 1] - for i in range(-w + 1, w + 1): - cum += a[i + 1] - return 0.3 * cum - B = numba.stencil(stencil2_kernel, neighborhood=((-w, w), ), - index_offsets=(-offset, ))(A, w) - return B - - offset = 1 - cpfunc = self.compile_parallel(test_seq, (types.intp, types.intp, - types.intp)) - parfor_output = cpfunc.entry_point(n, w, offset) - np.testing.assert_almost_equal(parfor_output, expected, decimal=3) - self.assertIn('@do_scheduling', cpfunc.library.get_llvm_str()) - # test slice in kernel - - def test_seq(n, w, offset): - A = np.arange(n) - - def stencil2_kernel(a, w): - return 0.3 * np.sum(a[-w + 1:w + 2]) - B = numba.stencil(stencil2_kernel, neighborhood=((-w, w), ), - index_offsets=(-offset, ))(A, w) - return B - - offset = 1 - cpfunc = self.compile_parallel(test_seq, (types.intp, types.intp, - types.intp)) - parfor_output = cpfunc.entry_point(n, w, offset) - np.testing.assert_almost_equal(parfor_output, expected, decimal=3) - self.assertIn('@do_scheduling', cpfunc.library.get_llvm_str()) - - @skip_unsupported - @tag('important') - def test_stencil3(self): - """Tests whether a non-zero optional cval argument to the stencil - decorator works. Also tests integer result type. - """ - def test_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = stencil3_kernel(A) - return B - - test_njit = njit(test_seq) - test_par = njit(test_seq, parallel=True) - - n = 5 - seq_res = test_seq(n) - njit_res = test_njit(n) - par_res = test_par(n) - - self.assertTrue(seq_res[0, 0] == 1.0 and seq_res[4, 4] == 1.0) - self.assertTrue(njit_res[0, 0] == 1.0 and njit_res[4, 4] == 1.0) - self.assertTrue(par_res[0, 0] == 1.0 and par_res[4, 4] == 1.0) - - @skip_unsupported - @tag('important') - def test_stencil_standard_indexing_1d(self): - """Tests standard indexing with a 1d array. - """ - def test_seq(n): - A = np.arange(n) - B = [3.0, 7.0] - C = stencil_with_standard_indexing_1d(A, B) - return C - - def test_impl_seq(n): - A = np.arange(n) - B = [3.0, 7.0] - C = np.zeros(n) - - for i in range(1, n): - C[i] = A[i - 1] * B[0] + A[i] * B[1] - return C - - n = 100 - self.check(test_impl_seq, test_seq, n) - - @skip_unsupported - @tag('important') - def test_stencil_standard_indexing_2d(self): - """Tests standard indexing with a 2d array and multiple stencil calls. - """ - def test_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.ones((3, 3)) - C = stencil_with_standard_indexing_2d(A, B) - D = stencil_with_standard_indexing_2d(C, B) - return D - - def test_impl_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.ones((3, 3)) - C = np.zeros(n**2).reshape((n, n)) - D = np.zeros(n**2).reshape((n, n)) - - for i in range(1, n - 1): - for j in range(1, n - 1): - C[i, j] = (A[i, j + 1] * B[0, 1] + A[i + 1, j] * B[1, 0] + - A[i, j - 1] * B[0, -1] + A[i - 1, j] * B[-1, 0]) - for i in range(1, n - 1): - for j in range(1, n - 1): - D[i, j] = (C[i, j + 1] * B[0, 1] + C[i + 1, j] * B[1, 0] + - C[i, j - 1] * B[0, -1] + C[i - 1, j] * B[-1, 0]) - return D - - n = 5 - self.check(test_impl_seq, test_seq, n) - - @skip_unsupported - @tag('important') - def test_stencil_multiple_inputs(self): - """Tests whether multiple inputs of the same size work. - """ - def test_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.arange(n**2).reshape((n, n)) - C = stencil_multiple_input_kernel(A, B) - return C - - def test_impl_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.arange(n**2).reshape((n, n)) - C = np.zeros(n**2).reshape((n, n)) - for i in range(1, n - 1): - for j in range(1, n - 1): - C[i, j] = 0.25 * \ - (A[i, j + 1] + A[i + 1, j] - + A[i, j - 1] + A[i - 1, j] - + B[i, j + 1] + B[i + 1, j] - + B[i, j - 1] + B[i - 1, j]) - return C - - n = 3 - self.check(test_impl_seq, test_seq, n) - # test stencil with a non-array input - - def test_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.arange(n**2).reshape((n, n)) - w = 0.25 - C = stencil_multiple_input_kernel_var(A, B, w) - return C - self.check(test_impl_seq, test_seq, n) - - @skip_unsupported - @tag('important') - def test_stencil_call(self): - """Tests 2D numba.stencil calls. - """ - def test_impl1(n): - A = np.arange(n**2).reshape((n, n)) - B = np.zeros(n**2).reshape((n, n)) - numba.stencil(lambda a: 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] - + a[-1, 0]))(A, out=B) - return B - - def test_impl2(n): - A = np.arange(n**2).reshape((n, n)) - B = np.zeros(n**2).reshape((n, n)) - - def sf(a): - return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0]) - B = numba.stencil(sf)(A) - return B - - def test_impl_seq(n): - A = np.arange(n**2).reshape((n, n)) - B = np.zeros(n**2).reshape((n, n)) - for i in range(1, n - 1): - for j in range(1, n - 1): - B[i, j] = 0.25 * (A[i, j + 1] + A[i + 1, j] - + A[i, j - 1] + A[i - 1, j]) - return B - - n = 100 - self.check(test_impl_seq, test_impl1, n) - self.check(test_impl_seq, test_impl2, n) - - @skip_unsupported - @tag('important') - def test_stencil_call_1D(self): - """Tests 1D numba.stencil calls. - """ - def test_impl(n): - A = np.arange(n) - B = np.zeros(n) - numba.stencil(lambda a: 0.3 * (a[-1] + a[0] + a[1]))(A, out=B) - return B - - def test_impl_seq(n): - A = np.arange(n) - B = np.zeros(n) - for i in range(1, n - 1): - B[i] = 0.3 * (A[i - 1] + A[i] + A[i + 1]) - return B - - n = 100 - self.check(test_impl_seq, test_impl, n) - - @skip_unsupported - @tag('important') - def test_stencil_parallel_off(self): - """Tests 1D numba.stencil calls without parallel translation - turned off. - """ - def test_impl(A): - return numba.stencil(lambda a: 0.3 * (a[-1] + a[0] + a[1]))(A) - - cpfunc = self.compile_parallel(test_impl, (numba.float64[:],), stencil=False) - self.assertNotIn('@do_scheduling', cpfunc.library.get_llvm_str()) - - - -class pyStencilGenerator: - """ - Holds the classes and methods needed to generate a python stencil - implementation from a kernel purely using AST transforms. - """ - - class Builder: - """ - Provides code generation for the AST manipulation pipeline. - The class methods largely produce AST nodes/trees. - """ - - def __init__(self): - self.__state = 0 - - ids = [chr(ord(v) + x) for v in ['a', 'A'] for x in range(26)] - - def varidx(self): - """ - a monotonically increasing index for use in labelling variables. - """ - tmp = self.__state - self.__state = self.__state + 1 - return tmp - - # builder functions - def gen_alloc_return(self, orig, var, dtype_var, init_val=0): - """ - Generates an AST equivalent to: - `var = np.full(orig.shape, init_val, dtype = dtype_var)` - """ - new = ast.Assign( - targets=[ - ast.Name( - id=var, - ctx=ast.Store())], - value=ast.Call( - func=ast.Attribute( - value=ast.Name( - id='np', - ctx=ast.Load()), - attr='full', - ctx=ast.Load()), - args=[ - ast.Attribute( - value=ast.Name( - id=orig, - ctx=ast.Load()), - attr='shape', - ctx=ast.Load()), - self.gen_num(init_val)], - keywords=[ast.keyword(arg='dtype', - value=self.gen_call('type', [dtype_var.id]).value)], - starargs=None, - kwargs=None), - ) - return new - - def gen_assign(self, var, value, index_names): - """ - Generates an AST equivalent to: - `retvar[(*index_names,)] = value[]` - """ - elts_info = [ast.Name(id=x, ctx=ast.Load()) for x in index_names] - new = ast.Assign( - targets=[ - ast.Subscript( - value=ast.Name( - id=var, - ctx=ast.Load()), - slice=ast.Index( - value=ast.Tuple( - elts=elts_info, - ctx=ast.Load())), - ctx=ast.Store())], - value=value) - return new - - def gen_loop(self, var, start=0, stop=0, body=None): - """ - Generates an AST equivalent to a loop in `var` from - `start` to `stop` with body `body`. - """ - if isinstance(start, int): - start_val = ast.Num(n=start) - else: - start_val = start - if isinstance(stop, int): - stop_val = ast.Num(n=stop) - else: - stop_val = stop - return ast.For( - target=ast.Name(id=var, ctx=ast.Store()), - iter=ast.Call( - func=ast.Name(id='range', ctx=ast.Load()), - args=[start_val, stop_val], - keywords=[], - starargs=None, kwargs=None), - body=body, orelse=[]) - - def gen_return(self, var): - """ - Generates an AST equivalent to `return var` - """ - return ast.Return(value=ast.Name(id=var, ctx=ast.Load())) - - def gen_slice(self, value): - """Generates an Index with the given value""" - return ast.Index(value=ast.Num(n=value)) - - def gen_attr(self, name, attr): - """ - Generates AST equivalent to `name.attr` - """ - return ast.Attribute( - value=ast.Name(id=name, ctx=ast.Load()), - attr=attr, ctx=ast.Load()) - - def gen_subscript(self, name, attr, index, offset=None): - """ - Generates an AST equivalent to a subscript, something like: - name.attr[slice(index) +/- offset] - """ - attribute = self.gen_attr(name, attr) - slise = self.gen_slice(index) - ss = ast.Subscript(value=attribute, slice=slise, ctx=ast.Load()) - if offset: - pm = ast.Add() if offset >= 0 else ast.Sub() - ss = ast.BinOp(left=ss, op=pm, right=ast.Num(n=abs(offset))) - return ss - - def gen_num(self, value): - """ - Generates an ast.Num of value `value` - """ - if abs(value) >= 0: - return ast.Num(value) - else: - return ast.UnaryOp(ast.USub(), ast.Num(-value)) - - def gen_call(self, call_name, args, kwargs=None): - """ - Generates an AST equivalent to a call, something like: - `call_name(*args, **kwargs) - """ - fixed_args = [ast.Name(id='%s' % x, ctx=ast.Load()) for x in args] - if kwargs is not None: - keywords = [ast.keyword( - arg='%s' % - x, value=ast.parse(str(x)).body[0].value) - for x in kwargs] - else: - keywords = [] - func = ast.Name(id=call_name, ctx=ast.Load()) - return ast.Expr(value=ast.Call( - func=func, args=fixed_args, - keywords=keywords, - starargs=None, kwargs=None), ctx=ast.Load()) - - # AST transformers - class FoldConst(ast.NodeTransformer, Builder): - """ - Folds const expr, this is so const expressions in the relidx are - more easily handled - """ - - # just support a few for testing purposes - supported_ops = { - ast.Add: operator.add, - ast.Sub: operator.sub, - ast.Mult: operator.mul, - } - - def visit_BinOp(self, node): - # does const expr folding - node = self.generic_visit(node) - - op = self.supported_ops.get(node.op.__class__) - lhs = getattr(node, 'left', None) - rhs = getattr(node, 'right', None) - - if not (lhs and rhs and op): - return node - - if (isinstance(lhs, ast.Num) and - isinstance(rhs, ast.Num)): - return ast.Num(op(node.left.n, node.right.n)) - else: - return node - - class FixRelIndex(ast.NodeTransformer, Builder): - """ Fixes the relative indexes to be written in as - induction index + relative index - """ - - def __init__(self, argnames, const_assigns, - standard_indexing, neighborhood, *args, **kwargs): - ast.NodeTransformer.__init__(self, *args, **kwargs) - pyStencilGenerator.Builder.__init__(self, *args, **kwargs) - self._argnames = argnames - self._const_assigns = const_assigns - self._idx_len = -1 - self._mins = None - self._maxes = None - self._imin = np.iinfo(int).min - self._imax = np.iinfo(int).max - self._standard_indexing = standard_indexing \ - if standard_indexing else [] - self._neighborhood = neighborhood - self._id_pat = '__%sn' if neighborhood else '__%s' - - def get_val_from_num(self, node): - """ - Gets the literal value from a Num or UnaryOp - """ - if isinstance(node, ast.Num): - return node.n - elif isinstance(node, ast.UnaryOp): - return -node.operand.n - else: - raise ValueError( - "get_val_from_num: Unknown indexing operation") - - def visit_Subscript(self, node): - """ - Transforms subscripts of the form `a[x]` and `a[x, y, z, ...]` - where `x, y, z` are relative indexes, to forms such as: - `a[x + i]` and `a[x + i, y + j, z + k]` for use in loop induced - indexing. - """ - - node = self.generic_visit(node) - if (node.value.id in self._argnames) and ( - node.value.id not in self._standard_indexing): - # 2D index - if isinstance(node.slice.value, ast.Tuple): - idx = [] - for x, val in enumerate(node.slice.value.elts): - useval = self._const_assigns.get(val, val) - idx.append( - ast.BinOp( - left=ast.Name( - id=self._id_pat % - self.ids[x], - ctx=ast.Load()), - op=ast.Add(), - right=useval, - ctx=ast.Load())) - if self._idx_len == -1: - self._idx_len = len(idx) - else: - if(self._idx_len != len(idx)): - raise ValueError( - "Relative indexing mismatch detected") - if isinstance(node.ctx, ast.Store): - msg = ("Assignments to array passed to " - "stencil kernels is not allowed") - raise ValueError(msg) - context = ast.Load() - newnode = ast.Subscript( - value=node.value, - slice=ast.Index( - value=ast.Tuple( - elts=idx, - ctx=ast.Load()), - ctx=ast.Load()), - ctx=context) - ast.copy_location(newnode, node) - ast.fix_missing_locations(newnode) - - # now work out max/min for index ranges i.e. stencil size - if self._mins is None and self._maxes is None: - # first pass - self._mins = [self._imax] * self._idx_len - self._maxes = [self._imin] * self._idx_len - - if not self._neighborhood: - for x, lnode in enumerate(node.slice.value.elts): - if isinstance(lnode, ast.Num) or\ - isinstance(lnode, ast.UnaryOp): - relvalue = self.get_val_from_num(lnode) - elif (hasattr(lnode, 'id') and - lnode.id in self._const_assigns): - relvalue = self._const_assigns[lnode.id] - else: - raise ValueError( - "Cannot interpret indexing value") - if relvalue < self._mins[x]: - self._mins[x] = relvalue - if relvalue > self._maxes[x]: - self._maxes[x] = relvalue - else: - for x, lnode in enumerate(self._neighborhood): - self._mins[x] = self._neighborhood[x][0] - self._maxes[x] = self._neighborhood[x][1] - - return newnode - # 1D index - elif isinstance(node.slice, ast.Index): - useval = self._const_assigns.get( - node.slice.value, node.slice.value) - idx = ast.BinOp(left=ast.Name( - id=self._id_pat % - self.ids[0], - ctx=ast.Load()), - op=ast.Add(), - right=useval, - ctx=ast.Load()) - if self._idx_len == -1: - self._idx_len = 1 - else: - if(self._idx_len != 1): - raise ValueError( - "Relative indexing mismatch detected") - if isinstance(node.ctx, ast.Store): - msg = ("Assignments to array passed to " - "stencil kernels is not allowed") - raise ValueError(msg) - context = ast.Load() - newnode = ast.Subscript( - value=node.value, - slice=ast.Index( - value=idx, - ctx=ast.Load()), - ctx=context) - ast.copy_location(newnode, node) - ast.fix_missing_locations(newnode) - - # now work out max/min for index ranges i.e. stencil size - if self._mins is None and self._maxes is None: - # first pass - self._mins = [self._imax, ] - self._maxes = [self._imin, ] - - if not self._neighborhood: - if isinstance(node.slice.value, ast.Num) or\ - isinstance(node.slice.value, ast.UnaryOp): - relvalue = self.get_val_from_num(node.slice.value) - elif (hasattr(node.slice.value, 'id') and - node.slice.value.id in self._const_assigns): - relvalue = self._const_assigns[node.slice.value.id] - else: - raise ValueError("Cannot interpret indexing value") - if relvalue < self._mins[0]: - self._mins[0] = relvalue - if relvalue > self._maxes[0]: - self._maxes[0] = relvalue - else: - self._mins[0] = self._neighborhood[0][0] - self._maxes[0] = self._neighborhood[0][1] - - return newnode - else: # unknown - raise ValueError("Unhandled subscript") - else: - return node - - @property - def idx_len(self): - if self._idx_len == -1: - raise ValueError( - 'Transform has not been run/no indexes found') - else: - return self._idx_len - - @property - def maxes(self): - return self._maxes - - @property - def mins(self): - return self._mins - - @property - def id_pattern(self): - return self._id_pat - - class TransformReturns(ast.NodeTransformer, Builder): - """ - Transforms return nodes into assignments. - """ - - def __init__(self, relidx_info, *args, **kwargs): - ast.NodeTransformer.__init__(self, *args, **kwargs) - pyStencilGenerator.Builder.__init__(self, *args, **kwargs) - self._relidx_info = relidx_info - self._ret_var_idx = self.varidx() - retvar = '__b%s' % self._ret_var_idx - self._retvarname = retvar - - def visit_Return(self, node): - self.generic_visit(node) - nloops = self._relidx_info.idx_len - var_pattern = self._relidx_info.id_pattern - return self.gen_assign( - self._retvarname, node.value, - [var_pattern % self.ids[l] for l in range(nloops)]) - - @property - def ret_var_name(self): - return self._retvarname - - class FixFunc(ast.NodeTransformer, Builder): - """ The main function rewriter, takes the body of the kernel and generates: - * checking function calls - * return value allocation - * loop nests - * return site - * Function definition as an entry point - """ - - def __init__(self, kprops, relidx_info, ret_info, - cval, standard_indexing, neighborhood, *args, **kwargs): - ast.NodeTransformer.__init__(self, *args, **kwargs) - pyStencilGenerator.Builder.__init__(self, *args, **kwargs) - self._original_kernel = kprops.original_kernel - self._argnames = kprops.argnames - self._retty = kprops.retty - self._relidx_info = relidx_info - self._ret_info = ret_info - self._standard_indexing = standard_indexing \ - if standard_indexing else [] - self._neighborhood = neighborhood if neighborhood else tuple() - self._relidx_args = [ - x for x in self._argnames if x not in self._standard_indexing] - # switch cval to python type - if hasattr(cval, 'dtype'): - self.cval = cval.tolist() - else: - self.cval = cval - self.stencil_arr = self._argnames[0] - - def visit_FunctionDef(self, node): - """ - Transforms the kernel function into a function that will perform - the stencil like behaviour on the kernel. - """ - self.generic_visit(node) - - # this function validates arguments and is injected into the top - # of the stencil call - def check_stencil_arrays(*args, **kwargs): - # the first has to be an array due to parfors requirements - neighborhood = kwargs.get('neighborhood') - init_shape = args[0].shape - if neighborhood is not None: - if len(init_shape) != len(neighborhood): - raise ValueError("Invalid neighborhood supplied") - for x in args[1:]: - if hasattr(x, 'shape'): - if init_shape != x.shape: - raise ValueError( - "Input stencil arrays do not commute") - - checksrc = inspect.getsource(check_stencil_arrays) - check_impl = ast.parse( - checksrc.strip()).body[0] # don't need module - ast.fix_missing_locations(check_impl) - - checker_call = self.gen_call( - 'check_stencil_arrays', - self._relidx_args, - kwargs=['neighborhood']) - - nloops = self._relidx_info.idx_len - - def computebound(mins, maxs): - minlim = 0 if mins >= 0 else -mins - maxlim = -maxs if maxs > 0 else 0 - return (minlim, maxlim) - - var_pattern = self._relidx_info.id_pattern - - loop_body = node.body - - # create loop nests - loop_count = 0 - for l in range(nloops): - minlim, maxlim = computebound( - self._relidx_info.mins[loop_count], - self._relidx_info.maxes[loop_count]) - minbound = minlim - maxbound = self.gen_subscript( - self.stencil_arr, 'shape', loop_count, maxlim) - loops = self.gen_loop( - var_pattern % self.ids[loop_count], - minbound, maxbound, body=loop_body) - loop_body = [loops] - loop_count += 1 - - # patch loop location - ast.copy_location(loops, node) - _rettyname = self._retty.targets[0] - - # allocate a return - retvar = self._ret_info.ret_var_name - allocate = self.gen_alloc_return( - self.stencil_arr, retvar, _rettyname, self.cval) - ast.copy_location(allocate, node) - - # generate the return - returner = self.gen_return(retvar) - ast.copy_location(returner, node) - - if _py27: - add_kwarg = [ast.Name('neighborhood', ast.Param())] - else: - add_kwarg = [ast.arg('neighborhood', None)] - defaults = [ast.Name(id='None', ctx=ast.Load())] - - newargs = ast.arguments( - args=node.args.args + - add_kwarg, - defaults=defaults, - vararg=None, - kwarg=None, - kwonlyargs=[], - kw_defaults=[]) - new = ast.FunctionDef( - name='__%s' % - node.name, - args=newargs, - body=[ - check_impl, - checker_call, - self._original_kernel, - self._retty, - allocate, - loops, - returner], - decorator_list=[]) - ast.copy_location(new, node) - return new - - class GetKernelProps(ast.NodeVisitor, Builder): - """ Gets the argument names and other properties - of the original kernel. - """ - - def __init__(self, *args, **kwargs): - ast.NodeVisitor.__init__(self, *args, **kwargs) - pyStencilGenerator.Builder.__init__(self, *args, **kwargs) - self._argnames = None - self._kwargnames = None - self._retty = None - self._original_kernel = None - self._const_assigns = {} - - def visit_FunctionDef(self, node): - if self._argnames is not None or self._kwargnames is not None: - raise RuntimeError("multiple definition of function/args?") - - if _py27: - attr = 'id' - else: - attr = 'arg' - - self._argnames = [getattr(x, attr) for x in node.args.args] - if node.args.kwarg: - self._kwargnames = [x.arg for x in node.args.kwarg] - compute_retdtype = self.gen_call(node.name, self._argnames) - self._retty = ast.Assign(targets=[ast.Name( - id='__retdtype', - ctx=ast.Store())], value=compute_retdtype.value) - self._original_kernel = ast.fix_missing_locations(deepcopy(node)) - self.generic_visit(node) - - def visit_Assign(self, node): - self.generic_visit(node) - tgt = node.targets - if len(tgt) == 1: - target = tgt[0] - if isinstance(target, ast.Name): - if isinstance(node.value, ast.Num): - self._const_assigns[target.id] = node.value.n - elif isinstance(node.value, ast.UnaryOp): - if isinstance(node.value, ast.UAdd): - self._const_assigns[target.id] = node.value.n - else: - self._const_assigns[target.id] = -node.value.n - - @property - def argnames(self): - """ - The names of the arguments to the function - """ - return self._argnames - - @property - def const_assigns(self): - """ - A map of variable name to constant for variables that are simple - constant assignments - """ - return self._const_assigns - - @property - def retty(self): - """ - The return type - """ - return self._retty - - @property - def original_kernel(self): - """ - The original unmutated kernel - """ - return self._original_kernel - - class FixCalls(ast.NodeTransformer): - """ Fixes call sites for astor (in case it is in use) """ - - def visit_Call(self, node): - self.generic_visit(node) - # Add in starargs and kwargs to calls - new = ast.Call( - func=node.func, - args=node.args, - keywords=node.keywords, - starargs=None, - kwargs=None) - return new - - def generate_stencil_tree( - self, func, cval, standard_indexing, neighborhood): - """ - Generates the AST tree for a stencil from: - func - a python stencil kernel - cval, standard_indexing and neighborhood as per the @stencil decorator - """ - src = inspect.getsource(func) - tree = ast.parse(src.strip()) - - # Prints debugging information if True. - # If astor is installed the decompilation of the AST is also printed - DEBUG = False - if DEBUG: - print("ORIGINAL") - print(ast.dump(tree)) - - def pipeline(tree): - """ the pipeline of manipulations """ - - # get the arg names - kernel_props = self.GetKernelProps() - kernel_props.visit(tree) - argnm = kernel_props.argnames - const_asgn = kernel_props.const_assigns - - if standard_indexing: - for x in standard_indexing: - if x not in argnm: - msg = ("Non-existent variable " - "specified in standard_indexing") - raise ValueError(msg) - - # fold consts - fold_const = self.FoldConst() - fold_const.visit(tree) - - # rewrite the relative indices as induced indices - relidx_fixer = self.FixRelIndex( - argnm, const_asgn, standard_indexing, neighborhood) - relidx_fixer.visit(tree) - - # switch returns into assigns - return_transformer = self.TransformReturns(relidx_fixer) - return_transformer.visit(tree) - - # generate the function body and loop nests and assemble - fixer = self.FixFunc( - kernel_props, - relidx_fixer, - return_transformer, - cval, - standard_indexing, - neighborhood) - fixer.visit(tree) - - # fix up the call sites so they work better with astor - callFixer = self.FixCalls() - callFixer.visit(tree) - ast.fix_missing_locations(tree.body[0]) - - # run the pipeline of transforms on the tree - pipeline(tree) - - if DEBUG: - print("\n\n\nNEW") - print(ast.dump(tree, include_attributes=True)) - try: - import astor - print(astor.to_source(tree)) - except ImportError: - pass - - return tree - - -def pyStencil(func_or_mode='constant', **options): - """ - A pure python implementation of (a large subset of) stencil functionality, - equivalent to StencilFunc. - """ - - if not isinstance(func_or_mode, str): - mode = 'constant' # default style - func = func_or_mode - else: - assert isinstance(func_or_mode, str), """stencil mode should be - a string""" - mode = func_or_mode - func = None - - for option in options: - if option not in ["cval", "standard_indexing", "neighborhood"]: - raise ValueError("Unknown stencil option " + option) - - if mode != 'constant': - raise ValueError("Unsupported mode style " + mode) - - cval = options.get('cval', 0) - standard_indexing = options.get('standard_indexing', None) - neighborhood = options.get('neighborhood', None) - - # generate a new AST tree from the kernel func - gen = pyStencilGenerator() - tree = gen.generate_stencil_tree(func, cval, standard_indexing, - neighborhood) - - # breathe life into the tree - mod_code = compile(tree, filename="", mode="exec") - func_code = mod_code.co_consts[0] - full_func = pytypes.FunctionType(func_code, globals()) - - return full_func - - -@skip_unsupported -class TestManyStencils(TestStencilBase): - - def __init__(self, *args, **kwargs): - super(TestManyStencils, self).__init__(*args, **kwargs) - - def check(self, pyfunc, *args, **kwargs): - """ - For a given kernel: - - The expected result is computed from a pyStencil version of the - stencil. - - The following results are then computed: - * from a pure @stencil decoration of the kernel. - * from the njit of a trivial wrapper function around the pure @stencil - decorated function. - * from the njit(parallel=True) of a trivial wrapper function around - the pure @stencil decorated function. - - The results are then compared. - """ - - options = kwargs.get('options', dict()) - expected_exception = kwargs.get('expected_exception') - - # DEBUG print output arrays - DEBUG_OUTPUT = False - - # collect fails - should_fail = [] - should_not_fail = [] - - # runner that handles fails - @contextmanager - def errorhandler(exty=None, usecase=None): - try: - yield - except Exception as e: - if exty is not None: - lexty = exty if hasattr(exty, '__iter__') else [exty, ] - found = False - for ex in lexty: - found |= isinstance(e, ex) - if not found: - raise - else: - should_not_fail.append( - (usecase, "%s: %s" % - (type(e), str(e)))) - else: - if exty is not None: - should_fail.append(usecase) - - if isinstance(expected_exception, dict): - pystencil_ex = expected_exception['pyStencil'] - stencil_ex = expected_exception['stencil'] - njit_ex = expected_exception['njit'] - parfor_ex = expected_exception['parfor'] - else: - pystencil_ex = expected_exception - stencil_ex = expected_exception - njit_ex = expected_exception - parfor_ex = expected_exception - - stencil_args = {'func_or_mode': pyfunc} - stencil_args.update(options) - - expected_present = True - try: - # ast impl - ast_impl = pyStencil(func_or_mode=pyfunc, **options) - expected = ast_impl( - *args, neighborhood=options.get('neighborhood')) - if DEBUG_OUTPUT: - print("\nExpected:\n", expected) - except Exception as ex: - # check exception is expected - with errorhandler(pystencil_ex, "pyStencil"): - raise ex - pyStencil_unhandled_ex = ex - expected_present = False - stencilfunc_output = None - with errorhandler(stencil_ex, "@stencil"): - stencil_func_impl = stencil(**stencil_args) - # stencil result - stencilfunc_output = stencil_func_impl(*args) - - # wrapped stencil impl, could this be generated? - if len(args) == 1: - def wrap_stencil(arg0): - return stencil_func_impl(arg0) - elif len(args) == 2: - def wrap_stencil(arg0, arg1): - return stencil_func_impl(arg0, arg1) - elif len(args) == 3: - def wrap_stencil(arg0, arg1, arg2): - return stencil_func_impl(arg0, arg1, arg2) - else: - raise ValueError( - "Up to 3 arguments can be provided, found %s" % - len(args)) - - sig = tuple([numba.typeof(x) for x in args]) - - njit_output = None - with errorhandler(njit_ex, "njit"): - wrapped_cfunc = self.compile_njit(wrap_stencil, sig) - # njit result - njit_output = wrapped_cfunc.entry_point(*args) - - parfor_output = None - with errorhandler(parfor_ex, "parfors"): - wrapped_cpfunc = self.compile_parallel(wrap_stencil, sig) - # parfor result - parfor_output = wrapped_cpfunc.entry_point(*args) - - if DEBUG_OUTPUT: - print("\n@stencil_output:\n", stencilfunc_output) - print("\nnjit_output:\n", njit_output) - print("\nparfor_output:\n", parfor_output) - - if expected_present: - try: - if not stencil_ex: - np.testing.assert_almost_equal( - stencilfunc_output, expected, decimal=1) - self.assertEqual(expected.dtype, stencilfunc_output.dtype) - except Exception as e: - should_not_fail.append( - ('@stencil', "%s: %s" % - (type(e), str(e)))) - print("@stencil failed: %s" % str(e)) - - try: - if not njit_ex: - np.testing.assert_almost_equal( - njit_output, expected, decimal=1) - self.assertEqual(expected.dtype, njit_output.dtype) - except Exception as e: - should_not_fail.append(('njit', "%s: %s" % (type(e), str(e)))) - print("@njit failed: %s" % str(e)) - - try: - if not parfor_ex: - np.testing.assert_almost_equal( - parfor_output, expected, decimal=1) - self.assertEqual(expected.dtype, parfor_output.dtype) - try: - self.assertIn( - '@do_scheduling', - wrapped_cpfunc.library.get_llvm_str()) - except AssertionError: - msg = 'Could not find `@do_scheduling` in LLVM IR' - raise AssertionError(msg) - except Exception as e: - should_not_fail.append( - ('parfors', "%s: %s" % - (type(e), str(e)))) - print("@njit(parallel=True) failed: %s" % str(e)) - - if DEBUG_OUTPUT: - print("\n\n") - - if should_fail: - msg = ["%s" % x for x in should_fail] - raise RuntimeError(("The following implementations should have " - "raised an exception but did not:\n%s") % msg) - - if should_not_fail: - impls = ["%s" % x[0] for x in should_not_fail] - errs = ''.join(["%s: Message: %s\n\n" % - x for x in should_not_fail]) - str1 = ("The following implementations should not have raised an " - "exception but did:\n%s\n" % impls) - str2 = "Errors were:\n\n%s" % errs - raise RuntimeError(str1 + str2) - - if not expected_present: - if expected_exception is None: - raise RuntimeError( - "pyStencil failed, was not caught/expected", - pyStencil_unhandled_ex) - - def exception_dict(self, **kwargs): - d = dict() - d['pyStencil'] = None - d['stencil'] = None - d['njit'] = None - d['parfor'] = None - for k, v in kwargs.items(): - d[k] = v - return d - - def test_basic00(self): - """rel index""" - def kernel(a): - return a[0, 0] - a = np.arange(12).reshape(3, 4) - self.check(kernel, a) - - def test_basic01(self): - """rel index add const""" - def kernel(a): - return a[0, 1] - a = np.arange(12.).reshape(3, 4) - self.check(kernel, a) - - def test_basic02(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[0, -1] - self.check(kernel, a) - - def test_basic03(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[1, 0] - self.check(kernel, a) - - def test_basic04(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, 0] - self.check(kernel, a) - - def test_basic05(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, 1] - self.check(kernel, a) - - def test_basic06(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[1, -1] - self.check(kernel, a) - - def test_basic07(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[1, 1] - self.check(kernel, a) - - def test_basic08(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, -1] - self.check(kernel, a) - - def test_basic09(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-2, 2] - self.check(kernel, a) - - def test_basic10(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[1, 0] - self.check(kernel, a) - - def test_basic11(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, 0] + a[1, 0] - self.check(kernel, a) - - def test_basic12(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, 1] + a[1, -1] - self.check(kernel, a) - - def test_basic13(self): - """rel index add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-1, -1] + a[1, 1] - self.check(kernel, a) - - def test_basic14(self): - """rel index add domain change const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + 1j - self.check(kernel, a) - - def test_basic14b(self): - """rel index add domain change const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - t = 1.j - return a[0, 0] + t - self.check(kernel, a) - - def test_basic15(self): - """two rel index, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[1, 0] + 1. - self.check(kernel, a) - - def test_basic16(self): - """two rel index OOB, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[10, 0] + 1. - - # only pyStencil bounds checks - ex = self.exception_dict(pyStencil=IndexError) - self.check(kernel, a, expected_exception=ex) - - def test_basic17(self): - """two rel index boundary test, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[2, 0] + 1. - self.check(kernel, a) - - def test_basic18(self): - """two rel index boundary test, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[-2, 0] + 1. - self.check(kernel, a) - - def test_basic19(self): - """two rel index boundary test, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[0, 3] + 1. - self.check(kernel, a) - - def test_basic20(self): - """two rel index boundary test, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[0, -3] + 1. - self.check(kernel, a) - - def test_basic21(self): - """same rel, add const""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0, 0] + a[0, 0] + 1. - self.check(kernel, a) - - def test_basic22(self): - """rel idx const expr folding, add const""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[1 + 0, 0] + a[0, 0] + 1. - self.check(kernel, a) - - def test_basic23(self): - """rel idx, work in body""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - x = np.sin(10 + a[2, 1]) - return a[1 + 0, 0] + a[0, 0] + x - self.check(kernel, a) - - def test_basic23a(self): - """rel idx, dead code should not impact rel idx""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - x = np.sin(10 + a[2, 1]) - return a[1 + 0, 0] + a[0, 0] - self.check(kernel, a) - - def test_basic24(self): - """1d idx on 2d arr""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return a[0] + 1. - self.check(kernel, a, expected_exception=[ValueError, TypingError]) - - def test_basic25(self): - """no idx on 2d arr""" - a = np.arange(12).reshape(3, 4) - - def kernel(a): - return 1. - self.check(kernel, a, expected_exception=[ValueError, LoweringError]) - - def test_basic26(self): - """3d arr""" - a = np.arange(64).reshape(4, 8, 2) - - def kernel(a): - return a[0, 0, 0] - a[0, 1, 0] + 1. - self.check(kernel, a) - - def test_basic27(self): - """4d arr""" - a = np.arange(128).reshape(4, 8, 2, 2) - - def kernel(a): - return a[0, 0, 0, 0] - a[0, 1, 0, -1] + 1. - self.check(kernel, a) - - def test_basic28(self): - """type widen """ - a = np.arange(12).reshape(3, 4).astype(np.float32) - - def kernel(a): - return a[0, 0] + np.float64(10.) - self.check(kernel, a) - - def test_basic29(self): - """const index from func """ - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[0, int(np.cos(0))] - self.check(kernel, a, expected_exception=[ValueError, LoweringError]) - - def test_basic30(self): - """signed zeros""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[-0, -0] - self.check(kernel, a) - - def test_basic31(self): - """does a const propagate? 2D""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - t = 1 - return a[t, 0] - self.check(kernel, a) - - @unittest.skip("constant folding not implemented") - def test_basic31b(self): - """does a const propagate?""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - s = 1 - t = 1 - s - return a[t, 0] - self.check(kernel, a) - - def test_basic31c(self): - """does a const propagate? 1D""" - a = np.arange(12.) - - def kernel(a): - t = 1 - return a[t] - self.check(kernel, a) - - def test_basic32(self): - """typed int index""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[np.int8(1), 0] - self.check(kernel, a, expected_exception=[ValueError, LoweringError]) - - def test_basic33(self): - """add 0d array""" - a = np.arange(12.).reshape(3, 4) - - def kernel(a): - return a[0, 0] + np.array(1) - self.check(kernel, a) - - def test_basic34(self): - """More complex rel index with dependency on addition rel index""" - def kernel(a): - g = 4. + a[0, 1] - return g + (a[0, 1] + a[1, 0] + a[0, -1] + np.sin(a[-2, 0])) - a = np.arange(144).reshape(12, 12) - self.check(kernel, a) - - def test_basic35(self): - """simple cval """ - def kernel(a): - return a[0, 1] - a = np.arange(12.).reshape(3, 4) - ex = self.exception_dict( - stencil=ValueError, - parfor=ValueError, - njit=LoweringError) - self.check(kernel, a, options={'cval': 5}, expected_exception=ex) - - def test_basic36(self): - """more complex with cval""" - def kernel(a): - return a[0, 1] + a[0, -1] + a[1, -1] + a[1, -1] - a = np.arange(12.).reshape(3, 4) - self.check(kernel, a, options={'cval': 5.}) - - def test_basic37(self): - """cval is expr""" - def kernel(a): - return a[0, 1] + a[0, -1] + a[1, -1] + a[1, -1] - a = np.arange(12.).reshape(3, 4) - self.check(kernel, a, options={'cval': 5 + 63.}) - - def test_basic38(self): - """cval is complex""" - def kernel(a): - return a[0, 1] + a[0, -1] + a[1, -1] + a[1, -1] - a = np.arange(12.).reshape(3, 4) - ex = self.exception_dict( - stencil=ValueError, - parfor=ValueError, - njit=LoweringError) - self.check(kernel, a, options={'cval': 1.j}, expected_exception=ex) - - def test_basic39(self): - """cval is func expr""" - def kernel(a): - return a[0, 1] + a[0, -1] + a[1, -1] + a[1, -1] - a = np.arange(12.).reshape(3, 4) - self.check(kernel, a, options={'cval': np.sin(3.) + np.cos(2)}) - - def test_basic40(self): - """2 args!""" - def kernel(a, b): - return a[0, 1] + b[0, -2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b) - - def test_basic41(self): - """2 args! rel arrays wildly not same size!""" - def kernel(a, b): - return a[0, 1] + b[0, -2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(1.).reshape(1, 1) - self.check( - kernel, a, b, expected_exception=[ - ValueError, AssertionError]) - - def test_basic42(self): - """2 args! rel arrays very close in size""" - def kernel(a, b): - return a[0, 1] + b[0, -2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(9.).reshape(3, 3) - self.check( - kernel, a, b, expected_exception=[ - ValueError, AssertionError]) - - def test_basic43(self): - """2 args more complexity""" - def kernel(a, b): - return a[0, 1] + a[1, 2] + b[-2, 0] + b[0, -1] - a = np.arange(30.).reshape(5, 6) - b = np.arange(30.).reshape(5, 6) - self.check(kernel, a, b) - - def test_basic44(self): - """2 args, has assignment before use""" - def kernel(a, b): - a[0, 1] = 12 - return a[0, 1] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, a, b, expected_exception=[ - ValueError, LoweringError]) - - def test_basic45(self): - """2 args, has assignment and then cross dependency""" - def kernel(a, b): - a[0, 1] = 12 - return a[0, 1] + a[1, 0] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, a, b, expected_exception=[ - ValueError, LoweringError]) - - def test_basic46(self): - """2 args, has cross relidx assignment""" - def kernel(a, b): - a[0, 1] = b[1, 2] - return a[0, 1] + a[1, 0] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, a, b, expected_exception=[ - ValueError, LoweringError]) - - def test_basic47(self): - """3 args""" - def kernel(a, b, c): - return a[0, 1] + b[1, 0] + c[-1, 0] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - c = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, c) - - # matches pyStencil, but all ought to fail - # probably hard to detect? - def test_basic48(self): - """2 args, has assignment before use via memory alias""" - def kernel(a): - c = a.T - c[:, :] = 10 - return a[0, 1] - a = np.arange(12.).reshape(3, 4) - self.check(kernel, a) - - def test_basic49(self): - """2 args, standard_indexing on second""" - def kernel(a, b): - return a[0, 1] + b[0, 3] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - @unittest.skip("dynamic range checking not implemented") - def test_basic50(self): - """2 args, standard_indexing OOB""" - def kernel(a, b): - return a[0, 1] + b[0, 15] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, - a, - b, - options={ - 'standard_indexing': 'b'}, - expected_exception=IndexError) - - def test_basic51(self): - """2 args, standard_indexing, no relidx""" - def kernel(a, b): - return a[0, 1] + b[0, 2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, a, b, options={ - 'standard_indexing': [ - 'a', 'b']}, expected_exception=[ - ValueError, LoweringError]) - - def test_basic52(self): - """3 args, standard_indexing on middle arg """ - def kernel(a, b, c): - return a[0, 1] + b[0, 1] + c[1, 2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(4.).reshape(2, 2) - c = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, c, options={'standard_indexing': 'b'}) - - def test_basic53(self): - """2 args, standard_indexing on variable that does not exist""" - def kernel(a, b): - return a[0, 1] + b[0, 2] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - ex = self.exception_dict( - pyStencil=ValueError, - stencil=Exception, - parfor=ValueError, - njit=Exception) - self.check( - kernel, - a, - b, - options={ - 'standard_indexing': 'c'}, - expected_exception=ex) - - def test_basic54(self): - """2 args, standard_indexing, index from var""" - def kernel(a, b): - t = 2 - return a[0, 1] + b[0, t] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - def test_basic55(self): - """2 args, standard_indexing, index from more complex var""" - def kernel(a, b): - s = 1 - t = 2 - s - return a[0, 1] + b[0, t] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - def test_basic56(self): - """2 args, standard_indexing, added complexity """ - def kernel(a, b): - s = 1 - acc = 0 - for k in b[0, :]: - acc += k - t = 2 - s - 1 - return a[0, 1] + b[0, t] + acc - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - def test_basic57(self): - """2 args, standard_indexing, split index operation """ - def kernel(a, b): - c = b[0] - return a[0, 1] + c[1] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - def test_basic58(self): - """2 args, standard_indexing, split index with broadcast mutation """ - def kernel(a, b): - c = b[0] + 1 - return a[0, 1] + c[1] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check(kernel, a, b, options={'standard_indexing': 'b'}) - - def test_basic59(self): - """3 args, mix of array, relative and standard indexing and const""" - def kernel(a, b, c): - return a[0, 1] + b[1, 1] + c - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - c = 10 - self.check(kernel, a, b, c, options={'standard_indexing': ['b', 'c']}) - - def test_basic60(self): - """3 args, mix of array, relative and standard indexing, - tuple pass through""" - def kernel(a, b, c): - return a[0, 1] + b[1, 1] + c[0] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - c = (10,) - # parfors does not support tuple args for stencil kernels - ex = self.exception_dict(parfor=ValueError) - self.check( - kernel, a, b, c, options={ - 'standard_indexing': [ - 'b', 'c']}, expected_exception=ex) - - def test_basic61(self): - """2 args, standard_indexing on first""" - def kernel(a, b): - return a[0, 1] + b[1, 1] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, - a, - b, - options={ - 'standard_indexing': 'a'}, - expected_exception=Exception) - - def test_basic62(self): - """2 args, standard_indexing and cval""" - def kernel(a, b): - return a[0, 1] + b[1, 1] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12.).reshape(3, 4) - self.check( - kernel, - a, - b, - options={ - 'standard_indexing': 'b', - 'cval': 10.}) - - def test_basic63(self): - """2 args, standard_indexing applied to relative, should fail, - non-const idx""" - def kernel(a, b): - return a[0, b[0, 1]] - a = np.arange(12.).reshape(3, 4) - b = np.arange(12).reshape(3, 4) - ex = self.exception_dict( - pyStencil=ValueError, - stencil=ValueError, - parfor=ValueError, - njit=LoweringError) - self.check( - kernel, - a, - b, - options={ - 'standard_indexing': 'b'}, - expected_exception=ex) - - # stencil, njit, parfors all fail. Does this make sense? - def test_basic64(self): - """1 arg that uses standard_indexing""" - def kernel(a): - return a[0, 0] - a = np.arange(12.).reshape(3, 4) - self.check( - kernel, - a, - options={ - 'standard_indexing': 'a'}, - expected_exception=[ - ValueError, - LoweringError]) - - def test_basic65(self): - """basic induced neighborhood test""" - def kernel(a): - cumul = 0 - for i in range(-29, 1): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-29, 0),)}) - - # Should this work? a[0] is out of neighborhood? - def test_basic66(self): - """basic const neighborhood test""" - def kernel(a): - cumul = 0 - for i in range(-29, 1): - cumul += a[0] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-29, 0),)}) - - def test_basic67(self): - """basic 2d induced neighborhood test""" - def kernel(a): - cumul = 0 - for i in range(-5, 1): - for j in range(-10, 1): - cumul += a[i, j] - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'neighborhood': ((-5, 0), (-10, 0),)}) - - def test_basic67b(self): - """basic 2d induced 1D neighborhood""" - def kernel(a): - cumul = 0 - for j in range(-10, 1): - cumul += a[0, j] - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check( - kernel, - a, - options={ - 'neighborhood': ( - (-10, - 0), - )}, - expected_exception=[ - TypingError, - ValueError]) - - # Should this work or is it UB? a[i, 0] is out of neighborhood? - def test_basic68(self): - """basic 2d one induced, one cost neighborhood test""" - def kernel(a): - cumul = 0 - for i in range(-5, 1): - for j in range(-10, 1): - cumul += a[i, 0] - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'neighborhood': ((-5, 0), (-10, 0),)}) - - # Should this work or is it UB? a[0, 0] is out of neighborhood? - def test_basic69(self): - """basic 2d two cost neighborhood test""" - def kernel(a): - cumul = 0 - for i in range(-5, 1): - for j in range(-10, 1): - cumul += a[0, 0] - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'neighborhood': ((-5, 0), (-10, 0),)}) - - def test_basic70(self): - """neighborhood adding complexity""" - def kernel(a): - cumul = 0 - zz = 12. - for i in range(-5, 1): - t = zz + i - for j in range(-10, 1): - cumul += a[i, j] + t - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'neighborhood': ((-5, 0), (-10, 0),)}) - - def test_basic71(self): - """neighborhood, type change""" - def kernel(a): - cumul = 0 - for i in range(-29, 1): - k = 0. - if i > -15: - k = 1j - cumul += a[i] + k - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-29, 0),)}) - - def test_basic72(self): - """neighborhood, narrower range than specified""" - def kernel(a): - cumul = 0 - for i in range(-19, -3): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-29, 0),)}) - - def test_basic73(self): - """neighborhood, +ve range""" - def kernel(a): - cumul = 0 - for i in range(5, 11): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((5, 10),)}) - - def test_basic73b(self): - """neighborhood, -ve range""" - def kernel(a): - cumul = 0 - for i in range(-10, -4): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-10, -5),)}) - - def test_basic74(self): - """neighborhood, -ve->+ve range span""" - def kernel(a): - cumul = 0 - for i in range(-5, 11): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-5, 10),)}) - - def test_basic75(self): - """neighborhood, -ve->-ve range span""" - def kernel(a): - cumul = 0 - for i in range(-10, -1): - cumul += a[i] - return cumul / 30 - a = np.arange(60.) - self.check(kernel, a, options={'neighborhood': ((-10, -2),)}) - - def test_basic76(self): - """neighborhood, mixed range span""" - def kernel(a): - cumul = 0 - zz = 12. - for i in range(-3, 0): - t = zz + i - for j in range(-3, 4): - cumul += a[i, j] + t - return cumul / (10 * 5) - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'neighborhood': ((-3, -1), (-3, 3),)}) - - def test_basic77(self): - """ neighborhood, two args """ - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[i, j] - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, b, options={'neighborhood': ((-3, 0), (-3, 0),)}) - - def test_basic78(self): - """ neighborhood, two args, -ve range, -ve range """ - def kernel(a, b): - cumul = 0 - for i in range(-6, -2): - for j in range(-7, -1): - cumul += a[i, j] + b[i, j] - return cumul / (9.) - a = np.arange(15. * 20.).reshape(15, 20) - b = np.arange(15. * 20.).reshape(15, 20) - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-6, -3), (-7, -2),)}) - - def test_basic78b(self): - """ neighborhood, two args, -ve range, +ve range """ - def kernel(a, b): - cumul = 0 - for i in range(-6, -2): - for j in range(2, 10): - cumul += a[i, j] + b[i, j] - return cumul / (9.) - a = np.arange(15. * 20.).reshape(15, 20) - b = np.arange(15. * 20.).reshape(15, 20) - self.check(kernel, a, b, options={'neighborhood': ((-6, -3), (2, 9),)}) - - def test_basic79(self): - """ neighborhood, two incompatible args """ - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[i, j] - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = np.arange(10. * 20.).reshape(10, 10, 2) - ex = self.exception_dict( - pyStencil=ValueError, - stencil=TypingError, - parfor=TypingError, - njit=TypingError) - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-3, 0), (-3, 0),)}, expected_exception=ex) - - def test_basic80(self): - """ neighborhood, type change """ - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = 12.j - self.check(kernel, a, b, options={'neighborhood': ((-3, 0), (-3, 0))}) - - def test_basic81(self): - """ neighborhood, dimensionally incompatible arrays """ - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[i] - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = a[0].copy() - ex = self.exception_dict( - pyStencil=ValueError, - stencil=TypingError, - parfor=AssertionError, - njit=TypingError) - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-3, 0), (-3, 0))}, expected_exception=ex) - - def test_basic82(self): - """ neighborhood, with standard_indexing""" - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[1, 3] - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = a.copy() - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-3, 0), (-3, 0)), 'standard_indexing': 'b'}) - - def test_basic83(self): - """ neighborhood, with standard_indexing and cval""" - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[1, 3] - return cumul / (9.) - a = np.arange(10. * 20.).reshape(10, 20) - b = a.copy() - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-3, 0), (-3, 0)), 'standard_indexing': 'b', 'cval': 1.5}) - - def test_basic84(self): - """ kernel calls njit """ - def kernel(a): - return a[0, 0] + addone_njit(a[0, 1]) - - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a) - - def test_basic85(self): - """ kernel calls njit(parallel=True)""" - def kernel(a): - return a[0, 0] + addone_pjit(a[0, 1]) - - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a) - - # njit/parfors fail correctly, but the error message isn't very informative - def test_basic86(self): - """ bad kwarg """ - def kernel(a): - return a[0, 0] - - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a, options={'bad': 10}, - expected_exception=[ValueError, TypingError]) - - def test_basic87(self): - """ reserved arg name in use """ - def kernel(__sentinel__): - return __sentinel__[0, 0] - - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a) - - def test_basic88(self): - """ use of reserved word """ - def kernel(a, out): - return out * a[0, 1] - a = np.arange(12.).reshape(3, 4) - ex = self.exception_dict( - pyStencil=ValueError, - stencil=ValueError, - parfor=ValueError, - njit=LoweringError) - self.check( - kernel, - a, - 1.0, - options={}, - expected_exception=ex) - - def test_basic89(self): - """ basic multiple return""" - def kernel(a): - if a[0, 1] > 10: - return 10. - elif a[0, 3] < 8: - return a[0, 0] - else: - return 7. - - a = np.arange(10. * 20.).reshape(10, 20) - self.check(kernel, a) - - def test_basic90(self): - """ neighborhood, with standard_indexing and cval, multiple returns""" - def kernel(a, b): - cumul = 0 - for i in range(-3, 1): - for j in range(-3, 1): - cumul += a[i, j] + b[1, 3] - res = cumul / (9.) - if res > 200.0: - return res + 1.0 - else: - return res - a = np.arange(10. * 20.).reshape(10, 20) - b = a.copy() - self.check( - kernel, a, b, options={ - 'neighborhood': ( - (-3, 0), (-3, 0)), 'standard_indexing': 'b', 'cval': 1.5}) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba/numba/tests/test_storeslice.py b/numba/numba/tests/test_storeslice.py deleted file mode 100644 index e03fd6954..000000000 --- a/numba/numba/tests/test_storeslice.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, errors -from .support import TestCase - - -def setitem_slice(a, start, stop, step, scalar): - a[start:stop:step] = scalar - - -def usecase(obs, nPoints): - center = nPoints // 2 - obs[0:center] = np.arange(center) - obs[center] = 321 - obs[(center + 1):] = np.arange(nPoints - center - 1) - - -class TestStoreSlice(TestCase): - - def test_usecase(self): - n = 10 - obs_got = np.zeros(n) - obs_expected = obs_got.copy() - - flags = Flags() - flags.set("nrt") - cres = compile_isolated(usecase, (types.float64[:], types.intp), - flags=flags) - cres.entry_point(obs_got, n) - usecase(obs_expected, n) - - self.assertPreciseEqual(obs_got, obs_expected) - - def test_array_slice_setitem(self): - n = 10 - argtys = (types.int64[:], types.int64, types.int64, types.int64, - types.int64) - cres = compile_isolated(setitem_slice, argtys) - a = np.arange(n, dtype=np.int64) - # tuple is (start, stop, step, scalar) - tests = ((2, 6, 1, 7), - (2, 6, -1, 7), - (-2, len(a), 2, 77), - (-2, 2 * len(a), 2, 77), - (-2, -6, 3, 88), - (-2, -6, -3, 9999), - (-6, -2, 4, 88), - (-6, -2, -4, 88), - (16, 20, 2, 88), - (16, 20, -2, 88), - ) - - for start, stop, step, scalar in tests: - a = np.arange(n, dtype=np.int64) - b = np.arange(n, dtype=np.int64) - cres.entry_point(a, start, stop, step, scalar) - setitem_slice(b, start, stop, step, scalar) - self.assertPreciseEqual(a, b) - - # test if step = 0 - a = np.arange(n, dtype=np.int64) - with self.assertRaises(ValueError) as cm: - cres.entry_point(a, 3, 6, 0, 88) - self.assertEqual(str(cm.exception), "slice step cannot be zero") - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_support.py b/numba/numba/tests/test_support.py deleted file mode 100644 index 6a20f3e67..000000000 --- a/numba/numba/tests/test_support.py +++ /dev/null @@ -1,349 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import itertools - -import numpy as np - -from numba import jit, utils -from numba import unittest_support as unittest -from .support import TestCase, forbid_codegen -from .enum_usecases import * - -DBL_EPSILON = 2**-52 -FLT_EPSILON = 2**-23 - -INF = float('inf') -NAN = float('nan') - - -class TestAssertPreciseEqual(TestCase): - """ - Tests for TestCase.assertPreciseEqual(). - """ - - int_types = [int] - if utils.PYVERSION < (3,): - int_types.append(long) - np_float_types = [np.float32, np.float64] - float_types = [float] + np_float_types - np_complex_types = [np.complex64, np.complex128] - complex_types = [complex] + np_complex_types - bool_types = [bool, np.bool_] - - def eq(self, left, right, **kwargs): - def assert_succeed(left, right): - self.assertPreciseEqual(left, right, **kwargs) - self.assertPreciseEqual(right, left, **kwargs) - assert_succeed(left, right) - assert_succeed((left, left), (right, right)) - assert_succeed([left, left], [right, right]) - - def ne(self, left, right, **kwargs): - def assert_fail(left, right): - try: - self.assertPreciseEqual(left, right, **kwargs) - except AssertionError: - pass - else: - self.fail("%s and %s unexpectedly considered equal" % (left, right)) - assert_fail(left, right) - assert_fail(right, left) - assert_fail((left, left), (right, right)) - assert_fail((right, right), (left, left)) - assert_fail([left, left], [right, right]) - assert_fail([right, right], [left, left]) - - def test_types(self): - # assertPreciseEqual() should test for type compatibility - # int-like, float-like, complex-like are not compatible - for i, f, c in itertools.product(self.int_types, self.float_types, - self.complex_types): - self.ne(i(1), f(1)) - self.ne(f(1), c(1)) - self.ne(i(1), c(1)) - # int and long are compatible between each other - for u, v in itertools.product(self.int_types, self.int_types): - self.eq(u(1), v(1)) - # int and bool are not compatible between each other - for u, v in itertools.product(self.int_types, self.bool_types): - self.ne(u(1), v(1)) - # NumPy float types are not compatible between each other - for u, v in itertools.product(self.np_float_types, self.np_float_types): - if u is v: - self.eq(u(1), v(1)) - else: - self.ne(u(1), v(1)) - # NumPy complex types are not compatible between each other - for u, v in itertools.product(self.np_complex_types, self.np_complex_types): - if u is v: - self.eq(u(1), v(1)) - else: - self.ne(u(1), v(1)) - - def test_int_values(self): - for tp in self.int_types: - for prec in ['exact', 'single', 'double']: - self.eq(tp(0), tp(0), prec=prec) - self.ne(tp(0), tp(1), prec=prec) - self.ne(tp(-1), tp(1), prec=prec) - self.ne(tp(2**80), tp(1+2**80), prec=prec) - - def test_bool_values(self): - for tpa, tpb in itertools.product(self.bool_types, self.bool_types): - self.eq(tpa(True), tpb(True)) - self.eq(tpa(False), tpb(False)) - self.ne(tpa(True), tpb(False)) - - def test_abs_tol_parse(self): - # check invalid values in abs_tol kwarg raises - with self.assertRaises(ValueError): - self.eq(np.float64(1e-17), np.float64(1e-17), abs_tol="invalid") - with self.assertRaises(ValueError): - self.eq(np.float64(1), np.float64(2), abs_tol=int(7)) - - def test_float_values(self): - for tp in self.float_types: - for prec in ['exact', 'single', 'double']: - self.eq(tp(1.5), tp(1.5), prec=prec) - # Signed zeros - self.eq(tp(0.0), tp(0.0), prec=prec) - self.eq(tp(-0.0), tp(-0.0), prec=prec) - self.ne(tp(0.0), tp(-0.0), prec=prec) - self.eq(tp(0.0), tp(-0.0), prec=prec, ignore_sign_on_zero=True) - # Infinities - self.eq(tp(INF), tp(INF), prec=prec) - self.ne(tp(INF), tp(1e38), prec=prec) - self.eq(tp(-INF), tp(-INF), prec=prec) - self.ne(tp(INF), tp(-INF), prec=prec) - # NaNs - self.eq(tp(NAN), tp(NAN), prec=prec) - self.ne(tp(NAN), tp(0), prec=prec) - self.ne(tp(NAN), tp(INF), prec=prec) - self.ne(tp(NAN), tp(-INF), prec=prec) - - def test_float64_values(self): - for tp in [float, np.float64]: - self.ne(tp(1.0 + DBL_EPSILON), tp(1.0)) - - def test_float32_values(self): - tp = np.float32 - self.ne(tp(1.0 + FLT_EPSILON), tp(1.0)) - - def test_float64_values_inexact(self): - for tp in [float, np.float64]: - for scale in [1.0, -2**3, 2**-4, -2**-20]: - a = scale * 1.0 - b = scale * (1.0 + DBL_EPSILON) - c = scale * (1.0 + DBL_EPSILON * 2) - d = scale * (1.0 + DBL_EPSILON * 4) - self.ne(tp(a), tp(b)) - self.ne(tp(a), tp(b), prec='exact') - self.eq(tp(a), tp(b), prec='double') - self.eq(tp(a), tp(b), prec='double', ulps=1) - self.ne(tp(a), tp(c), prec='double') - self.eq(tp(a), tp(c), prec='double', ulps=2) - self.ne(tp(a), tp(d), prec='double', ulps=2) - self.eq(tp(a), tp(c), prec='double', ulps=3) - self.eq(tp(a), tp(d), prec='double', ulps=3) - # test absolute tolerance based on eps - self.eq(tp(1e-16), tp(3e-16), prec='double', abs_tol="eps") - self.ne(tp(1e-16), tp(4e-16), prec='double', abs_tol="eps") - # test absolute tolerance based on value - self.eq(tp(1e-17), tp(1e-18), prec='double', abs_tol=1e-17) - self.ne(tp(1e-17), tp(3e-17), prec='double', abs_tol=1e-17) - - def test_float32_values_inexact(self): - tp = np.float32 - for scale in [1.0, -2**3, 2**-4, -2**-20]: - # About the choice of 0.9: there seem to be issues when - # converting - a = scale * 1.0 - b = scale * (1.0 + FLT_EPSILON) - c = scale * (1.0 + FLT_EPSILON * 2) - d = scale * (1.0 + FLT_EPSILON * 4) - self.ne(tp(a), tp(b)) - self.ne(tp(a), tp(b), prec='exact') - self.ne(tp(a), tp(b), prec='double') - self.eq(tp(a), tp(b), prec='single') - self.ne(tp(a), tp(c), prec='single') - self.eq(tp(a), tp(c), prec='single', ulps=2) - self.ne(tp(a), tp(d), prec='single', ulps=2) - self.eq(tp(a), tp(c), prec='single', ulps=3) - self.eq(tp(a), tp(d), prec='single', ulps=3) - # test absolute tolerance based on eps - self.eq(tp(1e-7), tp(2e-7), prec='single', abs_tol="eps") - self.ne(tp(1e-7), tp(3e-7), prec='single', abs_tol="eps") - # test absolute tolerance based on value - self.eq(tp(1e-7), tp(1e-8), prec='single', abs_tol=1e-7) - self.ne(tp(1e-7), tp(3e-7), prec='single', abs_tol=1e-7) - - def test_complex_values(self): - # Complex literals with signed zeros are confusing, better use - # the explicit constructor. - c_pp, c_pn, c_np, c_nn = [complex(0.0, 0.0), complex(0.0, -0.0), - complex(-0.0, 0.0), complex(-0.0, -0.0)] - for tp in self.complex_types: - for prec in ['exact', 'single', 'double']: - self.eq(tp(1 + 2j), tp(1 + 2j), prec=prec) - self.ne(tp(1 + 1j), tp(1 + 2j), prec=prec) - self.ne(tp(2 + 2j), tp(1 + 2j), prec=prec) - # Signed zeros - self.eq(tp(c_pp), tp(c_pp), prec=prec) - self.eq(tp(c_np), tp(c_np), prec=prec) - self.eq(tp(c_nn), tp(c_nn), prec=prec) - self.ne(tp(c_pp), tp(c_pn), prec=prec) - self.ne(tp(c_pn), tp(c_nn), prec=prec) - # Infinities - self.eq(tp(complex(INF, INF)), tp(complex(INF, INF)), prec=prec) - self.eq(tp(complex(INF, -INF)), tp(complex(INF, -INF)), prec=prec) - self.eq(tp(complex(-INF, -INF)), tp(complex(-INF, -INF)), prec=prec) - self.ne(tp(complex(INF, INF)), tp(complex(INF, -INF)), prec=prec) - self.ne(tp(complex(INF, INF)), tp(complex(-INF, INF)), prec=prec) - self.eq(tp(complex(INF, 0)), tp(complex(INF, 0)), prec=prec) - # NaNs - self.eq(tp(complex(NAN, 0)), tp(complex(NAN, 0)), prec=prec) - self.eq(tp(complex(0, NAN)), tp(complex(0, NAN)), prec=prec) - self.eq(tp(complex(NAN, NAN)), tp(complex(NAN, NAN)), prec=prec) - self.eq(tp(complex(INF, NAN)), tp(complex(INF, NAN)), prec=prec) - self.eq(tp(complex(NAN, -INF)), tp(complex(NAN, -INF)), prec=prec) - # FIXME - #self.ne(tp(complex(NAN, INF)), tp(complex(NAN, -INF))) - #self.ne(tp(complex(NAN, 0)), tp(complex(NAN, 1))) - #self.ne(tp(complex(INF, NAN)), tp(complex(-INF, NAN))) - #self.ne(tp(complex(0, NAN)), tp(complex(1, NAN))) - #self.ne(tp(complex(NAN, 0)), tp(complex(0, NAN))) - # XXX should work with other precisions as well? - self.ne(tp(complex(INF, 0)), tp(complex(INF, 1)), prec='exact') - - def test_complex128_values_inexact(self): - for tp in [complex, np.complex128]: - for scale in [1.0, -2**3, 2**-4, -2**-20]: - a = scale * 1.0 - b = scale * (1.0 + DBL_EPSILON) - c = scale * (1.0 + DBL_EPSILON * 2) - aa = tp(complex(a, a)) - ab = tp(complex(a, b)) - bb = tp(complex(b, b)) - self.ne(tp(aa), tp(ab)) - self.eq(tp(aa), tp(ab), prec='double') - self.eq(tp(ab), tp(bb), prec='double') - self.eq(tp(aa), tp(bb), prec='double') - ac = tp(complex(a, c)) - cc = tp(complex(c, c)) - self.ne(tp(aa), tp(ac), prec='double') - self.ne(tp(ac), tp(cc), prec='double') - self.eq(tp(aa), tp(ac), prec='double', ulps=2) - self.eq(tp(ac), tp(cc), prec='double', ulps=2) - self.eq(tp(aa), tp(cc), prec='double', ulps=2) - self.eq(tp(aa), tp(cc), prec='single') - - def test_complex64_values_inexact(self): - tp = np.complex64 - for scale in [1.0, -2**3, 2**-4, -2**-20]: - a = scale * 1.0 - b = scale * (1.0 + FLT_EPSILON) - c = scale * (1.0 + FLT_EPSILON * 2) - aa = tp(complex(a, a)) - ab = tp(complex(a, b)) - bb = tp(complex(b, b)) - self.ne(tp(aa), tp(ab)) - self.ne(tp(aa), tp(ab), prec='double') - self.eq(tp(aa), tp(ab), prec='single') - self.eq(tp(ab), tp(bb), prec='single') - self.eq(tp(aa), tp(bb), prec='single') - ac = tp(complex(a, c)) - cc = tp(complex(c, c)) - self.ne(tp(aa), tp(ac), prec='single') - self.ne(tp(ac), tp(cc), prec='single') - self.eq(tp(aa), tp(ac), prec='single', ulps=2) - self.eq(tp(ac), tp(cc), prec='single', ulps=2) - self.eq(tp(aa), tp(cc), prec='single', ulps=2) - - def test_enums(self): - values = [Color.red, Color.green, Color.blue, Shake.mint, - Shape.circle, Shape.square, Planet.EARTH, Planet.MERCURY] - for val in values: - self.eq(val, val) - self.ne(val, val.value) - for a, b in itertools.combinations(values, 2): - self.ne(a, b) - - def test_arrays(self): - a = np.arange(1, 7, dtype=np.int16).reshape((2, 3)) - b = a.copy() - self.eq(a, b) - # Different values - self.ne(a, b + 1) - self.ne(a, b[:-1]) - self.ne(a, b.T) - # Different dtypes - self.ne(a, b.astype(np.int32)) - # Different layout - self.ne(a, b.T.copy().T) - # Different ndim - self.ne(a, b.flatten()) - # Different writeability - b.flags.writeable = False - self.ne(a, b) - # Precision - a = np.arange(1, 3, dtype=np.float64) - b = a * (1.0 + DBL_EPSILON) - c = a * (1.0 + DBL_EPSILON * 2) - self.ne(a, b) - self.eq(a, b, prec='double') - self.ne(a, c, prec='double') - - def test_npdatetime(self): - a = np.datetime64('1900', 'Y') - b = np.datetime64('1900', 'Y') - c = np.datetime64('1900-01-01', 'D') - d = np.datetime64('1901', 'Y') - self.eq(a, b) - # Different unit - self.ne(a, c) - # Different value - self.ne(a, d) - - def test_nptimedelta(self): - a = np.timedelta64(1, 'h') - b = np.timedelta64(1, 'h') - c = np.timedelta64(60, 'm') - d = np.timedelta64(2, 'h') - self.eq(a, b) - # Different unit - self.ne(a, c) - # Different value - self.ne(a, d) - - -class TestMisc(TestCase): - - def test_assertRefCount(self): - # Use floats to avoid integer interning - x = 55. - y = 66. - l = [] - with self.assertRefCount(x, y): - pass - with self.assertRaises(AssertionError) as cm: - # y gains a reference - with self.assertRefCount(x, y): - l.append(y) - self.assertIn("66", str(cm.exception)) - - def test_forbid_codegen(self): - """ - Test that forbid_codegen() prevents code generation using the @jit - decorator. - """ - def f(): - return 1 - with forbid_codegen(): - with self.assertRaises(RuntimeError) as raises: - cfunc = jit(nopython=True)(f) - cfunc() - self.assertIn("codegen forbidden by test case", str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_svml.py b/numba/numba/tests/test_svml.py deleted file mode 100644 index 9b83ddba3..000000000 --- a/numba/numba/tests/test_svml.py +++ /dev/null @@ -1,404 +0,0 @@ -from __future__ import division, print_function - -import math -import numpy as np -import subprocess -import numbers -import importlib -import sys -import re -from itertools import chain, combinations - -import numba -from numba import prange, unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba.six import exec_ -from .support import TestCase, tag, override_env_config - -needs_svml = unittest.skipUnless(numba.config.USING_SVML, - "SVML tests need SVML to be present") - -# a map of float64 vector lenghs with corresponding CPU architecture -vlen2cpu = {2: 'nehalem', 4: 'haswell', 8: 'skylake-avx512'} - -# K: SVML functions, V: python functions which are expected to be SIMD-vectorized -# using SVML, explicit references to Python functions here are mostly for sake of -# instant import checks. -# TODO: [] and comments below mean unused/untested SVML function, it's to be -# either enabled or to be replaced with the explanation why the function -# cannot be used in Numba -# TODO: this test does not support functions with more than 1 arguments yet -# The test logic should be modified if there is an SVML function being used under -# different name or module from Python -svml_funcs = { - "sin": [np.sin, math.sin], - "cos": [np.cos, math.cos], - "pow": [], # pow, math.pow], - "exp": [np.exp, math.exp], - "log": [np.log, math.log], - "acos": [math.acos], - "acosh": [math.acosh], - "asin": [math.asin], - "asinh": [math.asinh], - "atan2": [], # math.atan2], - "atan": [math.atan], - "atanh": [math.atanh], - "cbrt": [], # np.cbrt], - "cdfnorm": [], - "cdfnorminv": [], - "ceil": [], # np.ceil, math.ceil], - "cosd": [], - "cosh": [np.cosh, math.cosh], - "erf": [math.erf], # np.erf is available in Intel Distribution - "erfc": [math.erfc], - "erfcinv": [], - "erfinv": [], - "exp10": [], - "exp2": [], # np.exp2], - "expm1": [np.expm1, math.expm1], - "floor": [], # np.floor, math.floor], - "fmod": [], # np.fmod, math.fmod], - "hypot": [], # np.hypot, math.hypot], - "invsqrt": [], # available in Intel Distribution - "log10": [np.log10, math.log10], - "log1p": [np.log1p, math.log1p], - "log2": [], # np.log2], - "logb": [], - "nearbyint": [], - "rint": [], # np.rint], - "round": [], # round], - "sind": [], - "sinh": [np.sinh, math.sinh], - "sqrt": [np.sqrt, math.sqrt], - "tan": [np.tan, math.tan], - "tanh": [np.tanh, math.tanh], - "trunc": [], # np.trunc, math.trunc], -} -# TODO: these functions are not vectorizable with complex types -complex_funcs_exclude = ["sqrt", "tan", "log10", "expm1", "log1p", "tanh", "log"] - -# remove untested entries -svml_funcs = {k: v for k, v in svml_funcs.items() if len(v) > 0} -# lists for functions which belong to numpy and math modules correpondently -numpy_funcs = [f for f, v in svml_funcs.items() if " 2: - for n in ( "test_int32_range4_usecase", # issue #3016 - ): - setattr(cls, n, tag("important")(getattr(cls, n))) - - -TestSVMLGeneration.autogenerate() - - -def math_sin_scalar(x): - return math.sin(x) - - -def math_sin_loop(n): - ret = np.empty(n, dtype=np.float64) - for x in range(n): - ret[x] = math.sin(np.float64(x)) - return ret - - -@needs_svml -class TestSVML(TestCase): - """ Tests SVML behaves as expected """ - - # env mutating, must not run in parallel - _numba_parallel_test_ = False - - def __init__(self, *args): - self.flags = Flags() - self.flags.set('nrt') - - # flags for njit(fastmath=True) - self.fastflags = Flags() - self.fastflags.set('nrt') - self.fastflags.set('fastmath') - super(TestSVML, self).__init__(*args) - - def compile(self, func, *args, **kwargs): - assert not kwargs - sig = tuple([numba.typeof(x) for x in args]) - - std = compile_isolated(func, sig, flags=self.flags) - fast = compile_isolated(func, sig, flags=self.fastflags) - - return std, fast - - def copy_args(self, *args): - if not args: - return tuple() - new_args = [] - for x in args: - if isinstance(x, np.ndarray): - new_args.append(x.copy('k')) - elif isinstance(x, np.number): - new_args.append(x.copy()) - elif isinstance(x, numbers.Number): - new_args.append(x) - else: - raise ValueError('Unsupported argument type encountered') - return tuple(new_args) - - def check(self, pyfunc, *args, **kwargs): - - jitstd, jitfast = self.compile(pyfunc, *args) - - std_pattern = kwargs.pop('std_pattern', None) - fast_pattern = kwargs.pop('fast_pattern', None) - cpu_name = kwargs.pop('cpu_name', 'skylake-avx512') - - # python result - py_expected = pyfunc(*self.copy_args(*args)) - - # jit result - jitstd_result = jitstd.entry_point(*self.copy_args(*args)) - - # fastmath result - jitfast_result = jitfast.entry_point(*self.copy_args(*args)) - - # assert numerical equality - np.testing.assert_almost_equal(jitstd_result, py_expected, **kwargs) - np.testing.assert_almost_equal(jitfast_result, py_expected, **kwargs) - - # look for specific patters in the asm for a given target - with override_env_config('NUMBA_CPU_NAME', cpu_name), \ - override_env_config('NUMBA_CPU_FEATURES', ''): - # recompile for overridden CPU - jitstd, jitfast = self.compile(pyfunc, *args) - if std_pattern: - self.check_svml_presence(jitstd, std_pattern) - if fast_pattern: - self.check_svml_presence(jitfast, fast_pattern) - - def check_svml_presence(self, func, pattern): - asm = func.library.get_asm_str() - self.assertIn(pattern, asm) - - def test_scalar_context(self): - # SVML will not be used. - pat = '$_sin' if numba.config.IS_OSX else '$sin' - self.check(math_sin_scalar, 7., std_pattern=pat) - self.check(math_sin_scalar, 7., fast_pattern=pat) - - @tag('important') - def test_svml(self): - # loops both with and without fastmath should use SVML. - # The high accuracy routines are dropped if `fastmath` is set - std = "__svml_sin8_ha," - fast = "__svml_sin8," # No `_ha`! - self.check(math_sin_loop, 10, std_pattern=std, fast_pattern=fast) - - def test_svml_disabled(self): - code = """if 1: - import os - import numpy as np - import math - - def math_sin_loop(n): - ret = np.empty(n, dtype=np.float64) - for x in range(n): - ret[x] = math.sin(np.float64(x)) - return ret - - def check_no_svml(): - try: - # ban the use of SVML - os.environ['NUMBA_DISABLE_INTEL_SVML'] = '1' - - # delay numba imports to account for env change as - # numba.__init__ picks up SVML and it is too late by - # then to override using `numba.config` - import numba - from numba import config - from numba.tests.support import override_env_config - from numba.compiler import compile_isolated, Flags - - # compile for overridden CPU, with and without fastmath - with override_env_config('NUMBA_CPU_NAME', 'skylake-avx512'), \ - override_env_config('NUMBA_CPU_FEATURES', ''): - sig = (numba.int32,) - f = Flags() - f.set('nrt') - std = compile_isolated(math_sin_loop, sig, flags=f) - f.set('fastmath') - fast = compile_isolated(math_sin_loop, sig, flags=f) - fns = std, fast - - # assert no SVML call is present in the asm - for fn in fns: - asm = fn.library.get_asm_str() - assert '__svml_sin' not in asm - finally: - # not really needed as process is separate - os.environ['NUMBA_DISABLE_INTEL_SVML'] = '0' - config.reload_config() - check_no_svml() - """ - popen = subprocess.Popen( - [sys.executable, "-c", code], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = popen.communicate() - if popen.returncode != 0: - raise AssertionError( - "process failed with code %s: stderr follows\n%s\n" % - (popen.returncode, err.decode())) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_sys_stdin_assignment.py b/numba/numba/tests/test_sys_stdin_assignment.py deleted file mode 100644 index 1990b1a8f..000000000 --- a/numba/numba/tests/test_sys_stdin_assignment.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import print_function, absolute_import, division - -import sys - -import unittest -from numba import njit - - -@njit -def f0(a, b): - return a + b - - -@njit -def f1(begin1, end1, begin2, end2): - if begin1 > begin2: return f1(begin2, end2, begin1, end1) - return end1 + 1 >= begin2 - - -@njit -def f0_2(a, b): - return a + b - - -@njit -def f1_2(begin1, end1, begin2, end2): - if begin1 > begin2: return f1_2(begin2, end2, begin1, end1) - return end1 + 1 >= begin2 - - -class TestSysStdinAssignment(unittest.TestCase): - - def test_no_reassignment_of_stdout(self): - """ - https://github.com/numba/numba/issues/3027 - Older versions of colorama break stdout/err when recursive functions - are compiled. - - This test should work irrespective of colorama version, or indeed its - presence. If the version is too low, it should be disabled and the test - should work anyway, if it is a sufficiently high version or it is not - present, it should work anyway. - """ - - originally = sys.stdout, sys.stderr - - try: - sys.stdout = None - f0(0, 1) # Not changed stdout? - self.assertEqual(sys.stdout, None) - f1(0, 1, 2, 3) # Not changed stdout? - self.assertEqual(sys.stdout, None) - - sys.stderr = None - f0_2(0, 1) # Not changed stderr? - self.assertEqual(sys.stderr, None) - f1_2(0, 1, 2, 3) # Not changed stderr? - self.assertEqual(sys.stderr, None) - - finally: - sys.stdout, sys.stderr = originally - - self.assertNotEqual(sys.stderr, None) - self.assertNotEqual(sys.stdout, None) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_target_overloadselector.py b/numba/numba/tests/test_target_overloadselector.py deleted file mode 100644 index fafc48386..000000000 --- a/numba/numba/tests/test_target_overloadselector.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import print_function - -from itertools import product, permutations -from collections import defaultdict - -import numba.unittest_support as unittest -from numba.targets.base import OverloadSelector -from numba.targets.registry import cpu_target -from numba.targets.imputils import builtin_registry, RegistryLoader -from numba import types - - -class TestOverloadSelector(unittest.TestCase): - def test_select_and_sort_1(self): - os = OverloadSelector() - os.append(1, (types.Any, types.Boolean)) - os.append(2, (types.Boolean, types.Integer)) - os.append(3, (types.Boolean, types.Any)) - os.append(4, (types.Boolean, types.Boolean)) - compats = os._select_compatible((types.boolean, types.boolean)) - self.assertEqual(len(compats), 3) - ordered, scoring = os._sort_signatures(compats) - self.assertEqual(len(ordered), 3) - self.assertEqual(len(scoring), 3) - self.assertEqual(ordered[0], (types.Boolean, types.Boolean)) - self.assertEqual(scoring[types.Boolean, types.Boolean], 0) - self.assertEqual(scoring[types.Boolean, types.Any], 1) - self.assertEqual(scoring[types.Any, types.Boolean], 1) - - def test_select_and_sort_2(self): - os = OverloadSelector() - os.append(1, (types.Container,)) - os.append(2, (types.Sequence,)) - os.append(3, (types.MutableSequence,)) - os.append(4, (types.List,)) - compats = os._select_compatible((types.List,)) - self.assertEqual(len(compats), 4) - ordered, scoring = os._sort_signatures(compats) - self.assertEqual(len(ordered), 4) - self.assertEqual(len(scoring), 4) - self.assertEqual(ordered[0], (types.List,)) - self.assertEqual(scoring[(types.List,)], 0) - self.assertEqual(scoring[(types.MutableSequence,)], 1) - self.assertEqual(scoring[(types.Sequence,)], 2) - self.assertEqual(scoring[(types.Container,)], 3) - - def test_match(self): - os = OverloadSelector() - self.assertTrue(os._match(formal=types.Boolean, actual=types.boolean)) - self.assertTrue(os._match(formal=types.Boolean, actual=types.Boolean)) - # test subclass - self.assertTrue(issubclass(types.Sequence, types.Container)) - self.assertTrue(os._match(formal=types.Container, - actual=types.Sequence)) - self.assertFalse(os._match(formal=types.Sequence, - actual=types.Container)) - # test any - self.assertTrue(os._match(formal=types.Any, actual=types.Any)) - self.assertTrue(os._match(formal=types.Any, actual=types.Container)) - self.assertFalse(os._match(formal=types.Container, actual=types.Any)) - - def test_ambiguous_detection(self): - os = OverloadSelector() - # unambiguous signatures - os.append(1, (types.Any, types.Boolean)) - os.append(2, (types.Integer, types.Boolean)) - self.assertEqual(os.find((types.boolean, types.boolean)), 1) - # not implemented - with self.assertRaises(NotImplementedError) as raises: - os.find((types.boolean, types.int32)) - # generic - os.append(3, (types.Any, types.Any)) - self.assertEqual(os.find((types.boolean, types.int32)), 3) - self.assertEqual(os.find((types.boolean, types.boolean)), 1) - # add ambiguous signature; can match (bool, any) and (any, bool) - os.append(4, (types.Boolean, types.Any)) - with self.assertRaises(TypeError) as raises: - os.find((types.boolean, types.boolean)) - self.assertIn('2 ambiguous signatures', str(raises.exception)) - # disambiguous - os.append(5, (types.boolean, types.boolean)) - self.assertEqual(os.find((types.boolean, types.boolean)), 5) - - def test_subclass_specialization(self): - os = OverloadSelector() - self.assertTrue(issubclass(types.Sequence, types.Container)) - os.append(1, (types.Container, types.Container,)) - lstty = types.List(types.boolean) - self.assertEqual(os.find((lstty, lstty)), 1) - os.append(2, (types.Container, types.Sequence,)) - self.assertEqual(os.find((lstty, lstty)), 2) - - def test_cache(self): - os = OverloadSelector() - self.assertEqual(len(os._cache), 0) - os.append(1, (types.Any,)) - self.assertEqual(os.find((types.int32,)), 1) - self.assertEqual(len(os._cache), 1) - os.append(2, (types.Integer,)) - self.assertEqual(len(os._cache), 0) - self.assertEqual(os.find((types.int32,)), 2) - self.assertEqual(len(os._cache), 1) - - -class TestAmbiguousOverloads(unittest.TestCase): - - @classmethod - def setUpClass(cls): - # ensure all impls are loaded - cpu_target.target_context.refresh() - - def create_overload_selector(self, kind): - os = OverloadSelector() - loader = RegistryLoader(builtin_registry) - for impl, sig in loader.new_registrations(kind): - os.append(impl, sig) - return os - - def test_ambiguous_casts(self): - os = self.create_overload_selector(kind='casts') - all_types = set(t for sig, impl in os.versions for t in sig) - # ensure there are no ambiguous cast overloads - # note: using permutations to avoid testing cast to the same type - for sig in permutations(all_types, r=2): - try: - os.find(sig) - except NotImplementedError: - pass # ignore not implemented cast - - def test_ambiguous_functions(self): - loader = RegistryLoader(builtin_registry) - selectors = defaultdict(OverloadSelector) - for impl, fn, sig in loader.new_registrations('functions'): - os = selectors[fn] - os.append(impl, sig) - - for fn, os in selectors.items(): - all_types = set(t for sig, impl in os.versions for t in sig) - # ensure there are no ambiguous overloads - for sig in product(all_types, all_types): - try: - os.find(sig) - except NotImplementedError: - pass # ignore not implemented cast - - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_threadsafety.py b/numba/numba/tests/test_threadsafety.py deleted file mode 100644 index fab1f0412..000000000 --- a/numba/numba/tests/test_threadsafety.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Test threadsafety for compiler. -These tests will cause segfault if fail. -""" -import threading -import random - -import numpy as np - -from numba import config -from numba import unittest_support as unittest -from numba import jit, vectorize, guvectorize - -from .support import temp_directory, override_config - - -def foo(n, v): - return np.ones(n) - - -def ufunc_foo(a, b): - return a + b - - -def gufunc_foo(a, b, out): - out[0] = a + b - - - -class TestThreadSafety(unittest.TestCase): - - def run_jit(self, **options): - def runner(): - cfunc = jit(**options)(foo) - - return cfunc(4, 10) - return runner - - def run_compile(self, fnlist): - self._cache_dir = temp_directory(self.__class__.__name__) - with override_config('CACHE_DIR', self._cache_dir): - def chooser(): - for _ in range(10): - fn = random.choice(fnlist) - fn() - - ths = [threading.Thread(target=chooser) - for i in range(4)] - for th in ths: - th.start() - for th in ths: - th.join() - - def test_concurrent_jit(self): - self.run_compile([self.run_jit(nopython=True)]) - - def test_concurrent_jit_cache(self): - self.run_compile([self.run_jit(nopython=True, cache=True)]) - - def run_vectorize(self, **options): - def runner(): - cfunc = vectorize(['(f4, f4)'], **options)(ufunc_foo) - a = b = np.random.random(10).astype(np.float32) - return cfunc(a, b) - return runner - - def test_concurrent_vectorize(self): - self.run_compile([self.run_vectorize(nopython=True)]) - - def test_concurrent_vectorize_cache(self): - self.run_compile([self.run_vectorize(nopython=True, cache=True)]) - - def run_guvectorize(self, **options): - def runner(): - sig = ['(f4, f4, f4[:])'] - cfunc = guvectorize(sig, '(),()->()', **options)(gufunc_foo) - a = b = np.random.random(10).astype(np.float32) - return cfunc(a, b) - return runner - - def test_concurrent_guvectorize(self): - self.run_compile([self.run_guvectorize(nopython=True)]) - - def test_concurrent_guvectorize_cache(self): - self.run_compile([self.run_guvectorize(nopython=True, cache=True)]) - - def test_concurrent_mix_use(self): - self.run_compile([self.run_jit(nopython=True, cache=True), - self.run_jit(nopython=True), - self.run_vectorize(nopython=True, cache=True), - self.run_vectorize(nopython=True), - self.run_guvectorize(nopython=True, cache=True), - self.run_guvectorize(nopython=True)]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_tracing.py b/numba/numba/tests/test_tracing.py deleted file mode 100644 index 528efeb59..000000000 --- a/numba/numba/tests/test_tracing.py +++ /dev/null @@ -1,182 +0,0 @@ -import numba.unittest_support as unittest -from numba import tracing -from numba.utils import StringIO -from numba.six import with_metaclass -import logging - -logger = logging.getLogger('trace') -logger.setLevel(logging.INFO) - -# Make sure tracing is enabled -orig_trace = tracing.trace -tracing.trace = tracing.dotrace - -class CapturedTrace: - """Capture the trace temporarily for validation.""" - - def __init__(self): - self.buffer = StringIO() - self.handler = logging.StreamHandler(self.buffer) - def __enter__(self): - self._handlers = logger.handlers - self.buffer = StringIO() - logger.handlers = [logging.StreamHandler(self.buffer)] - def __exit__(self, type, value, traceback): - logger.handlers = self._handlers - def getvalue(self): - - # Depending on how the tests are run, object names may be - # qualified by their containing module. - # Remove that to make the trace output independent from the testing mode. - log = self.buffer.getvalue() - log = log.replace(__name__ + '.','') - return log - -class Class(object): - - @tracing.trace - @classmethod - def class_method(cls): - pass - - @tracing.trace - @staticmethod - def static_method(): - pass - - __test = None - - def _test_get(self): - return self.__test - - def _test_set(self, value): - self.__test = value - - test = tracing.trace(property(_test_get, _test_set)) - - @tracing.trace - def method(self, some, other='value', *args, **kwds): - pass - - def __repr__(self): - """Generate a deterministic string for testing.""" - return '' - -class Class2(object): - @classmethod - def class_method(cls): - pass - - @staticmethod - def static_method(): - pass - - __test = None - @property - def test(self): - return self.__test - @test.setter - def test(self, value): - self.__test = value - - def method(self): - pass - - def __str__(self): - return 'Test(' + str(self.test) + ')' - - def __repr__(self): - """Generate a deterministic string for testing.""" - return '' - - -@tracing.trace -def test(x, y, z = True): - a = x + y - b = x * y - if z: return a - else: return b - -class TestTracing(unittest.TestCase): - - def __init__(self, *args): - super(TestTracing, self).__init__(*args) - - def setUp(self): - self.capture = CapturedTrace() - - def tearDown(self): - del self.capture - - def test_method(self): - - with self.capture: - Class().method('foo', bar='baz') - self.assertEqual(self.capture.getvalue(), - ">> Class.method(self=, some='foo', other='value', bar='baz')\n" + - "<< Class.method\n") - - def test_class_method(self): - - with self.capture: - Class.class_method() - self.assertEqual(self.capture.getvalue(), - ">> Class.class_method(cls=)\n" + - "<< Class.class_method\n") - - def test_static_method(self): - - with self.capture: - Class.static_method() - self.assertEqual(self.capture.getvalue(), - ">> static_method()\n" + - "<< static_method\n") - - - def test_property(self): - - with self.capture: - test = Class() - test.test = 1 - assert 1 == test.test - self.assertEqual(self.capture.getvalue(), - ">> Class._test_set(self=, value=1)\n" + - "<< Class._test_set\n" + - ">> Class._test_get(self=)\n" + - "<< Class._test_get -> 1\n") - - def test_function(self): - - with self.capture: - test(5, 5) - test(5, 5, False) - self.assertEqual(self.capture.getvalue(), - ">> test(x=5, y=5, z=True)\n" + - "<< test -> 10\n" + - ">> test(x=5, y=5, z=False)\n" + - "<< test -> 25\n") - - @unittest.skip("recursive decoration not yet implemented") - def test_injected(self): - - with self.capture: - tracing.trace(Class2, recursive=True) - Class2.class_method() - Class2.static_method() - test = Class2() - test.test = 1 - assert 1 == test.test - test.method() - - self.assertEqual(self.capture.getvalue(), - ">> Class2.class_method(cls=)\n" + - "<< Class2.class_method\n" - ">> static_method()\n" - "<< static_method\n") - - -# Reset tracing to its original value -tracing.trace = orig_trace - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_tuples.py b/numba/numba/tests/test_tuples.py deleted file mode 100644 index ee4a73029..000000000 --- a/numba/numba/tests/test_tuples.py +++ /dev/null @@ -1,504 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -import itertools - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated -from numba import jit, types, errors -from .support import TestCase, MemoryLeakMixin, tag - - -Rect = collections.namedtuple('Rect', ('width', 'height')) - -Point = collections.namedtuple('Point', ('x', 'y', 'z')) - -Empty = collections.namedtuple('Empty', ()) - -def tuple_return_usecase(a, b): - return a, b - -def tuple_first(tup): - a, b = tup - return a - -def tuple_second(tup): - a, b = tup - return b - -def tuple_index(tup, idx): - return tup[idx] - -def tuple_index_static(tup): - # Note the negative index - return tup[-2] - -def tuple_slice2(tup): - return tup[1:-1] - -def tuple_slice3(tup): - return tup[1::2] - -def len_usecase(tup): - return len(tup) - -def add_usecase(a, b): - return a + b - -def eq_usecase(a, b): - return a == b - -def ne_usecase(a, b): - return a != b - -def gt_usecase(a, b): - return a > b - -def ge_usecase(a, b): - return a >= b - -def lt_usecase(a, b): - return a < b - -def le_usecase(a, b): - return a <= b - -def in_usecase(a, b): - return a in b - -def bool_usecase(tup): - return bool(tup), (3 if tup else 2) - -def getattr_usecase(tup): - return tup.z, tup.y, tup.x - -def make_point(a, b, c): - return Point(a, b, c) - -def make_point_kws(a, b, c): - return Point(z=c, y=b, x=a) - -def make_point_nrt(n): - r = Rect(list(range(n)), np.zeros(n + 1)) - # This also exercises attribute access - p = Point(r, len(r.width), len(r.height)) - return p - -def type_usecase(tup, *args): - return type(tup)(*args) - -def identity(tup): - return tup - -def index_method_usecase(tup, value): - return tup.index(value) - - -class TestTupleReturn(TestCase): - - @tag('important') - def test_array_tuple(self): - aryty = types.Array(types.float64, 1, 'C') - cres = compile_isolated(tuple_return_usecase, (aryty, aryty)) - a = b = np.arange(5, dtype='float64') - ra, rb = cres.entry_point(a, b) - self.assertPreciseEqual(ra, a) - self.assertPreciseEqual(rb, b) - del a, b - self.assertPreciseEqual(ra, rb) - - def test_scalar_tuple(self): - scalarty = types.float32 - cres = compile_isolated(tuple_return_usecase, (scalarty, scalarty)) - a = b = 1 - ra, rb = cres.entry_point(a, b) - self.assertEqual(ra, a) - self.assertEqual(rb, b) - - @tag('important') - def test_hetero_tuple(self): - alltypes = [] - allvalues = [] - - alltypes.append((types.int32, types.int64)) - allvalues.append((1, 2)) - - alltypes.append((types.float32, types.float64)) - allvalues.append((1.125, .25)) - - alltypes.append((types.int32, types.float64)) - allvalues.append((1231, .5)) - - for (ta, tb), (a, b) in zip(alltypes, allvalues): - cres = compile_isolated(tuple_return_usecase, (ta, tb)) - ra, rb = cres.entry_point(a, b) - self.assertPreciseEqual((ra, rb), (a, b)) - - -class TestTuplePassing(TestCase): - - @tag('important') - def test_unituple(self): - tuple_type = types.UniTuple(types.int32, 2) - cr_first = compile_isolated(tuple_first, (tuple_type,)) - cr_second = compile_isolated(tuple_second, (tuple_type,)) - self.assertPreciseEqual(cr_first.entry_point((4, 5)), 4) - self.assertPreciseEqual(cr_second.entry_point((4, 5)), 5) - - @tag('important') - def test_hetero_tuple(self): - tuple_type = types.Tuple((types.int64, types.float32)) - cr_first = compile_isolated(tuple_first, (tuple_type,)) - cr_second = compile_isolated(tuple_second, (tuple_type,)) - self.assertPreciseEqual(cr_first.entry_point((2**61, 1.5)), 2**61) - self.assertPreciseEqual(cr_second.entry_point((2**61, 1.5)), 1.5) - - def test_size_mismatch(self): - # Issue #1638: tuple size should be checked when unboxing - tuple_type = types.UniTuple(types.int32, 2) - cr = compile_isolated(tuple_first, (tuple_type,)) - with self.assertRaises(ValueError) as raises: - cr.entry_point((4, 5, 6)) - self.assertEqual(str(raises.exception), - "size mismatch for tuple, expected 2 element(s) but got 3") - - -class TestOperations(TestCase): - - @tag('important') - def test_len(self): - pyfunc = len_usecase - cr = compile_isolated(pyfunc, - [types.Tuple((types.int64, types.float32))]) - self.assertPreciseEqual(cr.entry_point((4, 5)), 2) - cr = compile_isolated(pyfunc, - [types.UniTuple(types.int64, 3)]) - self.assertPreciseEqual(cr.entry_point((4, 5, 6)), 3) - - @tag('important') - def test_index(self): - pyfunc = tuple_index - cr = compile_isolated(pyfunc, - [types.UniTuple(types.int64, 3), types.int64]) - tup = (4, 3, 6) - for i in range(len(tup)): - self.assertPreciseEqual(cr.entry_point(tup, i), tup[i]) - - # Test empty tuple - cr = compile_isolated(pyfunc, - [types.UniTuple(types.int64, 0), types.int64]) - with self.assertRaises(IndexError) as raises: - cr.entry_point((), 0) - self.assertEqual("tuple index out of range", str(raises.exception)) - - # With a compile-time static index (the code generation path is different) - pyfunc = tuple_index_static - for typ in (types.UniTuple(types.int64, 4), - types.Tuple((types.int64, types.int32, types.int64, types.int32))): - cr = compile_isolated(pyfunc, (typ,)) - tup = (4, 3, 42, 6) - self.assertPreciseEqual(cr.entry_point(tup), pyfunc(tup)) - - typ = types.UniTuple(types.int64, 1) - with self.assertTypingError(): - cr = compile_isolated(pyfunc, (typ,)) - - - def test_in(self): - pyfunc = in_usecase - cr = compile_isolated(pyfunc, - [types.int64, types.UniTuple(types.int64, 3)]) - tup = (4, 1, 5) - for i in range(5): - self.assertPreciseEqual(cr.entry_point(i, tup), pyfunc(i, tup)) - - def check_slice(self, pyfunc): - tup = (4, 5, 6, 7) - cr = compile_isolated(pyfunc, - [types.UniTuple(types.int64, 4)]) - self.assertPreciseEqual(cr.entry_point(tup), pyfunc(tup)) - cr = compile_isolated( - pyfunc, - [types.Tuple((types.int64, types.int32, types.int64, types.int32))]) - self.assertPreciseEqual(cr.entry_point(tup), pyfunc(tup)) - - def test_slice2(self): - self.check_slice(tuple_slice2) - - def test_slice3(self): - self.check_slice(tuple_slice3) - - def test_bool(self): - pyfunc = bool_usecase - cr = compile_isolated(pyfunc, - [types.Tuple((types.int64, types.int32))]) - args = ((4, 5),) - self.assertPreciseEqual(cr.entry_point(*args), pyfunc(*args)) - cr = compile_isolated(pyfunc, - [types.UniTuple(types.int64, 3)]) - args = ((4, 5, 6),) - self.assertPreciseEqual(cr.entry_point(*args), pyfunc(*args)) - cr = compile_isolated(pyfunc, - [types.Tuple(())]) - self.assertPreciseEqual(cr.entry_point(()), pyfunc(())) - - @tag('important') - def test_add(self): - pyfunc = add_usecase - samples = [(types.Tuple(()), ()), - (types.UniTuple(types.int32, 0), ()), - (types.UniTuple(types.int32, 1), (42,)), - (types.Tuple((types.int64, types.float32)), (3, 4.5)), - ] - for (ta, a), (tb, b) in itertools.product(samples, samples): - cr = compile_isolated(pyfunc, (ta, tb)) - expected = pyfunc(a, b) - got = cr.entry_point(a, b) - self.assertPreciseEqual(got, expected, msg=(ta, tb)) - - def _test_compare(self, pyfunc): - def eq(pyfunc, cfunc, args): - self.assertIs(cfunc(*args), pyfunc(*args), - "mismatch for arguments %s" % (args,)) - - # Same-sized tuples - argtypes = [types.Tuple((types.int64, types.float32)), - types.UniTuple(types.int32, 2)] - for ta, tb in itertools.product(argtypes, argtypes): - cr = compile_isolated(pyfunc, (ta, tb)) - cfunc = cr.entry_point - for args in [((4, 5), (4, 5)), - ((4, 5), (4, 6)), - ((4, 6), (4, 5)), - ((4, 5), (5, 4))]: - eq(pyfunc, cfunc, args) - # Different-sized tuples - argtypes = [types.Tuple((types.int64, types.float32)), - types.UniTuple(types.int32, 3)] - cr = compile_isolated(pyfunc, tuple(argtypes)) - cfunc = cr.entry_point - for args in [((4, 5), (4, 5, 6)), - ((4, 5), (4, 4, 6)), - ((4, 5), (4, 6, 7))]: - eq(pyfunc, cfunc, args) - - @tag('important') - def test_eq(self): - self._test_compare(eq_usecase) - - @tag('important') - def test_ne(self): - self._test_compare(ne_usecase) - - @tag('important') - def test_gt(self): - self._test_compare(gt_usecase) - - @tag('important') - def test_ge(self): - self._test_compare(ge_usecase) - - @tag('important') - def test_lt(self): - self._test_compare(lt_usecase) - - @tag('important') - def test_le(self): - self._test_compare(le_usecase) - - -class TestNamedTuple(TestCase, MemoryLeakMixin): - - def test_unpack(self): - def check(p): - for pyfunc in tuple_first, tuple_second: - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(p), pyfunc(p)) - - # Homogeneous - check(Rect(4, 5)) - # Heterogeneous - check(Rect(4, 5.5)) - - def test_len(self): - def check(p): - pyfunc = len_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(p), pyfunc(p)) - - # Homogeneous - check(Rect(4, 5)) - check(Point(4, 5, 6)) - # Heterogeneous - check(Rect(4, 5.5)) - check(Point(4, 5.5, 6j)) - - def test_index(self): - pyfunc = tuple_index - cfunc = jit(nopython=True)(pyfunc) - - p = Point(4, 5, 6) - for i in range(len(p)): - self.assertPreciseEqual(cfunc(p, i), pyfunc(p, i)) - - def test_bool(self): - def check(p): - pyfunc = bool_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertPreciseEqual(cfunc(p), pyfunc(p)) - - # Homogeneous - check(Rect(4, 5)) - # Heterogeneous - check(Rect(4, 5.5)) - check(Empty()) - - def _test_compare(self, pyfunc): - def eq(pyfunc, cfunc, args): - self.assertIs(cfunc(*args), pyfunc(*args), - "mismatch for arguments %s" % (args,)) - - cfunc = jit(nopython=True)(pyfunc) - - # Same-sized named tuples - for a, b in [((4, 5), (4, 5)), - ((4, 5), (4, 6)), - ((4, 6), (4, 5)), - ((4, 5), (5, 4))]: - eq(pyfunc, cfunc, (Rect(*a), Rect(*b))) - - # Different-sized named tuples - for a, b in [((4, 5), (4, 5, 6)), - ((4, 5), (4, 4, 6)), - ((4, 5), (4, 6, 7))]: - eq(pyfunc, cfunc, (Rect(*a), Point(*b))) - - @tag('important') - def test_eq(self): - self._test_compare(eq_usecase) - - @tag('important') - def test_ne(self): - self._test_compare(ne_usecase) - - def test_gt(self): - self._test_compare(gt_usecase) - - def test_ge(self): - self._test_compare(ge_usecase) - - def test_lt(self): - self._test_compare(lt_usecase) - - def test_le(self): - self._test_compare(le_usecase) - - @tag('important') - def test_getattr(self): - pyfunc = getattr_usecase - cfunc = jit(nopython=True)(pyfunc) - - for args in (4, 5, 6), (4, 5.5, 6j): - p = Point(*args) - self.assertPreciseEqual(cfunc(p), pyfunc(p)) - - @tag('important') - def test_construct(self): - def check(pyfunc): - cfunc = jit(nopython=True)(pyfunc) - for args in (4, 5, 6), (4, 5.5, 6j): - expected = pyfunc(*args) - got = cfunc(*args) - self.assertIs(type(got), type(expected)) - self.assertPreciseEqual(got, expected) - - check(make_point) - check(make_point_kws) - - def test_type(self): - # Test the type() built-in on named tuples - pyfunc = type_usecase - cfunc = jit(nopython=True)(pyfunc) - - arg_tuples = [(4, 5, 6), (4, 5.5, 6j)] - for tup_args, args in itertools.product(arg_tuples, arg_tuples): - tup = Point(*tup_args) - expected = pyfunc(tup, *args) - got = cfunc(tup, *args) - self.assertIs(type(got), type(expected)) - self.assertPreciseEqual(got, expected) - - -class TestTupleNRT(TestCase, MemoryLeakMixin): - def test_tuple_add(self): - def pyfunc(x): - a = np.arange(3) - return (a,) + (x,) - - cfunc = jit(nopython=True)(pyfunc) - x = 123 - expect_a, expect_x = pyfunc(x) - got_a, got_x = cfunc(x) - np.testing.assert_equal(got_a, expect_a) - self.assertEqual(got_x, expect_x) - - -class TestNamedTupleNRT(TestCase, MemoryLeakMixin): - - def test_return(self): - # Check returning a namedtuple with a list inside it - pyfunc = make_point_nrt - cfunc = jit(nopython=True)(pyfunc) - - for arg in (3, 0): - expected = pyfunc(arg) - got = cfunc(arg) - self.assertIs(type(got), type(expected)) - self.assertPreciseEqual(got, expected) - - -class TestConversions(TestCase): - """ - Test implicit conversions between tuple types. - """ - - def check_conversion(self, fromty, toty, val): - pyfunc = identity - cr = compile_isolated(pyfunc, (fromty,), toty) - cfunc = cr.entry_point - res = cfunc(val) - self.assertEqual(res, val) - - def test_conversions(self): - check = self.check_conversion - fromty = types.UniTuple(types.int32, 2) - check(fromty, types.UniTuple(types.float32, 2), (4, 5)) - check(fromty, types.Tuple((types.float32, types.int16)), (4, 5)) - aty = types.UniTuple(types.int32, 0) - bty = types.Tuple(()) - check(aty, bty, ()) - check(bty, aty, ()) - - with self.assertRaises(errors.TypingError) as raises: - check(fromty, types.Tuple((types.float32,)), (4, 5)) - msg = "No conversion from tuple(int32 x 2) to tuple(float32 x 1)" - self.assertIn(msg, str(raises.exception)) - - -class TestMethods(TestCase): - - def test_index(self): - pyfunc = index_method_usecase - cfunc = jit(nopython=True)(pyfunc) - self.assertEqual(cfunc((1, 2, 3), 2), 1) - - with self.assertRaises(ValueError) as raises: - cfunc((1, 2, 3), 4) - msg = 'tuple.index(x): x not in tuple' - self.assertEqual(msg, str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_typeconv.py b/numba/numba/tests/test_typeconv.py deleted file mode 100644 index cb1841b69..000000000 --- a/numba/numba/tests/test_typeconv.py +++ /dev/null @@ -1,259 +0,0 @@ -from __future__ import print_function -import itertools - -from numba import unittest_support as unittest -from numba import types -from numba.typeconv.typeconv import TypeManager, TypeCastingRules -from numba.typeconv import rules -from numba.typeconv import castgraph, Conversion - - -class CompatibilityTestMixin(unittest.TestCase): - - def check_number_compatibility(self, check_compatible): - b = types.boolean - i8 = types.int8 - i16 = types.int16 - i32 = types.int32 - i64 = types.int64 - u8 = types.uint8 - u16 = types.uint16 - u32 = types.uint32 - u64 = types.uint64 - f32 = types.float32 - f64 = types.float64 - c64 = types.complex64 - c128 = types.complex128 - - self.assertEqual(check_compatible(i32, i32), Conversion.exact) - - self.assertEqual(check_compatible(b, i8), Conversion.safe) - self.assertEqual(check_compatible(b, u8), Conversion.safe) - self.assertEqual(check_compatible(i8, b), Conversion.unsafe) - self.assertEqual(check_compatible(u8, b), Conversion.unsafe) - - self.assertEqual(check_compatible(i32, i64), Conversion.promote) - self.assertEqual(check_compatible(i32, u32), Conversion.unsafe) - self.assertEqual(check_compatible(u32, i32), Conversion.unsafe) - self.assertEqual(check_compatible(u32, i64), Conversion.safe) - - self.assertEqual(check_compatible(i32, f32), Conversion.unsafe) - self.assertEqual(check_compatible(u32, f32), Conversion.unsafe) - self.assertEqual(check_compatible(i32, f64), Conversion.safe) - self.assertEqual(check_compatible(u32, f64), Conversion.safe) - # Note this is inconsistent with i32 -> f32... - self.assertEqual(check_compatible(i64, f64), Conversion.safe) - self.assertEqual(check_compatible(u64, f64), Conversion.safe) - - self.assertEqual(check_compatible(f32, c64), Conversion.safe) - self.assertEqual(check_compatible(f64, c128), Conversion.safe) - self.assertEqual(check_compatible(f64, c64), Conversion.unsafe) - - # Propagated compatibility relationships - self.assertEqual(check_compatible(i16, f64), Conversion.safe) - self.assertEqual(check_compatible(i16, i64), Conversion.promote) - self.assertEqual(check_compatible(i32, c64), Conversion.unsafe) - self.assertEqual(check_compatible(i32, c128), Conversion.safe) - self.assertEqual(check_compatible(i32, u64), Conversion.unsafe) - - for ta, tb in itertools.product(types.number_domain, - types.number_domain): - if ta in types.complex_domain and tb not in types.complex_domain: - continue - self.assertTrue(check_compatible(ta, tb) is not None, - msg="No cast from %s to %s" % (ta, tb)) - - -class TestTypeConv(CompatibilityTestMixin, unittest.TestCase): - - def test_typeconv(self): - tm = TypeManager() - - i32 = types.int32 - i64 = types.int64 - f32 = types.float32 - - tm.set_promote(i32, i64) - tm.set_unsafe_convert(i32, f32) - - sig = (i32, f32) - ovs = [ - (i32, i32), - (f32, f32), - (i64, i64), - ] - - # allow_unsafe = True => a conversion from i32 to f32 is chosen - sel = tm.select_overload(sig, ovs, True) - self.assertEqual(sel, 1) - # allow_unsafe = False => no overload available - with self.assertRaises(TypeError): - sel = tm.select_overload(sig, ovs, False) - - def test_default_rules(self): - tm = rules.default_type_manager - self.check_number_compatibility(tm.check_compatible) - - def test_overload1(self): - tm = rules.default_type_manager - - i32 = types.int32 - i64 = types.int64 - - sig = (i64, i32, i32) - ovs = [ - (i32, i32, i32), - (i64, i64, i64), - ] - # The first overload is unsafe, the second is safe => the second - # is always chosen, regardless of allow_unsafe. - self.assertEqual(tm.select_overload(sig, ovs, True), 1) - self.assertEqual(tm.select_overload(sig, ovs, False), 1) - - def test_overload2(self): - tm = rules.default_type_manager - - i16 = types.int16 - i32 = types.int32 - i64 = types.int64 - - sig = (i32, i16, i32) - ovs = [ - # Three promotes - (i64, i64, i64), - # One promotes, two exact types - (i32, i32, i32), - # Two unsafe converts, one exact type - (i16, i16, i16), - ] - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=False), 1) - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=True), 1) - - # The same in reverse order - ovs.reverse() - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=False), 1) - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=True), 1) - - def test_overload3(self): - # Promotes should be preferred over safe converts - tm = rules.default_type_manager - - i32 = types.int32 - i64 = types.int64 - f64 = types.float64 - - sig = (i32, i32) - ovs = [ - # Two promotes - (i64, i64), - # Two safe converts - (f64, f64), - ] - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=False), 0) - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=True), 0) - - # The same in reverse order - ovs.reverse() - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=False), 1) - self.assertEqual(tm.select_overload(sig, ovs, allow_unsafe=True), 1) - - def test_type_casting_rules(self): - tm = TypeManager() - tcr = TypeCastingRules(tm) - - i32 = types.int32 - i64 = types.int64 - f64 = types.float64 - f32 = types.float32 - made_up = types.Dummy("made_up") - - tcr.promote_unsafe(i32, i64) - tcr.safe_unsafe(i32, f64) - tcr.promote_unsafe(f32, f64) - - def base_test(): - # As declared - self.assertEqual(tm.check_compatible(i32, i64), Conversion.promote) - self.assertEqual(tm.check_compatible(i32, f64), Conversion.safe) - self.assertEqual(tm.check_compatible(f32, f64), Conversion.promote) - self.assertEqual(tm.check_compatible(i64, i32), Conversion.unsafe) - self.assertEqual(tm.check_compatible(f64, i32), Conversion.unsafe) - self.assertEqual(tm.check_compatible(f64, f32), Conversion.unsafe) - - # Propagated - self.assertEqual(tm.check_compatible(i64, f64), Conversion.unsafe) - self.assertEqual(tm.check_compatible(f64, i64), Conversion.unsafe) - self.assertEqual(tm.check_compatible(i64, f32), Conversion.unsafe) - self.assertEqual(tm.check_compatible(i32, f32), Conversion.unsafe) - self.assertEqual(tm.check_compatible(f32, i32), Conversion.unsafe) - - # Test base graph - base_test() - - self.assertIsNone(tm.check_compatible(i64, made_up)) - self.assertIsNone(tm.check_compatible(i32, made_up)) - self.assertIsNone(tm.check_compatible(f32, made_up)) - self.assertIsNone(tm.check_compatible(made_up, f64)) - self.assertIsNone(tm.check_compatible(made_up, i64)) - - # Add new test - tcr.promote(f64, made_up) - tcr.unsafe(made_up, i32) - - # Ensure the graph did not change by adding the new type - base_test() - - # To "made up" type - self.assertEqual(tm.check_compatible(i64, made_up), Conversion.unsafe) - self.assertEqual(tm.check_compatible(i32, made_up), Conversion.safe) - self.assertEqual(tm.check_compatible(f32, made_up), Conversion.promote) - self.assertEqual(tm.check_compatible(made_up, f64), Conversion.unsafe) - self.assertEqual(tm.check_compatible(made_up, i64), Conversion.unsafe) - - def test_castgraph_propagate(self): - saved = [] - - def callback(src, dst, rel): - saved.append((src, dst, rel)) - - tg = castgraph.TypeGraph(callback) - - i32 = types.int32 - i64 = types.int64 - f64 = types.float64 - f32 = types.float32 - - tg.insert_rule(i32, i64, Conversion.promote) - tg.insert_rule(i64, i32, Conversion.unsafe) - - saved.append(None) - - tg.insert_rule(i32, f64, Conversion.safe) - tg.insert_rule(f64, i32, Conversion.unsafe) - - saved.append(None) - - tg.insert_rule(f32, f64, Conversion.promote) - tg.insert_rule(f64, f32, Conversion.unsafe) - - self.assertIn((i32, i64, Conversion.promote), saved[0:2]) - self.assertIn((i64, i32, Conversion.unsafe), saved[0:2]) - self.assertIs(saved[2], None) - - self.assertIn((i32, f64, Conversion.safe), saved[3:7]) - self.assertIn((f64, i32, Conversion.unsafe), saved[3:7]) - self.assertIn((i64, f64, Conversion.unsafe), saved[3:7]) - self.assertIn((i64, f64, Conversion.unsafe), saved[3:7]) - self.assertIs(saved[7], None) - - self.assertIn((f32, f64, Conversion.promote), saved[8:14]) - self.assertIn((f64, f32, Conversion.unsafe), saved[8:14]) - self.assertIn((f32, i32, Conversion.unsafe), saved[8:14]) - self.assertIn((i32, f32, Conversion.unsafe), saved[8:14]) - self.assertIn((f32, i64, Conversion.unsafe), saved[8:14]) - self.assertIn((i64, f32, Conversion.unsafe), saved[8:14]) - self.assertEqual(len(saved[14:]), 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_typeinfer.py b/numba/numba/tests/test_typeinfer.py deleted file mode 100644 index aff0e13b4..000000000 --- a/numba/numba/tests/test_typeinfer.py +++ /dev/null @@ -1,702 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import os, sys, subprocess -import itertools - -import numpy as np - -from numba import unittest_support as unittest -from numba.compiler import compile_isolated -from numba import types, typeinfer, typing, jit, errors -from numba.typeconv import Conversion - -from .support import TestCase, tag -from .test_typeconv import CompatibilityTestMixin - - -i8 = types.int8 -i16 = types.int16 -i32 = types.int32 -i64 = types.int64 -u8 = types.uint8 -u16 = types.uint16 -u32 = types.uint32 -u64 = types.uint64 -f32 = types.float32 -f64 = types.float64 -c64 = types.complex64 -c128 = types.complex128 - - -class TestArgRetCasting(unittest.TestCase): - def test_arg_ret_casting(self): - def foo(x): - return x - - args = (i32,) - return_type = f32 - cres = compile_isolated(foo, args, return_type) - self.assertTrue(isinstance(cres.entry_point(123), float)) - self.assertEqual(cres.signature.args, args) - self.assertEqual(cres.signature.return_type, return_type) - - def test_arg_ret_mismatch(self): - def foo(x): - return x - - args = (types.Array(i32, 1, 'C'),) - return_type = f32 - try: - cres = compile_isolated(foo, args, return_type) - except errors.TypingError as e: - pass - else: - self.fail("Should complain about array casting to float32") - - def test_invalid_arg_type_forcing(self): - def foo(iters): - a = range(iters) - return iters - - args = (u32,) - return_type = u8 - cres = compile_isolated(foo, args, return_type) - typemap = cres.type_annotation.typemap - # Argument "iters" must be uint32 - self.assertEqual(typemap['iters'], u32) - - -class TestUnify(unittest.TestCase): - """ - Tests for type unification with a typing context. - """ - - int_unify = { - ('uint8', 'uint8'): 'uint8', - ('int8', 'int8'): 'int8', - ('uint16', 'uint16'): 'uint16', - ('int16', 'int16'): 'int16', - ('uint32', 'uint32'): 'uint32', - ('int32', 'int32'): 'int32', - ('uint64', 'uint64'): 'uint64', - ('int64', 'int64'): 'int64', - - ('int8', 'uint8'): 'int16', - ('int8', 'uint16'): 'int32', - ('int8', 'uint32'): 'int64', - - ('uint8', 'int32'): 'int32', - ('uint8', 'uint64'): 'uint64', - - ('int16', 'int8'): 'int16', - ('int16', 'uint8'): 'int16', - ('int16', 'uint16'): 'int32', - ('int16', 'uint32'): 'int64', - ('int16', 'int64'): 'int64', - ('int16', 'uint64'): 'float64', - - ('uint16', 'uint8'): 'uint16', - ('uint16', 'uint32'): 'uint32', - ('uint16', 'int32'): 'int32', - ('uint16', 'uint64'): 'uint64', - - ('int32', 'int8'): 'int32', - ('int32', 'int16'): 'int32', - ('int32', 'uint32'): 'int64', - ('int32', 'int64'): 'int64', - - ('uint32', 'uint8'): 'uint32', - ('uint32', 'int64'): 'int64', - ('uint32', 'uint64'): 'uint64', - - ('int64', 'int8'): 'int64', - ('int64', 'uint8'): 'int64', - ('int64', 'uint16'): 'int64', - - ('uint64', 'int8'): 'float64', - ('uint64', 'int32'): 'float64', - ('uint64', 'int64'): 'float64', - } - - def assert_unify(self, aty, bty, expected): - ctx = typing.Context() - template = "{0}, {1} -> {2} != {3}" - for unify_func in ctx.unify_types, ctx.unify_pairs: - unified = unify_func(aty, bty) - self.assertEqual(unified, expected, - msg=template.format(aty, bty, unified, expected)) - unified = unify_func(bty, aty) - self.assertEqual(unified, expected, - msg=template.format(bty, aty, unified, expected)) - - def assert_unify_failure(self, aty, bty): - self.assert_unify(aty, bty, None) - - @tag('important') - def test_integer(self): - ctx = typing.Context() - for aty, bty in itertools.product(types.integer_domain, - types.integer_domain): - key = (str(aty), str(bty)) - try: - expected = self.int_unify[key] - except KeyError: - expected = self.int_unify[key[::-1]] - self.assert_unify(aty, bty, getattr(types, expected)) - - @tag('important') - def test_bool(self): - aty = types.boolean - for bty in types.integer_domain: - self.assert_unify(aty, bty, bty) - # Not sure about this one, but it respects transitivity - for cty in types.real_domain: - self.assert_unify(aty, cty, cty) - - def unify_number_pair_test(self, n): - """ - Test all permutations of N-combinations of numeric types and ensure - that the order of types in the sequence is irrelevant. - """ - ctx = typing.Context() - for tys in itertools.combinations(types.number_domain, n): - res = [ctx.unify_types(*comb) - for comb in itertools.permutations(tys)] - first_result = res[0] - # Sanity check - self.assertIsInstance(first_result, types.Number) - # All results must be equal - for other in res[1:]: - self.assertEqual(first_result, other) - - def test_unify_number_pair(self): - self.unify_number_pair_test(2) - self.unify_number_pair_test(3) - - def test_none_to_optional(self): - """ - Test unification of `none` and multiple number types to optional type - """ - ctx = typing.Context() - for tys in itertools.combinations(types.number_domain, 2): - # First unify without none, to provide the control value - tys = list(tys) - expected = types.Optional(ctx.unify_types(*tys)) - results = [ctx.unify_types(*comb) - for comb in itertools.permutations(tys + [types.none])] - # All results must be equal - for res in results: - self.assertEqual(res, expected) - - @tag('important') - def test_none(self): - aty = types.none - bty = types.none - self.assert_unify(aty, bty, types.none) - - def test_optional(self): - aty = types.Optional(i32) - bty = types.none - self.assert_unify(aty, bty, aty) - aty = types.Optional(i32) - bty = types.Optional(i64) - self.assert_unify(aty, bty, bty) - aty = types.Optional(i32) - bty = i64 - self.assert_unify(aty, bty, types.Optional(i64)) - # Failure - aty = types.Optional(i32) - bty = types.Optional(types.slice3_type) - self.assert_unify_failure(aty, bty) - - @tag('important') - def test_tuple(self): - aty = types.UniTuple(i32, 3) - bty = types.UniTuple(i64, 3) - self.assert_unify(aty, bty, types.UniTuple(i64, 3)) - # (Tuple, UniTuple) -> Tuple - aty = types.UniTuple(i32, 2) - bty = types.Tuple((i16, i64)) - self.assert_unify(aty, bty, types.Tuple((i32, i64))) - aty = types.UniTuple(i64, 0) - bty = types.Tuple(()) - self.assert_unify(aty, bty, bty) - # (Tuple, Tuple) -> Tuple - aty = types.Tuple((i8, i16, i32)) - bty = types.Tuple((i32, i16, i8)) - self.assert_unify(aty, bty, types.Tuple((i32, i16, i32))) - aty = types.Tuple((i8, i32)) - bty = types.Tuple((i32, i8)) - self.assert_unify(aty, bty, types.Tuple((i32, i32))) - aty = types.Tuple((i8, i16)) - bty = types.Tuple((i16, i8)) - self.assert_unify(aty, bty, types.Tuple((i16, i16))) - # Different number kinds - aty = types.UniTuple(f64, 3) - bty = types.UniTuple(c64, 3) - self.assert_unify(aty, bty, types.UniTuple(c128, 3)) - # Tuples of tuples - aty = types.UniTuple(types.Tuple((u32, f32)), 2) - bty = types.UniTuple(types.Tuple((i16, f32)), 2) - self.assert_unify(aty, bty, - types.UniTuple(types.Tuple((i64, f32)), 2)) - # Failures - aty = types.UniTuple(i32, 1) - bty = types.UniTuple(types.slice3_type, 1) - self.assert_unify_failure(aty, bty) - aty = types.UniTuple(i32, 1) - bty = types.UniTuple(i32, 2) - self.assert_unify_failure(aty, bty) - aty = types.Tuple((i8, types.slice3_type)) - bty = types.Tuple((i32, i8)) - self.assert_unify_failure(aty, bty) - - def test_optional_tuple(self): - # Unify to optional tuple - aty = types.none - bty = types.UniTuple(i32, 2) - self.assert_unify(aty, bty, types.Optional(types.UniTuple(i32, 2))) - aty = types.Optional(types.UniTuple(i16, 2)) - bty = types.UniTuple(i32, 2) - self.assert_unify(aty, bty, types.Optional(types.UniTuple(i32, 2))) - # Unify to tuple of optionals - aty = types.Tuple((types.none, i32)) - bty = types.Tuple((i16, types.none)) - self.assert_unify(aty, bty, types.Tuple((types.Optional(i16), - types.Optional(i32)))) - aty = types.Tuple((types.Optional(i32), i64)) - bty = types.Tuple((i16, types.Optional(i8))) - self.assert_unify(aty, bty, types.Tuple((types.Optional(i32), - types.Optional(i64)))) - - @tag('important') - def test_arrays(self): - aty = types.Array(i32, 3, "C") - bty = types.Array(i32, 3, "A") - self.assert_unify(aty, bty, bty) - aty = types.Array(i32, 3, "C") - bty = types.Array(i32, 3, "F") - self.assert_unify(aty, bty, types.Array(i32, 3, "A")) - aty = types.Array(i32, 3, "C") - bty = types.Array(i32, 3, "C", readonly=True) - self.assert_unify(aty, bty, bty) - aty = types.Array(i32, 3, "A") - bty = types.Array(i32, 3, "C", readonly=True) - self.assert_unify(aty, bty, - types.Array(i32, 3, "A", readonly=True)) - # Failures - aty = types.Array(i32, 2, "C") - bty = types.Array(i32, 3, "C") - self.assert_unify_failure(aty, bty) - aty = types.Array(i32, 2, "C") - bty = types.Array(u32, 2, "C") - self.assert_unify_failure(aty, bty) - - @tag('important') - def test_list(self): - aty = types.List(types.undefined) - bty = types.List(i32) - self.assert_unify(aty, bty, bty) - aty = types.List(i16) - bty = types.List(i32) - self.assert_unify(aty, bty, bty) - aty = types.List(types.Tuple([i32, i16])) - bty = types.List(types.Tuple([i16, i64])) - cty = types.List(types.Tuple([i32, i64])) - self.assert_unify(aty, bty, cty) - # Different reflections - aty = types.List(i16, reflected=True) - bty = types.List(i32) - cty = types.List(i32, reflected=True) - self.assert_unify(aty, bty, cty) - # Incompatible dtypes - aty = types.List(i16) - bty = types.List(types.Tuple([i16])) - self.assert_unify_failure(aty, bty) - - def test_set(self): - # Different reflections - aty = types.Set(i16, reflected=True) - bty = types.Set(i32) - cty = types.Set(i32, reflected=True) - self.assert_unify(aty, bty, cty) - # Incompatible dtypes - aty = types.Set(i16) - bty = types.Set(types.Tuple([i16])) - self.assert_unify_failure(aty, bty) - - @tag('important') - def test_range(self): - aty = types.range_state32_type - bty = types.range_state64_type - self.assert_unify(aty, bty, bty) - - -class TestTypeConversion(CompatibilityTestMixin, unittest.TestCase): - """ - Test for conversion between types with a typing context. - """ - - def assert_can_convert(self, aty, bty, expected): - ctx = typing.Context() - got = ctx.can_convert(aty, bty) - self.assertEqual(got, expected) - - def assert_cannot_convert(self, aty, bty): - ctx = typing.Context() - got = ctx.can_convert(aty, bty) - self.assertIsNone(got) - - @tag('important') - def test_convert_number_types(self): - # Check that Context.can_convert() is compatible with the default - # number conversion rules registered in the typeconv module - # (which is used internally by the C _Dispatcher object). - ctx = typing.Context() - self.check_number_compatibility(ctx.can_convert) - - @tag('important') - def test_tuple(self): - # UniTuple -> UniTuple - aty = types.UniTuple(i32, 3) - bty = types.UniTuple(i64, 3) - self.assert_can_convert(aty, aty, Conversion.exact) - self.assert_can_convert(aty, bty, Conversion.promote) - aty = types.UniTuple(i32, 3) - bty = types.UniTuple(f64, 3) - self.assert_can_convert(aty, bty, Conversion.safe) - # Tuple -> Tuple - aty = types.Tuple((i32, i32)) - bty = types.Tuple((i32, i64)) - self.assert_can_convert(aty, bty, Conversion.promote) - # UniTuple <-> Tuple - aty = types.UniTuple(i32, 2) - bty = types.Tuple((i32, i64)) - self.assert_can_convert(aty, bty, Conversion.promote) - self.assert_can_convert(bty, aty, Conversion.unsafe) - # Empty tuples - aty = types.UniTuple(i64, 0) - bty = types.UniTuple(i32, 0) - cty = types.Tuple(()) - self.assert_can_convert(aty, bty, Conversion.safe) - self.assert_can_convert(bty, aty, Conversion.safe) - self.assert_can_convert(aty, cty, Conversion.safe) - self.assert_can_convert(cty, aty, Conversion.safe) - # Failures - aty = types.UniTuple(i64, 3) - bty = types.UniTuple(types.none, 3) - self.assert_cannot_convert(aty, bty) - aty = types.UniTuple(i64, 2) - bty = types.UniTuple(i64, 3) - - @tag('important') - def test_arrays(self): - # Different layouts - aty = types.Array(i32, 3, "C") - bty = types.Array(i32, 3, "A") - self.assert_can_convert(aty, bty, Conversion.safe) - aty = types.Array(i32, 2, "C") - bty = types.Array(i32, 2, "F") - self.assert_cannot_convert(aty, bty) - # Different mutabilities - aty = types.Array(i32, 3, "C") - bty = types.Array(i32, 3, "C", readonly=True) - self.assert_can_convert(aty, aty, Conversion.exact) - self.assert_can_convert(bty, bty, Conversion.exact) - self.assert_can_convert(aty, bty, Conversion.safe) - self.assert_cannot_convert(bty, aty) - # Various failures - aty = types.Array(i32, 2, "C") - bty = types.Array(i32, 3, "C") - self.assert_cannot_convert(aty, bty) - aty = types.Array(i32, 2, "C") - bty = types.Array(i64, 2, "C") - self.assert_cannot_convert(aty, bty) - - def test_optional(self): - aty = types.int32 - bty = types.Optional(i32) - self.assert_can_convert(types.none, bty, Conversion.promote) - self.assert_can_convert(aty, bty, Conversion.promote) - self.assert_cannot_convert(bty, types.none) - self.assert_can_convert(bty, aty, Conversion.safe) # XXX ??? - # Optional array - aty = types.Array(i32, 2, "C") - bty = types.Optional(aty) - self.assert_can_convert(types.none, bty, Conversion.promote) - self.assert_can_convert(aty, bty, Conversion.promote) - self.assert_can_convert(bty, aty, Conversion.safe) - aty = types.Array(i32, 2, "C") - bty = types.Optional(aty.copy(layout="A")) - self.assert_can_convert(aty, bty, Conversion.safe) # C -> A - self.assert_cannot_convert(bty, aty) # A -> C - aty = types.Array(i32, 2, "C") - bty = types.Optional(aty.copy(layout="F")) - self.assert_cannot_convert(aty, bty) - self.assert_cannot_convert(bty, aty) - - -class TestResolveOverload(unittest.TestCase): - """ - Tests for typing.Context.resolve_overload(). - """ - - def assert_resolve_overload(self, cases, args, expected): - ctx = typing.Context() - got = ctx.resolve_overload("foo", cases, args, {}) - self.assertEqual(got, expected) - - def test_non_ambiguous_match(self): - def check(args, expected): - self.assert_resolve_overload(cases, args, expected) - # Order shouldn't matter here - self.assert_resolve_overload(cases[::-1], args, expected) - - cases = [i8(i8, i8), i32(i32, i32), f64(f64, f64)] - # Exact match - check((i8, i8), cases[0]) - check((i32, i32), cases[1]) - check((f64, f64), cases[2]) - # "Promote" conversion - check((i8, i16), cases[1]) - check((i32, i8), cases[1]) - check((i32, i8), cases[1]) - check((f32, f32), cases[2]) - # "Safe" conversion - check((u32, u32), cases[2]) - # "Unsafe" conversion - check((i64, i64), cases[2]) - - def test_ambiguous_match(self): - # When the best match is ambiguous (there is a tie), the first - # best case in original sequence order should be returned. - def check(args, expected, expected_reverse): - self.assert_resolve_overload(cases, args, expected) - self.assert_resolve_overload(cases[::-1], args, expected_reverse) - - cases = [i16(i16, i16), i32(i32, i32), f64(f64, f64)] - # Two "promote" conversions - check((i8, i8), cases[0], cases[1]) - # Two "safe" conversions - check((u16, u16), cases[1], cases[2]) - - cases = [i32(i32, i32), f32(f32, f32)] - # Two "unsafe" conversions - check((u32, u32), cases[0], cases[1]) - - def test_ambiguous_error(self): - ctx = typing.Context() - cases = [i16(i16, i16), i32(i32, i32)] - with self.assertRaises(TypeError) as raises: - ctx.resolve_overload("foo", cases, (i8, i8), {}, - allow_ambiguous=False) - self.assertEqual(str(raises.exception).splitlines(), - ["Ambiguous overloading for foo (int8, int8):", - "(int16, int16) -> int16", - "(int32, int32) -> int32", - ]) - - -class TestUnifyUseCases(unittest.TestCase): - """ - Concrete cases where unification would fail. - """ - - @staticmethod - def _actually_test_complex_unify(): - def pyfunc(a): - res = 0.0 - for i in range(len(a)): - res += a[i] - return res - - argtys = [types.Array(c128, 1, 'C')] - cres = compile_isolated(pyfunc, argtys) - return (pyfunc, cres) - - @tag('important') - def test_complex_unify_issue599(self): - pyfunc, cres = self._actually_test_complex_unify() - arg = np.array([1.0j]) - cfunc = cres.entry_point - self.assertEqual(cfunc(arg), pyfunc(arg)) - - def test_complex_unify_issue599_multihash(self): - """ - Test issue #599 for multiple values of PYTHONHASHSEED. - """ - env = os.environ.copy() - for seedval in (1, 2, 1024): - env['PYTHONHASHSEED'] = str(seedval) - subproc = subprocess.Popen( - [sys.executable, '-c', - 'import numba.tests.test_typeinfer as test_mod\n' + - 'test_mod.TestUnifyUseCases._actually_test_complex_unify()'], - env=env) - subproc.wait() - self.assertEqual(subproc.returncode, 0, 'Child process failed.') - - @tag('important') - def test_int_tuple_unify(self): - """ - Test issue #493 - """ - def foo(an_int32, an_int64): - a = an_int32, an_int32 - while True: # infinite loop - a = an_int32, an_int64 - return a - - args = (i32, i64) - # Check if compilation is successful - cres = compile_isolated(foo, args) - - -def issue_797(x0, y0, x1, y1, grid): - nrows, ncols = grid.shape - - dx = abs(x1 - x0) - dy = abs(y1 - y0) - - sx = 0 - if x0 < x1: - sx = 1 - else: - sx = -1 - sy = 0 - if y0 < y1: - sy = 1 - else: - sy = -1 - - err = dx - dy - - while True: - if x0 == x1 and y0 == y1: - break - - if 0 <= x0 < nrows and 0 <= y0 < ncols: - grid[x0, y0] += 1 - - e2 = 2 * err - if e2 > -dy: - err -= dy - x0 += sx - if e2 < dx: - err += dx - y0 += sy - - -def issue_1080(a, b): - if not a: - return True - return b - - -def list_unify_usecase1(n): - res = 0 - x = [] - if n < 10: - x.append(np.int32(n)) - else: - for i in range(n): - x.append(np.int64(i)) - x.append(5.0) - - # Note `i` and `j` may have different types (int64 vs. int32) - for j in range(len(x)): - res += j * x[j] - for val in x: - res += int(val) & len(x) - while len(x) > 0: - res += x.pop() - return res - -def list_unify_usecase2(n): - res = [] - for i in range(n): - if i & 1: - res.append((i, 1.0)) - else: - res.append((2.0, i)) - res.append((123j, 42)) - return res - -def range_unify_usecase(v): - if v: - r = range(np.int32(3)) - else: - r = range(np.int64(5)) - for x in r: - return x - -def issue_1394(a): - if a: - for i in range(a): - a += i - i = 1.2 - else: - i = 3 - return a, i - - -class TestMiscIssues(TestCase): - - @tag('important') - def test_issue_797(self): - """https://github.com/numba/numba/issues/797#issuecomment-58592401 - - Undeterministic triggering of tuple coercion error - """ - foo = jit(nopython=True)(issue_797) - g = np.zeros(shape=(10, 10), dtype=np.int32) - foo(np.int32(0), np.int32(0), np.int32(1), np.int32(1), g) - - @tag('important') - def test_issue_1080(self): - """https://github.com/numba/numba/issues/1080 - - Erroneous promotion of boolean args to int64 - """ - foo = jit(nopython=True)(issue_1080) - foo(True, False) - - @tag('important') - def test_list_unify1(self): - """ - Exercise back-propagation of refined list type. - """ - pyfunc = list_unify_usecase1 - cfunc = jit(nopython=True)(pyfunc) - for n in [5, 100]: - res = cfunc(n) - self.assertPreciseEqual(res, pyfunc(n)) - - @tag('important') - def test_list_unify2(self): - pyfunc = list_unify_usecase2 - cfunc = jit(nopython=True)(pyfunc) - res = cfunc(3) - # NOTE: the types will differ (Numba returns a homogeneous list with - # converted values). - self.assertEqual(res, pyfunc(3)) - - @tag('important') - def test_range_unify(self): - pyfunc = range_unify_usecase - cfunc = jit(nopython=True)(pyfunc) - for v in (0, 1): - res = cfunc(v) - self.assertPreciseEqual(res, pyfunc(v)) - - @tag('important') - def test_issue_1394(self): - pyfunc = issue_1394 - cfunc = jit(nopython=True)(pyfunc) - for v in (0, 1, 2): - res = cfunc(v) - self.assertEqual(res, pyfunc(v)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_typenames.py b/numba/numba/tests/test_typenames.py deleted file mode 100644 index b0756570c..000000000 --- a/numba/numba/tests/test_typenames.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import print_function, absolute_import - -import numpy as np - -from numba import types -from numba import unittest_support as unittest - - -class TestTypeNames(unittest.TestCase): - def test_numpy_integers(self): - expect = getattr(types, "int%d" % (np.dtype("int").itemsize * 8)) - self.assertEqual(types.int_, expect) - - expect = getattr(types, "uint%d" % (np.dtype("uint").itemsize * 8)) - self.assertEqual(types.uint, expect) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_typeof.py b/numba/numba/tests/test_typeof.py deleted file mode 100644 index fbfc42c61..000000000 --- a/numba/numba/tests/test_typeof.py +++ /dev/null @@ -1,569 +0,0 @@ -""" -Tests for the typeof() machinery. -""" - -from __future__ import print_function - -import array -from collections import namedtuple -import enum -import mmap -import sys - -import numpy as np - -import numba.unittest_support as unittest -from numba import cffi_support, numpy_support, types -from numba.special import typeof -from numba.dispatcher import OmittedArg -from numba._dispatcher import compute_fingerprint - -from .support import TestCase, tag -from .test_numpy_support import ValueTypingTestBase -from .ctypes_usecases import * -from .enum_usecases import * - - -recordtype = np.dtype([('a', np.float64), - ('b', np.int32), - ('c', np.complex64), - ('d', (np.str, 5))]) - -recordtype2 = np.dtype([('e', np.int8), - ('f', np.float64)]) - -recordtype3 = np.dtype([('e', np.int8), - ('f', np.float64)], align=True) - -Point = namedtuple('Point', ('x', 'y')) - -Rect = namedtuple('Rect', ('width', 'height')) - - -class Custom(object): - - @property - def _numba_type_(self): - """ - Magic attribute expected by Numba to get the numba type that - represents this object. - """ - return types.UniTuple(types.boolean, 42) - - -class TestTypeof(ValueTypingTestBase, TestCase): - """ - Test typeof() and, implicitly, typing.Context.get_argument_type(). - """ - - @tag('important') - def test_number_values(self): - """ - Test special.typeof() with scalar number values. - """ - self.check_number_values(typeof) - # These values mirror Dispatcher semantics - self.assertEqual(typeof(1), types.intp) - self.assertEqual(typeof(-1), types.intp) - self.assertEqual(typeof(2**40), types.int64) - self.assertEqual(typeof(2**63), types.uint64) - self.assertEqual(typeof(2**63 - 1), types.int64) - self.assertEqual(typeof(-2**63), types.int64) - - @tag('important') - def test_datetime_values(self): - """ - Test special.typeof() with np.timedelta64 values. - """ - self.check_datetime_values(typeof) - - @tag('important') - def test_timedelta_values(self): - """ - Test special.typeof() with np.timedelta64 values. - """ - self.check_timedelta_values(typeof) - - @tag('important') - def test_array_values(self): - """ - Test special.typeof() with ndarray values. - """ - def check(arr, ndim, layout, mutable, aligned): - ty = typeof(arr) - self.assertIsInstance(ty, types.Array) - self.assertEqual(ty.ndim, ndim) - self.assertEqual(ty.layout, layout) - self.assertEqual(ty.mutable, mutable) - self.assertEqual(ty.aligned, aligned) - - a1 = np.arange(10) - check(a1, 1, 'C', True, True) - a2 = np.arange(10).reshape(2, 5) - check(a2, 2, 'C', True, True) - check(a2.T, 2, 'F', True, True) - a3 = (np.arange(60))[::2].reshape((2, 5, 3)) - check(a3, 3, 'A', True, True) - a4 = np.arange(1).reshape(()) - check(a4, 0, 'C', True, True) - a4.flags.writeable = False - check(a4, 0, 'C', False, True) - - # Unsupported dtype - a5 = a1.astype(a1.dtype.newbyteorder()) - with self.assertRaises(ValueError) as raises: - typeof(a5) - self.assertIn("Unsupported array dtype: %s" % (a5.dtype,), - str(raises.exception)) - - @tag('important') - def test_structured_arrays(self): - def check(arr, dtype, ndim, layout, aligned): - ty = typeof(arr) - self.assertIsInstance(ty, types.Array) - self.assertEqual(ty.dtype, dtype) - self.assertEqual(ty.ndim, ndim) - self.assertEqual(ty.layout, layout) - self.assertEqual(ty.aligned, aligned) - - dtype = np.dtype([('m', np.int32), ('n', 'S5')]) - rec_ty = numpy_support.from_struct_dtype(dtype) - - arr = np.empty(4, dtype=dtype) - check(arr, rec_ty, 1, "C", False) - arr = np.recarray(4, dtype=dtype) - check(arr, rec_ty, 1, "C", False) - - dtype = np.dtype([('m', np.int32), ('n', 'S5')], align=True) - rec_ty = numpy_support.from_struct_dtype(dtype) - - arr = np.empty(4, dtype=dtype) - check(arr, rec_ty, 1, "C", True) - arr = np.recarray(4, dtype=dtype) - check(arr, rec_ty, 1, "C", True) - - def test_buffers(self): - if sys.version_info >= (3,): - b = b"xx" - ty = typeof(b) - self.assertEqual(ty, types.Bytes(types.uint8, 1, "C")) - self.assertFalse(ty.mutable) - ty = typeof(memoryview(b)) - self.assertEqual(ty, types.MemoryView(types.uint8, 1, "C", - readonly=True)) - self.assertFalse(ty.mutable) - ty = typeof(array.array('i', [0, 1, 2])) - self.assertEqual(ty, types.PyArray(types.int32, 1, "C")) - self.assertTrue(ty.mutable) - - b = bytearray(10) - ty = typeof(b) - self.assertEqual(ty, types.ByteArray(types.uint8, 1, "C")) - self.assertTrue(ty.mutable) - - @tag('important') - def test_none(self): - ty = typeof(None) - self.assertEqual(ty, types.none) - - @tag('important') - def test_ellipsis(self): - ty = typeof(Ellipsis) - self.assertEqual(ty, types.ellipsis) - - def test_str(self): - ty = typeof("abc") - self.assertEqual(ty, types.string) - - @tag('important') - def test_slices(self): - for args in [(1,), (1, 2), (1, 2, 1), (1, 2, None)]: - v = slice(*args) - self.assertIs(typeof(v), types.slice2_type) - for args in [(1, 2, 2), (1, 2, -1), (None, None, -2)]: - v = slice(*args) - self.assertIs(typeof(v), types.slice3_type) - - @tag('important') - def test_tuples(self): - v = (1, 2) - self.assertEqual(typeof(v), types.UniTuple(types.intp, 2)) - v = (1, (2.0, 3)) - self.assertEqual(typeof(v), - types.Tuple((types.intp, - types.Tuple((types.float64, types.intp)))) - ) - - @tag('important') - def test_lists(self): - v = [1.0] * 100 - self.assertEqual(typeof(v), types.List(types.float64, reflected=True)) - - @tag('important') - def test_sets(self): - v = set([1.0, 2.0, 3.0]) - self.assertEqual(typeof(v), types.Set(types.float64, reflected=True)) - v = frozenset(v) - with self.assertRaises(ValueError): - typeof(v) - - @tag('important') - def test_namedtuple(self): - v = Point(1, 2) - tp_point = typeof(v) - self.assertEqual(tp_point, - types.NamedUniTuple(types.intp, 2, Point)) - v = Point(1, 2.0) - self.assertEqual(typeof(v), - types.NamedTuple([types.intp, types.float64], Point)) - w = Rect(3, 4) - tp_rect = typeof(w) - self.assertEqual(tp_rect, - types.NamedUniTuple(types.intp, 2, Rect)) - self.assertNotEqual(tp_rect, tp_point) - self.assertNotEqual(tp_rect, types.UniTuple(tp_rect.dtype, tp_rect.count)) - - @tag('important') - def test_enum(self): - tp_red = typeof(Color.red) - self.assertEqual(tp_red, types.EnumMember(Color, types.intp)) - self.assertEqual(tp_red, typeof(Color.blue)) - tp_choc = typeof(Shake.chocolate) - self.assertEqual(tp_choc, types.EnumMember(Shake, types.intp)) - self.assertEqual(tp_choc, typeof(Shake.mint)) - self.assertNotEqual(tp_choc, tp_red) - tp_404 = typeof(RequestError.not_found) - self.assertEqual(tp_404, types.IntEnumMember(RequestError, types.intp)) - self.assertEqual(tp_404, typeof(RequestError.internal_error)) - - with self.assertRaises(ValueError) as raises: - typeof(HeterogeneousEnum.red) - self.assertEqual(str(raises.exception), - "Cannot type heterogeneous enum: got value types complex128, float64") - - @tag('important') - def test_enum_class(self): - tp_color = typeof(Color) - self.assertEqual(tp_color, types.EnumClass(Color, types.intp)) - tp_shake = typeof(Shake) - self.assertEqual(tp_shake, types.EnumClass(Shake, types.intp)) - self.assertNotEqual(tp_shake, tp_color) - tp_shape = typeof(Shape) - self.assertEqual(tp_shape, types.IntEnumClass(Shape, types.intp)) - tp_error = typeof(RequestError) - self.assertEqual(tp_error, - types.IntEnumClass(RequestError, types.intp)) - self.assertNotEqual(tp_error, tp_shape) - - with self.assertRaises(ValueError) as raises: - typeof(HeterogeneousEnum) - self.assertEqual(str(raises.exception), - "Cannot type heterogeneous enum: got value types complex128, float64") - - @tag('important') - def test_dtype(self): - dtype = np.dtype('int64') - self.assertEqual(typeof(dtype), types.DType(types.int64)) - - dtype = np.dtype([('m', np.int32), ('n', 'S5')]) - rec_ty = numpy_support.from_struct_dtype(dtype) - self.assertEqual(typeof(dtype), types.DType(rec_ty)) - - @tag('important') - def test_ctypes(self): - ty_cos = typeof(c_cos) - ty_sin = typeof(c_sin) - self.assertIsInstance(ty_cos, types.ExternalFunctionPointer) - self.assertEqual(ty_cos.sig.args, (types.float64,)) - self.assertEqual(ty_cos.sig.return_type, types.float64) - self.assertEqual(ty_cos, ty_sin) - self.assertNotEqual(ty_cos.get_pointer(c_cos), - ty_sin.get_pointer(c_sin)) - - @tag('important') - @unittest.skipUnless(cffi_support.SUPPORTED, "CFFI not supported") - def test_cffi(self): - from . import cffi_usecases as mod - mod.init() - ty_cffi_cos = typeof(mod.cffi_cos) - ty_cffi_sin = typeof(mod.cffi_sin) - ty_cffi_boolean = typeof(mod.cffi_bool) - self.assertIsInstance(ty_cffi_cos, types.ExternalFunctionPointer) - self.assertEqual(ty_cffi_boolean.sig.return_type, types.boolean) - self.assertEqual(ty_cffi_cos.sig.args, (types.float64,)) - self.assertEqual(ty_cffi_cos.sig.return_type, types.float64) - self.assertEqual(ty_cffi_cos, ty_cffi_sin) - ty_ctypes_cos = typeof(c_cos) - self.assertNotEqual(ty_cffi_cos, ty_ctypes_cos) - self.assertNotEqual(ty_cffi_cos.get_pointer(mod.cffi_cos), - ty_cffi_sin.get_pointer(mod.cffi_sin)) - self.assertEqual(ty_cffi_cos.get_pointer(mod.cffi_cos), - ty_ctypes_cos.get_pointer(c_cos)) - - def test_custom(self): - ty = typeof(Custom()) - self.assertEqual(ty, types.UniTuple(types.boolean, 42)) - - def test_omitted_args(self): - ty0 = typeof(OmittedArg(0.0)) - ty1 = typeof(OmittedArg(1)) - ty2 = typeof(OmittedArg(1.0)) - ty3 = typeof(OmittedArg(1.0)) - self.assertEqual(ty0, types.Omitted(0.0)) - self.assertEqual(ty1, types.Omitted(1)) - self.assertEqual(ty2, types.Omitted(1.0)) - self.assertEqual(len({ty0, ty1, ty2}), 3) - self.assertEqual(ty3, ty2) - - -class DistinctChecker(object): - - def __init__(self): - self._distinct = set() - - def add(self, obj): - if obj in self._distinct: - raise AssertionError("%r already in %r" % (obj, self._distinct)) - self._distinct.add(obj) - - -class TestFingerprint(TestCase): - """ - Tests for _dispatcher.compute_fingerprint() - - Each fingerprint must denote values of only one Numba type (this is - the condition for correctness), but values of a Numba type may be - denoted by several distinct fingerprints (it only makes the cache - less efficient). - """ - - def test_floats(self): - s = compute_fingerprint(1.0) - self.assertEqual(compute_fingerprint(2.0), s) - s = compute_fingerprint(np.float32()) - self.assertEqual(compute_fingerprint(np.float32(2.0)), s) - self.assertNotEqual(compute_fingerprint(np.float64()), s) - - def test_ints(self): - s = compute_fingerprint(1) - for v in (-1, 2**60): - self.assertEqual(compute_fingerprint(v), s) - # Different int widths resolve to different fingerprints - distinct = set() - for tp in ('int8', 'int16', 'int32', 'int64', - 'uint8', 'uint16', 'uint32', 'uint64'): - tp = getattr(np, tp) - distinct.add(compute_fingerprint(tp())) - self.assertEqual(len(distinct), 8, distinct) - - def test_bool(self): - s = compute_fingerprint(True) - self.assertEqual(compute_fingerprint(False), s) - self.assertNotEqual(compute_fingerprint(1), s) - - def test_complex(self): - s = compute_fingerprint(1j) - self.assertEqual(s, compute_fingerprint(1+0j)) - s = compute_fingerprint(np.complex64()) - self.assertEqual(compute_fingerprint(np.complex64(2.0)), s) - self.assertNotEqual(compute_fingerprint(np.complex128()), s) - - def test_none(self): - compute_fingerprint(None) - - def test_enums(self): - # Enums should fail fingerprinting, even IntEnums - with self.assertRaises(NotImplementedError): - compute_fingerprint(Color.red) - with self.assertRaises(NotImplementedError): - compute_fingerprint(RequestError.not_found) - - def test_records(self): - d1 = np.dtype([('m', np.int32), ('n', np.int64)]) - d2 = np.dtype([('m', np.int32), ('n', np.int16)]) - v1 = np.empty(1, dtype=d1)[0] - v2 = np.empty(1, dtype=d2)[0] - self.assertNotEqual(compute_fingerprint(v1), - compute_fingerprint(v2)) - - def test_datetime(self): - a = np.datetime64(1, 'Y') - b = np.datetime64(2, 'Y') - c = np.datetime64(2, 's') - d = np.timedelta64(2, 's') - self.assertEqual(compute_fingerprint(a), - compute_fingerprint(b)) - distinct = set(compute_fingerprint(x) for x in (a, c, d)) - self.assertEqual(len(distinct), 3, distinct) - - def test_arrays(self): - distinct = DistinctChecker() - - # 1D - arr = np.empty(4, dtype=np.float64) - s = compute_fingerprint(arr) - distinct.add(s) - self.assertEqual(compute_fingerprint(arr[:1]), s) - # Non-contiguous - distinct.add(compute_fingerprint(arr[::2])) - # Other type - distinct.add(compute_fingerprint(arr.astype(np.complex64))) - # Readonly - arr.setflags(write=False) - distinct.add(compute_fingerprint(arr)) - - # 2D - arr = np.empty((4, 4), dtype=np.float64) - distinct.add(compute_fingerprint(arr)) - # F-contiguous - distinct.add(compute_fingerprint(arr.T)) - # Non-contiguous - distinct.add(compute_fingerprint(arr[::2])) - - # 0D - arr = np.empty((), dtype=np.float64) - distinct.add(compute_fingerprint(arr)) - - # Structured arrays - arr = np.empty(5, dtype=recordtype) - s = compute_fingerprint(arr) - distinct.add(s) - self.assertEqual(compute_fingerprint(arr[:1]), s) - arr = np.empty(5, dtype=recordtype2) - distinct.add(compute_fingerprint(arr)) - arr = np.empty(5, dtype=recordtype3) - distinct.add(compute_fingerprint(arr)) - - # np.recarray() is peculiar: it creates a new dtype instance in - # its constructor; check that the fingerprint remains efficient - a = np.recarray(1, dtype=recordtype) - b = np.recarray(1, dtype=recordtype) - self.assertEqual(compute_fingerprint(a), - compute_fingerprint(b)) - - def test_buffers(self): - distinct = DistinctChecker() - - s = compute_fingerprint(b'') - self.assertEqual(compute_fingerprint(b'xx'), s) - distinct.add(s) - distinct.add(compute_fingerprint(bytearray())) - distinct.add(compute_fingerprint(memoryview(b''))) - m_uint8_1d = compute_fingerprint(memoryview(bytearray())) - distinct.add(m_uint8_1d) - - if sys.version_info >= (3,): - arr = array.array('B', [42]) - distinct.add(compute_fingerprint(arr)) - self.assertEqual(compute_fingerprint(memoryview(arr)), m_uint8_1d) - for array_code in 'bi': - arr = array.array(array_code, [0, 1, 2]) - distinct.add(compute_fingerprint(arr)) - distinct.add(compute_fingerprint(memoryview(arr))) - - arr = np.empty(16, dtype=np.uint8) - distinct.add(compute_fingerprint(arr)) - self.assertEqual(compute_fingerprint(memoryview(arr)), m_uint8_1d) - arr = arr.reshape((4, 4)) - distinct.add(compute_fingerprint(arr)) - distinct.add(compute_fingerprint(memoryview(arr))) - arr = arr.T - distinct.add(compute_fingerprint(arr)) - distinct.add(compute_fingerprint(memoryview(arr))) - arr = arr[::2] - distinct.add(compute_fingerprint(arr)) - distinct.add(compute_fingerprint(memoryview(arr))) - - if sys.version_info >= (3,): - m = mmap.mmap(-1, 16384) - distinct.add(compute_fingerprint(m)) - self.assertEqual(compute_fingerprint(memoryview(m)), m_uint8_1d) - - def test_dtype(self): - distinct = DistinctChecker() - - s = compute_fingerprint(np.dtype('int64')) - self.assertEqual(compute_fingerprint(np.dtype('int64')), s) - distinct.add(s) - - for descr in ('int32', 'm8[s]', 'm8[W]', 'M8[s]'): - distinct.add(np.dtype(descr)) - - distinct.add(recordtype) - distinct.add(recordtype2) - - # np.recarray() is peculiar: it creates a new dtype instance in - # its constructor; check that the fingerprint remains efficient - a = np.recarray(1, dtype=recordtype) - b = np.recarray(1, dtype=recordtype) - self.assertEqual(compute_fingerprint(a.dtype), - compute_fingerprint(b.dtype)) - - def test_tuples(self): - distinct = DistinctChecker() - - s = compute_fingerprint((1,)) - self.assertEqual(compute_fingerprint((2,)), s) - distinct.add(s) - - distinct.add(compute_fingerprint(())) - distinct.add(compute_fingerprint((1, 2, 3))) - distinct.add(compute_fingerprint((1j, 2, 3))) - distinct.add(compute_fingerprint((1, (), np.empty(5)))) - distinct.add(compute_fingerprint((1, (), np.empty((5, 1))))) - - def test_lists(self): - distinct = DistinctChecker() - - s = compute_fingerprint([1]) - self.assertEqual(compute_fingerprint([2, 3]), s) - distinct.add(s) - - distinct.add(compute_fingerprint([1j])) - distinct.add(compute_fingerprint([4.5, 6.7])) - distinct.add(compute_fingerprint([(1,)])) - - with self.assertRaises(ValueError): - compute_fingerprint([]) - - def test_sets(self): - distinct = DistinctChecker() - - s = compute_fingerprint(set([1])) - self.assertEqual(compute_fingerprint(set([2, 3])), s) - distinct.add(s) - - distinct.add(compute_fingerprint([1])) - distinct.add(compute_fingerprint(set([1j]))) - distinct.add(compute_fingerprint(set([4.5, 6.7]))) - distinct.add(compute_fingerprint(set([(1,)]))) - - with self.assertRaises(ValueError): - compute_fingerprint(set()) - with self.assertRaises(NotImplementedError): - compute_fingerprint(frozenset([2, 3])) - - def test_omitted_args(self): - distinct = DistinctChecker() - - v0 = OmittedArg(0.0) - v1 = OmittedArg(1.0) - v2 = OmittedArg(1) - - s = compute_fingerprint(v0) - self.assertEqual(compute_fingerprint(v1), s) - distinct.add(s) - distinct.add(compute_fingerprint(v2)) - distinct.add(compute_fingerprint(0.0)) - distinct.add(compute_fingerprint(1)) - - def test_complicated_type(self): - # Generating a large fingerprint - t = None - for i in range(1000): - t = (t,) - s = compute_fingerprint(t) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_types.py b/numba/numba/tests/test_types.py deleted file mode 100644 index 1527ee032..000000000 --- a/numba/numba/tests/test_types.py +++ /dev/null @@ -1,564 +0,0 @@ -""" -Tests for numba.types. -""" - -from __future__ import print_function, absolute_import - -from collections import namedtuple -import gc -try: - import cPickle as pickle -except ImportError: - import pickle -import weakref - -import numpy as np - -from numba import unittest_support as unittest -from numba.utils import IS_PY3 -from numba import sigutils, types, typing -from numba.types.abstract import _typecache -from numba import jit, numpy_support, typeof -from .support import TestCase, tag -from .enum_usecases import * - - -Point = namedtuple('Point', ('x', 'y')) - -Rect = namedtuple('Rect', ('width', 'height')) - -def gen(x): - yield x + 1 - -class Dummy(object): - pass - - -class TestTypes(TestCase): - - @tag('important') - def test_equality(self): - self.assertEqual(types.int32, types.int32) - self.assertEqual(types.uint32, types.uint32) - self.assertEqual(types.complex64, types.complex64) - self.assertEqual(types.float32, types.float32) - # Different signedness - self.assertNotEqual(types.int32, types.uint32) - # Different width - self.assertNotEqual(types.int64, types.int32) - self.assertNotEqual(types.float64, types.float32) - self.assertNotEqual(types.complex64, types.complex128) - # Different domain - self.assertNotEqual(types.int64, types.float64) - self.assertNotEqual(types.uint64, types.float64) - self.assertNotEqual(types.complex64, types.float64) - # Same arguments but different return types - get_pointer = None - sig_a = typing.signature(types.intp, types.intp) - sig_b = typing.signature(types.voidptr, types.intp) - a = types.ExternalFunctionPointer(sig=sig_a, get_pointer=get_pointer) - b = types.ExternalFunctionPointer(sig=sig_b, get_pointer=get_pointer) - self.assertNotEqual(a, b) - # Different call convention - a = types.ExternalFunctionPointer(sig=sig_a, get_pointer=get_pointer) - b = types.ExternalFunctionPointer(sig=sig_a, get_pointer=get_pointer, - cconv='stdcall') - self.assertNotEqual(a, b) - # Different get_pointer - a = types.ExternalFunctionPointer(sig=sig_a, get_pointer=get_pointer) - b = types.ExternalFunctionPointer(sig=sig_a, get_pointer=object()) - self.assertNotEqual(a, b) - - # Different template classes bearing the same name - class DummyTemplate(object): - key = "foo" - a = types.BoundFunction(DummyTemplate, types.int32) - class DummyTemplate(object): - key = "bar" - b = types.BoundFunction(DummyTemplate, types.int32) - self.assertNotEqual(a, b) - - # Different dtypes - self.assertNotEqual(types.DType(types.int32), types.DType(types.int64)) - - def test_weaktype(self): - d = Dummy() - e = Dummy() - a = types.Dispatcher(d) - b = types.Dispatcher(d) - c = types.Dispatcher(e) - self.assertIs(a.dispatcher, d) - self.assertIs(b.dispatcher, d) - self.assertIs(c.dispatcher, e) - # Equality of alive references - self.assertTrue(a == b) - self.assertFalse(a != b) - self.assertTrue(a != c) - self.assertFalse(a == c) - z = types.int8 - self.assertFalse(a == z) - self.assertFalse(b == z) - self.assertFalse(c == z) - self.assertTrue(a != z) - self.assertTrue(b != z) - self.assertTrue(c != z) - # Hashing and mappings - s = set([a, b, c]) - self.assertEqual(len(s), 2) - self.assertIn(a, s) - self.assertIn(b, s) - self.assertIn(c, s) - # Kill the references - d = e = None - gc.collect() - with self.assertRaises(ReferenceError): - a.dispatcher - with self.assertRaises(ReferenceError): - b.dispatcher - with self.assertRaises(ReferenceError): - c.dispatcher - # Dead references are always unequal - self.assertFalse(a == b) - self.assertFalse(a == c) - self.assertFalse(b == c) - self.assertFalse(a == z) - self.assertTrue(a != b) - self.assertTrue(a != c) - self.assertTrue(b != c) - self.assertTrue(a != z) - - @tag('important') - def test_interning(self): - # Test interning and lifetime of dynamic types. - a = types.Dummy('xyzzyx') - code = a._code - b = types.Dummy('xyzzyx') - self.assertIs(b, a) - wr = weakref.ref(a) - del a - gc.collect() - c = types.Dummy('xyzzyx') - self.assertIs(c, b) - # The code is always the same - self.assertEqual(c._code, code) - del b, c - gc.collect() - self.assertIs(wr(), None) - d = types.Dummy('xyzzyx') - # The original code wasn't reused. - self.assertNotEqual(d._code, code) - - def test_cache_trimming(self): - # Test that the cache doesn't grow in size when types are - # created and disposed of. - cache = _typecache - gc.collect() - # Keep strong references to existing types, to avoid spurious failures - existing_types = [wr() for wr in cache] - cache_len = len(cache) - a = types.Dummy('xyzzyx') - b = types.Dummy('foox') - self.assertEqual(len(cache), cache_len + 2) - del a, b - gc.collect() - self.assertEqual(len(cache), cache_len) - - @tag('important') - def test_array_notation(self): - def check(arrty, scalar, ndim, layout): - self.assertIs(arrty.dtype, scalar) - self.assertEqual(arrty.ndim, ndim) - self.assertEqual(arrty.layout, layout) - scalar = types.int32 - check(scalar[:], scalar, 1, 'A') - check(scalar[::1], scalar, 1, 'C') - check(scalar[:,:], scalar, 2, 'A') - check(scalar[:,::1], scalar, 2, 'C') - check(scalar[::1,:], scalar, 2, 'F') - - def test_array_notation_for_dtype(self): - def check(arrty, scalar, ndim, layout): - self.assertIs(arrty.dtype, scalar) - self.assertEqual(arrty.ndim, ndim) - self.assertEqual(arrty.layout, layout) - scalar = types.int32 - dtyped = types.DType(scalar) - check(dtyped[:], scalar, 1, 'A') - check(dtyped[::1], scalar, 1, 'C') - check(dtyped[:,:], scalar, 2, 'A') - check(dtyped[:,::1], scalar, 2, 'C') - check(dtyped[::1,:], scalar, 2, 'F') - - @tag('important') - def test_call_notation(self): - # Function call signature - i = types.int32 - d = types.double - self.assertEqual(i(), typing.signature(i)) - self.assertEqual(i(d), typing.signature(i, d)) - self.assertEqual(i(d, d), typing.signature(i, d, d)) - # Value cast - self.assertPreciseEqual(i(42.5), 42) - self.assertPreciseEqual(d(-5), -5.0) - ty = types.NPDatetime('Y') - self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y')) - self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y')) - ty = types.NPTimedelta('s') - self.assertPreciseEqual(ty(5), np.timedelta64(5, 's')) - self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's')) - ty = types.NPTimedelta('') - self.assertPreciseEqual(ty(5), np.timedelta64(5)) - self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT')) - - -class TestNumbers(TestCase): - """ - Tests for number types. - """ - - def test_bitwidth(self): - """ - All numeric types have bitwidth attribute - """ - for ty in types.number_domain: - self.assertTrue(hasattr(ty, "bitwidth")) - - def test_minval_maxval(self): - self.assertEqual(types.int8.maxval, 127) - self.assertEqual(types.int8.minval, -128) - self.assertEqual(types.uint8.maxval, 255) - self.assertEqual(types.uint8.minval, 0) - self.assertEqual(types.int64.maxval, (1<<63) - 1) - self.assertEqual(types.int64.minval, -(1<<63)) - self.assertEqual(types.uint64.maxval, (1<<64) - 1) - self.assertEqual(types.uint64.minval, 0) - - def test_from_bidwidth(self): - f = types.Integer.from_bitwidth - self.assertIs(f(32), types.int32) - self.assertIs(f(8, signed=False), types.uint8) - - def test_ordering(self): - def check_order(values): - for i in range(len(values)): - self.assertLessEqual(values[i], values[i]) - self.assertGreaterEqual(values[i], values[i]) - self.assertFalse(values[i] < values[i]) - self.assertFalse(values[i] > values[i]) - for j in range(i): - self.assertLess(values[j], values[i]) - self.assertLessEqual(values[j], values[i]) - self.assertGreater(values[i], values[j]) - self.assertGreaterEqual(values[i], values[j]) - self.assertFalse(values[i] < values[j]) - self.assertFalse(values[i] <= values[j]) - self.assertFalse(values[j] > values[i]) - self.assertFalse(values[j] >= values[i]) - - check_order([types.int8, types.int16, types.int32, types.int64]) - check_order([types.uint8, types.uint16, types.uint32, types.uint64]) - check_order([types.float32, types.float64]) - check_order([types.complex64, types.complex128]) - - if IS_PY3: - with self.assertRaises(TypeError): - types.int8 <= types.uint32 - with self.assertRaises(TypeError): - types.int8 <= types.float32 - with self.assertRaises(TypeError): - types.float64 <= types.complex128 - - -class TestNdIter(TestCase): - - def test_properties(self): - def check(ty, dtypes, ndim, layout, indexers=None): - self.assertEqual(ty.ndim, ndim) - self.assertEqual(ty.layout, layout) - self.assertEqual(ty.dtypes, dtypes) - views = [types.Array(dtype, 0, "C") for dtype in dtypes] - if len(views) > 1: - self.assertEqual(ty.yield_type, types.BaseTuple.from_types(views)) - else: - self.assertEqual(ty.yield_type, views[0]) - if indexers is not None: - self.assertEqual(ty.indexers, indexers) - - f32 = types.float32 - c64 = types.complex64 - i16 = types.int16 - a = types.Array(f32, 1, "C") - b = types.Array(f32, 2, "C") - c = types.Array(c64, 2, "F") - d = types.Array(i16, 2, "A") - e = types.Array(i16, 0, "C") - f = types.Array(f32, 1, "A") - g = types.Array(f32, 0, "C") - - # 0-dim iterator - ty = types.NumpyNdIterType((e,)) - check(ty, (i16,), 0, "C", [('0d', 0, 0, [0])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((e, g)) - check(ty, (i16, f32), 0, "C", [('0d', 0, 0, [0, 1])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((e, c64)) - check(ty, (i16, c64), 0, "C", - [('0d', 0, 0, [0]), ('scalar', 0, 0, [1])]) - self.assertFalse(ty.need_shaped_indexing) - - # 1-dim iterator - ty = types.NumpyNdIterType((a,)) - check(ty, (f32,), 1, "C", - [('flat', 0, 1, [0])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((a, a)) - check(ty, (f32, f32), 1, "C", - [('flat', 0, 1, [0, 1])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((a, e, e, c64)) - check(ty, (f32, i16, i16, c64), 1, "C", - [('flat', 0, 1, [0]), # a - ('0d', 0, 0, [1, 2]), # e, e - ('scalar', 0, 0, [3]), # c64 - ]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((a, f)) - check(ty, (f32, f32), 1, "C", - [('flat', 0, 1, [0]), ('indexed', 0, 1, [1])]) - self.assertTrue(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((f,)) - check(ty, (f32,), 1, "C", [('indexed', 0, 1, [0])]) - self.assertTrue(ty.need_shaped_indexing) - - # 2-dim C-order iterator - ty = types.NumpyNdIterType((b,)) - check(ty, (f32,), 2, "C", [('flat', 0, 2, [0])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((b, c)) - check(ty, (f32, c64), 2, "C", [('flat', 0, 2, [0]), ('indexed', 0, 2, [1])]) - self.assertTrue(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((d,)) - check(ty, (i16,), 2, "C", [('indexed', 0, 2, [0])]) - self.assertTrue(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((b, c, d, d, e)) - check(ty, (f32, c64, i16, i16, i16), 2, "C", - [('flat', 0, 2, [0]), # b - ('indexed', 0, 2, [1, 2, 3]), # c, d, d - ('0d', 0, 0, [4]), # e - ]) - self.assertTrue(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((a, b, c, d, d, f)) - check(ty, (f32, f32, c64, i16, i16, f32), 2, "C", - [('flat', 1, 2, [0]), # a - ('flat', 0, 2, [1]), # b - ('indexed', 0, 2, [2, 3, 4]), # c, d, d - ('indexed', 1, 2, [5]), # f - ]) - self.assertTrue(ty.need_shaped_indexing) - - # 2-dim F-order iterator - ty = types.NumpyNdIterType((c,)) - check(ty, (c64,), 2, "F", [('flat', 0, 2, [0])]) - self.assertFalse(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((c, b, c, f)) - check(ty, (c64, f32, c64, f32), 2, "F", - [('flat', 0, 2, [0, 2]), # c, c - ('indexed', 0, 2, [1]), # b - ('indexed', 0, 1, [3]), # f - ]) - self.assertTrue(ty.need_shaped_indexing) - ty = types.NumpyNdIterType((b, c, c, d, d, a, e)) - check(ty, (f32, c64, c64, i16, i16, f32, i16), 2, "F", - [('indexed', 0, 2, [0, 3, 4]), # b, d, d - ('flat', 0, 2, [1, 2]), # c, c - ('flat', 0, 1, [5]), # a - ('0d', 0, 0, [6]), # e - ]) - self.assertTrue(ty.need_shaped_indexing) - - -class TestPickling(TestCase): - """ - Pickling and unpickling should preserve type identity (singleton-ness) - and the _code attribute. This is only a requirement for types that - can be part of function signatures. - """ - - def predefined_types(self): - """ - Yield all predefined type instances - """ - for ty in types.__dict__.values(): - if isinstance(ty, types.Type): - yield ty - - def check_pickling(self, orig): - pickled = pickle.dumps(orig, protocol=-1) - ty = pickle.loads(pickled) - self.assertIs(ty, orig) - self.assertGreaterEqual(ty._code, 0) - - def test_predefined_types(self): - tys = list(self.predefined_types()) - self.assertIn(types.int16, tys) - for ty in tys: - self.check_pickling(ty) - - def test_atomic_types(self): - for unit in ('M', 'ms'): - ty = types.NPDatetime(unit) - self.check_pickling(ty) - ty = types.NPTimedelta(unit) - self.check_pickling(ty) - - def test_arrays(self): - for ndim in (0, 1, 2): - for layout in ('A', 'C', 'F'): - ty = types.Array(types.int16, ndim, layout) - self.check_pickling(ty) - - def test_records(self): - recordtype = np.dtype([('a', np.float64), - ('b', np.int32), - ('c', np.complex64), - ('d', (np.str, 5))]) - ty = numpy_support.from_dtype(recordtype) - self.check_pickling(ty) - self.check_pickling(types.Array(ty, 1, 'A')) - - def test_optional(self): - ty = types.Optional(types.int32) - self.check_pickling(ty) - - def test_tuples(self): - ty1 = types.UniTuple(types.int32, 3) - self.check_pickling(ty1) - ty2 = types.Tuple((types.int32, ty1)) - self.check_pickling(ty2) - - def test_namedtuples(self): - ty1 = types.NamedUniTuple(types.intp, 2, Point) - self.check_pickling(ty1) - ty2 = types.NamedTuple((types.intp, types.float64), Point) - self.check_pickling(ty2) - - def test_enums(self): - ty1 = types.EnumMember(Color, types.int32) - self.check_pickling(ty1) - ty2 = types.EnumMember(Shake, types.int64) - self.check_pickling(ty2) - ty3 = types.IntEnumMember(Shape, types.int64) - self.check_pickling(ty3) - - def test_lists(self): - ty = types.List(types.int32) - self.check_pickling(ty) - - def test_generator(self): - cfunc = jit("(int32,)", nopython=True)(gen) - sigs = list(cfunc.nopython_signatures) - ty = sigs[0].return_type - self.assertIsInstance(ty, types.Generator) - self.check_pickling(ty) - - # call templates are not picklable - @unittest.expectedFailure - def test_external_function_pointers(self): - from numba.typing import ctypes_utils - from .ctypes_usecases import c_sin, c_cos - for fnptr in (c_sin, c_cos): - ty = ctypes_utils.make_function_type(fnptr) - self.assertIsInstance(ty, types.ExternalFunctionPointer) - self.check_pickling(ty) - - -class TestSignatures(TestCase): - - def test_normalize_signature(self): - f = sigutils.normalize_signature - - def check(sig, args, return_type): - self.assertEqual(f(sig), (args, return_type)) - - def check_error(sig, msg): - with self.assertRaises(TypeError) as raises: - f(sig) - self.assertIn(msg, str(raises.exception)) - - f32 = types.float32 - c64 = types.complex64 - i16 = types.int16 - a = types.Array(f32, 1, "C") - - check((c64,), (c64,), None) - check((f32, i16), (f32, i16), None) - check(a(i16), (i16,), a) - check("int16(complex64)", (c64,), i16) - check("(complex64, int16)", (c64, i16), None) - check(typing.signature(i16, c64), (c64,), i16) - - check_error((types.Integer,), "invalid signature") - check_error((None,), "invalid signature") - check_error([], "invalid signature") - - -class TestRecordDtype(unittest.TestCase): - def test_record_type_equiv(self): - rec_dt = np.dtype([('a', np.int32), ('b', np.float32)]) - rec_ty = typeof(rec_dt) - art1 = rec_ty[::1] - arr = np.zeros(5, dtype=rec_dt) - art2 = typeof(arr) - self.assertEqual(art2.dtype.dtype, rec_ty) - self.assertEqual(art1, art2) - - def test_user_specified(self): - rec_dt = np.dtype([('a', np.int32), ('b', np.float32)]) - rec_type = typeof(rec_dt) - - @jit((rec_type[:],), nopython=True) - def foo(x): - return x['a'], x['b'] - - arr = np.zeros(1, dtype=rec_dt) - arr[0]['a'] = 123 - arr[0]['b'] = 32.1 - - a, b = foo(arr) - - self.assertEqual(a, arr[0]['a']) - self.assertEqual(b, arr[0]['b']) - - -class TestDType(TestCase): - def test_type_attr(self): - # Test .type attribute of dtype - def conv(arr, val): - return arr.dtype.type(val) - - jit_conv = jit(nopython=True)(conv) - - def assert_matches(arr, val, exact): - expect = conv(arr, val) - got = jit_conv(arr, val) - self.assertPreciseEqual(expect, exact) - self.assertPreciseEqual(typeof(expect), typeof(got)) - self.assertPreciseEqual(expect, got) - - arr = np.zeros(5) - assert_matches(arr.astype(np.intp), 1.2, 1) - assert_matches(arr.astype(np.float64), 1.2, 1.2) - assert_matches(arr.astype(np.complex128), 1.2, (1.2 + 0j)) - assert_matches(arr.astype(np.complex128), 1.2j, 1.2j) - - def test_kind(self): - def tkind(A): - return A.dtype.kind=='f' - jit_tkind = jit(nopython=True)(tkind) - self.assertEqual(tkind(np.ones(3)), jit_tkind(np.ones(3))) - self.assertEqual(tkind(np.ones(3, dtype=np.intp)), - jit_tkind(np.ones(3, dtype=np.intp))) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_typingerror.py b/numba/numba/tests/test_typingerror.py deleted file mode 100644 index 95e776007..000000000 --- a/numba/numba/tests/test_typingerror.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import print_function - -import math -import re -import textwrap - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated -from numba import jit, types -from numba.errors import TypingError -from .support import TestCase - - -def what(): - pass - -def foo(): - return what() - -def bar(x): - return x.a - -def issue_868(a): - return a.shape * 2 - -def impossible_return_type(x): - if x > 0: - return () - else: - return 1j - -def bad_hypot_usage(): - return math.hypot(1) - -def imprecise_list(): - l = [] - return len(l) - -def using_imprecise_list(): - a = np.array([]) - return a.astype(np.int32) - -def unknown_module(): - return numpyz.int32(0) - -def nop(x, y, z): - pass - -def array_setitem_invalid_cast(): - arr = np.empty(1, dtype=np.float64) - arr[0] = 1j # invalid cast from complex to float - return arr - - -class Foo(object): - def __repr__(self): - return "" - - -class TestTypingError(unittest.TestCase): - - def test_unknown_function(self): - try: - compile_isolated(foo, ()) - except TypingError as e: - self.assertIn("Untyped global name 'what'", str(e)) - else: - self.fail("Should raise error") - - def test_unknown_attrs(self): - try: - compile_isolated(bar, (types.int32,)) - except TypingError as e: - self.assertIn("Unknown attribute 'a' of type int32", str(e)) - else: - self.fail("Should raise error") - - def test_unknown_module(self): - # This used to print "'object' object has no attribute 'int32'" - with self.assertRaises(TypingError) as raises: - compile_isolated(unknown_module, ()) - self.assertIn("Untyped global name 'numpyz'", str(raises.exception)) - - def test_issue_868(self): - ''' - Summary: multiplying a scalar by a non-scalar would cause a crash in - type inference because TimeDeltaMixOp always assumed at least one of - its operands was an NPTimeDelta in its generic() method. - ''' - with self.assertRaises(TypingError) as raises: - compile_isolated(issue_868, (types.Array(types.int32, 1, 'C'),)) - - expected = ( - "Invalid use of * with parameters (tuple({0} x 1), {0})" - .format(str(types.intp))) - self.assertIn(expected, str(raises.exception)) - self.assertIn("[1] During: typing of", str(raises.exception)) - - def test_return_type_unification(self): - with self.assertRaises(TypingError) as raises: - compile_isolated(impossible_return_type, (types.int32,)) - self.assertIn("Can't unify return type from the following types: (), complex128", - str(raises.exception)) - - def test_bad_hypot_usage(self): - with self.assertRaises(TypingError) as raises: - compile_isolated(bad_hypot_usage, ()) - - errmsg = str(raises.exception) - # Make sure it listed the known signatures. - # This is sensitive to the formatting of the error message. - self.assertIn(" * (float64, float64) -> float64", errmsg) - - # find the context lines - ctx_lines = [x for x in errmsg.splitlines() if "] During" in x ] - - # Check contextual msg - self.assertTrue(re.search(r'\[1\] During: resolving callee type: Function.*hypot', ctx_lines[0])) - self.assertTrue(re.search(r'\[2\] During: typing of call .*test_typingerror.py', ctx_lines[1])) - - - def test_imprecise_list(self): - """ - Type inference should catch that a list type's remain imprecise, - instead of letting lowering fail. - """ - with self.assertRaises(TypingError) as raises: - compile_isolated(imprecise_list, ()) - - errmsg = str(raises.exception) - self.assertIn("Can't infer type of variable 'l': list(undefined)", - errmsg) - - def test_using_imprecise_list(self): - """ - Type inference should report informative error about untyped list. - TODO: #2931 - """ - with self.assertRaises(TypingError) as raises: - compile_isolated(using_imprecise_list, ()) - - errmsg = str(raises.exception) - self.assertIn("Undecided type $0.6 := ", errmsg) - - def test_array_setitem_invalid_cast(self): - with self.assertRaises(TypingError) as raises: - compile_isolated(array_setitem_invalid_cast, ()) - - errmsg = str(raises.exception) - self.assertIn("setitem: array(float64, 1d, C)[0] = complex128", errmsg) - -class TestArgumentTypingError(unittest.TestCase): - """ - Test diagnostics of typing errors caused by argument inference failure. - """ - - def test_unsupported_array_dtype(self): - # See issue #1943 - cfunc = jit(nopython=True)(nop) - a = np.ones(3) - a = a.astype(a.dtype.newbyteorder()) - with self.assertRaises(TypingError) as raises: - cfunc(1, a, a) - expected = textwrap.dedent("""\ - This error may have been caused by the following argument(s): - - argument 1: Unsupported array dtype: {0} - - argument 2: Unsupported array dtype: {0}""" - ).format(a.dtype) - self.assertIn(expected, str(raises.exception)) - - def test_unsupported_type(self): - cfunc = jit(nopython=True)(nop) - foo = Foo() - with self.assertRaises(TypingError) as raises: - cfunc(1, foo, 1) - - expected=re.compile(("This error may have been caused by the following " - "argument\(s\):\\n- argument 1:.*cannot determine " - "Numba type of " - "")) - self.assertTrue(expected.search(str(raises.exception)) is not None) - - -class TestCallError(unittest.TestCase): - def test_readonly_array(self): - @jit("(f8[:],)", nopython=True) - def inner(x): - return x - - @jit(nopython=True) - def outer(): - return inner(gvalues) - - gvalues = np.ones(10, dtype=np.float64) - - with self.assertRaises(TypingError) as raises: - outer() - - got = str(raises.exception) - pat = r"Invalid use of.*readonly array\(float64, 1d, C\)" - self.assertIsNotNone(re.search(pat, got)) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_ufuncs.py b/numba/numba/tests/test_ufuncs.py deleted file mode 100644 index 6e1979c28..000000000 --- a/numba/numba/tests/test_ufuncs.py +++ /dev/null @@ -1,1854 +0,0 @@ -from __future__ import print_function - -import functools -import itertools -import re -import sys -import warnings -import threading - -import numpy as np - -import numba.unittest_support as unittest -from numba import types, typing, utils, typeof, numpy_support, njit -from numba.compiler import compile_isolated, Flags, DEFAULT_FLAGS -from numba.numpy_support import from_dtype, version as numpy_version -from numba import jit, vectorize -from numba.config import PYVERSION -from numba.errors import LoweringError, TypingError -from .support import TestCase, CompilationCache, MemoryLeakMixin, tag -from numba.six import exec_ -from numba.typing.npydecl import supported_ufuncs, all_ufuncs - -is32bits = tuple.__itemsize__ == 4 -iswindows = sys.platform.startswith('win32') -after_numpy_112 = numpy_version >= (1, 12) - -# NOTE: to test the implementation of Numpy ufuncs, we disable rewriting -# of array expressions. - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") -enable_pyobj_flags.set("no_rewrites") - -no_pyobj_flags = Flags() -no_pyobj_flags.set("no_rewrites") - -enable_nrt_flags = Flags() -enable_nrt_flags.set("nrt") -enable_nrt_flags.set("no_rewrites") - - -def _unimplemented(func): - """An 'expectedFailure' like decorator that only expects compilation errors - caused by unimplemented functions that fail in no-python mode""" - @functools.wraps(func) - def wrapper(*args, **kwargs): - try: - func(*args, **kwargs) - except TypingError: - raise unittest._ExpectedFailure(sys.exc_info()) - raise unittest._UnexpectedSuccess - -def _make_ufunc_usecase(ufunc): - ldict = {} - arg_str = ','.join(['a{0}'.format(i) for i in range(ufunc.nargs)]) - func_str = 'def fn({0}):\n np.{1}({0})'.format(arg_str, ufunc.__name__) - exec_(func_str, globals(), ldict) - fn = ldict['fn'] - fn.__name__ = '{0}_usecase'.format(ufunc.__name__) - return fn - - -def _make_unary_ufunc_usecase(ufunc): - ufunc_name = ufunc.__name__ - ldict = {} - exec_("def fn(x,out):\n np.{0}(x,out)".format(ufunc_name), globals(), ldict) - fn = ldict["fn"] - fn.__name__ = "{0}_usecase".format(ufunc_name) - return fn - - -def _make_unary_ufunc_op_usecase(ufunc_op): - ldict = {} - exec_("def fn(x):\n return {0}(x)".format(ufunc_op), globals(), ldict) - fn = ldict["fn"] - fn.__name__ = "usecase_{0}".format(hash(ufunc_op)) - return fn - - -def _make_binary_ufunc_usecase(ufunc): - ufunc_name = ufunc.__name__ - ldict = {} - exec_("def fn(x,y,out):\n np.{0}(x,y,out)".format(ufunc_name), globals(), ldict); - fn = ldict['fn'] - fn.__name__ = "{0}_usecase".format(ufunc_name) - return fn - - -def _make_binary_ufunc_op_usecase(ufunc_op): - ldict = {} - exec_("def fn(x,y):\n return x{0}y".format(ufunc_op), globals(), ldict) - fn = ldict["fn"] - fn.__name__ = "usecase_{0}".format(hash(ufunc_op)) - return fn - - -def _make_inplace_ufunc_op_usecase(ufunc_op): - ldict = {} - exec_("def fn(x,y):\n x{0}y".format(ufunc_op), globals(), ldict) - fn = ldict["fn"] - fn.__name__ = "usecase_{0}".format(hash(ufunc_op)) - return fn - - -def _as_dtype_value(tyargs, args): - """Convert python values into numpy scalar objects. - """ - return [np.dtype(str(ty)).type(val) for ty, val in zip(tyargs, args)] - - - -class BaseUFuncTest(MemoryLeakMixin): - - def setUp(self): - super(BaseUFuncTest, self).setUp() - self.inputs = [ - (np.uint32(0), types.uint32), - (np.uint32(1), types.uint32), - (np.int32(-1), types.int32), - (np.int32(0), types.int32), - (np.int32(1), types.int32), - (np.uint64(0), types.uint64), - (np.uint64(1), types.uint64), - (np.int64(-1), types.int64), - (np.int64(0), types.int64), - (np.int64(1), types.int64), - - (np.float32(-0.5), types.float32), - (np.float32(0.0), types.float32), - (np.float32(0.5), types.float32), - - (np.float64(-0.5), types.float64), - (np.float64(0.0), types.float64), - (np.float64(0.5), types.float64), - - (np.array([0,1], dtype='u4'), types.Array(types.uint32, 1, 'C')), - (np.array([0,1], dtype='u8'), types.Array(types.uint64, 1, 'C')), - (np.array([-1,0,1], dtype='i4'), types.Array(types.int32, 1, 'C')), - (np.array([-1,0,1], dtype='i8'), types.Array(types.int64, 1, 'C')), - (np.array([-0.5, 0.0, 0.5], dtype='f4'), types.Array(types.float32, 1, 'C')), - (np.array([-0.5, 0.0, 0.5], dtype='f8'), types.Array(types.float64, 1, 'C')), - ] - self.cache = CompilationCache() - - def _determine_output_type(self, input_type, int_output_type=None, - float_output_type=None): - ty = input_type - if isinstance(ty, types.Array): - ty = ty.dtype - - if ty in types.signed_domain: - if int_output_type: - output_type = types.Array(int_output_type, 1, 'C') - else: - output_type = types.Array(ty, 1, 'C') - elif ty in types.unsigned_domain: - if int_output_type: - output_type = types.Array(int_output_type, 1, 'C') - else: - output_type = types.Array(ty, 1, 'C') - else: - if float_output_type: - output_type = types.Array(float_output_type, 1, 'C') - else: - output_type = types.Array(ty, 1, 'C') - return output_type - - -class TestUFuncs(BaseUFuncTest, TestCase): - - def unary_ufunc_test(self, ufunc, flags=no_pyobj_flags, - skip_inputs=[], additional_inputs=[], - int_output_type=None, float_output_type=None, - kinds='ifc'): - # Necessary to avoid some Numpy warnings being silenced, despite - # the simplefilter() call below. - self.reset_module_warnings(__name__) - - ufunc = _make_unary_ufunc_usecase(ufunc) - - inputs = list(self.inputs) - inputs.extend(additional_inputs) - - pyfunc = ufunc - - for input_tuple in inputs: - input_operand = input_tuple[0] - input_type = input_tuple[1] - - if input_type in skip_inputs: - continue - # Some ufuncs don't allow all kinds of arguments, and implicit - # conversion has become stricter in 1.10. - if (numpy_support.strict_ufunc_typing and - input_operand.dtype.kind not in kinds): - continue - - output_type = self._determine_output_type( - input_type, int_output_type, float_output_type) - - cr = self.cache.compile(pyfunc, (input_type, output_type), - flags=flags) - cfunc = cr.entry_point - - if isinstance(input_operand, np.ndarray): - result = np.zeros(input_operand.size, - dtype=output_type.dtype.name) - expected = np.zeros(input_operand.size, - dtype=output_type.dtype.name) - else: - result = np.zeros(1, dtype=output_type.dtype.name) - expected = np.zeros(1, dtype=output_type.dtype.name) - - invalid_flag = False - with warnings.catch_warnings(record=True) as warnlist: - warnings.simplefilter('always') - pyfunc(input_operand, expected) - - warnmsg = "invalid value encountered" - for thiswarn in warnlist: - - if (issubclass(thiswarn.category, RuntimeWarning) - and str(thiswarn.message).startswith(warnmsg)): - invalid_flag = True - - cfunc(input_operand, result) - - msg = '\n'.join(["ufunc '{0}' failed", - "inputs ({1}):", "{2}", - "got({3})", "{4}", - "expected ({5}):", "{6}" - ]).format(ufunc.__name__, - input_type, input_operand, - output_type, result, - expected.dtype, expected) - - try: - np.testing.assert_array_almost_equal(expected, result, - decimal=5, - err_msg=msg) - except AssertionError: - if invalid_flag: - # Allow output to mismatch for invalid input - print("Output mismatch for invalid input", - input_tuple, result, expected) - else: - raise - - def binary_ufunc_test(self, ufunc, flags=no_pyobj_flags, - skip_inputs=[], additional_inputs=[], - int_output_type=None, float_output_type=None, - kinds='ifc', positive_only=False): - - ufunc = _make_binary_ufunc_usecase(ufunc) - - inputs = list(self.inputs) + additional_inputs - pyfunc = ufunc - - for input_tuple in inputs: - input_operand = input_tuple[0] - input_type = input_tuple[1] - - if input_type in skip_inputs: - continue - if positive_only and np.any(input_operand < 0): - continue - - # Some ufuncs don't allow all kinds of arguments, and implicit - # conversion has become stricter in 1.10. - if (numpy_support.strict_ufunc_typing and - input_operand.dtype.kind not in kinds): - continue - - output_type = self._determine_output_type( - input_type, int_output_type, float_output_type) - - cr = self.cache.compile(pyfunc, (input_type, input_type, output_type), - flags=flags) - cfunc = cr.entry_point - - if isinstance(input_operand, np.ndarray): - result = np.zeros(input_operand.size, - dtype=output_type.dtype.name) - expected = np.zeros(input_operand.size, - dtype=output_type.dtype.name) - else: - result = np.zeros(1, dtype=output_type.dtype.name) - expected = np.zeros(1, dtype=output_type.dtype.name) - cfunc(input_operand, input_operand, result) - pyfunc(input_operand, input_operand, expected) - np.testing.assert_array_almost_equal(expected, result) - - def unary_int_ufunc_test(self, name=None, flags=no_pyobj_flags): - self.unary_ufunc_test(name, flags=flags, - skip_inputs=[types.float32, types.float64, - types.Array(types.float32, 1, 'C'), - types.Array(types.float64, 1, 'C')]) - - def binary_int_ufunc_test(self, name=None, flags=no_pyobj_flags): - self.binary_ufunc_test(name, flags=flags, - skip_inputs=[types.float32, types.float64, - types.Array(types.float32, 1, 'C'), - types.Array(types.float64, 1, 'C')]) - - - ############################################################################ - # Math operations - - @tag('important') - def test_add_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.add, flags=flags) - - @tag('important') - def test_subtract_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.subtract, flags=flags) - - @tag('important') - def test_multiply_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.multiply, flags=flags) - - @tag('important') - def test_divide_ufunc(self, flags=no_pyobj_flags): - # Bear in mind that in python3 divide IS true_divide - # so the out type for int types will be a double - int_out_type = None - if PYVERSION >= (3, 0): - int_out_type = types.float64 - - self.binary_ufunc_test(np.divide, flags=flags, int_output_type=int_out_type) - - def test_logaddexp_ufunc(self): - self.binary_ufunc_test(np.logaddexp, kinds='f') - - def test_logaddexp2_ufunc(self): - self.binary_ufunc_test(np.logaddexp2, kinds='f') - - @tag('important') - def test_true_divide_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.true_divide, flags=flags, int_output_type=types.float64) - - @tag('important') - def test_floor_divide_ufunc(self): - self.binary_ufunc_test(np.floor_divide) - - @tag('important') - def test_negative_ufunc(self, flags=no_pyobj_flags): - # NumPy ufunc has bug with uint32 as input and int64 as output, - # so skip uint32 input. - self.unary_ufunc_test(np.negative, int_output_type=types.int64, - skip_inputs=[types.Array(types.uint32, 1, 'C'), types.uint32], - flags=flags) - - @tag('important') - def test_power_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.power, flags=flags, - positive_only=after_numpy_112) - - @tag('important') - def test_remainder_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.remainder, flags=flags) - - @tag('important') - def test_mod_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.mod, flags=flags) - - @tag('important') - def test_fmod_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.fmod, flags=flags) - - @tag('important') - def test_abs_ufunc(self, flags=no_pyobj_flags, ufunc=np.abs): - self.unary_ufunc_test(ufunc, flags=flags, - additional_inputs = [ - (np.uint32(np.iinfo(np.uint32).max), types.uint32), - (np.uint64(np.iinfo(np.uint64).max), types.uint64), - (np.float32(np.finfo(np.float32).min), types.float32), - (np.float64(np.finfo(np.float64).min), types.float64) - ]) - - @tag('important') - def test_absolute_ufunc(self, flags=no_pyobj_flags): - self.test_abs_ufunc(flags=flags, ufunc=np.absolute) - - @tag('important') - def test_fabs_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.fabs, flags=flags, kinds='f') - - @tag('important') - def test_rint_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.rint, flags=flags, kinds='cf') - - @tag('important') - def test_sign_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.sign, flags=flags) - - @tag('important') - def test_conj_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.conj, flags=flags) - - @tag('important') - def test_exp_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.exp, flags=flags, kinds='cf') - - @tag('important') - def test_exp2_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.exp2, flags=flags, kinds='cf') - - @tag('important') - def test_log_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.log, flags=flags, kinds='cf') - - @tag('important') - def test_log2_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.log2, flags=flags, kinds='cf') - - @tag('important') - def test_log10_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.log10, flags=flags, kinds='cf') - - @tag('important') - def test_expm1_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.expm1, flags=flags, kinds='cf') - - @tag('important') - def test_log1p_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.log1p, flags=flags, kinds='cf') - - @tag('important') - def test_sqrt_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.sqrt, flags=flags, kinds='cf') - - @tag('important') - def test_square_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.square, flags=flags) - - @tag('important') - def test_reciprocal_ufunc(self, flags=no_pyobj_flags): - # reciprocal for integers doesn't make much sense and is problematic - # in the case of division by zero, as an inf will overflow float to - # int conversions, which is undefined behavior. - to_skip = [types.Array(types.uint32, 1, 'C'), types.uint32, - types.Array(types.int32, 1, 'C'), types.int32, - types.Array(types.uint64, 1, 'C'), types.uint64, - types.Array(types.int64, 1, 'C'), types.int64] - self.unary_ufunc_test(np.reciprocal, skip_inputs=to_skip, flags=flags) - - @tag('important') - def test_conjugate_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.conjugate, flags=flags) - - - ############################################################################ - # Trigonometric Functions - - @tag('important') - def test_sin_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.sin, flags=flags, kinds='cf') - - @tag('important') - def test_cos_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.cos, flags=flags, kinds='cf') - - @tag('important') - def test_tan_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.tan, flags=flags, kinds='cf') - - def test_arcsin_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.arcsin, flags=flags, kinds='cf') - - def test_arccos_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.arccos, flags=flags, kinds='cf') - - def test_arctan_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.arctan, flags=flags, kinds='cf') - - def test_arctan2_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.arctan2, flags=flags, kinds='cf') - - def test_hypot_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.hypot, kinds='f') - - @tag('important') - def test_sinh_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.sinh, flags=flags, kinds='cf') - - @tag('important') - def test_cosh_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.cosh, flags=flags, kinds='cf') - - @tag('important') - def test_tanh_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.tanh, flags=flags, kinds='cf') - - def test_arcsinh_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.arcsinh, flags=flags, kinds='cf') - - def test_arccosh_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.arccosh, flags=flags, kinds='cf') - - def test_arctanh_ufunc(self, flags=no_pyobj_flags): - # arctanh is only valid is only finite in the range ]-1, 1[ - # This means that for any of the integer types it will produce - # conversion from infinity/-infinity to integer. That's undefined - # behavior in C, so the results may vary from implementation to - # implementation. This means that the result from the compiler - # used to compile NumPy may differ from the result generated by - # llvm. Skipping the integer types in this test avoids failed - # tests because of this. - to_skip = [types.Array(types.uint32, 1, 'C'), types.uint32, - types.Array(types.int32, 1, 'C'), types.int32, - types.Array(types.uint64, 1, 'C'), types.uint64, - types.Array(types.int64, 1, 'C'), types.int64] - - self.unary_ufunc_test(np.arctanh, skip_inputs=to_skip, flags=flags, - kinds='cf') - - def test_deg2rad_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.deg2rad, flags=flags, kinds='f') - - def test_rad2deg_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.rad2deg, flags=flags, kinds='f') - - def test_degrees_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.degrees, flags=flags, kinds='f') - - def test_radians_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.radians, flags=flags, kinds='f') - - ############################################################################ - # Bit-twiddling Functions - - def test_bitwise_and_ufunc(self, flags=no_pyobj_flags): - self.binary_int_ufunc_test(np.bitwise_and, flags=flags) - - def test_bitwise_or_ufunc(self, flags=no_pyobj_flags): - self.binary_int_ufunc_test(np.bitwise_or, flags=flags) - - def test_bitwise_xor_ufunc(self, flags=no_pyobj_flags): - self.binary_int_ufunc_test(np.bitwise_xor, flags=flags) - - def test_invert_ufunc(self, flags=no_pyobj_flags): - self.unary_int_ufunc_test(np.invert, flags=flags) - - def test_bitwise_not_ufunc(self, flags=no_pyobj_flags): - self.unary_int_ufunc_test(np.bitwise_not, flags=flags) - - # Note: there is no entry for left_shift and right_shift as this harness - # is not valid for them. This is so because left_shift and right - # shift implementation in NumPy has undefined behavior (in C-parlance) - # when the second argument is a negative (or bigger than the number - # of bits) value. - # Also, right_shift for negative first arguments also relies on - # implementation defined behavior, although numba warantees "sane" - # behavior (arithmetic shifts on signed integers, logic shifts on - # unsigned integers). - - ############################################################################ - # Comparison functions - @tag('important') - def test_greater_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.greater, flags=flags) - - @tag('important') - def test_greater_equal_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.greater_equal, flags=flags) - - @tag('important') - def test_less_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.less, flags=flags) - - @tag('important') - def test_less_equal_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.less_equal, flags=flags) - - @tag('important') - def test_not_equal_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.not_equal, flags=flags) - - @tag('important') - def test_equal_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.equal, flags=flags) - - def test_logical_and_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.logical_and, flags=flags) - - def test_logical_or_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.logical_or, flags=flags) - - def test_logical_xor_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.logical_xor, flags=flags) - - def test_logical_not_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.logical_not, flags=flags) - - @tag('important') - def test_maximum_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.maximum, flags=flags) - - @tag('important') - def test_minimum_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.minimum, flags=flags) - - def test_fmax_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.fmax, flags=flags) - - def test_fmin_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.fmin, flags=flags) - - - ############################################################################ - # Floating functions - @tag('important') - def test_isfinite_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.isfinite, flags=flags) - - @tag('important') - def test_isinf_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.isinf, flags=flags) - - @tag('important') - def test_isnan_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.isnan, flags=flags) - - @tag('important') - def test_signbit_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.signbit, flags=flags) - - @tag('important') - def test_copysign_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.copysign, flags=flags, kinds='f') - - def test_nextafter_ufunc(self, flags=no_pyobj_flags): - self.binary_ufunc_test(np.nextafter, flags=flags, kinds='f') - - @_unimplemented - def test_modf_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.modf, flags=flags, kinds='f') - - # Note: there is no entry for ldexp as this harness isn't valid for this - # ufunc. this is so because ldexp requires heterogeneous inputs. - # However, this ufunc is tested by the TestLoopTypes test classes. - - @_unimplemented - def test_frexp_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.frexp, flags=flags, kinds='f') - - @tag('important') - def test_floor_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.floor, flags=flags, kinds='f') - - @tag('important') - def test_ceil_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.ceil, flags=flags, kinds='f') - - @tag('important') - def test_trunc_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.trunc, flags=flags, kinds='f') - - def test_spacing_ufunc(self, flags=no_pyobj_flags): - self.unary_ufunc_test(np.spacing, flags=flags, kinds='f') - - ############################################################################ - # Other tests - - def binary_ufunc_mixed_types_test(self, ufunc, flags=no_pyobj_flags): - ufunc_name = ufunc.__name__ - ufunc = _make_binary_ufunc_usecase(ufunc) - inputs1 = [ - (1, types.uint64), - (-1, types.int64), - (0.5, types.float64), - - (np.array([0, 1], dtype='u8'), types.Array(types.uint64, 1, 'C')), - (np.array([-1, 1], dtype='i8'), types.Array(types.int64, 1, 'C')), - (np.array([-0.5, 0.5], dtype='f8'), types.Array(types.float64, 1, 'C'))] - - inputs2 = inputs1 - - output_types = [types.Array(types.int64, 1, 'C'), - types.Array(types.float64, 1, 'C')] - - pyfunc = ufunc - - for input1, input2, output_type in itertools.product(inputs1, inputs2, output_types): - - input1_operand = input1[0] - input1_type = input1[1] - - input2_operand = input2[0] - input2_type = input2[1] - - # Skip division by unsigned int because of NumPy bugs - if ufunc_name == 'divide' and (input2_type == types.Array(types.uint32, 1, 'C') or - input2_type == types.Array(types.uint64, 1, 'C')): - continue - - # Skip some subtraction tests because of NumPy bugs - if ufunc_name == 'subtract' and input1_type == types.Array(types.uint32, 1, 'C') and \ - input2_type == types.uint32 and types.Array(types.int64, 1, 'C'): - continue - if ufunc_name == 'subtract' and input1_type == types.Array(types.uint32, 1, 'C') and \ - input2_type == types.uint64 and types.Array(types.int64, 1, 'C'): - continue - - if ((isinstance(input1_type, types.Array) or - isinstance(input2_type, types.Array)) and - not isinstance(output_type, types.Array)): - continue - - cr = self.cache.compile(pyfunc, - (input1_type, input2_type, output_type), - flags=flags) - cfunc = cr.entry_point - - if isinstance(input1_operand, np.ndarray): - result = np.zeros(input1_operand.size, - dtype=output_type.dtype.name) - expected = np.zeros(input1_operand.size, - dtype=output_type.dtype.name) - elif isinstance(input2_operand, np.ndarray): - result = np.zeros(input2_operand.size, - dtype=output_type.dtype.name) - expected = np.zeros(input2_operand.size, - dtype=output_type.dtype.name) - else: - result = np.zeros(1, dtype=output_type.dtype.name) - expected = np.zeros(1, dtype=output_type.dtype.name) - - cfunc(input1_operand, input2_operand, result) - pyfunc(input1_operand, input2_operand, expected) - - scalar_type = getattr(output_type, 'dtype', output_type) - prec = ('single' - if scalar_type in (types.float32, types.complex64) - else 'double') - self.assertPreciseEqual(expected, result, prec=prec) - - def test_mixed_types(self): - if not numpy_support.strict_ufunc_typing: - self.binary_ufunc_mixed_types_test(np.divide, flags=no_pyobj_flags) - - @tag('important') - def test_broadcasting(self): - - # Test unary ufunc - pyfunc = _make_unary_ufunc_usecase(np.negative) - - input_operands = [ - np.arange(3, dtype='i8'), - np.arange(3, dtype='i8').reshape(3,1), - np.arange(3, dtype='i8').reshape(1,3), - np.arange(3, dtype='i8').reshape(3,1), - np.arange(3, dtype='i8').reshape(1,3), - np.arange(3*3, dtype='i8').reshape(3,3)] - - output_operands = [ - np.zeros(3*3, dtype='i8').reshape(3,3), - np.zeros(3*3, dtype='i8').reshape(3,3), - np.zeros(3*3, dtype='i8').reshape(3,3), - np.zeros(3*3*3, dtype='i8').reshape(3,3,3), - np.zeros(3*3*3, dtype='i8').reshape(3,3,3), - np.zeros(3*3*3, dtype='i8').reshape(3,3,3)] - - for x, result in zip(input_operands, output_operands): - - input_type = types.Array(types.uint64, x.ndim, 'C') - output_type = types.Array(types.int64, result.ndim, 'C') - - cr = self.cache.compile(pyfunc, (input_type, output_type), - flags=no_pyobj_flags) - cfunc = cr.entry_point - - expected = np.zeros(result.shape, dtype=result.dtype) - np.negative(x, expected) - - cfunc(x, result) - self.assertPreciseEqual(result, expected) - - # Test binary ufunc - pyfunc = _make_binary_ufunc_usecase(np.add) - - input1_operands = [ - np.arange(3, dtype='u8'), - np.arange(3*3, dtype='u8').reshape(3,3), - np.arange(3*3*3, dtype='u8').reshape(3,3,3), - np.arange(3, dtype='u8').reshape(3,1), - np.arange(3, dtype='u8').reshape(1,3), - np.arange(3, dtype='u8').reshape(3,1,1), - np.arange(3*3, dtype='u8').reshape(3,3,1), - np.arange(3*3, dtype='u8').reshape(3,1,3), - np.arange(3*3, dtype='u8').reshape(1,3,3)] - - input2_operands = input1_operands - - for x, y in itertools.product(input1_operands, input2_operands): - - input1_type = types.Array(types.uint64, x.ndim, 'C') - input2_type = types.Array(types.uint64, y.ndim, 'C') - output_type = types.Array(types.uint64, max(x.ndim, y.ndim), 'C') - - cr = self.cache.compile(pyfunc, (input1_type, input2_type, output_type), - flags=no_pyobj_flags) - cfunc = cr.entry_point - - expected = np.add(x, y) - result = np.zeros(expected.shape, dtype='u8') - - cfunc(x, y, result) - self.assertPreciseEqual(result, expected) - - def test_implicit_output_npm(self): - with self.assertRaises(TypeError): - def myadd(a0, a1): - return np.add(a0, a1) - arr_ty = types.Array(types.uint64, 1, 'C') - cr = compile_isolated(myadd, (arr_ty, arr_ty), - flags=no_pyobj_flags) - - def test_broadcast_implicit_output_npm_nrt(self): - def pyfunc(a0, a1): - return np.add(a0, a1) - - input1_operands = [ - np.arange(3, dtype='u8'), - np.arange(3*3, dtype='u8').reshape(3,3), - np.arange(3*3*3, dtype='u8').reshape(3,3,3), - np.arange(3, dtype='u8').reshape(3,1), - np.arange(3, dtype='u8').reshape(1,3), - np.arange(3, dtype='u8').reshape(3,1,1), - np.arange(3*3, dtype='u8').reshape(3,3,1), - np.arange(3*3, dtype='u8').reshape(3,1,3), - np.arange(3*3, dtype='u8').reshape(1,3,3)] - - input2_operands = input1_operands - - for x, y in itertools.product(input1_operands, input2_operands): - - input1_type = types.Array(types.uint64, x.ndim, 'C') - input2_type = types.Array(types.uint64, y.ndim, 'C') - - cr = self.cache.compile(pyfunc, (input1_type, input2_type), - flags=enable_nrt_flags) - cfunc = cr.entry_point - - expected = np.add(x, y) - result = cfunc(x, y) - np.testing.assert_array_equal(expected, result) - - def test_implicit_output_layout_binary(self): - def pyfunc(a0, a1): - return np.add(a0, a1) - - # C layout - X = np.linspace(0, 1, 20).reshape(4, 5) - # F layout - Y = np.array(X, order='F') - # A layout - Z = X.reshape(5, 4).T[0] - - Xty = typeof(X) - assert X.flags.c_contiguous and Xty.layout == 'C' - Yty = typeof(Y) - assert Y.flags.f_contiguous and Yty.layout == 'F' - Zty = typeof(Z) - assert Zty.layout == 'A' - assert not Z.flags.c_contiguous - assert not Z.flags.f_contiguous - - testcases = list(itertools.permutations([X, Y, Z], 2)) - testcases += [(X, X)] - testcases += [(Y, Y)] - testcases += [(Z, Z)] - - for arg0, arg1 in testcases: - cr = self.cache.compile(pyfunc, (typeof(arg0), typeof(arg1)), - flags=enable_nrt_flags) - expected = pyfunc(arg0, arg1) - result = cr.entry_point(arg0, arg1) - - self.assertEqual(expected.flags.c_contiguous, - result.flags.c_contiguous) - self.assertEqual(expected.flags.f_contiguous, - result.flags.f_contiguous) - np.testing.assert_array_equal(expected, result) - - def test_implicit_output_layout_unary(self): - def pyfunc(a0): - return np.sqrt(a0) - - # C layout - X = np.linspace(0, 1, 20).reshape(4, 5) - # F layout - Y = np.array(X, order='F') - # A layout - Z = X.reshape(5, 4).T[0] - - Xty = typeof(X) - assert X.flags.c_contiguous and Xty.layout == 'C' - Yty = typeof(Y) - assert Y.flags.f_contiguous and Yty.layout == 'F' - Zty = typeof(Z) - assert Zty.layout == 'A' - assert not Z.flags.c_contiguous - assert not Z.flags.f_contiguous - - for arg0 in [X, Y, Z]: - cr = self.cache.compile(pyfunc, (typeof(arg0),), - flags=enable_nrt_flags) - expected = pyfunc(arg0) - result = cr.entry_point(arg0) - - self.assertEqual(expected.flags.c_contiguous, - result.flags.c_contiguous) - self.assertEqual(expected.flags.f_contiguous, - result.flags.f_contiguous) - np.testing.assert_array_equal(expected, result) - - - -class TestArrayOperators(BaseUFuncTest, TestCase): - - def _check_results(self, expected, got): - self.assertEqual(expected.dtype.kind, got.dtype.kind) - np.testing.assert_array_almost_equal(expected, got) - - def unary_op_test(self, operator, flags=enable_nrt_flags, - skip_inputs=[], additional_inputs=[], - int_output_type=None, float_output_type=None): - operator_func = _make_unary_ufunc_op_usecase(operator) - inputs = list(self.inputs) - inputs.extend(additional_inputs) - pyfunc = operator_func - for input_tuple in inputs: - input_operand, input_type = input_tuple - - if ((input_type in skip_inputs) or - (not isinstance(input_type, types.Array))): - continue - - cr = self.cache.compile(pyfunc, (input_type,), - flags=flags) - cfunc = cr.entry_point - expected = pyfunc(input_operand) - got = cfunc(input_operand) - self._check_results(expected, got) - - def binary_op_test(self, operator, flags=enable_nrt_flags, - skip_inputs=[], additional_inputs=[], - int_output_type=None, float_output_type=None, - positive_rhs=False): - operator_func = _make_binary_ufunc_op_usecase(operator) - inputs = list(self.inputs) - inputs.extend(additional_inputs) - pyfunc = operator_func - for input_tuple in inputs: - input_operand1, input_type = input_tuple - input_dtype = numpy_support.as_dtype( - getattr(input_type, "dtype", input_type)) - input_type1 = input_type - - if input_type in skip_inputs: - continue - - if positive_rhs: - zero = np.zeros(1, dtype=input_dtype)[0] - # If we only use two scalars, the code generator will not - # select the ufunctionalized operator, so we mix it up. - if isinstance(input_type, types.Array): - input_operand0 = input_operand1 - input_type0 = input_type - if positive_rhs and np.any(input_operand1 < zero): - continue - else: - input_operand0 = (np.random.random(10) * 100).astype( - input_dtype) - input_type0 = typeof(input_operand0) - if positive_rhs and input_operand1 < zero: - continue - - cr = self.cache.compile(pyfunc, (input_type0, input_type1), - flags=flags) - cfunc = cr.entry_point - expected = pyfunc(input_operand0, input_operand1) - got = cfunc(input_operand0, input_operand1) - self._check_results(expected, got) - - def bitwise_additional_inputs(self): - # For bitwise operators, we want to check the results for boolean - # arrays (see #1813). - return [ - (True, types.boolean), - (False, types.boolean), - (np.array([True, False]), types.Array(types.boolean, 1, 'C')), - ] - - def binary_int_op_test(self, *args, **kws): - skip_inputs = kws.setdefault('skip_inputs', []) - skip_inputs += [ - types.float32, types.float64, - types.Array(types.float32, 1, 'C'), - types.Array(types.float64, 1, 'C'), - ] - return self.binary_op_test(*args, **kws) - - def binary_bitwise_op_test(self, *args, **kws): - additional_inputs = kws.setdefault('additional_inputs', []) - additional_inputs += self.bitwise_additional_inputs() - return self.binary_int_op_test(*args, **kws) - - def inplace_op_test(self, operator, lhs_values, rhs_values, - lhs_dtypes, rhs_dtypes): - operator_func = _make_inplace_ufunc_op_usecase(operator) - pyfunc = operator_func - - # The left operand can only be an array, while the right operand - # can be either an array or a scalar - lhs_inputs = [np.array(lhs_values, dtype=dtype) - for dtype in lhs_dtypes] - - rhs_arrays = [np.array(rhs_values, dtype=dtype) - for dtype in rhs_dtypes] - rhs_scalars = [dtype(v) for v in rhs_values for dtype in rhs_dtypes] - rhs_inputs = rhs_arrays + rhs_scalars - - for lhs, rhs in itertools.product(lhs_inputs, rhs_inputs): - lhs_type = typeof(lhs) - rhs_type = typeof(rhs) - cr = self.cache.compile(pyfunc, (lhs_type, rhs_type), - flags=no_pyobj_flags) - cfunc = cr.entry_point - expected = lhs.copy() - pyfunc(expected, rhs) - got = lhs.copy() - cfunc(got, rhs) - self.assertPreciseEqual(got, expected) - - def inplace_float_op_test(self, operator, lhs_values, rhs_values): - # Also accept integer inputs for the right operand (they should - # be converted to float). - return self.inplace_op_test(operator, lhs_values, rhs_values, - (np.float32, np.float64), - (np.float32, np.float64, np.int64)) - - def inplace_int_op_test(self, operator, lhs_values, rhs_values): - self.inplace_op_test(operator, lhs_values, rhs_values, - (np.int16, np.int32, np.int64), - (np.int16, np.uint32)) - - def inplace_bitwise_op_test(self, operator, lhs_values, rhs_values): - self.inplace_int_op_test(operator, lhs_values, rhs_values) - self.inplace_op_test(operator, lhs_values, rhs_values, - (np.bool_,), (np.bool_, np.bool_)) - - # ____________________________________________________________ - # Unary operators - - def test_unary_positive_array_op(self): - self.unary_op_test('+') - - @tag('important') - def test_unary_negative_array_op(self): - self.unary_op_test('-') - - def test_unary_invert_array_op(self): - self.unary_op_test('~', - skip_inputs=[types.float32, types.float64, - types.Array(types.float32, 1, 'C'), - types.Array(types.float64, 1, 'C')], - additional_inputs=self.bitwise_additional_inputs()) - - # ____________________________________________________________ - # Inplace operators - - @tag('important') - def test_inplace_add(self): - self.inplace_float_op_test('+=', [-1, 1.5, 3], [-5, 0, 2.5]) - - @tag('important') - def test_inplace_sub(self): - self.inplace_float_op_test('-=', [-1, 1.5, 3], [-5, 0, 2.5]) - - @tag('important') - def test_inplace_mul(self): - self.inplace_float_op_test('*=', [-1, 1.5, 3], [-5, 0, 2.5]) - - def test_inplace_floordiv(self): - self.inplace_float_op_test('//=', [-1, 1.5, 3], [-5, 1.25, 2.5]) - - def test_inplace_div(self): - self.inplace_float_op_test('/=', [-1, 1.5, 3], [-5, 0, 2.5]) - - def test_inplace_remainder(self): - self.inplace_float_op_test('%=', [-1, 1.5, 3], [-5, 2, 2.5]) - - @tag('important') - def test_inplace_pow(self): - self.inplace_float_op_test('**=', [-1, 1.5, 3], [-5, 2, 2.5]) - - def test_inplace_and(self): - self.inplace_bitwise_op_test('&=', [0, 1, 2, 3, 51], [0, 13, 16, 42, 255]) - - def test_inplace_or(self): - self.inplace_bitwise_op_test('|=', [0, 1, 2, 3, 51], [0, 13, 16, 42, 255]) - - def test_inplace_xor(self): - self.inplace_bitwise_op_test('^=', [0, 1, 2, 3, 51], [0, 13, 16, 42, 255]) - - @tag('important') - def test_inplace_lshift(self): - self.inplace_int_op_test('<<=', [0, 5, -10, -51], [0, 1, 4, 14]) - - def test_inplace_rshift(self): - self.inplace_int_op_test('>>=', [0, 5, -10, -51], [0, 1, 4, 14]) - - def test_unary_positive_array_op(self): - ''' - Verify that the unary positive operator copies values, and doesn't - just alias to the input array (mirrors normal Numpy/Python - interaction behavior). - ''' - # Test originally from @gmarkall - def f(a1): - a2 = +a1 - a1[0] = 3 - a2[1] = 4 - return a2 - - a1 = np.zeros(10) - a2 = f(a1) - self.assertTrue(a1[0] != a2[0] and a1[1] != a2[1]) - a3 = np.zeros(10) - a4 = njit(f)(a3) - self.assertTrue(a3[0] != a4[0] and a3[1] != a4[1]) - np.testing.assert_array_equal(a1, a3) - np.testing.assert_array_equal(a2, a4) - - # ____________________________________________________________ - # Binary operators - - @tag('important') - def test_add_array_op(self): - self.binary_op_test('+') - - @tag('important') - def test_subtract_array_op(self): - self.binary_op_test('-') - - @tag('important') - def test_multiply_array_op(self): - self.binary_op_test('*') - - @tag('important') - def test_divide_array_op(self): - int_out_type = None - if PYVERSION >= (3, 0): - int_out_type = types.float64 - self.binary_op_test('/', int_output_type=int_out_type) - - @tag('important') - def test_floor_divide_array_op(self): - # Avoid floating-point zeros as x // 0.0 can have varying results - # depending on the algorithm (which changed accross Numpy versions) - self.inputs = [ - (np.uint32(1), types.uint32), - (np.int32(-2), types.int32), - (np.int32(0), types.int32), - (np.uint64(4), types.uint64), - (np.int64(-5), types.int64), - (np.int64(0), types.int64), - - (np.float32(-0.5), types.float32), - (np.float32(1.5), types.float32), - - (np.float64(-2.5), types.float64), - (np.float64(3.5), types.float64), - - (np.array([1,2], dtype='u4'), types.Array(types.uint32, 1, 'C')), - (np.array([3,4], dtype='u8'), types.Array(types.uint64, 1, 'C')), - (np.array([-1,1,5], dtype='i4'), types.Array(types.int32, 1, 'C')), - (np.array([-1,1,6], dtype='i8'), types.Array(types.int64, 1, 'C')), - (np.array([-0.5, 1.5], dtype='f4'), types.Array(types.float32, 1, 'C')), - (np.array([-2.5, 3.5], dtype='f8'), types.Array(types.float64, 1, 'C')), - ] - self.binary_op_test('//') - - @tag('important') - def test_remainder_array_op(self): - self.binary_op_test('%') - - @tag('important') - def test_power_array_op(self): - self.binary_op_test('**', positive_rhs=after_numpy_112) - - @tag('important') - def test_left_shift_array_op(self): - self.binary_int_op_test('<<', positive_rhs=True) - - @tag('important') - def test_right_shift_array_op(self): - self.binary_int_op_test('>>', positive_rhs=True) - - @tag('important') - def test_bitwise_and_array_op(self): - self.binary_bitwise_op_test('&') - - @tag('important') - def test_bitwise_or_array_op(self): - self.binary_bitwise_op_test('|') - - @tag('important') - def test_bitwise_xor_array_op(self): - self.binary_bitwise_op_test('^') - - @tag('important') - def test_equal_array_op(self): - self.binary_op_test('==') - - @tag('important') - def test_greater_array_op(self): - self.binary_op_test('>') - - @tag('important') - def test_greater_equal_array_op(self): - self.binary_op_test('>=') - - @tag('important') - def test_less_array_op(self): - self.binary_op_test('<') - - @tag('important') - def test_less_equal_array_op(self): - self.binary_op_test('<=') - - @tag('important') - def test_not_equal_array_op(self): - self.binary_op_test('!=') - - -class TestScalarUFuncs(TestCase): - """check the machinery of ufuncs works when the result is an scalar. - These are not exhaustive because: - - the machinery to support this case is the same for all the functions of a - given arity. - - the result of the inner function itself is already tested in TestUFuncs - - This class tests regular uses. A subclass tests the no python backend. - """ - - _compile_flags = enable_pyobj_flags - - def run_ufunc(self, pyfunc, arg_types, arg_values): - for tyargs, args in zip(arg_types, arg_values): - cr = compile_isolated(pyfunc, tyargs, flags=self._compile_flags) - cfunc = cr.entry_point - got = cfunc(*args) - expected = pyfunc(*_as_dtype_value(tyargs, args)) - - msg = 'for args {0} typed {1}'.format(args, tyargs) - - # note: due to semantics of ufuncs, thing like adding a int32 to a - # uint64 results in doubles (as neither int32 can be cast safely - # to uint64 nor vice-versa, falling back to using the float version. - # Modify in those cases the expected value (the numpy version does - # not use typed integers as inputs so its result is an integer) - special = set([(types.int32, types.uint64), (types.uint64, types.int32), - (types.int64, types.uint64), (types.uint64, types.int64)]) - if tyargs in special: - expected = float(expected) - else: - # The numba version of scalar ufuncs return an actual value that - # gets converted to a Python type, instead of using NumPy scalars. - # although in python 2 NumPy scalars are considered and instance of - # the appropriate python type, in python 3 that is no longer the case. - # This is why the expected result is casted to the appropriate Python - # type (which is actually the expected behavior of the ufunc translation) - if np.issubdtype(expected.dtype, np.inexact): - expected = float(expected) - elif np.issubdtype(expected.dtype, np.integer): - expected = int(expected) - elif np.issubdtype(expected.dtype, np.bool): - expected = bool(expected) - - alltypes = cr.signature.args + (cr.signature.return_type,) - - # select the appropriate precision for comparison: note that an argument - # typed at a lower precision can introduce precision problems. For this - # reason the argument types must be taken into account. - if any([t==types.float32 for t in alltypes]): - prec='single' - elif any([t==types.float64 for t in alltypes]): - prec='double' - else: - prec='exact' - - self.assertPreciseEqual(got, expected, msg=msg, prec=prec) - - - def test_scalar_unary_ufunc(self): - def _func(x): - return np.sqrt(x) - - vals = [(2,), (2,), (1,), (2,), (.1,), (.2,)] - tys = [(types.int32,), (types.uint32,), - (types.int64,), (types.uint64,), - (types.float32,), (types.float64,)] - self.run_ufunc(_func, tys, vals) - - - def test_scalar_binary_uniform_ufunc(self): - def _func(x,y): - return np.add(x,y) - - vals = [2, 2, 1, 2, .1, .2] - tys = [types.int32, types.uint32, - types.int64, types.uint64, types.float32, types.float64] - self.run_ufunc(_func, zip(tys, tys), zip(vals, vals)) - - - def test_scalar_binary_mixed_ufunc(self, flags=enable_pyobj_flags): - def _func(x,y): - return np.add(x,y) - - vals = [2, 2, 1, 2, .1, .2] - tys = [types.int32, types.uint32, - types.int64, types.uint64, - types.float32, types.float64] - self.run_ufunc(_func, itertools.product(tys, tys), - itertools.product(vals, vals)) - - -class TestScalarUFuncsNoPython(TestScalarUFuncs): - """Same tests as TestScalarUFuncs, but forcing no python mode""" - _compile_flags = no_pyobj_flags - - -class TestUfuncIssues(TestCase): - - def test_issue_651(self): - # Exercise the code path to make sure this does not fail - @vectorize(["(float64,float64)"]) - def foo(x1, x2): - return np.add(x1, x2) + np.add(x1, x2) - - a = np.arange(10, dtype='f8') - b = np.arange(10, dtype='f8') - self.assertPreciseEqual(foo(a, b), (a + b) + (a + b)) - - def test_issue_713(self): - def foo(x,y): - return np.floor_divide(x,y) - - cr = compile_isolated(foo, [types.complex128, types.complex128]) - self.assertEqual(foo(1j, 1j), cr.entry_point(1j, 1j)) - - def test_issue_2006(self): - """ - should return float32, not float64. - """ - def foo(x, y): - return np.power(x, y) - pyfunc = foo - cfunc = jit(nopython=True)(pyfunc) - - def check(x, y): - got = cfunc(x, y) - np.testing.assert_array_almost_equal(got, pyfunc(x, y)) - # Check the power operation conserved the input's dtype - # (this is different from Numpy, whose behaviour depends on - # the *values* of the arguments -- see PyArray_CanCastArrayTo). - self.assertEqual(got.dtype, x.dtype) - - xs = [np.float32([1, 2, 3]), np.complex64([1j, 2, 3-3j])] - for x in xs: - check(x, 3) - check(x, np.uint64(3)) - check(x, np.int64([2, 2, 3])) - - -class _LoopTypesTester(TestCase): - """Test code generation for the different loop types defined by ufunc. - - This test relies on class variables to configure the test. Subclasses - of this class can just override some of these variables to check other - ufuncs in a different compilation context. The variables supported are: - - _funcs: the ufuncs to test - _compile_flags: compilation flags to use (to force nopython mode) - _skip_types: letter types that force skipping the loop when testing - if present in the NumPy ufunc signature. - _supported_types: only test loops where all the types in the loop - signature are in this collection. If unset, all. - - Note that both, _skip_types and _supported_types must be met for a loop - to be tested. - - The NumPy ufunc signature has a form like 'ff->f' (for a binary ufunc - loop taking 2 floats and resulting in a float). In a NumPy ufunc object - you can get a list of supported signatures by accessing the attribute - 'types'. - """ - _skip_types = 'OegG' - - # Allowed deviation between Numpy and Numba results - _ulps = {('arccos', 'F'): 2, - ('arcsin', 'D'): 4, - ('arcsin', 'F'): 4, - ('log10', 'D'): 5, - ('tanh', 'F'): 2, - } - - def _arg_for_type(self, a_letter_type, index=0): - """return a suitable array argument for testing the letter type""" - # Note all possible arrays must have the same size, since they - # may be used as inputs to the same func. - if a_letter_type in 'bhilq': - # an integral - return np.array([1, 4, 0, -2], dtype=a_letter_type) - if a_letter_type in 'BHILQ': - return np.array([1, 2, 4, 0], dtype=a_letter_type) - elif a_letter_type in '?': - # a boolean - return np.array([True, False, False, True], dtype=a_letter_type) - elif a_letter_type[0] == 'm': - # timedelta64 - if len(a_letter_type) == 1: - a_letter_type = 'm8[D]' - return np.array([2, -3, 'NaT', 0], dtype=a_letter_type) - elif a_letter_type[0] == 'M': - # datetime64 - if len(a_letter_type) == 1: - a_letter_type = 'M8[D]' - return np.array(['Nat', 1, 25, 0], dtype=a_letter_type) - elif a_letter_type in 'fd': - # floating point - return np.array([1.5, -3.5, 0.0, float('nan')], - dtype=a_letter_type) - elif a_letter_type in 'FD': - # complex - # Note `-1j` is different on 2.x and 3.x, hence the explicit spelling - if sys.platform != 'win32': - # Other platforms have better handling of negative zeros, - # test them - negzero = -(0.0 + 1.0j) - else: - negzero = 0.0 - 1.0j - return np.array([negzero, 1.5 + 1.5j, 1j * float('nan'), 0j], - dtype=a_letter_type) - else: - raise RuntimeError("type %r not understood" % (a_letter_type,)) - - def _check_loop(self, fn, ufunc, loop): - # the letter types for the args - letter_types = loop[:ufunc.nin] + loop[-ufunc.nout:] - - # ignore the loops containing an object argument. They will always - # fail in no python mode. Usually the last loop in ufuncs is an all - # object fallback - supported_types = getattr(self, '_supported_types', []) - if (supported_types and - any(l not in supported_types for l in letter_types)): - return - skip_types = getattr(self, '_skip_types', []) - if any(l in skip_types for l in letter_types): - return - # if the test case requires some types to be present, skip loops - # not involving any of those types. - required_types = getattr(self, '_required_types', []) - if required_types and not any(l in letter_types - for l in required_types): - return - - self._check_ufunc_with_dtypes(fn, ufunc, letter_types) - - def _check_ufunc_with_dtypes(self, fn, ufunc, dtypes): - arg_dty = [np.dtype(t) for t in dtypes] - arg_nbty = [types.Array(from_dtype(t), 1, 'C') for t in arg_dty] - cr = compile_isolated(fn, arg_nbty, flags=self._compile_flags) - - # Ensure a good mix of input values - c_args = [self._arg_for_type(t, index=index).repeat(2) - for index, t in enumerate(dtypes)] - for arr in c_args: - self.random.shuffle(arr) - py_args = [a.copy() for a in c_args] - - cr.entry_point(*c_args) - fn(*py_args) - - # Check each array (including inputs, to ensure they weren't - # mutated). - for dtype, py_arg, c_arg in zip(arg_dty, py_args, c_args): - py_arg, c_arg = self._fixup_results(dtype, py_arg, c_arg) - typechar = c_arg.dtype.char - ulps = self._ulps.get((ufunc.__name__, typechar), 1) - prec = 'single' if typechar in 'fF' else 'exact' - prec = 'double' if typechar in 'dD' else prec - msg = '\n'.join(["ufunc '{0}' arrays differ ({1}):", - "args: {2}", "expected {3}", "got {4}"]) - msg = msg.format(ufunc.__name__, c_args, prec, py_arg, c_arg) - self.assertPreciseEqual(py_arg, c_arg, prec=prec, msg=msg, - ulps=ulps) - - def _fixup_results(self, dtype, py_arg, c_arg): - return py_arg, c_arg - - @classmethod - def _check_ufunc_loops(cls, ufunc): - for loop in ufunc.types: - cls._inject_test(ufunc, loop) - - @classmethod - def _inject_test(cls, ufunc, loop): - def test_template(self): - fn = _make_ufunc_usecase(ufunc) - self._check_loop(fn, ufunc, loop) - setattr(cls, "test_{0}_{1}".format(ufunc.__name__, - loop.replace('->', '_')), - test_template) - - @classmethod - def autogenerate(cls): - for ufunc in cls._ufuncs: - cls._check_ufunc_loops(ufunc) - - -class TestLoopTypesIntNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = supported_ufuncs[:] - # reciprocal and power need a special test due to issue #757 - _ufuncs.remove(np.power) - _ufuncs.remove(np.reciprocal) - _ufuncs.remove(np.left_shift) # has its own test class - _ufuncs.remove(np.right_shift) # has its own test class - # special test for bool subtract/negative, np1.13 deprecated/not supported - _ufuncs.remove(np.subtract) - _ufuncs.remove(np.negative) - _required_types = '?bBhHiIlLqQ' - _skip_types = 'fdFDmMO' + _LoopTypesTester._skip_types - -TestLoopTypesIntNoPython.autogenerate() - - -class TestLoopTypesSubtractAndNegativeNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.subtract, np.negative] - _required_types = '?bBhHiIlLqQfdFD' - _skip_types = 'mMO' + _LoopTypesTester._skip_types - if after_numpy_112: # np1.13 deprecated/not supported - _skip_types += '?' - -TestLoopTypesSubtractAndNegativeNoPython.autogenerate() - - -class TestLoopTypesReciprocalNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.reciprocal] # issue #757 - _required_types = 'bBhHiIlLqQfdFD' - _skip_types = 'mMO' + _LoopTypesTester._skip_types - - def _arg_for_type(self, a_letter_type, index=0): - res = super(self.__class__, self)._arg_for_type(a_letter_type, - index=index) - if a_letter_type in 'bBhHiIlLqQ': - # For integer reciprocal, avoid 0 as argument, as it triggers - # undefined behavior that may differ in results from Numba - # to the compiler used to compile NumPy. - res[res == 0] = 42 - return res - -TestLoopTypesReciprocalNoPython.autogenerate() - - -class TestLoopTypesPowerNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.power] # issue #757 - _required_types = 'bBhHiIlLqQfdFD' - _skip_types = 'mMO' + _LoopTypesTester._skip_types - - def _arg_for_type(self, a_letter_type, index=0): - res = super(self.__class__, self)._arg_for_type(a_letter_type, - index=index) - if a_letter_type in 'bBhHiIlLqQ' and index == 1: - # For integer power, avoid a negative exponent, as it triggers - # undefined behavior that may differ in results from Numba - # to the compiler used to compile NumPy - res[res < 0] = 3 - return res - -TestLoopTypesPowerNoPython.autogenerate() - - -class TestLoopTypesIntLeftShiftNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.left_shift] - _required_types = 'bBhHiIlLqQ' - _skip_types = 'fdFDmMO' + _LoopTypesTester._skip_types - - def _arg_for_type(self, a_letter_type, index=0): - res = super(self.__class__, self)._arg_for_type(a_letter_type, - index=index) - # Shifting by a negative amount (argument with index 1) is undefined - # behavior in C. It is also undefined behavior in numba. In the same - # sense, it is also undefined behavior when the shift amount is larger - # than the number of bits in the shifted integer. - # To avoid problems in the test, the values are clamped (clipped) so - # that 0 <= shift_amount < bitcount(shifted_integer) - if index == 1: - bit_count = res.dtype.itemsize * 8 - res = np.clip(res, 0, bit_count-1) - return res - -TestLoopTypesIntLeftShiftNoPython.autogenerate() - - -class TestLoopTypesIntRightShiftNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.right_shift] - _required_types = 'bBhHiIlLqQ' - _skip_types = 'fdFDmMO' + _LoopTypesTester._skip_types - - def _arg_for_type(self, a_letter_type, index=0): - res = super(self.__class__, self)._arg_for_type(a_letter_type, - index=index) - # Shifting by a negative amount (argument with index 1) is undefined - # behavior in C. It is also undefined behavior in numba. In the same - # sense, it is also undefined behavior when the shift amount is larger - # than the number of bits in the shifted integer. - # To avoid problems in the test, the values are clamped (clipped) so - # that 0 <= shift_amount < bitcount(shifted_integer) - if index == 1: - bit_count = res.dtype.itemsize * 8 - res = np.clip(res, 0, bit_count-1) - - # Right shift has "implementation defined behavior" when the number - # shifted is negative (in C). In numba, right shift for signed integers - # is "arithmetic" while for unsigned integers is "logical". - # This test compares against the NumPy implementation, that relies - # on "implementation defined behavior", so the test could be a false - # failure if the compiler used to compile NumPy doesn't follow the same - # policy. - # Hint: do not rely on right shifting negative numbers in NumPy. - if index == 0: - res = np.abs(res) - return res - -TestLoopTypesIntRightShiftNoPython.autogenerate() - - -class TestLoopTypesFloorDivideNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = [np.floor_divide, np.remainder] - _required_types = 'bBhHiIlLqQfdFD' - _skip_types = 'mMO' + _LoopTypesTester._skip_types - - def _fixup_results(self, dtype, py_arg, c_arg): - if dtype.kind == 'f': - # Discrepancies on floating-point floor division and remainder: - # Numpy may return nan where Numba returns inf, e.g. 1. // 0. - pred = (np.isinf(c_arg) & np.isnan(py_arg)) - # Numpy and Numba may differ in signed zeros, e.g. -0. // -1. - pred |= (py_arg == 0.0) & (c_arg == 0.0) - c_arg[pred] = py_arg[pred] - return py_arg, c_arg - -TestLoopTypesFloorDivideNoPython.autogenerate() - - -class TestLoopTypesFloatNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = supported_ufuncs[:] - if iswindows: - _ufuncs.remove(np.signbit) # TODO: fix issue #758 - _ufuncs.remove(np.floor_divide) # has its own test class - _ufuncs.remove(np.remainder) # has its own test class - _ufuncs.remove(np.mod) # same as np.remainder - _required_types = 'fd' - _skip_types = 'FDmMO' + _LoopTypesTester._skip_types - -TestLoopTypesFloatNoPython.autogenerate() - - -class TestLoopTypesComplexNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = supported_ufuncs[:] - - # Test complex types - # Every loop containing a complex argument must be tested - _required_types = 'FD' - _skip_types = 'mMO' + _LoopTypesTester._skip_types - -TestLoopTypesComplexNoPython.autogenerate() - - -class TestLoopTypesDatetimeNoPython(_LoopTypesTester): - _compile_flags = no_pyobj_flags - _ufuncs = supported_ufuncs[:] - - # NOTE: the full list of ufuncs supporting datetime64 and timedelta64 - # types in Numpy is: - # ['absolute', 'add', 'divide', 'equal', 'floor_divide', 'fmax', 'fmin', - # 'greater', 'greater_equal', 'less', 'less_equal', 'maximum', - # 'minimum', 'multiply', 'negative', 'not_equal', 'sign', 'subtract', - # 'true_divide'] - - # Test datetime64 and timedelta64 types. - _required_types = 'mM' - - # Test various units combinations (TestLoopTypes is only able to test - # homogeneous units). - - def test_add(self): - ufunc = np.add - fn = _make_ufunc_usecase(ufunc) - # heterogeneous inputs - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'm8[m]', 'm8[s]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8[s]', 'm8[s]']) - if not numpy_support.strict_ufunc_typing: - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8', 'm8[m]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8', 'm8[m]', 'm8[m]']) - # heterogeneous inputs, scaled output - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'm8[m]', 'm8[ms]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8[s]', 'm8[ms]']) - # Cannot upscale result (Numpy would accept this) - with self.assertRaises(LoweringError): - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8[s]', 'm8[m]']) - - def test_subtract(self): - ufunc = np.subtract - fn = _make_ufunc_usecase(ufunc) - # heterogeneous inputs - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[s]', 'M8[m]', 'm8[s]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[m]', 'M8[s]', 'm8[s]']) - # heterogeneous inputs, scaled output - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[s]', 'M8[m]', 'm8[ms]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[m]', 'M8[s]', 'm8[ms]']) - # Cannot upscale result (Numpy would accept this) - with self.assertRaises(LoweringError): - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[m]', 'M8[s]', 'm8[m]']) - - def test_multiply(self): - ufunc = np.multiply - fn = _make_ufunc_usecase(ufunc) - # scaled output - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'q', 'm8[us]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['q', 'm8[s]', 'm8[us]']) - # Cannot upscale result (Numpy would accept this) - with self.assertRaises(LoweringError): - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'q', 'm8[m]']) - - def test_true_divide(self): - ufunc = np.true_divide - fn = _make_ufunc_usecase(ufunc) - # heterogeneous inputs - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8[s]', 'd']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'm8[m]', 'd']) - # scaled output - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'q', 'm8[s]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'd', 'm8[s]']) - # Cannot upscale result (Numpy would accept this) - with self.assertRaises(LoweringError): - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'q', 'm8[m]']) - - def test_floor_divide(self): - ufunc = np.floor_divide - fn = _make_ufunc_usecase(ufunc) - # scaled output - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'q', 'm8[s]']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'd', 'm8[s]']) - # Cannot upscale result (Numpy would accept this) - with self.assertRaises(LoweringError): - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'q', 'm8[m]']) - - def _check_comparison(self, ufunc): - fn = _make_ufunc_usecase(ufunc) - # timedelta - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8[s]', '?']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[s]', 'm8[m]', '?']) - if not numpy_support.strict_ufunc_typing: - self._check_ufunc_with_dtypes(fn, ufunc, ['m8[m]', 'm8', '?']) - self._check_ufunc_with_dtypes(fn, ufunc, ['m8', 'm8[m]', '?']) - # datetime - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[m]', 'M8[s]', '?']) - self._check_ufunc_with_dtypes(fn, ufunc, ['M8[s]', 'M8[m]', '?']) - - def test_comparisons(self): - for ufunc in [np.equal, np.not_equal, np.less, np.less_equal, - np.greater, np.greater_equal]: - self._check_comparison(ufunc) - -TestLoopTypesDatetimeNoPython.autogenerate() - - -class TestUFuncBadArgsNoPython(TestCase): - _compile_flags = no_pyobj_flags - - def test_missing_args(self): - def func(x): - """error: np.add requires two args""" - result = np.add(x) - return result - - self.assertRaises(TypingError, compile_isolated, func, [types.float64], - return_type=types.float64, flags=self._compile_flags) - - - def test_too_many_args(self): - def func(x, out, out2): - """error: too many args""" - result = np.add(x, x, out, out2) - return result - - array_type = types.Array(types.float64, 1, 'C') - self.assertRaises(TypingError, compile_isolated, func, [array_type] *3, - return_type=array_type, flags=self._compile_flags) - - def test_no_scalar_result_by_reference(self): - def func(x): - """error: scalar as a return value is not supported""" - y = 0 - np.add(x, x, y) - self.assertRaises(TypingError, compile_isolated, func, [types.float64], - return_type=types.float64, flags=self._compile_flags) - -class TestUFuncCompilationThreadSafety(TestCase): - - def test_lock(self): - """ - Test that (lazy) compiling from several threads at once doesn't - produce errors (see issue #2403). - """ - errors = [] - - @vectorize - def foo(x): - return x + 1 - - def wrapper(): - try: - a = np.ones((10,), dtype = np.float64) - expected = np.ones((10,), dtype = np.float64) + 1. - np.testing.assert_array_equal(foo(a), expected) - except BaseException as e: - errors.append(e) - - threads = [threading.Thread(target=wrapper) for i in range(16)] - for t in threads: - t.start() - for t in threads: - t.join() - self.assertFalse(errors) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_unicode_literals.py b/numba/numba/tests/test_unicode_literals.py deleted file mode 100644 index b7b5200e3..000000000 --- a/numba/numba/tests/test_unicode_literals.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import print_function, unicode_literals - -import sys - -import numpy as np - -import numba.unittest_support as unittest -from numba import utils, jit -from .support import TestCase - - -def docstring_usecase(): - """\u00e9""" - return 1 - - -@unittest.skipIf(sys.version_info >= (3,), "Python 2-specific test") -class TestFutureUnicodeLiterals(TestCase): - """ - Test issues with unicode_literals on Python 2. - """ - - def test_docstring(self): - """ - Test non-ASCII docstring (issue #1908). - """ - cfunc = jit(nopython=True)(docstring_usecase) - self.assertPreciseEqual(cfunc(), 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_unicode_names.py b/numba/numba/tests/test_unicode_names.py deleted file mode 100644 index 1c36b1faf..000000000 --- a/numba/numba/tests/test_unicode_names.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import print_function, absolute_import - - -from numba import njit, cfunc, cgutils -from numba.six import exec_ -from numba.utils import PY2 - -from .support import TestCase, unittest - -unicode_name1 = u""" -def unicode_name1(ಠ_ರೃ, ಠਊಠ): - return (ಠ_ರೃ) + (ಠਊಠ) -""" - -unicode_name2 = u""" -def Ծ_Ծ(ಠ_ರೃ, ಠਊಠ): - return (ಠ_ರೃ) + (ಠਊಠ) -""" - - -@unittest.skipIf(PY2, "unicode identifier not supported in python2") -class TestUnicodeNames(TestCase): - def make_testcase(self, src, fname): - glb = {} - exec_(src, glb) - fn = glb[fname] - return fn - - def test_unicode_name1(self): - fn = self.make_testcase(unicode_name1, 'unicode_name1') - cfn = njit(fn) - self.assertEqual(cfn(1, 2), 3) - - def test_unicode_name2(self): - fn = self.make_testcase(unicode_name2, 'Ծ_Ծ') - cfn = njit(fn) - self.assertEqual(cfn(1, 2), 3) - - def test_cfunc(self): - fn = self.make_testcase(unicode_name2, 'Ծ_Ծ') - cfn = cfunc("int32(int32, int32)")(fn) - self.assertEqual(cfn.ctypes(1, 2), 3) - - -class TestUnicodeUtils(TestCase): - def test_normalize_ir_text(self): - # non-unicode input - out = cgutils.normalize_ir_text('abc') - # str returned - self.assertIsInstance(out, str) - # try encoding to latin - out.encode('latin1') - - @unittest.skipIf(PY2, "unicode identifier not supported in python2") - def test_normalize_ir_text_py3(self): - # unicode input - out = cgutils.normalize_ir_text(unicode_name2) - # str returned - self.assertIsInstance(out, str) - # try encoding to latin - out.encode('latin1') - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_unpack_sequence.py b/numba/numba/tests/test_unpack_sequence.py deleted file mode 100644 index e7eeae72f..000000000 --- a/numba/numba/tests/test_unpack_sequence.py +++ /dev/null @@ -1,244 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import errors, types, typeof -from .support import TestCase, MemoryLeakMixin, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - -no_pyobj_flags = Flags() -no_pyobj_flags.set("nrt") - - -def unpack_list(l): - a, b, c = l - return (a, b, c) - - -def unpack_shape(a): - x, y, z = a.shape - return x + y + z - - -def unpack_range(): - a, b, c = range(3) - return a + b + c - - -def unpack_range_too_small(): - a, b, c = range(2) - return a + b + c - - -def unpack_range_too_large(): - a, b, c = range(4) - return a + b + c - - -def unpack_tuple(): - a, b, c = (1, 2, 3) - return a + b + c - - -def unpack_tuple_too_small(): - a, b, c = (1, 2) - return a + b + c - - -def unpack_tuple_too_large(): - a, b, c = (1, 2, 3, 4) - return a + b + c - - -def unpack_heterogeneous_tuple_too_small(): - a, b, c = (1, 2.5j) - return a + b + c - - -def unpack_heterogeneous_tuple_too_large(): - a, b, c = (1, 2.5, 3j, 4) - return a + b + c - - -def unpack_heterogeneous_tuple(): - a, b, c = (1, 2.5, 3j) - return a + b + c - - -def unpack_nested_heterogeneous_tuple(): - a, (b, c) = (1, (2.5, 3j)) - return a + b + c - - -def unpack_arbitrary(seq): - a, b = seq - return b, a - - -def unpack_nrt(): - a = np.zeros(1) - b = np.zeros(2) - tup = b, a - alpha, beta = tup - return alpha, beta - - -def chained_unpack_assign1(x, y): - # Used to fail in object mode (issue #580) - a = (b, c) = (x, y) - (d, e) = a - return d + e + b + c - - -def conditional_swap(x, y): - # Used to produce invalid code (issue #977) - if x > 0: - x, y = y, x - return x, y - - -class TestUnpack(MemoryLeakMixin, TestCase): - - def test_unpack_list(self): - pyfunc = unpack_list - cr = compile_isolated(pyfunc, (), flags=force_pyobj_flags) - cfunc = cr.entry_point - l = [1, 2, 3] - self.assertEqual(cfunc(l), pyfunc(l)) - - def test_unpack_shape(self, flags=force_pyobj_flags): - pyfunc = unpack_shape - cr = compile_isolated(pyfunc, [types.Array(dtype=types.int32, - ndim=3, - layout='C')], - flags=flags) - cfunc = cr.entry_point - a = np.zeros(shape=(1, 2, 3)) - self.assertPreciseEqual(cfunc(a), pyfunc(a)) - - @tag('important') - def test_unpack_shape_npm(self): - self.test_unpack_shape(flags=no_pyobj_flags) - - def test_unpack_range(self, flags=force_pyobj_flags): - self.run_nullary_func(unpack_range, flags) - - @tag('important') - def test_unpack_range_npm(self): - self.test_unpack_range(flags=no_pyobj_flags) - - def test_unpack_tuple(self, flags=force_pyobj_flags): - self.run_nullary_func(unpack_tuple, flags) - - @tag('important') - def test_unpack_tuple_npm(self): - self.test_unpack_tuple(flags=no_pyobj_flags) - - def test_unpack_heterogeneous_tuple(self, flags=force_pyobj_flags): - self.run_nullary_func(unpack_heterogeneous_tuple, flags) - - def test_unpack_heterogeneous_tuple_npm(self): - self.test_unpack_heterogeneous_tuple(flags=no_pyobj_flags) - - def test_unpack_nested_heterogeneous_tuple(self, flags=force_pyobj_flags): - self.run_nullary_func(unpack_nested_heterogeneous_tuple, flags) - - @tag('important') - def test_unpack_nested_heterogeneous_tuple_npm(self): - self.test_unpack_nested_heterogeneous_tuple(flags=no_pyobj_flags) - - def test_chained_unpack_assign(self, flags=force_pyobj_flags): - pyfunc = chained_unpack_assign1 - cr = compile_isolated(pyfunc, [types.int32, types.int32], - flags=flags) - cfunc = cr.entry_point - args = (4, 5) - self.assertPreciseEqual(cfunc(*args), pyfunc(*args)) - - def test_chained_unpack_assign_npm(self): - self.test_chained_unpack_assign(flags=no_pyobj_flags) - - def check_unpack_error(self, pyfunc, flags=force_pyobj_flags, exc=ValueError): - with self.assertRaises(exc): - cr = compile_isolated(pyfunc, (), flags=flags) - cfunc = cr.entry_point - cfunc() - - def test_unpack_tuple_too_small(self): - self.check_unpack_error(unpack_tuple_too_small) - self.check_unpack_error(unpack_heterogeneous_tuple_too_small) - - def test_unpack_tuple_too_small_npm(self): - self.check_unpack_error(unpack_tuple_too_small, no_pyobj_flags, - errors.TypingError) - self.check_unpack_error(unpack_heterogeneous_tuple_too_small, - no_pyobj_flags, errors.TypingError) - - def test_unpack_tuple_too_large(self): - self.check_unpack_error(unpack_tuple_too_large) - self.check_unpack_error(unpack_heterogeneous_tuple_too_large) - - def test_unpack_tuple_too_large_npm(self): - self.check_unpack_error(unpack_tuple_too_large, no_pyobj_flags, - errors.TypingError) - self.check_unpack_error(unpack_heterogeneous_tuple_too_large, - no_pyobj_flags, errors.TypingError) - - def test_unpack_range_too_small(self): - self.check_unpack_error(unpack_range_too_small) - - def test_unpack_range_too_small_npm(self): - self.check_unpack_error(unpack_range_too_small, no_pyobj_flags) - - def test_unpack_range_too_large(self): - self.check_unpack_error(unpack_range_too_large) - - def test_unpack_range_too_large_npm(self): - self.check_unpack_error(unpack_range_too_large, no_pyobj_flags) - - def check_conditional_swap(self, flags=force_pyobj_flags): - cr = compile_isolated(conditional_swap, (types.int32, types.int32), - flags=flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc(4, 5), (5, 4)) - self.assertPreciseEqual(cfunc(0, 5), (0, 5)) - - def test_conditional_swap(self): - self.check_conditional_swap() - - @tag('important') - def test_conditional_swap_npm(self): - self.check_conditional_swap(no_pyobj_flags) - - def test_unpack_tuple_of_arrays(self): - tup = tuple(np.zeros(i + 1) for i in range(2)) - tupty = typeof(tup) - pyfunc = unpack_arbitrary - cr = compile_isolated(pyfunc, (tupty,), - flags=no_pyobj_flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc(tup), pyfunc(tup)) - - def test_unpack_nrt(self): - pyfunc = unpack_nrt - cr = compile_isolated(pyfunc, (), flags=no_pyobj_flags) - cfunc = cr.entry_point - self.assertPreciseEqual(cfunc(), pyfunc()) - - def test_invalid_unpack(self): - pyfunc = unpack_arbitrary - with self.assertRaises(errors.TypingError) as raises: - compile_isolated(pyfunc, (types.int32,), flags=no_pyobj_flags) - self.assertIn("failed to unpack int32", str(raises.exception)) - - -if __name__ == '__main__': - unittest.main() - diff --git a/numba/numba/tests/test_unsafe_intrinsics.py b/numba/numba/tests/test_unsafe_intrinsics.py deleted file mode 100644 index 403487e0e..000000000 --- a/numba/numba/tests/test_unsafe_intrinsics.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import print_function - -import random - -import numpy as np - -from .support import TestCase -from numba import njit -from numba.unsafe.tuple import tuple_setitem -from numba.unsafe.ndarray import to_fixed_tuple -from numba.errors import TypingError - - -class TestTupleIntrinsic(TestCase): - """Tests for numba.unsafe.tuple - """ - def test_tuple_setitem(self): - @njit - def foo(tup, idxs, vals): - out_tup = tup - for i, v in zip(idxs, vals): - out_tup = tuple_setitem(out_tup, i, v) - return tup, out_tup - - random.seed(123) - for _ in range(20): - # Random data - n = random.randint(1, 10) - tup = tuple([random.randint(0, n) for i in range(n)]) - vals = tuple([random.randint(10, 20) for i in range(n)]) - idxs = list(range(len(vals))) - random.shuffle(idxs) - idxs = tuple(idxs) - # Expect - expect_tup = tuple(tup) - expect_out = np.asarray(expect_tup) - expect_out[np.asarray(idxs)] = vals - # Got - got_tup, got_out = foo(tup, idxs, vals) - # Check - self.assertEqual(got_tup, expect_tup) - self.assertEqual(got_out, tuple(expect_out)) - - -class TestNdarrayIntrinsic(TestCase): - """Tests for numba.unsafe.ndarray - """ - def test_to_fixed_tuple(self): - const = 3 - - @njit - def foo(array): - a = to_fixed_tuple(array, length=1) - b = to_fixed_tuple(array, 2) - c = to_fixed_tuple(array, const) - d = to_fixed_tuple(array, 0) - return a, b, c, d - - np.random.seed(123) - for _ in range(10): - # Random data - arr = np.random.random(3) - # Run - a, b, c, d = foo(arr) - # Check - self.assertEqual(a, tuple(arr[:1])) - self.assertEqual(b, tuple(arr[:2])) - self.assertEqual(c, tuple(arr[:3])) - self.assertEqual(d, ()) - - # Check error with ndim!=1 - with self.assertRaises(TypingError) as raises: - foo(np.random.random((1, 2))) - self.assertIn("Not supported on array.ndim=2", - str(raises.exception)) - - # Check error with non-constant length - @njit - def tuple_with_length(array, length): - return to_fixed_tuple(array, length) - - with self.assertRaises(TypingError) as raises: - tuple_with_length(np.random.random(3), 1) - expectmsg = "*length* argument must be a constant" - self.assertIn(expectmsg, str(raises.exception)) diff --git a/numba/numba/tests/test_usecases.py b/numba/numba/tests/test_usecases.py deleted file mode 100644 index c64e6afda..000000000 --- a/numba/numba/tests/test_usecases.py +++ /dev/null @@ -1,232 +0,0 @@ -from __future__ import print_function - -import itertools -import numpy as np - -import numba.unittest_support as unittest -from numba.compiler import compile_isolated, Flags -from numba import types, utils -from numba.tests import usecases -from .support import TestCase, tag - -enable_pyobj_flags = Flags() -enable_pyobj_flags.set("enable_pyobject") - -force_pyobj_flags = Flags() -force_pyobj_flags.set("force_pyobject") - - -class TestUsecases(TestCase): - - @tag('important') - def test_andor(self): - pyfunc = usecases.andor - cr = compile_isolated(pyfunc, (types.int32, types.int32)) - cfunc = cr.entry_point - - # Argument boundaries - xs = -1, 0, 1, 9, 10, 11 - ys = -1, 0, 1, 9, 10, 11 - - for args in itertools.product(xs, ys): - self.assertEqual(pyfunc(*args), cfunc(*args), "args %s" % (args,)) - - @tag('important') - def test_sum1d(self): - pyfunc = usecases.sum1d - cr = compile_isolated(pyfunc, (types.int32, types.int32)) - cfunc = cr.entry_point - - ss = -1, 0, 1, 100, 200 - es = -1, 0, 1, 100, 200 - - for args in itertools.product(ss, es): - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - def test_sum1d_pyobj(self): - pyfunc = usecases.sum1d - cr = compile_isolated(pyfunc, (types.int32, types.int32), - flags=force_pyobj_flags) - cfunc = cr.entry_point - - ss = -1, 0, 1, 100, 200 - es = -1, 0, 1, 100, 200 - - for args in itertools.product(ss, es): - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - args = 0, 500 - - def bm_python(): - pyfunc(*args) - - def bm_numba(): - cfunc(*args) - - print(utils.benchmark(bm_python, maxsec=.1)) - print(utils.benchmark(bm_numba, maxsec=.1)) - - @tag('important') - def test_sum2d(self): - pyfunc = usecases.sum2d - cr = compile_isolated(pyfunc, (types.int32, types.int32)) - cfunc = cr.entry_point - - ss = -1, 0, 1, 100, 200 - es = -1, 0, 1, 100, 200 - - for args in itertools.product(ss, es): - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - @tag('important') - def test_while_count(self): - pyfunc = usecases.while_count - cr = compile_isolated(pyfunc, (types.int32, types.int32)) - cfunc = cr.entry_point - - ss = -1, 0, 1, 100, 200 - es = -1, 0, 1, 100, 200 - - for args in itertools.product(ss, es): - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - def test_copy_arrays(self): - pyfunc = usecases.copy_arrays - arraytype = types.Array(types.int32, 1, 'A') - cr = compile_isolated(pyfunc, (arraytype, arraytype)) - cfunc = cr.entry_point - - nda = 0, 1, 10, 100 - - for nd in nda: - a = np.arange(nd, dtype='int32') - b = np.empty_like(a) - args = a, b - - cfunc(*args) - self.assertPreciseEqual(a, b, msg=str(args)) - - @tag('important') - def test_copy_arrays2d(self): - pyfunc = usecases.copy_arrays2d - arraytype = types.Array(types.int32, 2, 'A') - cr = compile_isolated(pyfunc, (arraytype, arraytype)) - cfunc = cr.entry_point - - nda = (0, 0), (1, 1), (2, 5), (4, 25) - - for nd in nda: - d1, d2 = nd - a = np.arange(d1 * d2, dtype='int32').reshape(d1, d2) - b = np.empty_like(a) - args = a, b - - cfunc(*args) - self.assertPreciseEqual(a, b, msg=str(args)) - - def run_ifelse(self, pyfunc): - cr = compile_isolated(pyfunc, (types.int32, types.int32)) - cfunc = cr.entry_point - - xs = -1, 0, 1 - ys = -1, 0, 1 - - for x, y in itertools.product(xs, ys): - args = x, y - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - def test_string_concat(self): - pyfunc = usecases.string_concat - cr = compile_isolated(pyfunc, (types.int32, types.int32), - flags=enable_pyobj_flags) - cfunc = cr.entry_point - - xs = -1, 0, 1 - ys = -1, 0, 1 - - for x, y in itertools.product(xs, ys): - args = x, y - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - def test_string_len(self): - pyfunc = usecases.string_len - cr = compile_isolated(pyfunc, (types.pyobject,), - flags=enable_pyobj_flags) - cfunc = cr.entry_point - - test_str = '123456' - self.assertEqual(pyfunc(test_str), cfunc(test_str)) - test_str = '1' - self.assertEqual(pyfunc(test_str), cfunc(test_str)) - test_str = '' - self.assertEqual(pyfunc(test_str), cfunc(test_str)) - - def test_string_slicing(self): - pyfunc = usecases.string_slicing - cr = compile_isolated(pyfunc, (types.pyobject,) * 3, - flags=enable_pyobj_flags) - cfunc = cr.entry_point - - test_str = '123456' - self.assertEqual(pyfunc(test_str, 0, 3), cfunc(test_str, 0, 3)) - self.assertEqual(pyfunc(test_str, 1, 5), cfunc(test_str, 1, 5)) - self.assertEqual(pyfunc(test_str, 2, 3), cfunc(test_str, 2, 3)) - - def test_string_conversion(self): - pyfunc = usecases.string_conversion - - cr = compile_isolated(pyfunc, (types.int32,), - flags=enable_pyobj_flags) - cfunc = cr.entry_point - self.assertEqual(pyfunc(1), cfunc(1)) - - cr = compile_isolated(pyfunc, (types.float32,), - flags=enable_pyobj_flags) - cfunc = cr.entry_point - self.assertEqual(pyfunc(1.1), cfunc(1.1)) - - def test_string_comparisons(self): - import operator - pyfunc = usecases.string_comparison - cr = compile_isolated(pyfunc, (types.pyobject, types.pyobject, types.pyobject), - flags=enable_pyobj_flags) - cfunc = cr.entry_point - - test_str1 = '123' - test_str2 = '123' - op = operator.eq - self.assertEqual(pyfunc(test_str1, test_str2, op), - cfunc(test_str1, test_str2, op)) - - test_str1 = '123' - test_str2 = '456' - op = operator.eq - self.assertEqual(pyfunc(test_str1, test_str2, op), - cfunc(test_str1, test_str2, op)) - - test_str1 = '123' - test_str2 = '123' - op = operator.ne - self.assertEqual(pyfunc(test_str1, test_str2, op), - cfunc(test_str1, test_str2, op)) - - test_str1 = '123' - test_str2 = '456' - op = operator.ne - self.assertEqual(pyfunc(test_str1, test_str2, op), - cfunc(test_str1, test_str2, op)) - - def test_blackscholes_cnd(self): - pyfunc = usecases.blackscholes_cnd - cr = compile_isolated(pyfunc, (types.float32,)) - cfunc = cr.entry_point - - ds = -0.5, 0, 0.5 - - for d in ds: - args = (d,) - self.assertEqual(pyfunc(*args), cfunc(*args), args) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_utils.py b/numba/numba/tests/test_utils.py deleted file mode 100644 index 0500401a8..000000000 --- a/numba/numba/tests/test_utils.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -Tests for numba.utils. -""" - -from __future__ import print_function, absolute_import - -import threading -import time - -from numba import utils -from numba import unittest_support as unittest - - -class C(object): - def __init__(self, value): - self.value = value - - def __eq__(self, o): - return self.value == o.value - - def __ne__(self, o): - return self.value != o.value - - def __gt__(self, o): - return self.value > o.value - -class D(C): - pass - - -class TestTotalOrdering(unittest.TestCase): - - def test_is_inherited(self): - f = utils._is_inherited_from_object - for cls in (C, D): - self.assertFalse(f(cls, '__eq__')) - self.assertFalse(f(cls, '__gt__')) - self.assertFalse(f(cls, '__ne__')) - self.assertTrue(f(cls, '__ge__')) - self.assertTrue(f(cls, '__le__')) - self.assertTrue(f(cls, '__lt__')) - - def check_total_ordering(self, cls): - # Duplicate the class-under-test, to avoid mutating the original - cls = type(cls.__name__, cls.__bases__, dict(cls.__dict__)) - cls = utils.total_ordering(cls) - - a, b, c, d = cls(10), cls(5), cls(15), cls(10) - self.assertFalse(a < b) - self.assertTrue(a < c) - self.assertFalse(a < d) - self.assertTrue(b < c) - self.assertTrue(b < d) - self.assertFalse(c < d) - - self.assertFalse(a <= b) - self.assertTrue(a <= c) - self.assertTrue(a <= d) - self.assertTrue(b <= c) - self.assertTrue(b <= d) - self.assertFalse(c <= d) - - self.assertTrue(a > b) - self.assertFalse(a > c) - self.assertFalse(a > d) - self.assertFalse(b > c) - self.assertFalse(b > d) - self.assertTrue(c > d) - - self.assertTrue(a >= b) - self.assertFalse(a >= c) - self.assertTrue(a >= d) - self.assertFalse(b >= c) - self.assertFalse(b >= d) - self.assertTrue(c >= d) - - def test_total_ordering(self): - self.check_total_ordering(C) - - def test_total_ordering_derived(self): - self.check_total_ordering(D) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_vectorization_type_inference.py b/numba/numba/tests/test_vectorization_type_inference.py deleted file mode 100644 index b07429bff..000000000 --- a/numba/numba/tests/test_vectorization_type_inference.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import print_function -from numba import vectorize, jit, bool_, double, int_, float_, typeof, int8 -import numba.unittest_support as unittest -import numpy as np - - -def add(a, b): - return a + b - - -def func(dtypeA, dtypeB): - A = np.arange(10, dtype=dtypeA) - B = np.arange(10, dtype=dtypeB) - return typeof(vector_add(A, B)) - - -class TestVectTypeInfer(unittest.TestCase): - - def test_type_inference(self): - """This is testing numpy ufunc dispatch machinery - """ - global vector_add - vector_add = vectorize([ - bool_(double, int_), - double(double, double), - float_(double, float_), - ])(add) - - cfunc = jit(func) - - def numba_type_equal(a, b): - self.assertEqual(a.dtype, b.dtype) - self.assertEqual(a.ndim, b.ndim) - - numba_type_equal(cfunc(np.dtype(np.float64), np.dtype('i')), bool_[:]) - numba_type_equal(cfunc(np.dtype(np.float64), np.dtype(np.float64)), - double[:]) - # This is because the double(double, double) matches first - numba_type_equal(cfunc(np.dtype(np.float64), np.dtype(np.float32)), - double[:]) - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_warnings.py b/numba/numba/tests/test_warnings.py deleted file mode 100644 index 8603f9674..000000000 --- a/numba/numba/tests/test_warnings.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import print_function -import warnings -import numpy as np - -import numba.unittest_support as unittest -from numba import jit -from numba.errors import NumbaWarning, deprecated - -class TestBuiltins(unittest.TestCase): - def test_type_infer_warning(self): - def add(x, y): - a = {} - return x + y - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - cfunc = jit(add) - cfunc(1, 2) - - self.assertEqual(len(w), 2) - # Type inference failure - self.assertEqual(w[0].category, NumbaWarning) - self.assertIn('type inference', str(w[0].message)) - - # Object mode - self.assertEqual(w[1].category, NumbaWarning) - self.assertIn('object mode', str(w[1].message)) - - def test_return_type_warning(self): - y = np.ones(4, dtype=np.float32) - def return_external_array(): - return y - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - cfunc = jit(_nrt=False)(return_external_array) - cfunc() - - self.assertEqual(len(w), 2) - # Legal return value failure - self.assertEqual(w[0].category, NumbaWarning) - self.assertIn('return type', str(w[0].message)) - - # Object mode - self.assertEqual(w[1].category, NumbaWarning) - self.assertIn('object mode', str(w[1].message)) - - def test_return_type_warning_with_nrt(self): - """ - Rerun test_return_type_warning with nrt - """ - y = np.ones(4, dtype=np.float32) - - def return_external_array(): - return y - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - cfunc = jit(return_external_array) - cfunc() - # No more warning - self.assertEqual(len(w), 0) - - - def test_no_warning_with_forceobj(self): - def add(x, y): - a = [] - return x + y - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - cfunc = jit(add, forceobj=True) - cfunc(1, 2) - - self.assertEqual(len(w), 0) - - def test_loop_lift_warn(self): - def do_loop(x): - a = {} - for i in range(x.shape[0]): - x[i] *= 2 - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', NumbaWarning) - - x = np.ones(4, dtype=np.float32) - cfunc = jit(do_loop) - cfunc(x) - - self.assertEqual(len(w), 3) - # Type inference failure (1st pass) - self.assertEqual(w[0].category, NumbaWarning) - self.assertIn('type inference', str(w[0].message)) - - # Type inference failure (2nd pass, with lifted loops) - self.assertEqual(w[1].category, NumbaWarning) - self.assertIn('type inference', str(w[1].message)) - - # Object mode - self.assertEqual(w[2].category, NumbaWarning) - self.assertIn('object mode', str(w[2].message)) - self.assertIn('lifted loops', str(w[2].message)) - - - def test_deprecated(self): - @deprecated('foo') - def bar(): pass - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - bar() - - self.assertEqual(len(w), 1) - self.assertEqual(w[0].category, DeprecationWarning) - self.assertIn('bar', str(w[0].message)) - self.assertIn('foo', str(w[0].message)) - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/test_wrapper.py b/numba/numba/tests/test_wrapper.py deleted file mode 100644 index 479603255..000000000 --- a/numba/numba/tests/test_wrapper.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import print_function - -import numpy as np - -import numba.unittest_support as unittest -from numba import compiler, types, utils -from numba.targets import registry - - -def overhead(x): - return x - - -def array_overhead(x): - x[0] = 1 - x[1] = 2 - - -def add(x): - return x + x + x + x + x - - -class TestWrapper(unittest.TestCase): - def test_overhead(self): - """ - This will show higher overhead due to unboxing in the native version. - """ - cr = compiler.compile_isolated(overhead, [types.int32]) - cfunc = cr.entry_point - disp = registry.CPUDispatcher(overhead) - disp.add_overload(cr) - - x = 321 - - def python(): - overhead(x) - - def pycfunc(): - cfunc(x) - - def overloaded(): - disp(x) - - print(overhead) - print(utils.benchmark(python, maxsec=.5)) - print(utils.benchmark(pycfunc, maxsec=.5)) - print(utils.benchmark(overloaded, maxsec=.5)) - - def test_array_overhead(self): - """ - The time to set two array element seems to be more expensive than - the overhead of the overloaded call. - """ - cr = compiler.compile_isolated(array_overhead, [types.int32[::1]]) - cfunc = cr.entry_point - disp = registry.CPUDispatcher(array_overhead) - disp.add_overload(cr) - - self.assertEqual(cr.signature.args[0].layout, 'C') - - x = np.zeros(shape=2, dtype='int32') - - def python(): - array_overhead(x) - - def pycfunc(): - cfunc(x) - - def overloaded(): - disp(x) - - print(array_overhead) - print(utils.benchmark(python, maxsec=.5)) - print(utils.benchmark(pycfunc, maxsec=.5)) - print(utils.benchmark(overloaded, maxsec=.5)) - - - def test_add(self): - """ - This seems to be about the amount of work to balance out the overhead - by the overloaded one - """ - cr = compiler.compile_isolated(add, [types.int32]) - cfunc = cr.entry_point - disp = registry.CPUDispatcher(add) - disp.add_overload(cr) - - x = 321 - - def python(): - add(x) - - def pycfunc(): - cfunc(x) - - def overloaded(): - disp(x) - - print(add) - print(utils.benchmark(python, maxsec=.5)) - print(utils.benchmark(pycfunc, maxsec=.5)) - print(utils.benchmark(overloaded, maxsec=.5)) - - - -if __name__ == '__main__': - unittest.main() diff --git a/numba/numba/tests/timsort.py b/numba/numba/tests/timsort.py deleted file mode 100644 index a23e19bdb..000000000 --- a/numba/numba/tests/timsort.py +++ /dev/null @@ -1,944 +0,0 @@ -""" -Timsort implementation. Mostly adapted from CPython's listobject.c. - -For more information, see listsort.txt in CPython's source tree. -""" - -from __future__ import print_function, absolute_import, division - -import collections - -from numba import types - - -TimsortImplementation = collections.namedtuple( - 'TimsortImplementation', - (# The compile function itself - 'compile', - # All subroutines exercised by test_sort - 'count_run', 'binarysort', 'gallop_left', 'gallop_right', - 'merge_init', 'merge_append', 'merge_pop', - 'merge_compute_minrun', 'merge_lo', 'merge_hi', 'merge_at', - 'merge_force_collapse', 'merge_collapse', - # The top-level functions - 'run_timsort', 'run_timsort_with_values' - )) - - -# The maximum number of entries in a MergeState's pending-runs stack. -# This is enough to sort arrays of size up to about -# 32 * phi ** MAX_MERGE_PENDING -# where phi ~= 1.618. 85 is ridiculously large enough, good for an array -# with 2**64 elements. -# NOTE this implementation doesn't depend on it (the stack is dynamically -# allocated), but it's still good to check as an invariant. -MAX_MERGE_PENDING = 85 - -# When we get into galloping mode, we stay there until both runs win less -# often than MIN_GALLOP consecutive times. See listsort.txt for more info. -MIN_GALLOP = 7 - -# Start size for temp arrays. -MERGESTATE_TEMP_SIZE = 256 - -# A mergestate is a named tuple with the following members: -# - *min_gallop* is an integer controlling when we get into galloping mode -# - *keys* is a temp list for merging keys -# - *values* is a temp list for merging values, if needed -# - *pending* is a stack of pending runs to be merged -# - *n* is the current stack length of *pending* - -MergeState = collections.namedtuple( - 'MergeState', ('min_gallop', 'keys', 'values', 'pending', 'n')) - - -MergeRun = collections.namedtuple('MergeRun', ('start', 'size')) - - -def make_timsort_impl(wrap, make_temp_area): - - make_temp_area = wrap(make_temp_area) - intp = types.intp - zero = intp(0) - - @wrap - def has_values(keys, values): - return values is not keys - - @wrap - def merge_init(keys): - """ - Initialize a MergeState for a non-keyed sort. - """ - temp_size = min(len(keys) // 2 + 1, MERGESTATE_TEMP_SIZE) - temp_keys = make_temp_area(keys, temp_size) - temp_values = temp_keys - pending = [MergeRun(zero, zero)] * MAX_MERGE_PENDING - return MergeState(intp(MIN_GALLOP), temp_keys, temp_values, pending, zero) - - @wrap - def merge_init_with_values(keys, values): - """ - Initialize a MergeState for a keyed sort. - """ - temp_size = min(len(keys) // 2 + 1, MERGESTATE_TEMP_SIZE) - temp_keys = make_temp_area(keys, temp_size) - temp_values = make_temp_area(values, temp_size) - pending = [MergeRun(zero, zero)] * MAX_MERGE_PENDING - return MergeState(intp(MIN_GALLOP), temp_keys, temp_values, pending, zero) - - @wrap - def merge_append(ms, run): - """ - Append a run on the merge stack. - """ - n = ms.n - assert n < MAX_MERGE_PENDING - ms.pending[n] = run - return MergeState(ms.min_gallop, ms.keys, ms.values, ms.pending, n + 1) - - @wrap - def merge_pop(ms): - """ - Pop the top run from the merge stack. - """ - return MergeState(ms.min_gallop, ms.keys, ms.values, ms.pending, ms.n - 1) - - @wrap - def merge_getmem(ms, need): - """ - Ensure enough temp memory for 'need' items is available. - """ - alloced = len(ms.keys) - if need <= alloced: - return ms - # Over-allocate - while alloced < need: - alloced = alloced << 1 - # Don't realloc! That can cost cycles to copy the old data, but - # we don't care what's in the block. - temp_keys = make_temp_area(ms.keys, alloced) - if has_values(ms.keys, ms.values): - temp_values = make_temp_area(ms.values, alloced) - else: - temp_values = temp_keys - return MergeState(ms.min_gallop, temp_keys, temp_values, ms.pending, ms.n) - - @wrap - def merge_adjust_gallop(ms, new_gallop): - """ - Modify the MergeState's min_gallop. - """ - return MergeState(intp(new_gallop), ms.keys, ms.values, ms.pending, ms.n) - - - @wrap - def LT(a, b): - """ - Trivial comparison function between two keys. This is factored out to - make it clear where comparisons occur. - """ - return a < b - - @wrap - def binarysort(keys, values, lo, hi, start): - """ - binarysort is the best method for sorting small arrays: it does - few compares, but can do data movement quadratic in the number of - elements. - [lo, hi) is a contiguous slice of a list, and is sorted via - binary insertion. This sort is stable. - On entry, must have lo <= start <= hi, and that [lo, start) is already - sorted (pass start == lo if you don't know!). - """ - assert lo <= start and start <= hi - _has_values = has_values(keys, values) - if lo == start: - start += 1 - while start < hi: - pivot = keys[start] - # Bisect to find where to insert `pivot` - # NOTE: bisection only wins over linear search if the comparison - # function is much more expensive than simply moving data. - l = lo - r = start - # Invariants: - # pivot >= all in [lo, l). - # pivot < all in [r, start). - # The second is vacuously true at the start. - while l < r: - p = l + ((r - l) >> 1) - if LT(pivot, keys[p]): - r = p - else: - l = p+1 - - # The invariants still hold, so pivot >= all in [lo, l) and - # pivot < all in [l, start), so pivot belongs at l. Note - # that if there are elements equal to pivot, l points to the - # first slot after them -- that's why this sort is stable. - # Slide over to make room (aka memmove()). - for p in range(start, l, -1): - keys[p] = keys[p - 1] - keys[l] = pivot - if _has_values: - pivot_val = values[start] - for p in range(start, l, -1): - values[p] = values[p - 1] - values[l] = pivot_val - - start += 1 - - - @wrap - def count_run(keys, lo, hi): - """ - Return the length of the run beginning at lo, in the slice [lo, hi). - lo < hi is required on entry. "A run" is the longest ascending sequence, with - - lo[0] <= lo[1] <= lo[2] <= ... - - or the longest descending sequence, with - - lo[0] > lo[1] > lo[2] > ... - - A tuple (length, descending) is returned, where boolean *descending* - is set to 0 in the former case, or to 1 in the latter. - For its intended use in a stable mergesort, the strictness of the defn of - "descending" is needed so that the caller can safely reverse a descending - sequence without violating stability (strict > ensures there are no equal - elements to get out of order). - """ - assert lo < hi - if lo + 1 == hi: - # Trivial 1-long run - return 1, False - if LT(keys[lo + 1], keys[lo]): - # Descending run - for k in range(lo + 2, hi): - if not LT(keys[k], keys[k - 1]): - return k - lo, True - return hi - lo, True - else: - # Ascending run - for k in range(lo + 2, hi): - if LT(keys[k], keys[k - 1]): - return k - lo, False - return hi - lo, False - - - @wrap - def gallop_left(key, a, start, stop, hint): - """ - Locate the proper position of key in a sorted vector; if the vector contains - an element equal to key, return the position immediately to the left of - the leftmost equal element. [gallop_right() does the same except returns - the position to the right of the rightmost equal element (if any).] - - "a" is a sorted vector with stop elements, starting at a[start]. - stop must be > start. - - "hint" is an index at which to begin the search, start <= hint < stop. - The closer hint is to the final result, the faster this runs. - - The return value is the int k in start..stop such that - - a[k-1] < key <= a[k] - - pretending that a[start-1] is minus infinity and a[stop] is plus infinity. - IOW, key belongs at index k; or, IOW, the first k elements of a should - precede key, and the last stop-start-k should follow key. - - See listsort.txt for info on the method. - """ - assert stop > start - assert hint >= start and hint < stop - n = stop - start - - # First, gallop from the hint to find a "good" subinterval for bisecting - lastofs = 0 - ofs = 1 - if LT(a[hint], key): - # a[hint] < key => gallop right, until - # a[hint + lastofs] < key <= a[hint + ofs] - maxofs = stop - hint - while ofs < maxofs: - if LT(a[hint + ofs], key): - lastofs = ofs - ofs = (ofs << 1) + 1 - if ofs <= 0: - # Int overflow - ofs = maxofs - else: - # key <= a[hint + ofs] - break - if ofs > maxofs: - ofs = maxofs - # Translate back to offsets relative to a[0] - lastofs += hint - ofs += hint - else: - # key <= a[hint] => gallop left, until - # a[hint - ofs] < key <= a[hint - lastofs] - maxofs = hint - start + 1 - while ofs < maxofs: - if LT(a[hint - ofs], key): - break - else: - # key <= a[hint - ofs] - lastofs = ofs - ofs = (ofs << 1) + 1 - if ofs <= 0: - # Int overflow - ofs = maxofs - if ofs > maxofs: - ofs = maxofs - # Translate back to positive offsets relative to a[0] - lastofs, ofs = hint - ofs, hint - lastofs - - assert start - 1 <= lastofs and lastofs < ofs and ofs <= stop - # Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the - # right of lastofs but no farther right than ofs. Do a binary - # search, with invariant a[lastofs-1] < key <= a[ofs]. - lastofs += 1 - while lastofs < ofs: - m = lastofs + ((ofs - lastofs) >> 1) - if LT(a[m], key): - # a[m] < key - lastofs = m + 1 - else: - # key <= a[m] - ofs = m - # Now lastofs == ofs, so a[ofs - 1] < key <= a[ofs] - return ofs - - - @wrap - def gallop_right(key, a, start, stop, hint): - """ - Exactly like gallop_left(), except that if key already exists in a[start:stop], - finds the position immediately to the right of the rightmost equal value. - - The return value is the int k in start..stop such that - - a[k-1] <= key < a[k] - - The code duplication is massive, but this is enough different given that - we're sticking to "<" comparisons that it's much harder to follow if - written as one routine with yet another "left or right?" flag. - """ - assert stop > start - assert hint >= start and hint < stop - n = stop - start - - # First, gallop from the hint to find a "good" subinterval for bisecting - lastofs = 0 - ofs = 1 - if LT(key, a[hint]): - # key < a[hint] => gallop left, until - # a[hint - ofs] <= key < a[hint - lastofs] - maxofs = hint - start + 1 - while ofs < maxofs: - if LT(key, a[hint - ofs]): - lastofs = ofs - ofs = (ofs << 1) + 1 - if ofs <= 0: - # Int overflow - ofs = maxofs - else: - # a[hint - ofs] <= key - break - if ofs > maxofs: - ofs = maxofs - # Translate back to positive offsets relative to a[0] - lastofs, ofs = hint - ofs, hint - lastofs - else: - # a[hint] <= key -- gallop right, until - # a[hint + lastofs] <= key < a[hint + ofs] - maxofs = stop - hint - while ofs < maxofs: - if LT(key, a[hint + ofs]): - break - else: - # a[hint + ofs] <= key - lastofs = ofs - ofs = (ofs << 1) + 1 - if ofs <= 0: - # Int overflow - ofs = maxofs - if ofs > maxofs: - ofs = maxofs - # Translate back to offsets relative to a[0] - lastofs += hint - ofs += hint - - assert start - 1 <= lastofs and lastofs < ofs and ofs <= stop - # Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the - # right of lastofs but no farther right than ofs. Do a binary - # search, with invariant a[lastofs-1] <= key < a[ofs]. - lastofs += 1 - while lastofs < ofs: - m = lastofs + ((ofs - lastofs) >> 1) - if LT(key, a[m]): - # key < a[m] - ofs = m - else: - # a[m] <= key - lastofs = m + 1 - # Now lastofs == ofs, so a[ofs - 1] <= key < a[ofs] - return ofs - - - @wrap - def merge_compute_minrun(n): - """ - Compute a good value for the minimum run length; natural runs shorter - than this are boosted artificially via binary insertion. - - If n < 64, return n (it's too small to bother with fancy stuff). - Else if n is an exact power of 2, return 32. - Else return an int k, 32 <= k <= 64, such that n/k is close to, but - strictly less than, an exact power of 2. - - See listsort.txt for more info. - """ - r = 0 - assert n >= 0 - while n >= 64: - r |= n & 1 - n >>= 1 - return n + r - - - @wrap - def sortslice_copy(dest_keys, dest_values, dest_start, - src_keys, src_values, src_start, - nitems): - """ - Upwards memcpy(). - """ - assert src_start >= 0 - assert dest_start >= 0 - for i in range(nitems): - dest_keys[dest_start + i] = src_keys[src_start + i] - if has_values(src_keys, src_values): - for i in range(nitems): - dest_values[dest_start + i] = src_values[src_start + i] - - @wrap - def sortslice_copy_down(dest_keys, dest_values, dest_start, - src_keys, src_values, src_start, - nitems): - """ - Downwards memcpy(). - """ - assert src_start >= 0 - assert dest_start >= 0 - for i in range(nitems): - dest_keys[dest_start - i] = src_keys[src_start - i] - if has_values(src_keys, src_values): - for i in range(nitems): - dest_values[dest_start - i] = src_values[src_start - i] - - - # Disable this for debug or perf comparison - DO_GALLOP = 1 - - @wrap - def merge_lo(ms, keys, values, ssa, na, ssb, nb): - """ - Merge the na elements starting at ssa with the nb elements starting at - ssb = ssa + na in a stable way, in-place. na and nb must be > 0, - and should have na <= nb. See listsort.txt for more info. - - An updated MergeState is returned (with possibly a different min_gallop - or larger temp arrays). - - NOTE: compared to CPython's timsort, the requirement that - "Must also have that keys[ssa + na - 1] belongs at the end of the merge" - - is removed. This makes the code a bit simpler and easier to reason about. - """ - assert na > 0 and nb > 0 and na <= nb - assert ssb == ssa + na - # First copy [ssa, ssa + na) into the temp space - ms = merge_getmem(ms, na) - sortslice_copy(ms.keys, ms.values, 0, - keys, values, ssa, - na) - a_keys = ms.keys - a_values = ms.values - b_keys = keys - b_values = values - dest = ssa - ssa = 0 - - _has_values = has_values(a_keys, a_values) - min_gallop = ms.min_gallop - - # Now start merging into the space left from [ssa, ...) - - while nb > 0 and na > 0: - # Do the straightforward thing until (if ever) one run - # appears to win consistently. - acount = 0 - bcount = 0 - - while True: - if LT(b_keys[ssb], a_keys[ssa]): - keys[dest] = b_keys[ssb] - if _has_values: - values[dest] = b_values[ssb] - dest += 1 - ssb += 1 - nb -= 1 - if nb == 0: - break - # It's a B run - bcount += 1 - acount = 0 - if bcount >= min_gallop: - break - else: - keys[dest] = a_keys[ssa] - if _has_values: - values[dest] = a_values[ssa] - dest += 1 - ssa += 1 - na -= 1 - if na == 0: - break - # It's a A run - acount += 1 - bcount = 0 - if acount >= min_gallop: - break - - # One run is winning so consistently that galloping may - # be a huge win. So try that, and continue galloping until - # (if ever) neither run appears to be winning consistently - # anymore. - if DO_GALLOP and na > 0 and nb > 0: - min_gallop += 1 - - while acount >= MIN_GALLOP or bcount >= MIN_GALLOP: - # As long as we gallop without leaving this loop, make - # the heuristic more likely - min_gallop -= min_gallop > 1 - - # Gallop in A to find where keys[ssb] should end up - k = gallop_right(b_keys[ssb], a_keys, ssa, ssa + na, ssa) - # k is an index, make it a size - k -= ssa - acount = k - if k > 0: - # Copy everything from A before k - sortslice_copy(keys, values, dest, - a_keys, a_values, ssa, - k) - dest += k - ssa += k - na -= k - if na == 0: - # Finished merging - break - # Copy keys[ssb] - keys[dest] = b_keys[ssb] - if _has_values: - values[dest] = b_values[ssb] - dest += 1 - ssb += 1 - nb -= 1 - if nb == 0: - # Finished merging - break - - # Gallop in B to find where keys[ssa] should end up - k = gallop_left(a_keys[ssa], b_keys, ssb, ssb + nb, ssb) - # k is an index, make it a size - k -= ssb - bcount = k - if k > 0: - # Copy everything from B before k - # NOTE: source and dest are the same buffer, but the - # destination index is below the source index - sortslice_copy(keys, values, dest, - b_keys, b_values, ssb, - k) - dest += k - ssb += k - nb -= k - if nb == 0: - # Finished merging - break - # Copy keys[ssa] - keys[dest] = a_keys[ssa] - if _has_values: - values[dest] = a_values[ssa] - dest += 1 - ssa += 1 - na -= 1 - if na == 0: - # Finished merging - break - - # Penalize it for leaving galloping mode - min_gallop += 1 - - # Merge finished, now handle the remaining areas - if nb == 0: - # Only A remaining to copy at the end of the destination area - sortslice_copy(keys, values, dest, - a_keys, a_values, ssa, - na) - else: - assert na == 0 - assert dest == ssb - # B's tail is already at the right place, do nothing - - return merge_adjust_gallop(ms, min_gallop) - - - @wrap - def merge_hi(ms, keys, values, ssa, na, ssb, nb): - """ - Merge the na elements starting at ssa with the nb elements starting at - ssb = ssa + na in a stable way, in-place. na and nb must be > 0, - and should have na >= nb. See listsort.txt for more info. - - An updated MergeState is returned (with possibly a different min_gallop - or larger temp arrays). - - NOTE: compared to CPython's timsort, the requirement that - "Must also have that keys[ssa + na - 1] belongs at the end of the merge" - - is removed. This makes the code a bit simpler and easier to reason about. - """ - assert na > 0 and nb > 0 and na >= nb - assert ssb == ssa + na - # First copy [ssb, ssb + nb) into the temp space - ms = merge_getmem(ms, nb) - sortslice_copy(ms.keys, ms.values, 0, - keys, values, ssb, - nb) - a_keys = keys - a_values = values - b_keys = ms.keys - b_values = ms.values - - # Now start merging *in descending order* into the space left - # from [..., ssb + nb). - dest = ssb + nb - 1 - ssb = nb - 1 - ssa = ssa + na - 1 - - _has_values = has_values(b_keys, b_values) - min_gallop = ms.min_gallop - - while nb > 0 and na > 0: - # Do the straightforward thing until (if ever) one run - # appears to win consistently. - acount = 0 - bcount = 0 - - while True: - if LT(b_keys[ssb], a_keys[ssa]): - # We merge in descending order, so copy the larger value - keys[dest] = a_keys[ssa] - if _has_values: - values[dest] = a_values[ssa] - dest -= 1 - ssa -= 1 - na -= 1 - if na == 0: - break - # It's a A run - acount += 1 - bcount = 0 - if acount >= min_gallop: - break - else: - keys[dest] = b_keys[ssb] - if _has_values: - values[dest] = b_values[ssb] - dest -= 1 - ssb -= 1 - nb -= 1 - if nb == 0: - break - # It's a B run - bcount += 1 - acount = 0 - if bcount >= min_gallop: - break - - # One run is winning so consistently that galloping may - # be a huge win. So try that, and continue galloping until - # (if ever) neither run appears to be winning consistently - # anymore. - if DO_GALLOP and na > 0 and nb > 0: - min_gallop += 1 - - while acount >= MIN_GALLOP or bcount >= MIN_GALLOP: - # As long as we gallop without leaving this loop, make - # the heuristic more likely - min_gallop -= min_gallop > 1 - - # Gallop in A to find where keys[ssb] should end up - k = gallop_right(b_keys[ssb], a_keys, ssa - na + 1, ssa + 1, ssa) - # k is an index, make it a size from the end - k = ssa + 1 - k - acount = k - if k > 0: - # Copy everything from A after k. - # Destination and source are the same buffer, and destination - # index is greater, so copy from the end to the start. - sortslice_copy_down(keys, values, dest, - a_keys, a_values, ssa, - k) - dest -= k - ssa -= k - na -= k - if na == 0: - # Finished merging - break - # Copy keys[ssb] - keys[dest] = b_keys[ssb] - if _has_values: - values[dest] = b_values[ssb] - dest -= 1 - ssb -= 1 - nb -= 1 - if nb == 0: - # Finished merging - break - - # Gallop in B to find where keys[ssa] should end up - k = gallop_left(a_keys[ssa], b_keys, ssb - nb + 1, ssb + 1, ssb) - # k is an index, make it a size from the end - k = ssb + 1 - k - bcount = k - if k > 0: - # Copy everything from B before k - sortslice_copy_down(keys, values, dest, - b_keys, b_values, ssb, - k) - dest -= k - ssb -= k - nb -= k - if nb == 0: - # Finished merging - break - # Copy keys[ssa] - keys[dest] = a_keys[ssa] - if _has_values: - values[dest] = a_values[ssa] - dest -= 1 - ssa -= 1 - na -= 1 - if na == 0: - # Finished merging - break - - # Penalize it for leaving galloping mode - min_gallop += 1 - - # Merge finished, now handle the remaining areas - if na == 0: - # Only B remaining to copy at the front of the destination area - sortslice_copy(keys, values, dest - nb + 1, - b_keys, b_values, ssb - nb + 1, - nb) - else: - assert nb == 0 - assert dest == ssa - # A's front is already at the right place, do nothing - - return merge_adjust_gallop(ms, min_gallop) - - - @wrap - def merge_at(ms, keys, values, i): - """ - Merge the two runs at stack indices i and i+1. - - An updated MergeState is returned. - """ - n = ms.n - assert n >= 2 - assert i >= 0 - assert i == n - 2 or i == n - 3 - - ssa, na = ms.pending[i] - ssb, nb = ms.pending[i + 1] - assert na > 0 and nb > 0 - assert ssa + na == ssb - - # Record the length of the combined runs; if i is the 3rd-last - # run now, also slide over the last run (which isn't involved - # in this merge). The current run i+1 goes away in any case. - ms.pending[i] = MergeRun(ssa, na + nb) - if i == n - 3: - ms.pending[i + 1] = ms.pending[i + 2] - ms = merge_pop(ms) - - # Where does b start in a? Elements in a before that can be - # ignored (already in place). - k = gallop_right(keys[ssb], keys, ssa, ssa + na, ssa) - # [k, ssa + na) remains to be merged - na -= k - ssa - ssa = k - if na == 0: - return ms - - # Where does a end in b? Elements in b after that can be - # ignored (already in place). - k = gallop_left(keys[ssa + na - 1], keys, ssb, ssb + nb, ssb + nb - 1) - # [ssb, k) remains to be merged - nb = k - ssb - - # Merge what remains of the runs, using a temp array with - # min(na, nb) elements. - if na <= nb: - return merge_lo(ms, keys, values, ssa, na, ssb, nb) - else: - return merge_hi(ms, keys, values, ssa, na, ssb, nb) - - - @wrap - def merge_collapse(ms, keys, values): - """ - Examine the stack of runs waiting to be merged, merging adjacent runs - until the stack invariants are re-established: - - 1. len[-3] > len[-2] + len[-1] - 2. len[-2] > len[-1] - - An updated MergeState is returned. - - See listsort.txt for more info. - """ - while ms.n > 1: - pending = ms.pending - n = ms.n - 2 - if ((n > 0 and pending[n-1].size <= pending[n].size + pending[n+1].size) or - (n > 1 and pending[n-2].size <= pending[n-1].size + pending[n].size)): - if pending[n - 1].size < pending[n + 1].size: - # Merge smaller one first - n -= 1 - ms = merge_at(ms, keys, values, n) - elif pending[n].size < pending[n + 1].size: - ms = merge_at(ms, keys, values, n) - else: - break - return ms - - @wrap - def merge_force_collapse(ms, keys, values): - """ - Regardless of invariants, merge all runs on the stack until only one - remains. This is used at the end of the mergesort. - - An updated MergeState is returned. - """ - while ms.n > 1: - pending = ms.pending - n = ms.n - 2 - if n > 0: - if pending[n - 1].size < pending[n + 1].size: - # Merge the smaller one first - n -= 1 - ms = merge_at(ms, keys, values, n) - return ms - - - @wrap - def reverse_slice(keys, values, start, stop): - """ - Reverse a slice, in-place. - """ - i = start - j = stop - 1 - while i < j: - keys[i], keys[j] = keys[j], keys[i] - i += 1 - j -= 1 - if has_values(keys, values): - i = start - j = stop - 1 - while i < j: - values[i], values[j] = values[j], values[i] - i += 1 - j -= 1 - - - @wrap - def run_timsort_with_mergestate(ms, keys, values): - """ - Run timsort with the mergestate. - """ - nremaining = len(keys) - if nremaining < 2: - return - - # March over the array once, left to right, finding natural runs, - # and extending short natural runs to minrun elements. - minrun = merge_compute_minrun(nremaining) - - lo = zero - while nremaining > 0: - n, desc = count_run(keys, lo, lo + nremaining) - if desc: - # Descending run => reverse - reverse_slice(keys, values, lo, lo + n) - # If short, extend to min(minrun, nremaining) - if n < minrun: - force = min(minrun, nremaining) - binarysort(keys, values, lo, lo + force, lo + n) - n = force - # Push run onto stack, and maybe merge. - ms = merge_append(ms, MergeRun(lo, n)) - ms = merge_collapse(ms, keys, values) - # Advance to find next run. - lo += n - nremaining -= n - - # All initial runs have been discovered, now finish merging. - ms = merge_force_collapse(ms, keys, values) - assert ms.n == 1 - assert ms.pending[0] == (0, len(keys)) - - - @wrap - def run_timsort(keys): - """ - Run timsort over the given keys. - """ - values = keys - run_timsort_with_mergestate(merge_init(keys), keys, values) - - - @wrap - def run_timsort_with_values(keys, values): - """ - Run timsort over the given keys and values. - """ - run_timsort_with_mergestate(merge_init_with_values(keys, values), - keys, values) - - return TimsortImplementation( - wrap, - count_run, binarysort, gallop_left, gallop_right, - merge_init, merge_append, merge_pop, - merge_compute_minrun, merge_lo, merge_hi, merge_at, - merge_force_collapse, merge_collapse, - run_timsort, run_timsort_with_values) - - -def make_py_timsort(*args): - return make_timsort_impl((lambda f: f), *args) - -def make_jit_timsort(*args): - from numba import jit - return make_timsort_impl((lambda f: jit(nopython=True)(f)), - *args) diff --git a/numba/numba/tests/true_div_usecase.py b/numba/numba/tests/true_div_usecase.py deleted file mode 100644 index ba5257170..000000000 --- a/numba/numba/tests/true_div_usecase.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import division - - -# These functions have their own module in order to be compiled with the right -# __future__ flag (and be tested alongside the 2.x legacy division operator). - -def truediv_usecase(x, y): - return x / y - -def itruediv_usecase(x, y): - x /= y - return x diff --git a/numba/numba/tests/usecases.py b/numba/numba/tests/usecases.py deleted file mode 100644 index f2ea3edab..000000000 --- a/numba/numba/tests/usecases.py +++ /dev/null @@ -1,89 +0,0 @@ -import math -import numpy as np -from numba import jit - -def sum1d(s, e): - c = 0 - for i in range(s, e): - c += i - return c - - -def sum2d(s, e): - c = 0 - for i in range(s, e): - for j in range(s, e): - c += i * j - return c - - -def while_count(s, e): - i = s - c = 0 - while i < e: - c += i - i += 1 - return c - - -def copy_arrays(a, b): - for i in range(a.shape[0]): - b[i] = a[i] - - -def copy_arrays2d(a, b): - for i in range(a.shape[0]): - for j in range(a.shape[1]): - b[i, j] = a[i, j] - - -def redefine1(): - x = 0 - for i in range(5): - x += 1 - x = 0. + x - for i in range(5): - x += 1 - return x - - -def andor(x, y): - return (x > 0 and x < 10) or (y > 0 and y < 10) - -andornopython = jit(nopython=True)(andor) - - -def string_concat(x, y): - a = "whatzup" - return a + str(x + y) - - -def string_len(s): - return len(s) - - -def string_slicing(s, start, stop): - return s[start:stop] - - -def string_conversion(x): - return str(x) - - -def string_comparison(s1, s2, op): - return op(s1, s2) - - -def blackscholes_cnd(d): - A1 = 0.31938153 - A2 = -0.356563782 - A3 = 1.781477937 - A4 = -1.821255978 - A5 = 1.330274429 - RSQRT2PI = 0.39894228040143267793994605993438 - K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) - ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) * - (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) - if d > 0: - ret_val = 1.0 - ret_val - return ret_val diff --git a/numba/numba/tracing.py b/numba/numba/tracing.py deleted file mode 100644 index 64d600bd6..000000000 --- a/numba/numba/tracing.py +++ /dev/null @@ -1,218 +0,0 @@ -from __future__ import absolute_import - -import logging -import sys -import types -import threading -import inspect -from functools import wraps -from itertools import chain -from . import config - -class TLS(threading.local): - """Use a subclass to properly initialize the TLS variables in all threads.""" - def __init__(self): - self.tracing = False - self.indent = 0 - -tls = TLS() - -def find_function_info(func, spec, args): - """Return function meta-data in a tuple. - - (name, type)""" - - module = getattr(func, '__module__', None) - name = getattr(func, '__name__', None) - self = getattr(func, '__self__', None) - cname = None - if self: - cname = self.__name__ - #cname = self.__class__.__name__ - # Try to deduce the class' name even for unbound methods from their - # first argument, which we assume to be a class instance if named 'self'... - elif len(spec.args) and spec.args[0] == 'self': - cname = args[0].__class__.__name__ - # ...or a class object if named 'cls' - elif len(spec.args) and spec.args[0] == 'cls': - cname = args[0].__name__ - if name: - qname = [] - if module and module != '__main__': - qname.append(module) - qname.append('.') - if cname: - qname.append(cname) - qname.append('.') - qname.append(name) - name = ''.join(qname) - return name, None - -def chop(value): - MAX_SIZE = 320 - s = repr(value) - if len(s) > MAX_SIZE: - return s[:MAX_SIZE] + '...' + s[-1] - else: - return s - -def create_events(fname, spec, args, kwds): - - values = dict() - if spec.defaults: - values = dict(zip(spec.args[-len(spec.defaults):],spec.defaults)) - values.update(kwds) - values.update(list(zip(spec.args[:len(args)], args))) - positional = ['%s=%r'%(a, values.pop(a)) for a in spec.args] - anonymous = [str(a) for a in args[len(positional):]] - keywords = ['%s=%r'%(k, values[k]) for k in sorted(values.keys())] - params = ', '.join([f for f in chain(positional, anonymous, keywords) if f]) - - enter = ['>> ', tls.indent * ' ', fname, '(', params, ')'] - leave = ['<< ', tls.indent * ' ', fname] - return enter, leave - - -def dotrace(*args, **kwds): - """Function decorator to trace a function's entry and exit. - - *args: categories in which to trace this function. Example usage: - - @trace - def function(...):... - - @trace('mycategory') - def function(...):... - - - """ - - recursive = kwds.get('recursive', False) - def decorator(func): - - spec = None - logger = logging.getLogger('trace') - def wrapper(*args, **kwds): - if not logger.isEnabledFor(logging.INFO) or tls.tracing: - return func(*args, **kwds) - - fname, ftype = find_function_info(func, spec, args) - - try: - tls.tracing = True - enter, leave = create_events(fname, spec, args, kwds) - - try: - logger.info(''.join(enter)) - tls.indent += 1 - try: - try: - tls.tracing = False - result = func(*args, **kwds) - finally: - tls.tracing = True - except: - type, value, traceback = sys.exc_info() - leave.append(' => exception thrown\n\traise ') - mname = type.__module__ - if mname != '__main__': - leave.append(mname) - leave.append('.') - leave.append(type.__name__) - if value.args: - leave.append('(') - leave.append(', '.join(chop(v) for v in value.args)) - leave.append(')') - else: - leave.append('()') - raise - else: - if result is not None: - leave.append(' -> ') - leave.append(chop(result)) - finally: - tls.indent -= 1 - logger.info(''.join(leave)) - finally: - tls.tracing = False - return result - # wrapper end - - result = None - rewrap = lambda x: x - # Unwrap already wrapped functions - # (to be rewrapped again later) - if type(func) == classmethod: - rewrap = type(func) - # Note: 'func.__func__' only works in Python 3 - func = func.__get__(True).__func__ - elif type(func) == staticmethod: - rewrap = type(func) - # Note: 'func.__func__' only works in Python 3 - func = func.__get__(True) - elif type(func) == property: - raise NotImplementedError - - spec = inspect.getargspec(func) - return rewrap(wraps(func)(wrapper)) - - arg0 = len(args) and args[0] or None - # not supported yet... - if recursive: - raise NotImplementedError - if inspect.ismodule(arg0): - for n, f in inspect.getmembers(arg0, inspect.isfunction): - setattr(arg0, n, decorator(f)) - for n, c in inspect.getmembers(arg0, inspect.isclass): - dotrace(c, *args, recursive=recursive) - elif inspect.isclass(arg0): - for n, f in inspect.getmembers(arg0, lambda x: (inspect.isfunction(x) or - inspect.ismethod(x))): - setattr(arg0, n, decorator(f)) - - - if callable(arg0) or type(arg0) in (classmethod, staticmethod): - return decorator(arg0) - elif type(arg0) == property: - # properties combine up to three functions: 'get', 'set', 'del', - # so let's wrap them all. - pget, pset, pdel = None, None, None - if arg0.fget: - pget = decorator(arg0.fget) - if arg0.fset: - pset = decorator(arg0.fset) - if arg0.fdel: - pdel = decorator(arg0.fdel) - return property(pget, pset, pdel) - - else: - return decorator - -def notrace(*args, **kwds): - """Just a no-op in case tracing is disabled.""" - def decorator(func): - return func - arg0 = len(args) and args[0] or None - - if callable(arg0) or type(arg0) in (classmethod, staticmethod): - return decorator(arg0) - else: - return decorator - -def doevent(msg): - msg = ['== ', tls.indent * ' ', msg] - logger = logging.getLogger('trace') - logger.info(''.join(msg)) - -def noevent(msg): - pass - -if config.TRACE: - logger = logging.getLogger('trace') - logger.setLevel(logging.INFO) - logger.handlers = [logging.StreamHandler()] - trace = dotrace - event = doevent -else: - trace = notrace - event = noevent diff --git a/numba/numba/transforms.py b/numba/numba/transforms.py deleted file mode 100644 index 894a8c0e6..000000000 --- a/numba/numba/transforms.py +++ /dev/null @@ -1,301 +0,0 @@ -""" -Implement transformation on Numba IR -""" - -from __future__ import absolute_import, print_function - -from collections import namedtuple - -from numba.analysis import compute_cfg_from_blocks, find_top_level_loops -from numba import ir -from numba.interpreter import Interpreter -from numba.analysis import compute_use_defs - - -def _extract_loop_lifting_candidates(cfg, blocks): - """ - Returns a list of loops that are candidate for loop lifting - """ - # check well-formed-ness of the loop - def same_exit_point(loop): - "all exits must point to the same location" - outedges = set() - for k in loop.exits: - succs = set(x for x, _ in cfg.successors(k)) - if not succs: - # If the exit point has no successor, it contains an return - # statement, which is not handled by the looplifting code. - # Thus, this loop is not a candidate. - return False - outedges |= succs - return len(outedges) == 1 - - def one_entry(loop): - "there is one entry" - return len(loop.entries) == 1 - - def cannot_yield(loop): - "cannot have yield inside the loop" - insiders = set(loop.body) | set(loop.entries) | set(loop.exits) - for blk in map(blocks.__getitem__, insiders): - for inst in blk.body: - if isinstance(inst, ir.Assign): - if isinstance(inst.value, ir.Yield): - return False - return True - - return [loop for loop in find_top_level_loops(cfg) - if same_exit_point(loop) and one_entry(loop) and cannot_yield(loop)] - - -_loop_lift_info = namedtuple('loop_lift_info', - 'loop,inputs,outputs,callfrom,returnto') - - -def _loop_lift_get_candidate_infos(cfg, blocks, livemap): - """ - Returns information on looplifting candidates. - """ - loops = _extract_loop_lifting_candidates(cfg, blocks) - loopinfos = [] - for loop in loops: - [callfrom] = loop.entries # requirement checked earlier - an_exit = next(iter(loop.exits)) # anyone of the exit block - [(returnto, _)] = cfg.successors(an_exit) # requirement checked earlier - inputs = livemap[callfrom] - outputs = livemap[returnto] - - # ensure live variables are actually used in the blocks, else remove, - # saves having to create something valid to run through postproc - # to achieve similar - local_block_ids = set(loop.body) | set(loop.entries) | set(loop.exits) - loopblocks = {} - for k in local_block_ids: - loopblocks[k] = blocks[k] - - used_vars = set() - for vs in compute_use_defs(loopblocks).usemap.values(): - used_vars |= vs - - # note: sorted for stable ordering - inputs = sorted(set(inputs) & used_vars) - outputs = sorted(set(outputs) & used_vars) - - lli = _loop_lift_info(loop=loop, inputs=inputs, outputs=outputs, - callfrom=callfrom, returnto=returnto) - loopinfos.append(lli) - - return loopinfos - - -def _loop_lift_modify_call_block(liftedloop, block, inputs, outputs, returnto): - """ - Transform calling block from top-level function to call the lifted loop. - """ - scope = block.scope - loc = block.loc - blk = ir.Block(scope=scope, loc=loc) - # load loop - fn = ir.Const(value=liftedloop, loc=loc) - fnvar = scope.make_temp(loc=loc) - blk.append(ir.Assign(target=fnvar, value=fn, loc=loc)) - # call loop - args = [scope.get_exact(name) for name in inputs] - callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc) - # temp variable for the return value - callres = scope.make_temp(loc=loc) - blk.append(ir.Assign(target=callres, value=callexpr, loc=loc)) - # unpack return value - for i, out in enumerate(outputs): - target = scope.get_exact(out) - getitem = ir.Expr.static_getitem(value=callres, index=i, - index_var=None, loc=loc) - blk.append(ir.Assign(target=target, value=getitem, loc=loc)) - # jump to next block - blk.append(ir.Jump(target=returnto, loc=loc)) - return blk - - -def _loop_lift_prepare_loop_func(loopinfo, blocks): - """ - Inplace transform loop blocks for use as lifted loop. - """ - def make_prologue(): - """ - Make a new block that unwraps the argument and jump to the loop entry. - This block is the entry block of the function. - """ - entry_block = blocks[loopinfo.callfrom] - scope = entry_block.scope - loc = entry_block.loc - - block = ir.Block(scope=scope, loc=loc) - # load args - args = [ir.Arg(name=k, index=i, loc=loc) - for i, k in enumerate(loopinfo.inputs)] - for aname, aval in zip(loopinfo.inputs, args): - tmp = ir.Var(scope=scope, name=aname, loc=loc) - block.append(ir.Assign(target=tmp, value=aval, loc=loc)) - # jump to loop entry - block.append(ir.Jump(target=loopinfo.callfrom, loc=loc)) - return block - - def make_epilogue(): - """ - Make a new block to prepare the return values. - This block is the last block of the function. - """ - entry_block = blocks[loopinfo.callfrom] - scope = entry_block.scope - loc = entry_block.loc - - block = ir.Block(scope=scope, loc=loc) - # prepare tuples to return - vals = [scope.get_exact(name=name) for name in loopinfo.outputs] - tupexpr = ir.Expr.build_tuple(items=vals, loc=loc) - tup = scope.make_temp(loc=loc) - block.append(ir.Assign(target=tup, value=tupexpr, loc=loc)) - # return - block.append(ir.Return(value=tup, loc=loc)) - return block - - # Lowering assumes the first block to be the one with the smallest offset - firstblk = min(blocks) - 1 - blocks[firstblk] = make_prologue() - blocks[loopinfo.returnto] = make_epilogue() - - -def _loop_lift_modify_blocks(func_ir, loopinfo, blocks, - typingctx, targetctx, flags, locals): - """ - Modify the block inplace to call to the lifted-loop. - Returns a dictionary of blocks of the lifted-loop. - """ - from numba.dispatcher import LiftedLoop - - # Copy loop blocks - loop = loopinfo.loop - loopblockkeys = set(loop.body) | set(loop.entries) | set(loop.exits) - loopblocks = dict((k, blocks[k].copy()) for k in loopblockkeys) - # Modify the loop blocks - _loop_lift_prepare_loop_func(loopinfo, loopblocks) - - # Create a new IR for the lifted loop - lifted_ir = func_ir.derive(blocks=loopblocks, - arg_names=tuple(loopinfo.inputs), - arg_count=len(loopinfo.inputs), - force_non_generator=True) - liftedloop = LiftedLoop(lifted_ir, - typingctx, targetctx, flags, locals) - - # modify for calling into liftedloop - callblock = _loop_lift_modify_call_block(liftedloop, blocks[loopinfo.callfrom], - loopinfo.inputs, loopinfo.outputs, - loopinfo.returnto) - # remove blocks - for k in loopblockkeys: - del blocks[k] - # update main interpreter callsite into the liftedloop - blocks[loopinfo.callfrom] = callblock - return liftedloop - - -def loop_lifting(func_ir, typingctx, targetctx, flags, locals): - """ - Loop lifting transformation. - - Given a interpreter `func_ir` returns a 2 tuple of - `(toplevel_interp, [loop0_interp, loop1_interp, ....])` - """ - blocks = func_ir.blocks.copy() - cfg = compute_cfg_from_blocks(blocks) - loopinfos = _loop_lift_get_candidate_infos(cfg, blocks, - func_ir.variable_lifetime.livemap) - loops = [] - for loopinfo in loopinfos: - lifted = _loop_lift_modify_blocks(func_ir, loopinfo, blocks, - typingctx, targetctx, flags, locals) - loops.append(lifted) - - # Make main IR - main = func_ir.derive(blocks=blocks) - - return main, loops - - -def canonicalize_cfg_single_backedge(blocks): - """ - Rewrite loops that have multiple backedges. - """ - cfg = compute_cfg_from_blocks(blocks) - newblocks = blocks.copy() - - def new_block_id(): - return max(newblocks.keys()) + 1 - - def has_multiple_backedges(loop): - count = 0 - for k in loop.body: - blk = blocks[k] - edges = blk.terminator.get_targets() - # is a backedge? - if loop.header in edges: - count += 1 - if count > 1: - # early exit - return True - return False - - def yield_loops_with_multiple_backedges(): - for lp in cfg.loops().values(): - if has_multiple_backedges(lp): - yield lp - - def replace_target(term, src, dst): - def replace(target): - return (dst if target == src else target) - - if isinstance(term, ir.Branch): - return ir.Branch(cond=term.cond, - truebr=replace(term.truebr), - falsebr=replace(term.falsebr), - loc=term.loc) - elif isinstance(term, ir.Jump): - return ir.Jump(target=replace(term.target), loc=term.loc) - else: - assert not term.get_targets() - return term - - def rewrite_single_backedge(loop): - """ - Add new tail block that gathers all the backedges - """ - header = loop.header - tailkey = new_block_id() - for blkkey in loop.body: - blk = newblocks[blkkey] - if header in blk.terminator.get_targets(): - newblk = blk.copy() - # rewrite backedge into jumps to new tail block - newblk.body[-1] = replace_target(blk.terminator, header, - tailkey) - newblocks[blkkey] = newblk - # create new tail block - entryblk = newblocks[header] - tailblk = ir.Block(scope=entryblk.scope, loc=entryblk.loc) - # add backedge - tailblk.append(ir.Jump(target=header, loc=tailblk.loc)) - newblocks[tailkey] = tailblk - - for loop in yield_loops_with_multiple_backedges(): - rewrite_single_backedge(loop) - - return newblocks - - -def canonicalize_cfg(blocks): - """ - Rewrite the given blocks to canonicalize the CFG. - Returns a new dictionary of blocks. - """ - return canonicalize_cfg_single_backedge(blocks) diff --git a/numba/numba/typeconv/__init__.py b/numba/numba/typeconv/__init__.py deleted file mode 100644 index 4ae3acabf..000000000 --- a/numba/numba/typeconv/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .castgraph import Conversion diff --git a/numba/numba/typeconv/_typeconv.cpp b/numba/numba/typeconv/_typeconv.cpp deleted file mode 100644 index 386a77e78..000000000 --- a/numba/numba/typeconv/_typeconv.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#include "../_pymodule.h" -#include "../capsulethunk.h" -#include "typeconv.hpp" - -extern "C" { - - -static PyObject* -new_type_manager(PyObject* self, PyObject* args); - -static void -del_type_manager(PyObject *); - -static PyObject* -select_overload(PyObject* self, PyObject* args); - -static PyObject* -check_compatible(PyObject* self, PyObject* args); - -static PyObject* -set_compatible(PyObject* self, PyObject* args); - -static PyObject* -get_pointer(PyObject* self, PyObject* args); - - -static PyMethodDef ext_methods[] = { -#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } - declmethod(new_type_manager), - declmethod(select_overload), - declmethod(check_compatible), - declmethod(set_compatible), - declmethod(get_pointer), - { NULL }, -#undef declmethod -}; - - -MOD_INIT(_typeconv) { - PyObject *m; - MOD_DEF(m, "_typeconv", "No docs", ext_methods) - if (m == NULL) - return MOD_ERROR_VAL; - - return MOD_SUCCESS_VAL(m); -} - -} // end extern C - -/////////////////////////////////////////////////////////////////////////////// - -const char PY_CAPSULE_TM_NAME[] = "*tm"; -#define BAD_TM_ARGUMENT PyErr_SetString(PyExc_TypeError, \ - "1st argument not TypeManager") - -static -TypeManager* unwrap_TypeManager(PyObject *tm) { - void* p = PyCapsule_GetPointer(tm, PY_CAPSULE_TM_NAME); - return reinterpret_cast(p); -} - -PyObject* -new_type_manager(PyObject* self, PyObject* args) -{ - TypeManager* tm = new TypeManager(); - return PyCapsule_New(tm, PY_CAPSULE_TM_NAME, &del_type_manager); -} - -void -del_type_manager(PyObject *tm) -{ - delete unwrap_TypeManager(tm); -} - -PyObject* -select_overload(PyObject* self, PyObject* args) -{ - PyObject *tmcap, *sigtup, *ovsigstup; - int allow_unsafe; - - if (!PyArg_ParseTuple(args, "OOOi", &tmcap, &sigtup, &ovsigstup, - &allow_unsafe)) { - return NULL; - } - - TypeManager *tm = unwrap_TypeManager(tmcap); - if (!tm) { - BAD_TM_ARGUMENT; - } - - Py_ssize_t sigsz = PySequence_Size(sigtup); - Py_ssize_t ovsz = PySequence_Size(ovsigstup); - - Type *sig = new Type[sigsz]; - Type *ovsigs = new Type[ovsz * sigsz]; - - for (int i = 0; i < sigsz; ++i) { - sig[i] = Type(PyNumber_AsSsize_t(PySequence_Fast_GET_ITEM(sigtup, - i), NULL)); - } - - for (int i = 0; i < ovsz; ++i) { - PyObject *cursig = PySequence_Fast_GET_ITEM(ovsigstup, i); - for (int j = 0; j < sigsz; ++j) { - long tid = PyNumber_AsSsize_t(PySequence_Fast_GET_ITEM(cursig, - j), NULL); - ovsigs[i * sigsz + j] = Type(tid); - } - } - - int selected = -42; - int matches = tm->selectOverload(sig, ovsigs, selected, sigsz, ovsz, - (bool) allow_unsafe); - - delete [] sig; - delete [] ovsigs; - - if (matches > 1) { - PyErr_SetString(PyExc_TypeError, "Ambigous overloading"); - return NULL; - } else if (matches == 0) { - PyErr_SetString(PyExc_TypeError, "No compatible overload"); - return NULL; - } - - return PyLong_FromLong(selected); -} - -PyObject* -check_compatible(PyObject* self, PyObject* args) -{ - PyObject *tmcap; - int from, to; - if (!PyArg_ParseTuple(args, "Oii", &tmcap, &from, &to)) { - return NULL; - } - - TypeManager *tm = unwrap_TypeManager(tmcap); - if(!tm) { - BAD_TM_ARGUMENT; - return NULL; - } - - switch(tm->isCompatible(Type(from), Type(to))){ - case TCC_EXACT: - return PyString_FromString("exact"); - case TCC_PROMOTE: - return PyString_FromString("promote"); - case TCC_CONVERT_SAFE: - return PyString_FromString("safe"); - case TCC_CONVERT_UNSAFE: - return PyString_FromString("unsafe"); - default: - Py_RETURN_NONE; - } -} - -PyObject* -set_compatible(PyObject* self, PyObject* args) -{ - PyObject *tmcap; - int from, to, by; - if (!PyArg_ParseTuple(args, "Oiii", &tmcap, &from, &to, &by)) { - return NULL; - } - - TypeManager *tm = unwrap_TypeManager(tmcap); - if (!tm) { - BAD_TM_ARGUMENT; - return NULL; - } - TypeCompatibleCode tcc; - switch (by) { - case 'p': // promote - tcc = TCC_PROMOTE; - break; - case 's': // safe convert - tcc = TCC_CONVERT_SAFE; - break; - case 'u': // unsafe convert - tcc = TCC_CONVERT_UNSAFE; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unknown TCC"); - return NULL; - } - - tm->addCompatibility(Type(from), Type(to), tcc); - Py_RETURN_NONE; -} - - -PyObject* -get_pointer(PyObject* self, PyObject* args) -{ - PyObject *tmcap; - if (!PyArg_ParseTuple(args, "O", &tmcap)) { - return NULL; - } - return PyLong_FromVoidPtr(unwrap_TypeManager(tmcap)); -} - - diff --git a/numba/numba/typeconv/castgraph.py b/numba/numba/typeconv/castgraph.py deleted file mode 100644 index 6d3b7637d..000000000 --- a/numba/numba/typeconv/castgraph.py +++ /dev/null @@ -1,136 +0,0 @@ -from __future__ import print_function, absolute_import - -from collections import defaultdict -import enum - -from numba.utils import total_ordering - - -class Conversion(enum.IntEnum): - """ - A conversion kind from one type to the other. The enum members - are ordered from stricter to looser. - """ - # The two types are identical - exact = 1 - # The two types are of the same kind, the destination type has more - # extension or precision than the source type (e.g. float32 -> float64, - # or int32 -> int64) - promote = 2 - # The source type can be converted to the destination type without loss - # of information (e.g. int32 -> int64). Note that the conversion may - # still fail explicitly at runtime (e.g. Optional(int32) -> int32) - safe = 3 - # The conversion may appear to succeed at runtime while losing information - # or precision (e.g. int32 -> uint32, float64 -> float32, int64 -> int32, - # etc.) - unsafe = 4 - - # This value is only used internally - nil = 99 - - -class CastSet(object): - """A set of casting rules. - - There is at most one rule per target type. - """ - - def __init__(self): - self._rels = {} - - def insert(self, to, rel): - old = self.get(to) - setrel = min(rel, old) - self._rels[to] = setrel - return old != setrel - - def items(self): - return self._rels.items() - - def get(self, item): - return self._rels.get(item, Conversion.nil) - - def __len__(self): - return len(self._rels) - - def __repr__(self): - body = ["{rel}({ty})".format(rel=rel, ty=ty) - for ty, rel in self._rels.items()] - return "{" + ', '.join(body) + "}" - - def __contains__(self, item): - return item in self._rels - - def __iter__(self): - return iter(self._rels.keys()) - - def __getitem__(self, item): - return self._rels[item] - - -class TypeGraph(object): - """A graph that maintains the casting relationship of all types. - - This simplifies the definition of casting rules by automatically - propagating the rules. - """ - - def __init__(self, callback=None): - """ - Args - ---- - - callback: callable or None - It is called for each new casting rule with - (from_type, to_type, castrel). - """ - assert callback is None or callable(callback) - self._forwards = defaultdict(CastSet) - self._backwards = defaultdict(set) - self._callback = callback - - def get(self, ty): - return self._forwards[ty] - - def propagate(self, a, b, baserel): - backset = self._backwards[a] - - # Forward propagate the relationship to all nodes that b leads to - for child in self._forwards[b]: - rel = max(baserel, self._forwards[b][child]) - if a != child: - if self._forwards[a].insert(child, rel): - self._callback(a, child, rel) - self._backwards[child].add(a) - - # Propagate the relationship from nodes that connects to a - for backnode in backset: - if backnode != child: - backrel = max(rel, self._forwards[backnode][a]) - if self._forwards[backnode].insert(child, backrel): - self._callback(backnode, child, backrel) - self._backwards[child].add(backnode) - - # Every node that leads to a connects to b - for child in self._backwards[a]: - rel = max(baserel, self._forwards[child][a]) - if b != child: - if self._forwards[child].insert(b, rel): - self._callback(child, b, rel) - self._backwards[b].add(child) - - def insert_rule(self, a, b, rel): - self._forwards[a].insert(b, rel) - self._callback(a, b, rel) - self._backwards[b].add(a) - self.propagate(a, b, rel) - - def promote(self, a, b): - self.insert_rule(a, b, Conversion.promote) - - def safe(self, a, b): - self.insert_rule(a, b, Conversion.safe) - - def unsafe(self, a, b): - self.insert_rule(a, b, Conversion.unsafe) - diff --git a/numba/numba/typeconv/rules.py b/numba/numba/typeconv/rules.py deleted file mode 100644 index 499cc592d..000000000 --- a/numba/numba/typeconv/rules.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import print_function, absolute_import -import itertools -from .typeconv import TypeManager, TypeCastingRules -from numba import types - - -default_type_manager = TypeManager() - - -def dump_number_rules(): - tm = default_type_manager - for a, b in itertools.product(types.number_domain, types.number_domain): - print(a, '->', b, tm.check_compatible(a, b)) - - -def _init_casting_rules(tm): - tcr = TypeCastingRules(tm) - tcr.safe_unsafe(types.boolean, types.int8) - tcr.safe_unsafe(types.boolean, types.uint8) - - tcr.promote_unsafe(types.int8, types.int16) - tcr.promote_unsafe(types.uint8, types.uint16) - - tcr.promote_unsafe(types.int16, types.int32) - tcr.promote_unsafe(types.uint16, types.uint32) - - tcr.promote_unsafe(types.int32, types.int64) - tcr.promote_unsafe(types.uint32, types.uint64) - - tcr.safe_unsafe(types.uint8, types.int16) - tcr.safe_unsafe(types.uint16, types.int32) - tcr.safe_unsafe(types.uint32, types.int64) - - tcr.safe_unsafe(types.int16, types.float32) - tcr.safe_unsafe(types.int32, types.float64) - - tcr.unsafe_unsafe(types.int32, types.float32) - # XXX this is inconsistent with the above; but we want to prefer - # float64 over int64 when typing a heterogeneous operation, - # e.g. `float64 + int64`. Perhaps we need more granularity in the - # conversion kinds. - tcr.safe_unsafe(types.int64, types.float64) - tcr.safe_unsafe(types.uint64, types.float64) - - tcr.promote_unsafe(types.float32, types.float64) - - tcr.safe(types.float32, types.complex64) - tcr.safe(types.float64, types.complex128) - - tcr.promote_unsafe(types.complex64, types.complex128) - - # Allow integers to cast ot void* - tcr.unsafe_unsafe(types.uintp, types.voidptr) - - return tcr - - -default_casting_rules = _init_casting_rules(default_type_manager) - diff --git a/numba/numba/typeconv/test.cpp b/numba/numba/typeconv/test.cpp deleted file mode 100644 index 6ffe4e5b4..000000000 --- a/numba/numba/typeconv/test.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include "typeconv.hpp" - -using std::cout; -const char EOL = '\n'; - -int main() { - TypeManager tm; - Type t_int32 = tm.get("int32"); - Type t_float = tm.get("float"); - Type t_int64 = tm.get("int64"); - - tm.addConversion(t_int32, t_float); - tm.addConversion(t_float, t_int32); - tm.addConversion(t_float, t_int64); - tm.addPromotion(t_int32, t_int64); - - cout << "int32 -> float " - << TCCString(tm.isCompatible(tm.get("int32"), tm.get("float"))) - << EOL; - cout << "int32 -> int64 " - << TCCString(tm.isCompatible(tm.get("int32"), tm.get("int64"))) - << EOL; - - Type sig[] = {t_int32, t_float}; - Type ovsigs[] = { - t_float, t_float, - t_int64, t_int64, - t_int32, t_float, - }; - - int sel = tm.selectOverload(sig, ovsigs, 2, 3); - - cout << "Selected " << sel << '\n'; - - - - return 0; -} diff --git a/numba/numba/typeconv/typeconv.cpp b/numba/numba/typeconv/typeconv.cpp deleted file mode 100644 index e103b2acb..000000000 --- a/numba/numba/typeconv/typeconv.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include -#include -#include -#include - -#include "typeconv.hpp" - - -// ------ TypeManager ------ - -TCCMap::TCCMap() - : nb_records(0) -{ -} - -unsigned int TCCMap::hash(const TypePair &key) const { - const int mult = 1000003; - int x = 0x345678; - x = (x ^ key.first) * mult; - x = (x ^ key.second); - return x; -} - -void TCCMap::insert(const TypePair &key, TypeCompatibleCode val) { - unsigned int i = hash(key) & (TCCMAP_SIZE - 1); - TCCMapBin &bin = records[i]; - TCCRecord data; - data.key = key; - data.val = val; - for (unsigned int j = 0; j < bin.size(); ++j) { - if (bin[j].key == key) { - bin[j].val = val; - return; - } - } - bin.push_back(data); - nb_records++; -} - -TypeCompatibleCode TCCMap::find(const TypePair &key) const { - unsigned int i = hash(key) & (TCCMAP_SIZE - 1); - const TCCMapBin &bin = records[i]; - for (unsigned int j = 0; j < bin.size(); ++j) { - if (bin[j].key == key) { - return bin[j].val; - } - } - return TCC_FALSE; -} - -// ----- Ratings ----- -Rating::Rating() : promote(0), safe_convert(0), unsafe_convert(0) { } - -inline bool Rating::operator < (const Rating &other) const { - if (unsafe_convert < other.unsafe_convert) - return true; - else if (unsafe_convert > other.unsafe_convert) - return false; - if (safe_convert < other.safe_convert) - return true; - else if (safe_convert > other.safe_convert) - return false; - return (promote < other.promote); -} - -inline bool Rating::operator == (const Rating &other) const { - return promote == other.promote && safe_convert == other.safe_convert && - unsafe_convert == other.unsafe_convert; -} - -// ------ TypeManager ------ - -bool TypeManager::canPromote(Type from, Type to) const { - return isCompatible(from, to) == TCC_PROMOTE; -} - -bool TypeManager::canSafeConvert(Type from, Type to) const { - return isCompatible(from, to) == TCC_CONVERT_SAFE; -} - -bool TypeManager::canUnsafeConvert(Type from, Type to) const { - return isCompatible(from, to) == TCC_CONVERT_UNSAFE; -} - -void TypeManager::addPromotion(Type from, Type to) { - return addCompatibility(from, to, TCC_PROMOTE); -} - -void TypeManager::addUnsafeConversion(Type from, Type to) { - return addCompatibility(from, to, TCC_CONVERT_UNSAFE); -} - -void TypeManager::addSafeConversion(Type from, Type to) { - return addCompatibility(from, to, TCC_CONVERT_SAFE); -} - -void TypeManager::addCompatibility(Type from, Type to, TypeCompatibleCode tcc) { - TypePair pair(from, to); - tccmap.insert(pair, tcc); -} - -TypeCompatibleCode TypeManager::isCompatible(Type from, Type to) const { - if (from == to) - return TCC_EXACT; - TypePair pair(from, to); - return tccmap.find(pair); -} - - -int TypeManager::selectOverload(const Type sig[], const Type ovsigs[], - int &selected, - int sigsz, int ovct, bool allow_unsafe) const { - int count; - if (ovct <= 16) { - Rating ratings[16]; - int candidates[16]; - count = _selectOverload(sig, ovsigs, selected, sigsz, ovct, - allow_unsafe, ratings, candidates); - } - else { - Rating *ratings = new Rating[ovct]; - int *candidates = new int[ovct]; - count = _selectOverload(sig, ovsigs, selected, sigsz, ovct, - allow_unsafe, ratings, candidates); - delete [] ratings; - delete [] candidates; - } - return count; -} - -int TypeManager::_selectOverload(const Type sig[], const Type ovsigs[], - int &selected, int sigsz, int ovct, - bool allow_unsafe, Rating ratings[], - int candidates[]) const { - // Generate rating table - // Use a penalize scheme. - int nb_candidates = 0; - - for (int i = 0; i < ovct; ++i) { - const Type *entry = &ovsigs[i * sigsz]; - Rating rate; - - for (int j = 0; j < sigsz; ++j) { - TypeCompatibleCode tcc = isCompatible(sig[j], entry[j]); - if (tcc == TCC_FALSE || - (tcc == TCC_CONVERT_UNSAFE && !allow_unsafe)) { - // stop the loop early - goto _incompatible; - } - switch(tcc) { - case TCC_PROMOTE: - rate.promote += 1; - break; - case TCC_CONVERT_SAFE: - rate.safe_convert += 1; - break; - case TCC_CONVERT_UNSAFE: - rate.unsafe_convert += 1; - break; - default: - break; - } - } - ratings[nb_candidates] = rate; - candidates[nb_candidates] = i; - nb_candidates++; - _incompatible: - ; - } - - // Bail if no match - if (nb_candidates == 0) - return 0; - - // Find lowest rating - Rating best = ratings[0]; - selected = candidates[0]; - - int matchcount = 1; - for (int i = 1; i < nb_candidates; ++i) { - if (ratings[i] < best) { - best = ratings[i]; - selected = candidates[i]; - matchcount = 1; - } - else if (ratings[i] == best) { - matchcount += 1; - } - } - return matchcount; -} - -// ----- utils ----- - -const char* TCCString(TypeCompatibleCode tcc) { - switch(tcc) { - case TCC_EXACT: - return "exact"; - case TCC_SUBTYPE: - return "subtype"; - case TCC_PROMOTE: - return "promote"; - case TCC_CONVERT_SAFE: - return "safe_convert"; - case TCC_CONVERT_UNSAFE: - return "unsafe_convert"; - default: - return "false"; - } -} - diff --git a/numba/numba/typeconv/typeconv.hpp b/numba/numba/typeconv/typeconv.hpp deleted file mode 100644 index d86106472..000000000 --- a/numba/numba/typeconv/typeconv.hpp +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef NUMBA_TYPECONV_HPP_ -#define NUMBA_TYPECONV_HPP_ -#include -#include - - -typedef int Type; - -enum TypeCompatibleCode{ - // No match - TCC_FALSE = 0, - // Exact match - TCC_EXACT, - // Subtype is UNUSED - TCC_SUBTYPE, - // Promotion with no precision loss - TCC_PROMOTE, - // Conversion with no precision loss - // e.g. int32 to double - TCC_CONVERT_SAFE, - // Conversion with precision loss - // e.g. int64 to double (53 bits precision) - TCC_CONVERT_UNSAFE, -}; - -typedef std::pair TypePair; - -struct TCCRecord { - TypePair key; - TypeCompatibleCode val; -}; - -typedef std::vector TCCMapBin; - -class TCCMap { -public: - TCCMap(); - - unsigned int hash(const TypePair &key) const; - void insert(const TypePair &key, TypeCompatibleCode val); - TypeCompatibleCode find(const TypePair &key) const; -private: - /* Must be a power of two */ - static const int TCCMAP_SIZE = 512; - TCCMapBin records[TCCMAP_SIZE]; - int nb_records; -}; - -struct Rating { - unsigned int promote; - unsigned int safe_convert; - unsigned int unsafe_convert; - - Rating(); - - bool operator < (const Rating &other) const; - bool operator == (const Rating &other) const; -}; - - -class TypeManager{ -public: - bool canPromote(Type from, Type to) const; - bool canUnsafeConvert(Type from, Type to) const; - bool canSafeConvert(Type from, Type to) const; - - void addPromotion(Type from, Type to); - void addUnsafeConversion(Type from, Type to); - void addSafeConversion(Type from, Type to); - void addCompatibility(Type from, Type to, TypeCompatibleCode by); - - TypeCompatibleCode isCompatible(Type from, Type to) const; - - /** - Output stored in selected. - Returns - Number of matches - */ - int selectOverload(const Type sig[], const Type ovsigs[], int &selected, - int sigsz, int ovct, bool allow_unsafe) const; - -private: - int _selectOverload(const Type sig[], const Type ovsigs[], int &selected, - int sigsz, int ovct, bool allow_unsafe, - Rating ratings[], int candidates[]) const; - - TCCMap tccmap; -}; - - -const char* TCCString(TypeCompatibleCode tcc); - - -#endif // NUMBA_TYPECONV_HPP_ diff --git a/numba/numba/typeconv/typeconv.py b/numba/numba/typeconv/typeconv.py deleted file mode 100644 index b22bcb505..000000000 --- a/numba/numba/typeconv/typeconv.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import print_function, absolute_import - -from . import _typeconv, castgraph, Conversion -from .. import types - - -class TypeManager(object): - - # The character codes used by the C/C++ API (_typeconv.cpp) - _conversion_codes = { - Conversion.safe: ord("s"), - Conversion.unsafe: ord("u"), - Conversion.promote: ord("p"), - } - - def __init__(self): - self._ptr = _typeconv.new_type_manager() - self._types = set() - - def select_overload(self, sig, overloads, allow_unsafe): - sig = [t._code for t in sig] - overloads = [[t._code for t in s] for s in overloads] - return _typeconv.select_overload(self._ptr, sig, overloads, - allow_unsafe) - - def check_compatible(self, fromty, toty): - if not isinstance(toty, types.Type): - raise ValueError("Specified type '%s' (%s) is not a Numba type" % - (toty, type(toty))) - name = _typeconv.check_compatible(self._ptr, fromty._code, toty._code) - conv = Conversion[name] if name is not None else None - assert conv is not Conversion.nil - return conv - - def set_compatible(self, fromty, toty, by): - code = self._conversion_codes[by] - _typeconv.set_compatible(self._ptr, fromty._code, toty._code, code) - # Ensure the types don't die, otherwise they may be recreated with - # other type codes and pollute the hash table. - self._types.add(fromty) - self._types.add(toty) - - def set_promote(self, fromty, toty): - self.set_compatible(fromty, toty, Conversion.promote) - - def set_unsafe_convert(self, fromty, toty): - self.set_compatible(fromty, toty, Conversion.unsafe) - - def set_safe_convert(self, fromty, toty): - self.set_compatible(fromty, toty, Conversion.safe) - - def get_pointer(self): - return _typeconv.get_pointer(self._ptr) - - -class TypeCastingRules(object): - """ - A helper for establishing type casting rules. - """ - def __init__(self, tm): - self._tm = tm - self._tg = castgraph.TypeGraph(self._cb_update) - - def promote(self, a, b): - """ - Set `a` can promote to `b` - """ - self._tg.promote(a, b) - - def unsafe(self, a, b): - """ - Set `a` can unsafe convert to `b` - """ - self._tg.unsafe(a, b) - - def safe(self, a, b): - """ - Set `a` can safe convert to `b` - """ - self._tg.safe(a, b) - - def promote_unsafe(self, a, b): - """ - Set `a` can promote to `b` and `b` can unsafe convert to `a` - """ - self.promote(a, b) - self.unsafe(b, a) - - def safe_unsafe(self, a, b): - """ - Set `a` can safe convert to `b` and `b` can unsafe convert to `a` - """ - self._tg.safe(a, b) - self._tg.unsafe(b, a) - - def unsafe_unsafe(self, a, b): - """ - Set `a` can unsafe convert to `b` and `b` can unsafe convert to `a` - """ - self._tg.unsafe(a, b) - self._tg.unsafe(b, a) - - def _cb_update(self, a, b, rel): - """ - Callback for updating. - """ - if rel == Conversion.promote: - self._tm.set_promote(a, b) - elif rel == Conversion.safe: - self._tm.set_safe_convert(a, b) - elif rel == Conversion.unsafe: - self._tm.set_unsafe_convert(a, b) - else: - raise AssertionError(rel) - diff --git a/numba/numba/typeinfer.py b/numba/numba/typeinfer.py deleted file mode 100644 index 9c2664d39..000000000 --- a/numba/numba/typeinfer.py +++ /dev/null @@ -1,1325 +0,0 @@ -""" -Type inference base on CPA. -The algorithm guarantees monotonic growth of type-sets for each variable. - -Steps: - 1. seed initial types - 2. build constraints - 3. propagate constraints - 4. unify types - -Constraint propagation is precise and does not regret (no backtracing). -Constraints push types forward following the dataflow. -""" - -from __future__ import print_function, division, absolute_import - -import contextlib -import itertools -from pprint import pprint -import traceback -from collections import OrderedDict - -from numba import ir, types, utils, config, typing -from .errors import (TypingError, UntypedAttributeError, new_error_context, - termcolor, UnsupportedError) -from .funcdesc import qualifying_prefix - - -class NOTSET: pass - -# terminal color markup -_termcolor = termcolor() - -class TypeVar(object): - def __init__(self, context, var): - self.context = context - self.var = var - self.type = None - self.locked = False - # Stores source location of first definition - self.define_loc = None - # Qualifiers - self.literal_value = NOTSET - - def add_type(self, tp, loc): - assert isinstance(tp, types.Type), type(tp) - - if self.locked: - if tp != self.type: - if self.context.can_convert(tp, self.type) is None: - msg = ("No conversion from %s to %s for '%s', " - "defined at %s") - raise TypingError(msg % (tp, self.type, self.var, - self.define_loc), - loc=loc) - else: - if self.type is not None: - unified = self.context.unify_pairs(self.type, tp) - if unified is None: - msg = "Cannot unify %s and %s for '%s', defined at %s" - raise TypingError(msg % (self.type, tp, self.var, - self.define_loc), - loc=self.define_loc) - else: - # First time definition - unified = tp - self.define_loc = loc - - self.type = unified - - return self.type - - def lock(self, tp, loc, literal_value=NOTSET): - assert isinstance(tp, types.Type), type(tp) - assert not self.locked - - # If there is already a type, ensure we can convert it to the - # locked type. - if (self.type is not None and - self.context.can_convert(self.type, tp) is None): - raise TypingError("No conversion from %s to %s for " - "'%s'" % (tp, self.type, self.var), loc=loc) - - self.type = tp - self.locked = True - if self.define_loc is None: - self.define_loc = loc - self.literal_value = literal_value - - def union(self, other, loc): - if other.type is not None: - self.add_type(other.type, loc=loc) - - return self.type - - def __repr__(self): - return '%s := %s' % (self.var, self.type or "") - - @property - def defined(self): - return self.type is not None - - def get(self): - return (self.type,) if self.type is not None else () - - def getone(self, get_literals=False): - if self.type is None: - raise TypingError("Undecided type {}".format(self)) - if self.literal_value is not NOTSET and get_literals: - return types.Const(self.literal_value) - return self.type - - def __len__(self): - return 1 if self.type is not None else 0 - - -class ConstraintNetwork(object): - """ - TODO: It is possible to optimize constraint propagation to consider only - dirty type variables. - """ - - def __init__(self): - self.constraints = [] - - def append(self, constraint): - self.constraints.append(constraint) - - def propagate(self, typeinfer): - """ - Execute all constraints. Errors are caught and returned as a list. - This allows progressing even though some constraints may fail - due to lack of information - (e.g. imprecise types such as List(undefined)). - """ - errors = [] - for constraint in self.constraints: - loc = constraint.loc - with typeinfer.warnings.catch_warnings(filename=loc.filename, - lineno=loc.line): - try: - constraint(typeinfer) - except TypingError as e: - e = TypingError(str(e), - loc=constraint.loc, - highlighting=False) - errors.append(e) - except Exception: - msg = "Internal error at {con}:\n{sep}\n{err}{sep}\n" - e = TypingError(msg.format(con=constraint, - err=traceback.format_exc(), - sep='--%<' + '-' * 76), - loc=constraint.loc, - highlighting=False) - errors.append(e) - return errors - - -class Propagate(object): - """ - A simple constraint for direct propagation of types for assignments. - """ - - def __init__(self, dst, src, loc): - self.dst = dst - self.src = src - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of assignment at {0}", self.loc, - loc=self.loc): - typeinfer.copy_type(self.src, self.dst, loc=self.loc) - # If `dst` is refined, notify us - typeinfer.refine_map[self.dst] = self - - def refine(self, typeinfer, target_type): - # Do not back-propagate to locked variables (e.g. constants) - assert target_type.is_precise() - typeinfer.add_type(self.src, target_type, unless_locked=True, - loc=self.loc) - - -class ArgConstraint(object): - - def __init__(self, dst, src, loc): - self.dst = dst - self.src = src - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of argument at {0}", self.loc): - typevars = typeinfer.typevars - src = typevars[self.src] - if not src.defined: - return - ty = src.getone() - if isinstance(ty, types.Omitted): - ty = typeinfer.context.resolve_value_type(ty.value) - assert ty.is_precise() - typeinfer.add_type(self.dst, ty, loc=self.loc) - - -class BuildTupleConstraint(object): - def __init__(self, target, items, loc): - self.target = target - self.items = items - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of tuple at {0}", self.loc): - typevars = typeinfer.typevars - tsets = [typevars[i.name].get() for i in self.items] - for vals in itertools.product(*tsets): - if vals and all(vals[0] == v for v in vals): - tup = types.UniTuple(dtype=vals[0], count=len(vals)) - else: - # empty tuples fall here as well - tup = types.Tuple(vals) - assert tup.is_precise() - typeinfer.add_type(self.target, tup, loc=self.loc) - - -class _BuildContainerConstraint(object): - - def __init__(self, target, items, loc): - self.target = target - self.items = items - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of list at {0}", self.loc): - typevars = typeinfer.typevars - tsets = [typevars[i.name].get() for i in self.items] - if not tsets: - typeinfer.add_type(self.target, - self.container_type(types.undefined), - loc=self.loc) - else: - for typs in itertools.product(*tsets): - unified = typeinfer.context.unify_types(*typs) - if unified is not None: - typeinfer.add_type(self.target, - self.container_type(unified), - loc=self.loc) - - -class BuildListConstraint(_BuildContainerConstraint): - container_type = types.List - - -class BuildSetConstraint(_BuildContainerConstraint): - container_type = types.Set - - -class ExhaustIterConstraint(object): - def __init__(self, target, count, iterator, loc): - self.target = target - self.count = count - self.iterator = iterator - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of exhaust iter at {0}", self.loc): - typevars = typeinfer.typevars - for tp in typevars[self.iterator.name].get(): - # unpack optional - tp = tp.type if isinstance(tp, types.Optional) else tp - if isinstance(tp, types.BaseTuple): - if len(tp) == self.count: - assert tp.is_precise() - typeinfer.add_type(self.target, tp, loc=self.loc) - break - else: - raise ValueError("wrong tuple length for %r: " - "expected %d, got %d" - % (self.iterator.name, self.count, - len(tp))) - elif isinstance(tp, types.IterableType): - tup = types.UniTuple(dtype=tp.iterator_type.yield_type, - count=self.count) - assert tup.is_precise() - typeinfer.add_type(self.target, tup, loc=self.loc) - break - else: - raise TypingError("failed to unpack {}".format(tp), - loc=self.loc) - - -class PairFirstConstraint(object): - def __init__(self, target, pair, loc): - self.target = target - self.pair = pair - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of pair-first at {0}", self.loc): - typevars = typeinfer.typevars - for tp in typevars[self.pair.name].get(): - if not isinstance(tp, types.Pair): - # XXX is this an error? - continue - assert tp.first_type.is_precise() - typeinfer.add_type(self.target, tp.first_type, loc=self.loc) - - -class PairSecondConstraint(object): - def __init__(self, target, pair, loc): - self.target = target - self.pair = pair - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of pair-second at {0}", self.loc): - typevars = typeinfer.typevars - for tp in typevars[self.pair.name].get(): - if not isinstance(tp, types.Pair): - # XXX is this an error? - continue - assert tp.second_type.is_precise() - typeinfer.add_type(self.target, tp.second_type, loc=self.loc) - - -class StaticGetItemConstraint(object): - def __init__(self, target, value, index, index_var, loc): - self.target = target - self.value = value - self.index = index - if index_var is not None: - self.fallback = IntrinsicCallConstraint(target, 'getitem', - (value, index_var), {}, - None, loc) - else: - self.fallback = None - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of static-get-item at {0}", self.loc): - typevars = typeinfer.typevars - for ty in typevars[self.value.name].get(): - itemty = typeinfer.context.resolve_static_getitem( - value=ty, index=self.index) - if itemty is not None: - assert itemty.is_precise() - typeinfer.add_type(self.target, itemty, loc=self.loc) - elif self.fallback is not None: - self.fallback(typeinfer) - - def get_call_signature(self): - # The signature is only needed for the fallback case in lowering - return self.fallback and self.fallback.get_call_signature() - - -def fold_arg_vars(typevars, args, vararg, kws, get_literals=False): - """ - Fold and resolve the argument variables of a function call. - """ - # Fetch all argument types, bail if any is unknown - n_pos_args = len(args) - kwds = [kw for (kw, var) in kws] - argtypes = [typevars[a.name] for a in args] - argtypes += [typevars[var.name] for (kw, var) in kws] - if vararg is not None: - argtypes.append(typevars[vararg.name]) - - if not all(a.defined for a in argtypes): - return - - args = tuple(a.getone(get_literals=get_literals) for a in argtypes) - - pos_args = args[:n_pos_args] - if vararg is not None: - errmsg = "*args in function call should be a tuple, got %s" - # Handle constant literal used for `*args` - if isinstance(args[-1], types.Const): - const_val = args[-1].value - # Is the constant value a tuple? - if not isinstance(const_val, tuple): - raise TypeError(errmsg % (args[-1],)) - # Append the elements in the const tuple to the positional args - pos_args += args[-1].value - # Handle non-constant - elif not isinstance(args[-1], types.BaseTuple): - # Unsuitable for *args - # (Python is more lenient and accepts all iterables) - raise TypeError(errmsg % (args[-1],)) - else: - # Append the elements in the tuple to the positional args - pos_args += args[-1].types - # Drop the last arg - args = args[:-1] - kw_args = dict(zip(kwds, args[n_pos_args:])) - return pos_args, kw_args - - -def _is_array_not_precise(arrty): - """Check type is array and it is not precise - """ - return isinstance(arrty, types.Array) and not arrty.is_precise() - - -class CallConstraint(object): - """Constraint for calling functions. - Perform case analysis foreach combinations of argument types. - """ - signature = None - - def __init__(self, target, func, args, kws, vararg, loc): - self.target = target - self.func = func - self.args = args - self.kws = kws or {} - self.vararg = vararg - self.loc = loc - - def __call__(self, typeinfer): - msg = "typing of call at {0}\n".format(self.loc) - with new_error_context(msg): - typevars = typeinfer.typevars - with new_error_context( - "resolving caller type: {}".format(self.func)): - fnty = typevars[self.func].getone() - with new_error_context("resolving callee type: {0}", fnty): - self.resolve(typeinfer, typevars, fnty) - - def resolve(self, typeinfer, typevars, fnty): - assert fnty - context = typeinfer.context - - r = fold_arg_vars(typevars, self.args, self.vararg, self.kws) - if r is None: - # Cannot resolve call type until all argument types are known - return - pos_args, kw_args = r - - # Check argument to be precise - for a in itertools.chain(pos_args, kw_args.values()): - if not a.is_precise(): - # Getitem on non-precise array is allowed to - # support array-comprehension - if fnty == 'getitem' and isinstance(pos_args[0], types.Array): - pass - # Otherwise, don't compute type yet - else: - return - - literals = fold_arg_vars(typevars, self.args, self.vararg, self.kws, - get_literals=True) - # Resolve call type - sig = typeinfer.resolve_call(fnty, pos_args, kw_args, - literals=literals) - if sig is None: - # Note: duplicated error checking. - # See types.BaseFunction.get_call_type - # Arguments are invalid => explain why - headtemp = "Invalid use of {0} with parameters ({1})" - args = [str(a) for a in pos_args] - args += ["%s=%s" % (k, v) for k, v in sorted(kw_args.items())] - head = headtemp.format(fnty, ', '.join(map(str, args))) - desc = context.explain_function_type(fnty) - msg = '\n'.join([head, desc]) - raise TypingError(msg) - - typeinfer.add_type(self.target, sig.return_type, loc=self.loc) - - # If the function is a bound function and its receiver type - # was refined, propagate it. - if (isinstance(fnty, types.BoundFunction) - and sig.recvr is not None - and sig.recvr != fnty.this): - refined_this = context.unify_pairs(sig.recvr, fnty.this) - if refined_this is not None and refined_this.is_precise(): - refined_fnty = fnty.copy(this=refined_this) - typeinfer.propagate_refined_type(self.func, refined_fnty) - - # If the return type is imprecise but can be unified with the - # target variable's inferred type, use the latter. - # Useful for code such as:: - # s = set() - # s.add(1) - # (the set() call must be typed as int64(), not undefined()) - if not sig.return_type.is_precise(): - target = typevars[self.target] - if target.defined: - targetty = target.getone() - if context.unify_pairs(targetty, sig.return_type) == targetty: - sig = sig.replace(return_type=targetty) - - self.signature = sig - - target_type = typevars[self.target].getone() - if (isinstance(target_type, types.Array) - and isinstance(sig.return_type.dtype, types.Undefined)): - typeinfer.refine_map[self.target] = self - - def refine(self, typeinfer, updated_type): - # Is getitem? - if self.func == 'getitem': - aryty = typeinfer.typevars[self.args[0].name].getone() - # is array not precise? - if _is_array_not_precise(aryty): - # allow refinement of dtype - assert updated_type.is_precise() - newtype = aryty.copy(dtype=updated_type.dtype) - typeinfer.add_type(self.args[0].name, newtype, loc=self.loc) - - def get_call_signature(self): - return self.signature - - -class IntrinsicCallConstraint(CallConstraint): - def __call__(self, typeinfer): - with new_error_context("typing of intrinsic-call at {0}", self.loc): - self.resolve(typeinfer, typeinfer.typevars, fnty=self.func) - - -class GetAttrConstraint(object): - def __init__(self, target, attr, value, loc, inst): - self.target = target - self.attr = attr - self.value = value - self.loc = loc - self.inst = inst - - def __call__(self, typeinfer): - with new_error_context("typing of get attribute at {0}", self.loc): - typevars = typeinfer.typevars - valtys = typevars[self.value.name].get() - for ty in valtys: - attrty = typeinfer.context.resolve_getattr(ty, self.attr) - if attrty is None: - raise UntypedAttributeError(ty, self.attr, - loc=self.inst.loc) - else: - assert attrty.is_precise() - typeinfer.add_type(self.target, attrty, loc=self.loc) - typeinfer.refine_map[self.target] = self - - def refine(self, typeinfer, target_type): - if isinstance(target_type, types.BoundFunction): - recvr = target_type.this - assert recvr.is_precise() - typeinfer.add_type(self.value.name, recvr, loc=self.loc) - source_constraint = typeinfer.refine_map.get(self.value.name) - if source_constraint is not None: - source_constraint.refine(typeinfer, recvr) - - def __repr__(self): - return 'resolving type of attribute "{attr}" of "{value}"'.format( - value=self.value, attr=self.attr) - - -class SetItemConstraint(object): - def __init__(self, target, index, value, loc): - self.target = target - self.index = index - self.value = value - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of setitem at {0}", self.loc): - typevars = typeinfer.typevars - if not all(typevars[var.name].defined - for var in (self.target, self.index, self.value)): - return - targetty = typevars[self.target.name].getone() - idxty = typevars[self.index.name].getone() - valty = typevars[self.value.name].getone() - - sig = typeinfer.context.resolve_setitem(targetty, idxty, valty) - if sig is None: - raise TypingError("Cannot resolve setitem: %s[%s] = %s" % - (targetty, idxty, valty), loc=self.loc) - - # For array setitem, refine imprecise array dtype - if _is_array_not_precise(targetty): - assert sig.args[0].is_precise() - typeinfer.add_type(self.target.name, sig.args[0], loc=self.loc) - - self.signature = sig - - def get_call_signature(self): - return self.signature - - -class StaticSetItemConstraint(object): - def __init__(self, target, index, index_var, value, loc): - self.target = target - self.index = index - self.index_var = index_var - self.value = value - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of staticsetitem at {0}", self.loc): - typevars = typeinfer.typevars - if not all(typevars[var.name].defined - for var in (self.target, self.index_var, self.value)): - return - targetty = typevars[self.target.name].getone() - idxty = typevars[self.index_var.name].getone() - valty = typevars[self.value.name].getone() - - sig = typeinfer.context.resolve_static_setitem(targetty, - self.index, valty) - if sig is None: - sig = typeinfer.context.resolve_setitem(targetty, idxty, valty) - if sig is None: - raise TypingError("Cannot resolve setitem: %s[%r] = %s" % - (targetty, self.index, valty), loc=self.loc) - self.signature = sig - - def get_call_signature(self): - return self.signature - - -class DelItemConstraint(object): - def __init__(self, target, index, loc): - self.target = target - self.index = index - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of delitem at {0}", self.loc): - typevars = typeinfer.typevars - if not all(typevars[var.name].defined - for var in (self.target, self.index)): - return - targetty = typevars[self.target.name].getone() - idxty = typevars[self.index.name].getone() - - sig = typeinfer.context.resolve_delitem(targetty, idxty) - if sig is None: - raise TypingError("Cannot resolve delitem: %s[%s]" % - (targetty, idxty), loc=self.loc) - self.signature = sig - - def get_call_signature(self): - return self.signature - - -class SetAttrConstraint(object): - def __init__(self, target, attr, value, loc): - self.target = target - self.attr = attr - self.value = value - self.loc = loc - - def __call__(self, typeinfer): - with new_error_context("typing of set attribute {0!r} at {1}", - self.attr, self.loc): - typevars = typeinfer.typevars - if not all(typevars[var.name].defined - for var in (self.target, self.value)): - return - targetty = typevars[self.target.name].getone() - valty = typevars[self.value.name].getone() - sig = typeinfer.context.resolve_setattr(targetty, self.attr, - valty) - if sig is None: - raise TypingError("Cannot resolve setattr: (%s).%s = %s" % - (targetty, self.attr, valty), - loc=self.loc) - self.signature = sig - - def get_call_signature(self): - return self.signature - - -class PrintConstraint(object): - def __init__(self, args, vararg, loc): - self.args = args - self.vararg = vararg - self.loc = loc - - def __call__(self, typeinfer): - typevars = typeinfer.typevars - - r = fold_arg_vars(typevars, self.args, self.vararg, {}) - if r is None: - # Cannot resolve call type until all argument types are known - return - pos_args, kw_args = r - - fnty = typeinfer.context.resolve_value_type(print) - assert fnty is not None - sig = typeinfer.resolve_call(fnty, pos_args, kw_args) - self.signature = sig - - def get_call_signature(self): - return self.signature - - -class TypeVarMap(dict): - def set_context(self, context): - self.context = context - - def __getitem__(self, name): - if name not in self: - self[name] = TypeVar(self.context, name) - return super(TypeVarMap, self).__getitem__(name) - - def __setitem__(self, name, value): - assert isinstance(name, str) - if name in self: - raise KeyError("Cannot redefine typevar %s" % name) - else: - super(TypeVarMap, self).__setitem__(name, value) - - -# A temporary mapping of {function name: dispatcher object} -_temporary_dispatcher_map = {} - - -@contextlib.contextmanager -def register_dispatcher(disp): - """ - Register a Dispatcher for inference while it is not yet stored - as global or closure variable (e.g. during execution of the @jit() - call). This allows resolution of recursive calls with eager - compilation. - """ - assert callable(disp) - assert callable(disp.py_func) - name = disp.py_func.__name__ - _temporary_dispatcher_map[name] = disp - try: - yield - finally: - del _temporary_dispatcher_map[name] - - -typeinfer_extensions = {} - - -class TypeInferer(object): - """ - Operates on block that shares the same ir.Scope. - """ - - def __init__(self, context, func_ir, warnings): - self.context = context - # sort based on label, ensure iteration order! - self.blocks = OrderedDict() - for k in sorted(func_ir.blocks.keys()): - self.blocks[k] = func_ir.blocks[k] - self.generator_info = func_ir.generator_info - self.func_id = func_ir.func_id - self.func_ir = func_ir - - self.typevars = TypeVarMap() - self.typevars.set_context(context) - self.constraints = ConstraintNetwork() - self.warnings = warnings - - # { index: mangled name } - self.arg_names = {} - # self.return_type = None - # Set of assumed immutable globals - self.assumed_immutables = set() - # Track all calls and associated constraints - self.calls = [] - # The inference result of the above calls - self.calltypes = utils.UniqueDict() - # Target var -> constraint with refine hook - self.refine_map = {} - - if config.DEBUG or config.DEBUG_TYPEINFER: - self.debug = TypeInferDebug(self) - else: - self.debug = NullDebug() - - self._skip_recursion = False - - def copy(self, skip_recursion=False): - clone = TypeInferer(self.context, self.func_ir, self.warnings) - clone.arg_names = self.arg_names.copy() - clone._skip_recursion = skip_recursion - - for k, v in self.typevars.items(): - if not v.locked and v.defined: - clone.typevars[k].add_type(v.getone(), loc=v.define_loc) - - return clone - - def _mangle_arg_name(self, name): - # Disambiguise argument name - return "arg.%s" % (name,) - - def _get_return_vars(self): - rets = [] - for blk in utils.itervalues(self.blocks): - inst = blk.terminator - if isinstance(inst, ir.Return): - rets.append(inst.value) - return rets - - def seed_argument(self, name, index, typ): - name = self._mangle_arg_name(name) - self.seed_type(name, typ) - self.arg_names[index] = name - - def seed_type(self, name, typ): - """All arguments should be seeded. - """ - self.lock_type(name, typ, loc=None) - - def seed_return(self, typ): - """Seeding of return value is optional. - """ - for var in self._get_return_vars(): - self.lock_type(var.name, typ, loc=None) - - def build_constraint(self): - for blk in utils.itervalues(self.blocks): - for inst in blk.body: - self.constrain_statement(inst) - - def return_types_from_partial(self): - """ - Resume type inference partially to deduce the return type. - Note: No side-effect to `self`. - - Returns the inferred return type or None if it cannot deduce the return - type. - """ - # Clone the typeinferer and disable typing recursive calls - cloned = self.copy(skip_recursion=True) - # rebuild constraint network - cloned.build_constraint() - # propagate without raising - cloned.propagate(raise_errors=False) - # get return types - rettypes = set() - for retvar in cloned._get_return_vars(): - if retvar.name in cloned.typevars: - typevar = cloned.typevars[retvar.name] - if typevar and typevar.defined: - rettypes.add(typevar.getone()) - if not rettypes: - return - # unify return types - return cloned._unify_return_types(rettypes) - - def propagate(self, raise_errors=True): - newtoken = self.get_state_token() - oldtoken = None - # Since the number of types are finite, the typesets will eventually - # stop growing. - - while newtoken != oldtoken: - self.debug.propagate_started() - oldtoken = newtoken - # Errors can appear when the type set is incomplete; only - # raise them when there is no progress anymore. - errors = self.constraints.propagate(self) - newtoken = self.get_state_token() - self.debug.propagate_finished() - if errors: - if raise_errors: - raise errors[0] - else: - return errors - - def add_type(self, var, tp, loc, unless_locked=False): - assert isinstance(var, str), type(var) - tv = self.typevars[var] - if unless_locked and tv.locked: - return - oldty = tv.type - unified = tv.add_type(tp, loc=loc) - if unified != oldty: - self.propagate_refined_type(var, unified) - - def add_calltype(self, inst, signature): - self.calltypes[inst] = signature - - def copy_type(self, src_var, dest_var, loc): - self.typevars[dest_var].union(self.typevars[src_var], loc=loc) - - def lock_type(self, var, tp, loc, literal_value=NOTSET): - tv = self.typevars[var] - tv.lock(tp, loc=loc, literal_value=literal_value) - - def propagate_refined_type(self, updated_var, updated_type): - source_constraint = self.refine_map.get(updated_var) - if source_constraint is not None: - source_constraint.refine(self, updated_type) - - def unify(self): - """ - Run the final unification pass over all inferred types, and - catch imprecise types. - """ - typdict = utils.UniqueDict() - - def check_var(name): - tv = self.typevars[name] - if not tv.defined: - offender = None - for block in self.func_ir.blocks.values(): - offender = block.find_variable_assignment(name) - if offender is not None: - break - val = getattr(offender, 'value', 'unknown operation') - loc = getattr(offender, 'loc', 'unknown location') - msg = "Undefined variable '%s', operation: %s, location: %s" - raise TypingError(msg % (var, val, loc), loc) - tp = tv.getone() - if not tp.is_precise(): - raise TypingError("Can't infer type of variable '%s': %s" % - (var, tp)) - typdict[var] = tp - - # For better error display, check first user-visible vars, then - # temporaries - temps = set(k for k in self.typevars if not k[0].isalpha()) - others = set(self.typevars) - temps - for var in sorted(others): - check_var(var) - for var in sorted(temps): - check_var(var) - - retty = self.get_return_type(typdict) - fntys = self.get_function_types(typdict) - if self.generator_info: - retty = self.get_generator_type(typdict, retty) - - self.debug.unify_finished(typdict, retty, fntys) - - return typdict, retty, fntys - - def get_generator_type(self, typdict, retty): - gi = self.generator_info - arg_types = [None] * len(self.arg_names) - for index, name in self.arg_names.items(): - arg_types[index] = typdict[name] - state_types = [typdict[var_name] for var_name in gi.state_vars] - yield_types = [typdict[y.inst.value.name] - for y in gi.get_yield_points()] - if not yield_types: - msg = "Cannot type generator: it does not yield any value" - raise TypingError(msg) - yield_type = self.context.unify_types(*yield_types) - if yield_type is None: - msg = "Cannot type generator: cannot unify yielded types %s" - raise TypingError(msg % (yield_types,)) - return types.Generator(self.func_id.func, yield_type, arg_types, - state_types, has_finalizer=True) - - def get_function_types(self, typemap): - """ - Fill and return the calltypes map. - """ - # XXX why can't this be done on the fly? - calltypes = self.calltypes - for call, constraint in self.calls: - calltypes[call] = constraint.get_call_signature() - return calltypes - - def _unify_return_types(self, rettypes): - if rettypes: - unified = self.context.unify_types(*rettypes) - if unified is None or not unified.is_precise(): - def check_type(atype): - lst = [] - for k, v in self.typevars.items(): - if atype == v.type: - lst.append(k) - returns = {} - for x in reversed(lst): - for block in self.func_ir.blocks.values(): - for instr in block.find_insts(ir.Return): - value = instr.value - if isinstance(value, ir.Var): - name = value.name - else: - pass - if x == name: - returns[x] = instr - break - - for name, offender in returns.items(): - loc = getattr(offender, 'loc', 'unknown location') - msg = ("Return of: IR name '%s', type '%s', " - "location: %s") - interped = msg % (name, atype, loc.strformat()) - return interped - - problem_str = [] - for xtype in rettypes: - problem_str.append(_termcolor.errmsg(check_type(xtype))) - - raise TypingError("Can't unify return type from the " - "following types: %s" - % ", ".join(sorted(map(str, rettypes))) + - "\n" + "\n".join(problem_str)) - return unified - else: - # Function without a successful return path - return types.none - - def get_return_type(self, typemap): - rettypes = set() - for var in self._get_return_vars(): - rettypes.add(typemap[var.name]) - return self._unify_return_types(rettypes) - - def get_state_token(self): - """The algorithm is monotonic. It can only grow or "refine" the - typevar map. - """ - return [tv.type for name, tv in sorted(self.typevars.items())] - - def constrain_statement(self, inst): - if isinstance(inst, ir.Assign): - self.typeof_assign(inst) - elif isinstance(inst, ir.SetItem): - self.typeof_setitem(inst) - elif isinstance(inst, ir.StaticSetItem): - self.typeof_static_setitem(inst) - elif isinstance(inst, ir.DelItem): - self.typeof_delitem(inst) - elif isinstance(inst, ir.SetAttr): - self.typeof_setattr(inst) - elif isinstance(inst, ir.Print): - self.typeof_print(inst) - elif isinstance(inst, (ir.Jump, ir.Branch, ir.Return, ir.Del)): - pass - elif isinstance(inst, ir.StaticRaise): - pass - elif type(inst) in typeinfer_extensions: - # let external calls handle stmt if type matches - f = typeinfer_extensions[type(inst)] - f(inst, self) - else: - msg = "Unsupported constraint encountered: %s" % inst - raise UnsupportedError(msg, loc=inst.loc) - - def typeof_setitem(self, inst): - constraint = SetItemConstraint(target=inst.target, index=inst.index, - value=inst.value, loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst, constraint)) - - def typeof_static_setitem(self, inst): - constraint = StaticSetItemConstraint(target=inst.target, - index=inst.index, - index_var=inst.index_var, - value=inst.value, loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst, constraint)) - - def typeof_delitem(self, inst): - constraint = DelItemConstraint(target=inst.target, index=inst.index, - loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst, constraint)) - - def typeof_setattr(self, inst): - constraint = SetAttrConstraint(target=inst.target, attr=inst.attr, - value=inst.value, loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst, constraint)) - - def typeof_print(self, inst): - constraint = PrintConstraint(args=inst.args, vararg=inst.vararg, - loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst, constraint)) - - def typeof_assign(self, inst): - value = inst.value - if isinstance(value, ir.Const): - self.typeof_const(inst, inst.target, value.value) - elif isinstance(value, ir.Var): - self.constraints.append(Propagate(dst=inst.target.name, - src=value.name, loc=inst.loc)) - elif isinstance(value, (ir.Global, ir.FreeVar)): - self.typeof_global(inst, inst.target, value) - elif isinstance(value, ir.Arg): - self.typeof_arg(inst, inst.target, value) - elif isinstance(value, ir.Expr): - self.typeof_expr(inst, inst.target, value) - elif isinstance(value, ir.Yield): - self.typeof_yield(inst, inst.target, value) - else: - msg = ("Unsupported assignment encountered: %s %s" % - (type(value), str(value))) - raise UnsupportedError(msg, loc=inst.loc) - - def resolve_value_type(self, inst, val): - """ - Resolve the type of a simple Python value, such as can be - represented by literals. - """ - try: - return self.context.resolve_value_type(val) - except ValueError as e: - msg = str(e) - raise TypingError(msg, loc=inst.loc) - - def typeof_arg(self, inst, target, arg): - src_name = self._mangle_arg_name(arg.name) - self.constraints.append(ArgConstraint(dst=target.name, - src=src_name, - loc=inst.loc)) - - def typeof_const(self, inst, target, const): - ty = self.resolve_value_type(inst, const) - # Special case string constant as Const type - if ty == types.string: - ty = types.Const(value=const) - self.lock_type(target.name, ty, loc=inst.loc, - literal_value=const) - - def typeof_yield(self, inst, target, yield_): - # Sending values into generators isn't supported. - self.add_type(target.name, types.none, loc=inst.loc) - - def sentry_modified_builtin(self, inst, gvar): - """ - Ensure that builtins are not modified. - """ - if (gvar.name in ('range', 'xrange') and - gvar.value not in utils.RANGE_ITER_OBJECTS): - bad = True - elif gvar.name == 'slice' and gvar.value is not slice: - bad = True - elif gvar.name == 'len' and gvar.value is not len: - bad = True - else: - bad = False - - if bad: - raise TypingError("Modified builtin '%s'" % gvar.name, - loc=inst.loc) - - def resolve_call(self, fnty, pos_args, kw_args, literals=None): - """ - Resolve a call to a given function type. A signature is returned. - """ - if isinstance(fnty, types.RecursiveCall) and not self._skip_recursion: - # Recursive call - disp = fnty.dispatcher_type.dispatcher - pysig, args = disp.fold_argument_types(pos_args, kw_args) - - frame = self.context.callstack.match(disp.py_func, args) - - # If the signature is not being compiled - if frame is None: - sig = self.context.resolve_function_type(fnty.dispatcher_type, - pos_args, kw_args) - fndesc = disp.overloads[args].fndesc - fnty.overloads[args] = qualifying_prefix(fndesc.modname, - fndesc.unique_name) - return sig - - fnid = frame.func_id - fnty.overloads[args] = qualifying_prefix(fnid.modname, - fnid.unique_name) - # Resume propagation in parent frame - return_type = frame.typeinfer.return_types_from_partial() - # No known return type - if return_type is None: - raise TypingError("cannot type infer runaway recursion") - - sig = typing.signature(return_type, *args) - sig.pysig = pysig - return sig - else: - # Normal non-recursive call - return self.context.resolve_function_type(fnty, pos_args, kw_args, - literals=literals) - - def typeof_global(self, inst, target, gvar): - try: - typ = self.resolve_value_type(inst, gvar.value) - except TypingError as e: - if (gvar.name == self.func_id.func_name - and gvar.name in _temporary_dispatcher_map): - # Self-recursion case where the dispatcher is not (yet?) known - # as a global variable - typ = types.Dispatcher(_temporary_dispatcher_map[gvar.name]) - else: - msg = _termcolor.errmsg("Untyped global name '%s':") + " %s" - e.patch_message(msg % (gvar.name, e)) - raise - - if isinstance(typ, types.Dispatcher) and typ.dispatcher.is_compiling: - # Recursive call - callstack = self.context.callstack - callframe = callstack.findfirst(typ.dispatcher.py_func) - if callframe is not None: - typ = types.RecursiveCall(typ) - else: - raise NotImplementedError( - "call to %s: unsupported recursion" - % typ.dispatcher) - - if isinstance(typ, types.Array): - # Global array in nopython mode is constant - typ = typ.copy(readonly=True) - - self.sentry_modified_builtin(inst, gvar) - # Setting literal_value for globals because they are handled - # like const value in numba - self.lock_type(target.name, typ, loc=inst.loc, - literal_value=gvar.value) - self.assumed_immutables.add(inst) - - def typeof_expr(self, inst, target, expr): - if expr.op == 'call': - if isinstance(expr.func, ir.Intrinsic): - sig = expr.func.type - self.add_type(target.name, sig.return_type, loc=inst.loc) - self.add_calltype(expr, sig) - else: - self.typeof_call(inst, target, expr) - elif expr.op in ('getiter', 'iternext'): - self.typeof_intrinsic_call(inst, target, expr.op, expr.value) - elif expr.op == 'exhaust_iter': - constraint = ExhaustIterConstraint(target.name, count=expr.count, - iterator=expr.value, - loc=expr.loc) - self.constraints.append(constraint) - elif expr.op == 'pair_first': - constraint = PairFirstConstraint(target.name, pair=expr.value, - loc=expr.loc) - self.constraints.append(constraint) - elif expr.op == 'pair_second': - constraint = PairSecondConstraint(target.name, pair=expr.value, - loc=expr.loc) - self.constraints.append(constraint) - elif expr.op == 'binop': - self.typeof_intrinsic_call(inst, target, expr.fn, expr.lhs, - expr.rhs) - elif expr.op == 'inplace_binop': - self.typeof_intrinsic_call(inst, target, expr.fn, - expr.lhs, expr.rhs) - elif expr.op == 'unary': - self.typeof_intrinsic_call(inst, target, expr.fn, expr.value) - elif expr.op == 'static_getitem': - constraint = StaticGetItemConstraint(target.name, value=expr.value, - index=expr.index, - index_var=expr.index_var, - loc=expr.loc) - self.constraints.append(constraint) - self.calls.append((inst.value, constraint)) - elif expr.op == 'getitem': - self.typeof_intrinsic_call(inst, target, 'getitem', expr.value, - expr.index) - elif expr.op == 'getattr': - constraint = GetAttrConstraint(target.name, attr=expr.attr, - value=expr.value, loc=inst.loc, - inst=inst) - self.constraints.append(constraint) - elif expr.op == 'build_tuple': - constraint = BuildTupleConstraint(target.name, items=expr.items, - loc=inst.loc) - self.constraints.append(constraint) - elif expr.op == 'build_list': - constraint = BuildListConstraint(target.name, items=expr.items, - loc=inst.loc) - self.constraints.append(constraint) - elif expr.op == 'build_set': - constraint = BuildSetConstraint(target.name, items=expr.items, - loc=inst.loc) - self.constraints.append(constraint) - elif expr.op == 'cast': - self.constraints.append(Propagate(dst=target.name, - src=expr.value.name, - loc=inst.loc)) - elif expr.op == 'make_function': - self.lock_type(target.name, types.pyfunc_type, loc=inst.loc) - else: - msg = "Unsupported op-code encountered: %s" % expr - raise UnsupportedError(msg, loc=inst.loc) - - def typeof_call(self, inst, target, call): - constraint = CallConstraint(target.name, call.func.name, call.args, - call.kws, call.vararg, loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst.value, constraint)) - - def typeof_intrinsic_call(self, inst, target, func, *args): - constraint = IntrinsicCallConstraint(target.name, func, args, - kws=(), vararg=None, loc=inst.loc) - self.constraints.append(constraint) - self.calls.append((inst.value, constraint)) - - -class NullDebug(object): - - def propagate_started(self): - pass - - def propagate_finished(self): - pass - - def unify_finished(self, typdict, retty, fntys): - pass - - -class TypeInferDebug(object): - - def __init__(self, typeinfer): - self.typeinfer = typeinfer - - def _dump_state(self): - print('---- type variables ----') - pprint([v for k, v in sorted(self.typeinfer.typevars.items())]) - - def propagate_started(self): - print("propagate".center(80, '-')) - - def propagate_finished(self): - self._dump_state() - - def unify_finished(self, typdict, retty, fntys): - print("Variable types".center(80, "-")) - pprint(typdict) - print("Return type".center(80, "-")) - pprint(retty) - print("Call types".center(80, "-")) - pprint(fntys) diff --git a/numba/numba/types/__init__.py b/numba/numba/types/__init__.py deleted file mode 100644 index 9fa699eee..000000000 --- a/numba/numba/types/__init__.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import struct - -import numpy as np - -from .abstract import * -from .containers import * -from .functions import * -from .iterators import * -from .misc import * -from .npytypes import * -from .scalars import * - - -# Short names - -pyobject = PyObject('pyobject') -ffi_forced_object = Opaque('ffi_forced_object') -ffi = Opaque('ffi') -none = NoneType('none') -ellipsis = EllipsisType('...') -Any = Phantom('any') -undefined = Undefined('undefined') -string = Opaque('str') -code_type = Opaque('code') -pyfunc_type = Opaque('pyfunc') - -# No operation is defined on voidptr -# Can only pass it around -voidptr = RawPointer('void*') - -boolean = bool_ = Boolean('bool') - -byte = uint8 = Integer('uint8') -uint16 = Integer('uint16') -uint32 = Integer('uint32') -uint64 = Integer('uint64') - -int8 = Integer('int8') -int16 = Integer('int16') -int32 = Integer('int32') -int64 = Integer('int64') -intp = int32 if utils.MACHINE_BITS == 32 else int64 -uintp = uint32 if utils.MACHINE_BITS == 32 else uint64 -intc = int32 if struct.calcsize('i') == 4 else int64 -uintc = uint32 if struct.calcsize('i') == 4 else uint64 - -float32 = Float('float32') -float64 = Float('float64') - -complex64 = Complex('complex64', float32) -complex128 = Complex('complex128', float64) - -range_iter32_type = RangeIteratorType(int32) -range_iter64_type = RangeIteratorType(int64) -unsigned_range_iter64_type = RangeIteratorType(uint64) -range_state32_type = RangeType(int32) -range_state64_type = RangeType(int64) -unsigned_range_state64_type = RangeType(uint64) - -slice2_type = SliceType('slice', 2) -slice3_type = SliceType('slice', 3) - -signed_domain = frozenset([int8, int16, int32, int64]) -unsigned_domain = frozenset([uint8, uint16, uint32, uint64]) -integer_domain = signed_domain | unsigned_domain -real_domain = frozenset([float32, float64]) -complex_domain = frozenset([complex64, complex128]) -number_domain = real_domain | integer_domain | complex_domain - -# Aliases to Numpy type names - -b1 = bool_ -i1 = int8 -i2 = int16 -i4 = int32 -i8 = int64 -u1 = uint8 -u2 = uint16 -u4 = uint32 -u8 = uint64 - -f4 = float32 -f8 = float64 - -c8 = complex64 -c16 = complex128 - -float_ = float32 -double = float64 -void = none - -_make_signed = lambda x: globals()["int%d" % (np.dtype(x).itemsize * 8)] -_make_unsigned = lambda x: globals()["uint%d" % (np.dtype(x).itemsize * 8)] - -char = _make_signed(np.byte) -uchar = byte = _make_unsigned(np.byte) -short = _make_signed(np.short) -ushort = _make_unsigned(np.short) -int_ = _make_signed(np.int_) -uint = _make_unsigned(np.int_) -intc = _make_signed(np.intc) # C-compat int -uintc = _make_unsigned(np.uintc) # C-compat uint -long_ = _make_signed(np.long) -ulong = _make_unsigned(np.long) -longlong = _make_signed(np.longlong) -ulonglong = _make_unsigned(np.longlong) - -# optional types -optional = Optional - - -deferred_type = DeferredType - -__all__ = ''' -int8 -int16 -int32 -int64 -uint8 -uint16 -uint32 -uint64 -intp -uintp -intc -uintc -boolean -float32 -float64 -complex64 -complex128 -bool_ -byte -char -uchar -short -ushort -int_ -uint -long_ -ulong -longlong -ulonglong -float_ -double -void -none -b1 -i1 -i2 -i4 -i8 -u1 -u2 -u4 -u8 -f4 -f8 -c8 -c16 -optional -ffi_forced_object -ffi -deferred_type -'''.split() diff --git a/numba/numba/types/abstract.py b/numba/numba/types/abstract.py deleted file mode 100644 index afcb261d8..000000000 --- a/numba/numba/types/abstract.py +++ /dev/null @@ -1,382 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from abc import ABCMeta, abstractmethod, abstractproperty -import itertools -import weakref - -import numpy as np - -from ..six import add_metaclass -from ..utils import cached_property - - -# Types are added to a global registry (_typecache) in order to assign -# them unique integer codes for fast matching in _dispatcher.c. -# However, we also want types to be disposable, therefore we ensure -# each type is interned as a weak reference, so that it lives only as -# long as necessary to keep a stable type code. -# NOTE: some types can still be made immortal elsewhere (for example -# in _dispatcher.c's internal caches). -_typecodes = itertools.count() - -def _autoincr(): - n = next(_typecodes) - # 4 billion types should be enough, right? - assert n < 2 ** 32, "Limited to 4 billion types" - return n - -_typecache = {} - -def _on_type_disposal(wr, _pop=_typecache.pop): - _pop(wr, None) - - -class _TypeMetaclass(ABCMeta): - """ - A metaclass that will intern instances after they are created. - This is done by first creating a new instance (including calling - __init__, which sets up the required attributes for equality - and hashing), then looking it up in the _typecache registry. - """ - - def _intern(cls, inst): - # Try to intern the created instance - wr = weakref.ref(inst, _on_type_disposal) - orig = _typecache.get(wr) - orig = orig and orig() - if orig is not None: - return orig - else: - inst._code = _autoincr() - _typecache[wr] = wr - return inst - - def __call__(cls, *args, **kwargs): - """ - Instantiate *cls* (a Type subclass, presumably) and intern it. - If an interned instance already exists, it is returned, otherwise - the new instance is returned. - """ - inst = type.__call__(cls, *args, **kwargs) - return cls._intern(inst) - - -def _type_reconstructor(reconstructor, reconstructor_args, state): - """ - Rebuild function for unpickling types. - """ - obj = reconstructor(*reconstructor_args) - if state: - obj.__dict__.update(state) - return type(obj)._intern(obj) - - -@add_metaclass(_TypeMetaclass) -class Type(object): - """ - The base class for all Numba types. - It is essential that proper equality comparison is implemented. The - default implementation uses the "key" property (overridable in subclasses) - for both comparison and hashing, to ensure sane behaviour. - """ - - mutable = False - # Rather the type is reflected at the python<->nopython boundary - reflected = False - - def __init__(self, name): - self.name = name - - @property - def key(self): - """ - A property used for __eq__, __ne__ and __hash__. Can be overriden - in subclasses. - """ - return self.name - - @property - def mangling_args(self): - """ - Returns `(basename, args)` where `basename` is the name of the type - and `args` is a sequence of parameters of the type. - - Subclass should override to specialize the behavior. - By default, this returns `(self.name, ())`. - """ - return self.name, () - - def __repr__(self): - return self.name - - def __hash__(self): - return hash(self.key) - - def __eq__(self, other): - return self.__class__ is other.__class__ and self.key == other.key - - def __ne__(self, other): - return not (self == other) - - def __reduce__(self): - reconstructor, args, state = super(Type, self).__reduce__() - return (_type_reconstructor, (reconstructor, args, state)) - - def unify(self, typingctx, other): - """ - Try to unify this type with the *other*. A third type must - be returned, or None if unification is not possible. - Only override this if the coercion logic cannot be expressed - as simple casting rules. - """ - return None - - def can_convert_to(self, typingctx, other): - """ - Check whether this type can be converted to the *other*. - If successful, must return a string describing the conversion, e.g. - "exact", "promote", "unsafe", "safe"; otherwise None is returned. - """ - return None - - def can_convert_from(self, typingctx, other): - """ - Similar to *can_convert_to*, but in reverse. Only needed if - the type provides conversion from other types. - """ - return None - - def is_precise(self): - """ - Whether this type is precise, i.e. can be part of a successful - type inference. Default implementation returns True. - """ - return True - - def augment(self, other): - """ - Augment this type with the *other*. Return the augmented type, - or None if not supported. - """ - return None - - # User-facing helpers. These are not part of the core Type API but - # are provided so that users can write e.g. `numba.boolean(1.5)` - # (returns True) or `types.int32(types.int32[:])` (returns something - # usable as a function signature). - - def __call__(self, *args): - from ..typing import signature - if len(args) == 1 and not isinstance(args[0], Type): - return self.cast_python_value(args[0]) - return signature(self, # return_type - *args) - - def __getitem__(self, args): - """ - Return an array of this type. - """ - from . import Array - ndim, layout = self._determine_array_spec(args) - return Array(dtype=self, ndim=ndim, layout=layout) - - def _determine_array_spec(self, args): - # XXX non-contiguous by default, even for 1d arrays, - # doesn't sound very intuitive - if isinstance(args, (tuple, list)): - ndim = len(args) - if args[0].step == 1: - layout = 'F' - elif args[-1].step == 1: - layout = 'C' - else: - layout = 'A' - elif isinstance(args, slice): - ndim = 1 - if args.step == 1: - layout = 'C' - else: - layout = 'A' - else: - ndim = 1 - layout = 'A' - - return ndim, layout - - def cast_python_value(self, args): - raise NotImplementedError - - -# XXX we should distinguish between Dummy (no meaningful -# representation, e.g. None or a builtin function) and Opaque (has a -# meaningful representation, e.g. ExternalFunctionPointer) - -class Dummy(Type): - """ - Base class for types that do not really have a representation and are - compatible with a void*. - """ - - -class Hashable(Type): - """ - Base class for hashable types. - """ - - -class Number(Hashable): - """ - Base class for number types. - """ - - def unify(self, typingctx, other): - """ - Unify the two number types using Numpy's rules. - """ - from .. import numpy_support - if isinstance(other, Number): - # XXX: this can produce unsafe conversions, - # e.g. would unify {int64, uint64} to float64 - a = numpy_support.as_dtype(self) - b = numpy_support.as_dtype(other) - sel = np.promote_types(a, b) - return numpy_support.from_dtype(sel) - - -class Callable(Type): - """ - Base class for callables. - """ - - @abstractmethod - def get_call_type(self, context, args, kws): - """ - Using the typing *context*, resolve the callable's signature for - the given arguments. A signature object is returned, or None. - """ - - @abstractmethod - def get_call_signatures(self): - """ - Returns a tuple of (list of signatures, parameterized) - """ - - def get_call_type_with_literals(self, context, args, kws, literals): - """Simliar to .get_call_type() but with extra argument for literals. - Default implementation ignores literals and forwards to .get_call_type(). - """ - return self.get_call_type(context, args, kws) - - -class DTypeSpec(Type): - """ - Base class for types usable as "dtype" arguments to various Numpy APIs - (e.g. np.empty()). - """ - - @abstractproperty - def dtype(self): - """ - The actual dtype denoted by this dtype spec (a Type instance). - """ - - -class IterableType(Type): - """ - Base class for iterable types. - """ - - @abstractproperty - def iterator_type(self): - """ - The iterator type obtained when calling iter() (explicitly or implicitly). - """ - - -class Sized(Type): - """ - Base class for objects that support len() - """ - - -class ConstSized(Sized): - """ - For types that have a constant size - """ - @abstractmethod - def __len__(self): - pass - - -class IteratorType(IterableType): - """ - Base class for all iterator types. - Derived classes should implement the *yield_type* attribute. - """ - - def __init__(self, name, **kwargs): - super(IteratorType, self).__init__(name, **kwargs) - - @abstractproperty - def yield_type(self): - """ - The type of values yielded by the iterator. - """ - - # This is a property to avoid recursivity (for pickling) - - @property - def iterator_type(self): - return self - - -class Container(Sized, IterableType): - """ - Base class for container types. - """ - - -class Sequence(Container): - """ - Base class for 1d sequence types. Instances should have the *dtype* - attribute. - """ - - -class MutableSequence(Sequence): - """ - Base class for 1d mutable sequence types. Instances should have the - *dtype* attribute. - """ - - -class ArrayCompatible(Type): - """ - Type class for Numpy array-compatible objects (typically, objects - exposing an __array__ method). - Derived classes should implement the *as_array* attribute. - """ - # If overriden by a subclass, it should also implement typing - # for '__array_wrap__' with arguments (input, formal result). - array_priority = 0.0 - - @abstractproperty - def as_array(self): - """ - The equivalent array type, for operations supporting array-compatible - objects (such as ufuncs). - """ - - # For compatibility with types.Array - - @cached_property - def ndim(self): - return self.as_array.ndim - - @cached_property - def layout(self): - return self.as_array.layout - - @cached_property - def dtype(self): - return self.as_array.dtype diff --git a/numba/numba/types/common.py b/numba/numba/types/common.py deleted file mode 100644 index 20307b69e..000000000 --- a/numba/numba/types/common.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Helper classes / mixins for defining types. -""" -from __future__ import print_function, division, absolute_import - -from .abstract import * - - -class Opaque(Dummy): - """ - A type that is a opaque pointer. - """ - - -class SimpleIterableType(IterableType): - - def __init__(self, name, iterator_type): - self._iterator_type = iterator_type - super(SimpleIterableType, self).__init__(name) - - @property - def iterator_type(self): - return self._iterator_type - - -class SimpleIteratorType(IteratorType): - - def __init__(self, name, yield_type): - self._yield_type = yield_type - super(SimpleIteratorType, self).__init__(name) - - @property - def yield_type(self): - return self._yield_type - - -class Buffer(IterableType, ArrayCompatible): - """ - Type class for objects providing the buffer protocol. - Derived classes exist for more specific cases. - """ - mutable = True - slice_is_copy = False - aligned = True - - # CS and FS are not reserved for inner contig but strided - LAYOUTS = frozenset(['C', 'F', 'CS', 'FS', 'A']) - - def __init__(self, dtype, ndim, layout, readonly=False, name=None): - if isinstance(dtype, Buffer): - raise TypeError("Buffer dtype cannot be buffer") - if layout not in self.LAYOUTS: - raise ValueError("Invalid layout '%s'" % layout) - self.dtype = dtype - self.ndim = ndim - self.layout = layout - if readonly: - self.mutable = False - if name is None: - type_name = self.__class__.__name__.lower() - if readonly: - type_name = "readonly %s" % type_name - name = "%s(%s, %sd, %s)" % (type_name, dtype, ndim, layout) - super(Buffer, self).__init__(name) - - @property - def iterator_type(self): - from .iterators import ArrayIterator - return ArrayIterator(self) - - @property - def as_array(self): - return self - - def copy(self, dtype=None, ndim=None, layout=None): - if dtype is None: - dtype = self.dtype - if ndim is None: - ndim = self.ndim - if layout is None: - layout = self.layout - return self.__class__(dtype=dtype, ndim=ndim, layout=layout, - readonly=not self.mutable) - - @property - def key(self): - return self.dtype, self.ndim, self.layout, self.mutable - - @property - def is_c_contig(self): - return self.layout == 'C' or (self.ndim <= 1 and self.layout in 'CF') - - @property - def is_f_contig(self): - return self.layout == 'F' or (self.ndim <= 1 and self.layout in 'CF') - - @property - def is_contig(self): - return self.layout in 'CF' diff --git a/numba/numba/types/containers.py b/numba/numba/types/containers.py deleted file mode 100644 index 93570cd5f..000000000 --- a/numba/numba/types/containers.py +++ /dev/null @@ -1,430 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from .abstract import * -from .common import * -from .misc import Undefined -from ..typeconv import Conversion - - -class Pair(Type): - """ - A heterogeneous pair. - """ - - def __init__(self, first_type, second_type): - self.first_type = first_type - self.second_type = second_type - name = "pair<%s, %s>" % (first_type, second_type) - super(Pair, self).__init__(name=name) - - @property - def key(self): - return self.first_type, self.second_type - - def unify(self, typingctx, other): - if isinstance(other, Pair): - first = typingctx.unify_pairs(self.first_type, other.first_type) - second = typingctx.unify_pairs(self.second_type, other.second_type) - if first is not None and second is not None: - return Pair(first, second) - - -class BaseContainerIterator(SimpleIteratorType): - """ - Convenience base class for some container iterators. - - Derived classes must implement the *container_class* attribute. - """ - - def __init__(self, container): - assert isinstance(container, self.container_class), container - self.container = container - yield_type = container.dtype - name = 'iter(%s)' % container - super(BaseContainerIterator, self).__init__(name, yield_type) - - def unify(self, typingctx, other): - cls = type(self) - if isinstance(other, cls): - container = typingctx.unify_pairs(self.container, other.container) - if container is not None: - return cls(container) - - @property - def key(self): - return self.container - - -class BaseContainerPayload(Type): - """ - Convenience base class for some container payloads. - - Derived classes must implement the *container_class* attribute. - """ - - def __init__(self, container): - assert isinstance(container, self.container_class) - self.container = container - name = 'payload(%s)' % container - super(BaseContainerPayload, self).__init__(name) - - @property - def key(self): - return self.container - - -class Bytes(Buffer): - """ - Type class for Python 3.x bytes objects. - """ - mutable = False - # Actually true but doesn't matter since bytes is immutable - slice_is_copy = False - - -class ByteArray(Buffer): - """ - Type class for bytearray objects. - """ - slice_is_copy = True - - -class PyArray(Buffer): - """ - Type class for array.array objects. - """ - slice_is_copy = True - - -class MemoryView(Buffer): - """ - Type class for memoryview objects. - """ - - -class BaseTuple(ConstSized, Hashable): - """ - The base class for all tuple types (with a known size). - """ - - @classmethod - def from_types(cls, tys, pyclass=None): - """ - Instantiate the right tuple type for the given element types. - """ - homogeneous = False - if tys: - first = tys[0] - for ty in tys[1:]: - if ty != first: - break - else: - homogeneous = True - - if pyclass is not None and pyclass is not tuple: - # A subclass => is it a namedtuple? - assert issubclass(pyclass, tuple) - if hasattr(pyclass, "_asdict"): - if homogeneous: - return NamedUniTuple(first, len(tys), pyclass) - else: - return NamedTuple(tys, pyclass) - if homogeneous: - return UniTuple(first, len(tys)) - else: - return Tuple(tys) - - -class BaseAnonymousTuple(BaseTuple): - """ - Mixin for non-named tuples. - """ - - def can_convert_to(self, typingctx, other): - """ - Convert this tuple to another one. Note named tuples are rejected. - """ - if not isinstance(other, BaseAnonymousTuple): - return - if len(self) != len(other): - return - if len(self) == 0: - return Conversion.safe - if isinstance(other, BaseTuple): - kinds = [typingctx.can_convert(ta, tb) - for ta, tb in zip(self, other)] - if any(kind is None for kind in kinds): - return - return max(kinds) - - -class _HomogeneousTuple(Sequence, BaseTuple): - - @property - def iterator_type(self): - return UniTupleIter(self) - - def getitem(self, ind): - return self.dtype, intp - - def __getitem__(self, i): - """ - Return element at position i - """ - return self.dtype - - def __iter__(self): - return iter([self.dtype] * self.count) - - def __len__(self): - return self.count - - @property - def types(self): - return (self.dtype,) * self.count - - -class UniTuple(BaseAnonymousTuple, _HomogeneousTuple, Sequence): - """ - Type class for homogeneous tuples. - """ - - def __init__(self, dtype, count): - self.dtype = dtype - self.count = count - name = "tuple(%s x %d)" % (dtype, count) - super(UniTuple, self).__init__(name) - - @property - def mangling_args(self): - return self.__class__.__name__, (self.dtype, self.count) - - @property - def key(self): - return self.dtype, self.count - - def unify(self, typingctx, other): - """ - Unify UniTuples with their dtype - """ - if isinstance(other, UniTuple) and len(self) == len(other): - dtype = typingctx.unify_pairs(self.dtype, other.dtype) - if dtype is not None: - return UniTuple(dtype=dtype, count=self.count) - - -class UniTupleIter(BaseContainerIterator): - """ - Type class for homogeneous tuple iterators. - """ - container_class = _HomogeneousTuple - - -class _HeterogeneousTuple(BaseTuple): - - def __getitem__(self, i): - """ - Return element at position i - """ - return self.types[i] - - def __len__(self): - # Beware: this makes Tuple(()) false-ish - return len(self.types) - - def __iter__(self): - return iter(self.types) - - -class Tuple(BaseAnonymousTuple, _HeterogeneousTuple): - - def __new__(cls, types): - if types and all(t == types[0] for t in types[1:]): - return UniTuple(dtype=types[0], count=len(types)) - else: - return object.__new__(Tuple) - - def __init__(self, types): - self.types = tuple(types) - self.count = len(self.types) - name = "(%s)" % ', '.join(str(i) for i in self.types) - super(Tuple, self).__init__(name) - - @property - def mangling_args(self): - return self.__class__.__name__, tuple(t for t in self.types) - - @property - def key(self): - return self.types - - def unify(self, typingctx, other): - """ - Unify elements of Tuples/UniTuples - """ - # Other is UniTuple or Tuple - if isinstance(other, BaseTuple) and len(self) == len(other): - unified = [typingctx.unify_pairs(ta, tb) - for ta, tb in zip(self, other)] - - if all(t is not None for t in unified): - return Tuple(unified) - - -class BaseNamedTuple(BaseTuple): - pass - - -class NamedUniTuple(_HomogeneousTuple, BaseNamedTuple): - - def __init__(self, dtype, count, cls): - self.dtype = dtype - self.count = count - self.fields = tuple(cls._fields) - self.instance_class = cls - name = "%s(%s x %d)" % (cls.__name__, dtype, count) - super(NamedUniTuple, self).__init__(name) - - @property - def iterator_type(self): - return UniTupleIter(self) - - @property - def key(self): - return self.instance_class, self.dtype, self.count - - -class NamedTuple(_HeterogeneousTuple, BaseNamedTuple): - - def __init__(self, types, cls): - self.types = tuple(types) - self.count = len(self.types) - self.fields = tuple(cls._fields) - self.instance_class = cls - name = "%s(%s)" % (cls.__name__, ', '.join(str(i) for i in self.types)) - super(NamedTuple, self).__init__(name) - - @property - def key(self): - return self.instance_class, self.types - - -class List(MutableSequence): - """ - Type class for (arbitrary-sized) homogeneous lists. - """ - mutable = True - - def __init__(self, dtype, reflected=False): - self.dtype = dtype - self.reflected = reflected - cls_name = "reflected list" if reflected else "list" - name = "%s(%s)" % (cls_name, self.dtype) - super(List, self).__init__(name=name) - - def copy(self, dtype=None, reflected=None): - if dtype is None: - dtype = self.dtype - if reflected is None: - reflected = self.reflected - return List(dtype, reflected) - - def unify(self, typingctx, other): - if isinstance(other, List): - dtype = typingctx.unify_pairs(self.dtype, other.dtype) - reflected = self.reflected or other.reflected - if dtype is not None: - return List(dtype, reflected) - - @property - def key(self): - return self.dtype, self.reflected - - @property - def iterator_type(self): - return ListIter(self) - - def is_precise(self): - return self.dtype.is_precise() - - -class ListIter(BaseContainerIterator): - """ - Type class for list iterators. - """ - container_class = List - - -class ListPayload(BaseContainerPayload): - """ - Internal type class for the dynamically-allocated payload of a list. - """ - container_class = List - - -class Set(Container): - """ - Type class for homogeneous sets. - """ - mutable = True - - def __init__(self, dtype, reflected=False): - assert isinstance(dtype, (Hashable, Undefined)) - self.dtype = dtype - self.reflected = reflected - cls_name = "reflected set" if reflected else "set" - name = "%s(%s)" % (cls_name, self.dtype) - super(Set, self).__init__(name=name) - - @property - def key(self): - return self.dtype, self.reflected - - @property - def iterator_type(self): - return SetIter(self) - - def is_precise(self): - return self.dtype.is_precise() - - def copy(self, dtype=None, reflected=None): - if dtype is None: - dtype = self.dtype - if reflected is None: - reflected = self.reflected - return Set(dtype, reflected) - - def unify(self, typingctx, other): - if isinstance(other, Set): - dtype = typingctx.unify_pairs(self.dtype, other.dtype) - reflected = self.reflected or other.reflected - if dtype is not None: - return Set(dtype, reflected) - - -class SetIter(BaseContainerIterator): - """ - Type class for set iterators. - """ - container_class = Set - - -class SetPayload(BaseContainerPayload): - """ - Internal type class for the dynamically-allocated payload of a set. - """ - container_class = Set - - -class SetEntry(Type): - """ - Internal type class for the entries of a Set's hash table. - """ - def __init__(self, set_type): - self.set_type = set_type - name = 'entry(%s)' % set_type - super(SetEntry, self).__init__(name) - - @property - def key(self): - return self.set_type diff --git a/numba/numba/types/functions.py b/numba/numba/types/functions.py deleted file mode 100644 index 35b8208a8..000000000 --- a/numba/numba/types/functions.py +++ /dev/null @@ -1,435 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import traceback -import inspect -import sys - -from .abstract import * -from .common import * -from numba.ir import Loc -from numba import errors - -# terminal color markup -_termcolor = errors.termcolor() - -class _ResolutionFailures(object): - """Collect and format function resolution failures. - """ - def __init__(self, context, function_type, args, kwargs): - self._context = context - self._function_type = function_type - self._args = args - self._kwargs = kwargs - self._failures = [] - - def __len__(self): - return len(self._failures) - - def add_error(self, calltemplate, error): - """ - Args - ---- - calltemplate : CallTemplate - error : Exception or str - Error message - """ - self._failures.append((calltemplate, error)) - - def format(self): - """Return a formatted error message from all the gathered errors. - """ - indent = ' ' * 4 - args = [str(a) for a in self._args] - args += ["%s=%s" % (k, v) for k, v in sorted(self._kwargs.items())] - headtmp = 'Invalid use of {} with argument(s) of type(s): ({})' - msgbuf = [headtmp.format(self._function_type, ', '.join(args))] - explain = self._context.explain_function_type(self._function_type) - msgbuf.append(explain) - for i, (temp, error) in enumerate(self._failures): - msgbuf.append("In definition {}:".format(i)) - msgbuf.append(_termcolor.highlight('{}{}'.format( - indent, self.format_error(error)))) - loc = self.get_loc(temp, error) - if loc: - msgbuf.append('{}raised from {}'.format(indent, loc)) - - likely_cause = ("This error is usually caused by passing an argument " - "of a type that is unsupported by the named function.") - msgbuf.append(_termcolor.errmsg(likely_cause)) - return '\n'.join(msgbuf) - - def format_error(self, error): - """Format error message or exception - """ - if isinstance(error, Exception): - return '{}: {}'.format(type(error).__name__, error) - else: - return '{}'.format(error) - - def get_loc(self, classtemplate, error): - """Get source location information from the error message. - """ - if isinstance(error, Exception) and hasattr(error, '__traceback__'): - # traceback is unavailable in py2 - frame = traceback.extract_tb(error.__traceback__)[-1] - return "{}:{}".format(frame[0], frame[1]) - - -class BaseFunction(Callable): - """ - Base type class for some function types. - """ - - def __init__(self, template): - if isinstance(template, (list, tuple)): - self.templates = tuple(template) - keys = set(temp.key for temp in self.templates) - if len(keys) != 1: - raise ValueError("incompatible templates: keys = %s" - % (this,)) - self.typing_key, = keys - else: - self.templates = (template,) - self.typing_key = template.key - self._impl_keys = {} - name = "%s(%s)" % (self.__class__.__name__, self.typing_key) - super(BaseFunction, self).__init__(name) - - @property - def key(self): - return self.typing_key, self.templates - - def augment(self, other): - """ - Augment this function type with the other function types' templates, - so as to support more input types. - """ - if type(other) is type(self) and other.typing_key == self.typing_key: - return type(self)(self.templates + other.templates) - - def get_impl_key(self, sig): - """ - Get the implementation key (used by the target context) for the - given signature. - """ - return self._impl_keys[sig.args] - - def get_call_type(self, context, args, kws): - return self.get_call_type_with_literals(context, args, kws, - literals=None) - - def get_call_type_with_literals(self, context, args, kws, literals): - failures = _ResolutionFailures(context, self, args, kws) - for temp_cls in self.templates: - temp = temp_cls(context) - try: - if literals is not None and temp.support_literals: - sig = temp.apply(*literals) - else: - sig = temp.apply(args, kws) - except Exception as e: - sig = None - failures.add_error(temp_cls, e) - else: - if sig is not None: - self._impl_keys[sig.args] = temp.get_impl_key(sig) - return sig - else: - failures.add_error(temp_cls, "All templates rejected") - - if len(failures) == 0: - raise AssertionError("Internal Error. " - "Function resolution ended with no failures " - "or successfull signature") - - raise errors.TypingError(failures.format()) - - def get_call_signatures(self): - sigs = [] - is_param = False - for temp in self.templates: - sigs += getattr(temp, 'cases', []) - is_param = is_param or hasattr(temp, 'generic') - return sigs, is_param - - -class Function(BaseFunction, Opaque): - """ - Type class for builtin functions implemented by Numba. - """ - - -class BoundFunction(Callable, Opaque): - """ - A function with an implicit first argument (denoted as *this* below). - """ - - def __init__(self, template, this): - # Create a derived template with an attribute *this* - newcls = type(template.__name__ + '.' + str(this), (template,), - dict(this=this)) - self.template = newcls - self.typing_key = self.template.key - self.this = this - name = "%s(%s for %s)" % (self.__class__.__name__, - self.typing_key, self.this) - super(BoundFunction, self).__init__(name) - - def unify(self, typingctx, other): - if (isinstance(other, BoundFunction) and - self.typing_key == other.typing_key): - this = typingctx.unify_pairs(self.this, other.this) - if this is not None: - # XXX is it right that both template instances are distinct? - return self.copy(this=this) - - def copy(self, this): - return type(self)(self.template, this) - - @property - def key(self): - return self.typing_key, self.this - - def get_impl_key(self, sig): - """ - Get the implementation key (used by the target context) for the - given signature. - """ - return self.typing_key - - def get_call_type(self, context, args, kws): - return self.template(context).apply(args, kws) - - def get_call_type_with_literals(self, context, args, kws, literals): - if literals is not None and self.template.support_literals: - return self.template(context).apply(*literals) - else: - return self.get_call_type(context, args, kws) - - def get_call_signatures(self): - sigs = getattr(self.template, 'cases', []) - is_param = hasattr(self.template, 'generic') - return sigs, is_param - - -class WeakType(Type): - """ - Base class for types parametered by a mortal object, to which only - a weak reference is kept. - """ - - def _store_object(self, obj): - self._wr = weakref.ref(obj) - - def _get_object(self): - obj = self._wr() - if obj is None: - raise ReferenceError("underlying object has vanished") - return obj - - @property - def key(self): - return self._wr - - def __eq__(self, other): - if type(self) is type(other): - obj = self._wr() - return obj is not None and obj is other._wr() - - def __hash__(self): - return Type.__hash__(self) - - -class Dispatcher(WeakType, Callable, Dummy): - """ - Type class for @jit-compiled functions. - """ - - def __init__(self, dispatcher): - self._store_object(dispatcher) - super(Dispatcher, self).__init__("type(%s)" % dispatcher) - - def get_call_type(self, context, args, kws): - """ - Resolve a call to this dispatcher using the given argument types. - A signature returned and it is ensured that a compiled specialization - is available for it. - """ - template, pysig, args, kws = self.dispatcher.get_call_template(args, kws) - sig = template(context).apply(args, kws) - if sig: - sig.pysig = pysig - return sig - - def get_call_signatures(self): - sigs = self.dispatcher.nopython_signatures - return sigs, True - - @property - def dispatcher(self): - """ - A strong reference to the underlying numba.dispatcher.Dispatcher instance. - """ - return self._get_object() - - def get_overload(self, sig): - """ - Get the compiled overload for the given signature. - """ - return self.dispatcher.get_overload(sig.args) - - def get_impl_key(self, sig): - """ - Get the implementation key for the given signature. - """ - return self.get_overload(sig) - - -class ExternalFunctionPointer(BaseFunction): - """ - A pointer to a native function (e.g. exported via ctypes or cffi). - *get_pointer* is a Python function taking an object - and returning the raw pointer value as an int. - """ - def __init__(self, sig, get_pointer, cconv=None): - from ..typing.templates import (AbstractTemplate, make_concrete_template, - signature) - from . import ffi_forced_object - if sig.return_type == ffi_forced_object: - raise TypeError("Cannot return a pyobject from a external function") - self.sig = sig - self.requires_gil = any(a == ffi_forced_object for a in self.sig.args) - self.get_pointer = get_pointer - self.cconv = cconv - if self.requires_gil: - class GilRequiringDefn(AbstractTemplate): - key = self.sig - - def generic(self, args, kws): - if kws: - raise TypeError("does not support keyword arguments") - # Make ffi_forced_object a bottom type to allow any type to be - # casted to it. This is the only place that support - # ffi_forced_object. - coerced = [actual if formal == ffi_forced_object else formal - for actual, formal - in zip(args, self.key.args)] - return signature(self.key.return_type, *coerced) - template = GilRequiringDefn - else: - template = make_concrete_template("CFuncPtr", sig, [sig]) - super(ExternalFunctionPointer, self).__init__(template) - - @property - def key(self): - return self.sig, self.cconv, self.get_pointer - - -class ExternalFunction(Function): - """ - A named native function (resolvable by LLVM) accepting an explicit signature. - For internal use only. - """ - - def __init__(self, symbol, sig): - from .. import typing - self.symbol = symbol - self.sig = sig - template = typing.make_concrete_template(symbol, symbol, [sig]) - super(ExternalFunction, self).__init__(template) - - @property - def key(self): - return self.symbol, self.sig - - -class NumbaFunction(Function): - """ - A named native function with the Numba calling convention - (resolvable by LLVM). - For internal use only. - """ - - def __init__(self, fndesc, sig): - from .. import typing - self.fndesc = fndesc - self.sig = sig - template = typing.make_concrete_template(fndesc.qualname, - fndesc.qualname, [sig]) - super(NumbaFunction, self).__init__(template) - - @property - def key(self): - return self.fndesc.unique_name, self.sig - - -class NamedTupleClass(Callable, Opaque): - """ - Type class for namedtuple classes. - """ - - def __init__(self, instance_class): - self.instance_class = instance_class - name = "class(%s)" % (instance_class) - super(NamedTupleClass, self).__init__(name) - - def get_call_type(self, context, args, kws): - # Overriden by the __call__ constructor resolution in typing.collections - return None - - def get_call_signatures(self): - return (), True - - @property - def key(self): - return self.instance_class - - -class NumberClass(Callable, DTypeSpec, Opaque): - """ - Type class for number classes (e.g. "np.float64"). - """ - - def __init__(self, instance_type): - self.instance_type = instance_type - name = "class(%s)" % (instance_type,) - super(NumberClass, self).__init__(name) - - def get_call_type(self, context, args, kws): - # Overriden by the __call__ constructor resolution in typing.builtins - return None - - def get_call_signatures(self): - return (), True - - @property - def key(self): - return self.instance_type - - @property - def dtype(self): - return self.instance_type - - -class RecursiveCall(Opaque): - """ - Recursive call to a Dispatcher. - """ - _overloads = None - - def __init__(self, dispatcher_type): - assert isinstance(dispatcher_type, Dispatcher) - self.dispatcher_type = dispatcher_type - name = "recursive(%s)" % (dispatcher_type,) - super(RecursiveCall, self).__init__(name) - # Initializing for the first time - if self._overloads is None: - self._overloads = {} - - @property - def overloads(self): - return self._overloads - - @property - def key(self): - return self.dispatcher_type diff --git a/numba/numba/types/iterators.py b/numba/numba/types/iterators.py deleted file mode 100644 index 5aeb99f2a..000000000 --- a/numba/numba/types/iterators.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from .abstract import * -from .common import * - - -class RangeType(SimpleIterableType): - - def __init__(self, dtype): - self.dtype = dtype - name = "range_state_%s" % (dtype,) - super(SimpleIterableType, self).__init__(name) - self._iterator_type = RangeIteratorType(self.dtype) - - def unify(self, typingctx, other): - if isinstance(other, RangeType): - dtype = typingctx.unify_pairs(self.dtype, other.dtype) - if dtype is not None: - return RangeType(dtype) - - -class RangeIteratorType(SimpleIteratorType): - - def __init__(self, dtype): - name = "range_iter_%s" % (dtype,) - super(SimpleIteratorType, self).__init__(name) - self._yield_type = dtype - - def unify(self, typingctx, other): - if isinstance(other, RangeIteratorType): - dtype = typingctx.unify_pairs(self.yield_type, other.yield_type) - if dtype is not None: - return RangeIteratorType(dtype) - - -class Generator(SimpleIteratorType): - """ - Type class for Numba-compiled generator objects. - """ - - def __init__(self, gen_func, yield_type, arg_types, state_types, - has_finalizer): - self.gen_func = gen_func - self.arg_types = tuple(arg_types) - self.state_types = tuple(state_types) - self.has_finalizer = has_finalizer - name = "%s generator(func=%s, args=%s, has_finalizer=%s)" % ( - yield_type, self.gen_func, self.arg_types, - self.has_finalizer) - super(Generator, self).__init__(name, yield_type) - - @property - def key(self): - return self.gen_func, self.arg_types, self.yield_type, self.has_finalizer - - -class EnumerateType(SimpleIteratorType): - """ - Type class for `enumerate` objects. - Type instances are parametered with the underlying source type. - """ - - def __init__(self, iterable_type): - from . import Tuple, intp - self.source_type = iterable_type.iterator_type - yield_type = Tuple([intp, self.source_type.yield_type]) - name = 'enumerate(%s)' % (self.source_type) - super(EnumerateType, self).__init__(name, yield_type) - - @property - def key(self): - return self.source_type - - -class ZipType(SimpleIteratorType): - """ - Type class for `zip` objects. - Type instances are parametered with the underlying source types. - """ - - def __init__(self, iterable_types): - from . import Tuple - self.source_types = tuple(tp.iterator_type for tp in iterable_types) - yield_type = Tuple([tp.yield_type for tp in self.source_types]) - name = 'zip(%s)' % ', '.join(str(tp) for tp in self.source_types) - super(ZipType, self).__init__(name, yield_type) - - @property - def key(self): - return self.source_types - - -class ArrayIterator(SimpleIteratorType): - """ - Type class for iterators of array and buffer objects. - """ - - def __init__(self, array_type): - self.array_type = array_type - name = "iter(%s)" % (self.array_type,) - nd = array_type.ndim - if nd == 0 or nd == 1: - yield_type = array_type.dtype - else: - yield_type = array_type.copy(ndim=array_type.ndim - 1) - super(ArrayIterator, self).__init__(name, yield_type) diff --git a/numba/numba/types/misc.py b/numba/numba/types/misc.py deleted file mode 100644 index bba637067..000000000 --- a/numba/numba/types/misc.py +++ /dev/null @@ -1,420 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from .abstract import * -from .common import * -from ..typeconv import Conversion - - -class PyObject(Dummy): - """ - A generic CPython object. - """ - - def is_precise(self): - return False - - -class Phantom(Dummy): - """ - A type that cannot be materialized. A Phantom cannot be used as - argument or return type. - """ - - -class Undefined(Dummy): - """ - A type that is left imprecise. This is used as a temporaray placeholder - during type inference in the hope that the type can be later refined. - """ - - def is_precise(self): - return False - - -class RawPointer(Opaque): - """ - A raw pointer without any specific meaning. - """ - - -class Const(Dummy): - """ - A compile-time constant, for (internal) use when a type is needed for - lookup. - """ - - def __init__(self, value): - self.value = value - # We want to support constants of non-hashable values, therefore - # fall back on the value's id() if necessary. - try: - hash(value) - except TypeError: - self._key = id(value) - else: - self._key = value - super(Const, self).__init__("const(%r)" % (value,)) - - @property - def key(self): - return type(self.value), self._key - - -class Omitted(Opaque): - """ - An omitted function argument with a default value. - """ - - def __init__(self, value): - self.value = value - super(Omitted, self).__init__("omitted(default=%r)" % (value,)) - - @property - def key(self): - return type(self.value), id(self.value) - - -class VarArg(Type): - """ - Special type representing a variable number of arguments at the - end of a function's signature. Only used for signature matching, - not for actual values. - """ - - def __init__(self, dtype): - self.dtype = dtype - super(VarArg, self).__init__("*%s" % dtype) - - @property - def key(self): - return self.dtype - - -class Module(Dummy): - def __init__(self, pymod): - self.pymod = pymod - super(Module, self).__init__("Module(%s)" % pymod) - - @property - def key(self): - return self.pymod - - -class Macro(Type): - def __init__(self, template): - self.template = template - cls = type(self) - super(Macro, self).__init__("%s(%s)" % (cls.__name__, template)) - - @property - def key(self): - return self.template - - -class MemInfoPointer(Type): - """ - Pointer to a Numba "meminfo" (i.e. the information for a managed - piece of memory). - """ - mutable = True - - def __init__(self, dtype): - self.dtype = dtype - name = "memory-managed *%s" % dtype - super(MemInfoPointer, self).__init__(name) - - @property - def key(self): - return self.dtype - - -class CPointer(Type): - """ - Type class for pointers to other types. - """ - mutable = True - - def __init__(self, dtype): - self.dtype = dtype - name = "%s*" % dtype - super(CPointer, self).__init__(name) - - @property - def key(self): - return self.dtype - - -class EphemeralPointer(CPointer): - """ - Type class for pointers which aren't guaranteed to last long - e.g. - stack-allocated slots. The data model serializes such pointers - by copying the data pointed to. - """ - - -class EphemeralArray(Type): - """ - Similar to EphemeralPointer, but pointing to an array of elements, - rather than a single one. The array size must be known at compile-time. - """ - - def __init__(self, dtype, count): - self.dtype = dtype - self.count = count - name = "*%s[%d]" % (dtype, count) - super(EphemeralArray, self).__init__(name) - - @property - def key(self): - return self.dtype, self.count - - -class Object(Type): - # XXX unused? - mutable = True - - def __init__(self, clsobj): - self.cls = clsobj - name = "Object(%s)" % clsobj.__name__ - super(Object, self).__init__(name) - - @property - def key(self): - return self.cls - - -class Optional(Type): - """ - Type class for optional types, i.e. union { some type, None } - """ - - def __init__(self, typ): - assert not isinstance(typ, (Optional, NoneType)) - self.type = typ - name = "?%s" % typ - super(Optional, self).__init__(name) - - @property - def key(self): - return self.type - - def can_convert_to(self, typingctx, other): - if isinstance(other, Optional): - return typingctx.can_convert(self.type, other.type) - else: - conv = typingctx.can_convert(self.type, other) - if conv is not None: - return max(conv, Conversion.safe) - - def can_convert_from(self, typingctx, other): - if isinstance(other, NoneType): - return Conversion.promote - elif isinstance(other, Optional): - return typingctx.can_convert(other.type, self.type) - else: - conv = typingctx.can_convert(other, self.type) - if conv is not None: - return max(conv, Conversion.promote) - - def unify(self, typingctx, other): - if isinstance(other, Optional): - unified = typingctx.unify_pairs(self.type, other.type) - else: - unified = typingctx.unify_pairs(self.type, other) - - if unified is not None: - if isinstance(unified, Optional): - return unified - else: - return Optional(unified) - - -class NoneType(Opaque): - """ - The type for None. - """ - - def unify(self, typingctx, other): - """ - Turn anything to a Optional type; - """ - if isinstance(other, (Optional, NoneType)): - return other - return Optional(other) - - -class EllipsisType(Opaque): - """ - The type for the Ellipsis singleton. - """ - - -class ExceptionClass(Callable, Phantom): - """ - The type of exception classes (not instances). - """ - - def __init__(self, exc_class): - assert issubclass(exc_class, BaseException) - name = "%s" % (exc_class.__name__) - self.exc_class = exc_class - super(ExceptionClass, self).__init__(name) - - def get_call_type(self, context, args, kws): - return self.get_call_signatures()[0][0] - - def get_call_signatures(self): - from .. import typing - return_type = ExceptionInstance(self.exc_class) - return [typing.signature(return_type)], False - - @property - def key(self): - return self.exc_class - - -class ExceptionInstance(Phantom): - """ - The type of exception instances. *exc_class* should be the - exception class. - """ - - def __init__(self, exc_class): - assert issubclass(exc_class, BaseException) - name = "%s(...)" % (exc_class.__name__,) - self.exc_class = exc_class - super(ExceptionInstance, self).__init__(name) - - @property - def key(self): - return self.exc_class - - -class SliceType(Type): - - def __init__(self, name, members): - assert members in (2, 3) - self.members = members - self.has_step = members >= 3 - super(SliceType, self).__init__(name) - - @property - def key(self): - return self.members - - -class ClassInstanceType(Type): - """ - The type of a jitted class *instance*. It will be the return-type - of the constructor of the class. - """ - mutable = True - name_prefix = "instance" - - def __init__(self, class_type): - self.class_type = class_type - name = "{0}.{1}".format(self.name_prefix, self.class_type.name) - super(ClassInstanceType, self).__init__(name) - - def get_data_type(self): - return ClassDataType(self) - - def get_reference_type(self): - return self - - @property - def key(self): - return self.class_type.key - - @property - def classname(self): - return self.class_type.class_def.__name__ - - @property - def jitprops(self): - return self.class_type.jitprops - - @property - def jitmethods(self): - return self.class_type.jitmethods - - @property - def struct(self): - return self.class_type.struct - - @property - def methods(self): - return self.class_type.methods - - -class ClassType(Callable, Opaque): - """ - The type of the jitted class (not instance). When the type of a class - is called, its constructor is invoked. - """ - mutable = True - name_prefix = "jitclass" - instance_type_class = ClassInstanceType - - def __init__(self, class_def, ctor_template_cls, struct, jitmethods, - jitprops): - self.class_def = class_def - self.ctor_template = self._specialize_template(ctor_template_cls) - self.jitmethods = jitmethods - self.jitprops = jitprops - self.struct = struct - self.methods = dict((k, v.py_func) for k, v in self.jitmethods.items()) - fielddesc = ','.join("{0}:{1}".format(k, v) for k, v in struct.items()) - name = "{0}.{1}#{2:x}<{3}>".format(self.name_prefix, class_def.__name__, - id(class_def), fielddesc) - super(ClassType, self).__init__(name) - self.instance_type = self.instance_type_class(self) - - def get_call_type(self, context, args, kws): - return self.ctor_template(context).apply(args, kws) - - def get_call_signatures(self): - return (), True - - def _specialize_template(self, basecls): - return type(basecls.__name__, (basecls,), dict(key=self)) - - -class DeferredType(Type): - """ - Represents a type that will be defined later. It must be defined - before it is materialized (used in the compiler). Once defined, it - behaves exactly as the type it is defining. - """ - def __init__(self): - self._define = None - name = "{0}#{1}".format(type(self).__name__, id(self)) - super(DeferredType, self).__init__(name) - - def get(self): - if self._define is None: - raise RuntimeError("deferred type not defined") - return self._define - - def define(self, typ): - if self._define is not None: - raise TypeError("deferred type already defined") - if not isinstance(typ, Type): - raise TypeError("arg is not a Type; got: {0}".format(type(typ))) - self._define = typ - - def unify(self, typingctx, other): - return typingctx.unify_pairs(self.get(), other) - - -class ClassDataType(Type): - """ - Internal only. - Represents the data of the instance. The representation of - ClassInstanceType contains a pointer to a ClassDataType which represents - a C structure that contains all the data fields of the class instance. - """ - def __init__(self, classtyp): - self.class_type = classtyp - name = "data.{0}".format(self.class_type.name) - super(ClassDataType, self).__init__(name) diff --git a/numba/numba/types/npytypes.py b/numba/numba/types/npytypes.py deleted file mode 100644 index 60fd3f408..000000000 --- a/numba/numba/types/npytypes.py +++ /dev/null @@ -1,454 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections - -import numpy as np - -from .abstract import * -from .common import * -from ..typeconv import Conversion -from .. import utils - - -class CharSeq(Type): - """ - A fixed-length 8-bit character sequence. - """ - mutable = True - - def __init__(self, count): - self.count = count - name = "[char x %d]" % count - super(CharSeq, self).__init__(name) - - @property - def key(self): - return self.count - - -class UnicodeCharSeq(Type): - """ - A fixed-length unicode character sequence. - """ - mutable = True - - def __init__(self, count): - self.count = count - name = "[unichr x %d]" % count - super(UnicodeCharSeq, self).__init__(name) - - @property - def key(self): - return self.count - - -class Record(Type): - """ - A Numpy structured scalar. *descr* is the string representation - of the Numpy dtype; *fields* of mapping of field names to - (type, offset) tuples; *size* the bytesize of a record; - *aligned* whether the fields are aligned; *dtype* the Numpy dtype - instance. - """ - mutable = True - - def __init__(self, descr, fields, size, aligned, dtype): - self.descr = descr - self.fields = fields.copy() - self.size = size - self.aligned = aligned - self.dtype = dtype - name = 'Record(%s)' % descr - super(Record, self).__init__(name) - - @property - def key(self): - # Numpy dtype equality doesn't always succeed, use the descr instead - # (https://github.com/numpy/numpy/issues/5715) - return (self.descr, self.size, self.aligned) - - @property - def mangling_args(self): - return self.__class__.__name__, (self._code,) - - def __len__(self): - return len(self.fields) - - def offset(self, key): - return self.fields[key][1] - - def typeof(self, key): - return self.fields[key][0] - - @property - def members(self): - return [(f, t) for f, (t, _) in self.fields.items()] - - -class DType(DTypeSpec, Opaque): - """ - Type class for Numpy dtypes. - """ - - def __init__(self, dtype): - assert isinstance(dtype, Type) - self._dtype = dtype - name = "dtype(%s)" % (dtype,) - super(DTypeSpec, self).__init__(name) - - @property - def key(self): - return self.dtype - - @property - def dtype(self): - return self._dtype - - def __getitem__(self, arg): - res = super(DType, self).__getitem__(arg) - return res.copy(dtype=self.dtype) - - -class NumpyFlatType(SimpleIteratorType, MutableSequence): - """ - Type class for `ndarray.flat()` objects. - """ - - def __init__(self, arrty): - self.array_type = arrty - yield_type = arrty.dtype - self.dtype = yield_type - name = "array.flat({arrayty})".format(arrayty=arrty) - super(NumpyFlatType, self).__init__(name, yield_type) - - @property - def key(self): - return self.array_type - - -class NumpyNdEnumerateType(SimpleIteratorType): - """ - Type class for `np.ndenumerate()` objects. - """ - - def __init__(self, arrty): - from . import Tuple, UniTuple, intp - self.array_type = arrty - yield_type = Tuple((UniTuple(intp, arrty.ndim), arrty.dtype)) - name = "ndenumerate({arrayty})".format(arrayty=arrty) - super(NumpyNdEnumerateType, self).__init__(name, yield_type) - - @property - def key(self): - return self.array_type - - -class NumpyNdIterType(IteratorType): - """ - Type class for `np.nditer()` objects. - - The layout denotes in which order the logical shape is iterated on. - "C" means logical order (corresponding to in-memory order in C arrays), - "F" means reverse logical order (corresponding to in-memory order in - F arrays). - """ - - def __init__(self, arrays): - # Note inputs arrays can also be scalars, in which case they are - # broadcast. - self.arrays = tuple(arrays) - self.layout = self._compute_layout(self.arrays) - self.dtypes = tuple(getattr(a, 'dtype', a) for a in self.arrays) - self.ndim = max(getattr(a, 'ndim', 0) for a in self.arrays) - name = "nditer(ndim={ndim}, layout={layout}, inputs={arrays})".format( - ndim=self.ndim, layout=self.layout, arrays=self.arrays) - super(NumpyNdIterType, self).__init__(name) - - @classmethod - def _compute_layout(cls, arrays): - c = collections.Counter() - for a in arrays: - if not isinstance(a, Array): - continue - if a.layout in 'CF' and a.ndim == 1: - c['C'] += 1 - c['F'] += 1 - elif a.ndim >= 1: - c[a.layout] += 1 - return 'F' if c['F'] > c['C'] else 'C' - - @property - def key(self): - return self.arrays - - @property - def views(self): - """ - The views yielded by the iterator. - """ - return [Array(dtype, 0, 'C') for dtype in self.dtypes] - - @property - def yield_type(self): - from . import BaseTuple - views = self.views - if len(views) > 1: - return BaseTuple.from_types(views) - else: - return views[0] - - @utils.cached_property - def indexers(self): - """ - A list of (kind, start_dim, end_dim, indices) where: - - `kind` is either "flat", "indexed", "0d" or "scalar" - - `start_dim` and `end_dim` are the dimension numbers at which - this indexing takes place - - `indices` is the indices of the indexed arrays in self.arrays - """ - d = collections.OrderedDict() - layout = self.layout - ndim = self.ndim - assert layout in 'CF' - for i, a in enumerate(self.arrays): - if not isinstance(a, Array): - indexer = ('scalar', 0, 0) - elif a.ndim == 0: - indexer = ('0d', 0, 0) - else: - if a.layout == layout or (a.ndim == 1 and a.layout in 'CF'): - kind = 'flat' - else: - kind = 'indexed' - if layout == 'C': - # If iterating in C order, broadcasting is done on the outer indices - indexer = (kind, ndim - a.ndim, ndim) - else: - indexer = (kind, 0, a.ndim) - d.setdefault(indexer, []).append(i) - return list(k + (v,) for k, v in d.items()) - - @utils.cached_property - def need_shaped_indexing(self): - """ - Whether iterating on this iterator requires keeping track of - individual indices inside the shape. If False, only a single index - over the equivalent flat shape is required, which can make the - iterator more efficient. - """ - for kind, start_dim, end_dim, _ in self.indexers: - if kind in ('0d', 'scalar'): - pass - elif kind == 'flat': - if (start_dim, end_dim) != (0, self.ndim): - # Broadcast flat iteration needs shaped indexing - # to know when to restart iteration. - return True - else: - return True - return False - - -class NumpyNdIndexType(SimpleIteratorType): - """ - Type class for `np.ndindex()` objects. - """ - - def __init__(self, ndim): - from . import UniTuple, intp - self.ndim = ndim - yield_type = UniTuple(intp, self.ndim) - name = "ndindex(ndim={ndim})".format(ndim=ndim) - super(NumpyNdIndexType, self).__init__(name, yield_type) - - @property - def key(self): - return self.ndim - - -class Array(Buffer): - """ - Type class for Numpy arrays. - """ - - def __init__(self, dtype, ndim, layout, readonly=False, name=None, - aligned=True): - if readonly: - self.mutable = False - if (not aligned or - (isinstance(dtype, Record) and not dtype.aligned)): - self.aligned = False - if name is None: - type_name = "array" - if not self.mutable: - type_name = "readonly " + type_name - if not self.aligned: - type_name = "unaligned " + type_name - name = "%s(%s, %sd, %s)" % (type_name, dtype, ndim, layout) - super(Array, self).__init__(dtype, ndim, layout, name=name) - - @property - def mangling_args(self): - args = [self.dtype, self.ndim, self.layout, - 'mutable' if self.mutable else 'readonly', - 'aligned' if self.aligned else 'unaligned'] - return self.__class__.__name__, args - - def copy(self, dtype=None, ndim=None, layout=None, readonly=None): - if dtype is None: - dtype = self.dtype - if ndim is None: - ndim = self.ndim - if layout is None: - layout = self.layout - if readonly is None: - readonly = not self.mutable - return Array(dtype=dtype, ndim=ndim, layout=layout, readonly=readonly, - aligned=self.aligned) - - @property - def key(self): - return self.dtype, self.ndim, self.layout, self.mutable, self.aligned - - def unify(self, typingctx, other): - """ - Unify this with the *other* Array. - """ - # If other is array and the ndim matches - if isinstance(other, Array) and other.ndim == self.ndim: - # If dtype matches or other.dtype is undefined (inferred) - if other.dtype == self.dtype or not other.dtype.is_precise(): - if self.layout == other.layout: - layout = self.layout - else: - layout = 'A' - readonly = not (self.mutable and other.mutable) - aligned = self.aligned and other.aligned - return Array(dtype=self.dtype, ndim=self.ndim, layout=layout, - readonly=readonly, aligned=aligned) - - def can_convert_to(self, typingctx, other): - """ - Convert this Array to the *other*. - """ - if (isinstance(other, Array) and other.ndim == self.ndim - and other.dtype == self.dtype): - if (other.layout in ('A', self.layout) - and (self.mutable or not other.mutable) - and (self.aligned or not other.aligned)): - return Conversion.safe - - def is_precise(self): - return self.dtype.is_precise() - - -class SmartArrayType(Array): - - def __init__(self, dtype, ndim, layout, pyclass): - self.pyclass = pyclass - super(SmartArrayType, self).__init__(dtype, ndim, layout, name='numba_array') - - @property - def as_array(self): - return Array(self.dtype, self.ndim, self.layout) - - def copy(self, dtype=None, ndim=None, layout=None): - if dtype is None: - dtype = self.dtype - if ndim is None: - ndim = self.ndim - if layout is None: - layout = self.layout - return type(self)(dtype, ndim, layout, self.pyclass) - - -class ArrayCTypes(Type): - """ - This is the type for `np.ndarray.ctypes`. - """ - def __init__(self, arytype): - # This depends on the ndim for the shape and strides attributes, - # even though they are not implemented, yet. - self.dtype = arytype.dtype - self.ndim = arytype.ndim - name = "ArrayCTypes(dtype={0}, ndim={1})".format(self.dtype, self.ndim) - super(ArrayCTypes, self).__init__(name) - - @property - def key(self): - return self.dtype, self.ndim - - def can_convert_to(self, typingctx, other): - """ - Convert this type to the corresponding pointer type. - This allows passing a array.ctypes object to a C function taking - a raw pointer. - - Note that in pure Python, the array.ctypes object can only be - passed to a ctypes function accepting a c_void_p, not a typed - pointer. - """ - from . import CPointer, voidptr - # XXX what about readonly - if isinstance(other, CPointer) and other.dtype == self.dtype: - return Conversion.safe - elif other == voidptr: - return Conversion.safe - - -class ArrayFlags(Type): - """ - This is the type for `np.ndarray.flags`. - """ - def __init__(self, arytype): - self.array_type = arytype - name = "ArrayFlags({0})".format(self.array_type) - super(ArrayFlags, self).__init__(name) - - @property - def key(self): - return self.array_type - - -class NestedArray(Array): - """ - A NestedArray is an array nested within a structured type (which are "void" - type in NumPy parlance). Unlike an Array, the shape, and not just the number - of dimenions is part of the type of a NestedArray. - """ - - def __init__(self, dtype, shape): - assert dtype.bitwidth % 8 == 0, \ - "Dtype bitwidth must be a multiple of bytes" - self._shape = shape - name = "nestedarray(%s, %s)" % (dtype, shape) - ndim = len(shape) - super(NestedArray, self).__init__(dtype, ndim, 'C', name=name) - - @property - def shape(self): - return self._shape - - @property - def nitems(self): - l = 1 - for s in self.shape: - l = l * s - return l - - @property - def size(self): - return self.dtype.bitwidth // 8 - - @property - def strides(self): - stride = self.size - strides = [] - for i in reversed(self._shape): - strides.append(stride) - stride *= i - return tuple(reversed(strides)) - - @property - def key(self): - return self.dtype, self.shape diff --git a/numba/numba/types/scalars.py b/numba/numba/types/scalars.py deleted file mode 100644 index 0070fbb57..000000000 --- a/numba/numba/types/scalars.py +++ /dev/null @@ -1,218 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import enum - -import numpy as np - -from .abstract import * -from .. import npdatetime, utils -from ..typeconv import Conversion - - -class Boolean(Hashable): - - def cast_python_value(self, value): - return bool(value) - - -@utils.total_ordering -class Integer(Number): - def __init__(self, *args, **kws): - super(Integer, self).__init__(*args, **kws) - # Determine bitwidth - for prefix in ('int', 'uint'): - if self.name.startswith(prefix): - bitwidth = int(self.name[len(prefix):]) - self.bitwidth = bitwidth - self.signed = self.name.startswith('int') - - @classmethod - def from_bitwidth(cls, bitwidth, signed=True): - name = ('int%d' if signed else 'uint%d') % bitwidth - return cls(name) - - def cast_python_value(self, value): - return getattr(np, self.name)(value) - - def __lt__(self, other): - if self.__class__ is not other.__class__: - return NotImplemented - if self.signed != other.signed: - return NotImplemented - return self.bitwidth < other.bitwidth - - @property - def maxval(self): - """ - The maximum value representable by this type. - """ - if self.signed: - return (1 << (self.bitwidth - 1)) - 1 - else: - return (1 << self.bitwidth) - 1 - - @property - def minval(self): - """ - The minimal value representable by this type. - """ - if self.signed: - return -(1 << (self.bitwidth - 1)) - else: - return 0 - - -@utils.total_ordering -class Float(Number): - def __init__(self, *args, **kws): - super(Float, self).__init__(*args, **kws) - # Determine bitwidth - assert self.name.startswith('float') - bitwidth = int(self.name[5:]) - self.bitwidth = bitwidth - - def cast_python_value(self, value): - return getattr(np, self.name)(value) - - def __lt__(self, other): - if self.__class__ is not other.__class__: - return NotImplemented - return self.bitwidth < other.bitwidth - - -@utils.total_ordering -class Complex(Number): - def __init__(self, name, underlying_float, **kwargs): - super(Complex, self).__init__(name, **kwargs) - self.underlying_float = underlying_float - # Determine bitwidth - assert self.name.startswith('complex') - bitwidth = int(self.name[7:]) - self.bitwidth = bitwidth - - def cast_python_value(self, value): - return getattr(np, self.name)(value) - - def __lt__(self, other): - if self.__class__ is not other.__class__: - return NotImplemented - return self.bitwidth < other.bitwidth - - -class _NPDatetimeBase(Type): - """ - Common base class for np.datetime64 and np.timedelta64. - """ - - def __init__(self, unit, *args, **kws): - name = '%s(%s)' % (self.type_name, unit) - self.unit = unit - self.unit_code = npdatetime.DATETIME_UNITS[self.unit] - super(_NPDatetimeBase, self).__init__(name, *args, **kws) - - def __lt__(self, other): - if self.__class__ is not other.__class__: - return NotImplemented - # A coarser-grained unit is "smaller", i.e. less precise values - # can be represented (but the magnitude of representable values is - # also greater...). - return self.unit_code < other.unit_code - - def cast_python_value(self, value): - cls = getattr(np, self.type_name) - if self.unit: - return cls(value, self.unit) - else: - return cls(value) - - -@utils.total_ordering -class NPTimedelta(_NPDatetimeBase): - type_name = 'timedelta64' - -@utils.total_ordering -class NPDatetime(_NPDatetimeBase): - type_name = 'datetime64' - - -class EnumClass(Dummy): - """ - Type class for Enum classes. - """ - basename = "Enum class" - - def __init__(self, cls, dtype): - assert isinstance(cls, type) - assert isinstance(dtype, Type) - self.instance_class = cls - self.dtype = dtype - name = "%s<%s>(%s)" % (self.basename, self.dtype, self.instance_class.__name__) - super(EnumClass, self).__init__(name) - - @property - def key(self): - return self.instance_class, self.dtype - - @utils.cached_property - def member_type(self): - """ - The type of this class' members. - """ - return EnumMember(self.instance_class, self.dtype) - - -class IntEnumClass(EnumClass): - """ - Type class for IntEnum classes. - """ - basename = "IntEnum class" - - @utils.cached_property - def member_type(self): - """ - The type of this class' members. - """ - return IntEnumMember(self.instance_class, self.dtype) - - -class EnumMember(Type): - """ - Type class for Enum members. - """ - basename = "Enum" - class_type_class = EnumClass - - def __init__(self, cls, dtype): - assert isinstance(cls, type) - assert isinstance(dtype, Type) - self.instance_class = cls - self.dtype = dtype - name = "%s<%s>(%s)" % (self.basename, self.dtype, self.instance_class.__name__) - super(EnumMember, self).__init__(name) - - @property - def key(self): - return self.instance_class, self.dtype - - @property - def class_type(self): - """ - The type of this member's class. - """ - return self.class_type_class(self.instance_class, self.dtype) - - -class IntEnumMember(EnumMember): - """ - Type class for IntEnum members. - """ - basename = "IntEnum" - class_type_class = IntEnumClass - - def can_convert_to(self, typingctx, other): - """ - Convert IntEnum members to plain integers. - """ - if issubclass(self.instance_class, enum.IntEnum): - conv = typingctx.can_convert(self.dtype, other) - return max(conv, Conversion.safe) diff --git a/numba/numba/typing/__init__.py b/numba/numba/typing/__init__.py deleted file mode 100644 index dc910c12e..000000000 --- a/numba/numba/typing/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import absolute_import -from .context import BaseContext, Context -from .templates import (signature, make_concrete_template, Signature, - fold_arguments) diff --git a/numba/numba/typing/arraydecl.py b/numba/numba/typing/arraydecl.py deleted file mode 100644 index ddf605624..000000000 --- a/numba/numba/typing/arraydecl.py +++ /dev/null @@ -1,708 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import numpy as np - -from collections import namedtuple - -from numba import types, utils -from numba.typing.templates import (AttributeTemplate, AbstractTemplate, - infer, infer_getattr, signature, - bound_function) -# import time side effect: array operations requires typing support of sequence -# defined in collections: e.g. array.shape[i] -from numba.typing import collections -from numba.errors import TypingError - -Indexing = namedtuple("Indexing", ("index", "result", "advanced")) - - -def get_array_index_type(ary, idx): - """ - Returns None or a tuple-3 for the types of the input array, index, and - resulting type of ``array[index]``. - - Note: This is shared logic for ndarray getitem and setitem. - """ - if not isinstance(ary, types.Buffer): - return - - ndim = ary.ndim - - left_indices = [] - right_indices = [] - ellipsis_met = False - advanced = False - has_integer = False - - if not isinstance(idx, types.BaseTuple): - idx = [idx] - - # Walk indices - for ty in idx: - if ty is types.ellipsis: - if ellipsis_met: - raise TypeError("only one ellipsis allowed in array index " - "(got %s)" % (idx,)) - ellipsis_met = True - elif isinstance(ty, types.SliceType): - pass - elif isinstance(ty, types.Integer): - # Normalize integer index - ty = types.intp if ty.signed else types.uintp - # Integer indexing removes the given dimension - ndim -= 1 - has_integer = True - elif (isinstance(ty, types.Array) and ty.ndim == 0 - and isinstance(ty.dtype, types.Integer)): - # 0-d array used as integer index - ndim -= 1 - has_integer = True - elif (isinstance(ty, types.Array) - and ty.ndim == 1 - and isinstance(ty.dtype, (types.Integer, types.Boolean))): - if advanced or has_integer: - # We don't support the complicated combination of - # advanced indices (and integers are considered part - # of them by Numpy). - raise NotImplementedError("only one advanced index supported") - advanced = True - else: - raise TypeError("unsupported array index type %s in %s" - % (ty, idx)) - (right_indices if ellipsis_met else left_indices).append(ty) - - # Only Numpy arrays support advanced indexing - if advanced and not isinstance(ary, types.Array): - return - - # Check indices and result dimensionality - all_indices = left_indices + right_indices - if ellipsis_met: - assert right_indices[0] is types.ellipsis - del right_indices[0] - - n_indices = len(all_indices) - ellipsis_met - if n_indices > ary.ndim: - raise TypeError("cannot index %s with %d indices: %s" - % (ary, n_indices, idx)) - if n_indices == ary.ndim and ndim == 0 and not ellipsis_met: - # Full integer indexing => scalar result - # (note if ellipsis is present, a 0-d view is returned instead) - res = ary.dtype - - elif advanced: - # Result is a copy - res = ary.copy(ndim=ndim, layout='C', readonly=False) - - else: - # Result is a view - if ary.slice_is_copy: - # Avoid view semantics when the original type creates a copy - # when slicing. - return - - # Infer layout - layout = ary.layout - - def keeps_contiguity(ty, is_innermost): - # A slice can only keep an array contiguous if it is the - # innermost index and it is not strided - return (ty is types.ellipsis or isinstance(ty, types.Integer) - or (is_innermost and isinstance(ty, types.SliceType) - and not ty.has_step)) - - def check_contiguity(outer_indices): - """ - Whether indexing with the given indices (from outer to inner in - physical layout order) can keep an array contiguous. - """ - for ty in outer_indices[:-1]: - if not keeps_contiguity(ty, False): - return False - if outer_indices and not keeps_contiguity(outer_indices[-1], True): - return False - return True - - if layout == 'C': - # Integer indexing on the left keeps the array C-contiguous - if n_indices == ary.ndim: - # If all indices are there, ellipsis's place is indifferent - left_indices = left_indices + right_indices - right_indices = [] - if right_indices: - layout = 'A' - elif not check_contiguity(left_indices): - layout = 'A' - elif layout == 'F': - # Integer indexing on the right keeps the array F-contiguous - if n_indices == ary.ndim: - # If all indices are there, ellipsis's place is indifferent - right_indices = left_indices + right_indices - left_indices = [] - if left_indices: - layout = 'A' - elif not check_contiguity(right_indices[::-1]): - layout = 'A' - - res = ary.copy(ndim=ndim, layout=layout) - - # Re-wrap indices - if isinstance(idx, types.BaseTuple): - idx = types.BaseTuple.from_types(all_indices) - else: - idx, = all_indices - - return Indexing(idx, res, advanced) - - -@infer -class GetItemBuffer(AbstractTemplate): - key = "getitem" - - def generic(self, args, kws): - assert not kws - [ary, idx] = args - out = get_array_index_type(ary, idx) - if out is not None: - return signature(out.result, ary, out.index) - -@infer -class SetItemBuffer(AbstractTemplate): - key = "setitem" - - def generic(self, args, kws): - assert not kws - ary, idx, val = args - if not isinstance(ary, types.Buffer): - return - if not ary.mutable: - raise TypeError("Cannot modify value of type %s" %(ary,)) - out = get_array_index_type(ary, idx) - if out is None: - return - - idx = out.index - res = out.result - if isinstance(res, types.Array): - # Indexing produces an array - if isinstance(val, types.Array): - if not self.context.can_convert(val.dtype, res.dtype): - # DType conversion not possible - return - else: - res = val - elif isinstance(val, types.Sequence): - if (res.ndim == 1 and - self.context.can_convert(val.dtype, res.dtype)): - # Allow assignement of sequence to 1d array - res = val - else: - # NOTE: sequence-to-array broadcasting is unsupported - return - else: - # Allow scalar broadcasting - if self.context.can_convert(val, res.dtype): - res = res.dtype - else: - # Incompatible scalar type - return - elif not isinstance(val, types.Array): - # Single item assignment - if not self.context.can_convert(val, res): - # if the array dtype is not yet defined - if not res.is_precise(): - # set the array type to use the dtype of value (RHS) - newary = ary.copy(dtype=val) - return signature(types.none, newary, idx, res) - else: - return - res = val - else: - return - return signature(types.none, ary, idx, res) - - -def normalize_shape(shape): - if isinstance(shape, types.UniTuple): - if isinstance(shape.dtype, types.Integer): - dimtype = types.intp if shape.dtype.signed else types.uintp - return types.UniTuple(dimtype, len(shape)) - - elif isinstance(shape, types.Tuple) and shape.count == 0: - # Force (0 x intp) for consistency with other shapes - return types.UniTuple(types.intp, 0) - - -@infer_getattr -class ArrayAttribute(AttributeTemplate): - key = types.Array - - def resolve_dtype(self, ary): - return types.DType(ary.dtype) - - def resolve_itemsize(self, ary): - return types.intp - - def resolve_shape(self, ary): - return types.UniTuple(types.intp, ary.ndim) - - def resolve_strides(self, ary): - return types.UniTuple(types.intp, ary.ndim) - - def resolve_ndim(self, ary): - return types.intp - - def resolve_size(self, ary): - return types.intp - - def resolve_flat(self, ary): - return types.NumpyFlatType(ary) - - def resolve_ctypes(self, ary): - return types.ArrayCTypes(ary) - - def resolve_flags(self, ary): - return types.ArrayFlags(ary) - - def resolve_T(self, ary): - if ary.ndim <= 1: - retty = ary - else: - layout = {"C": "F", "F": "C"}.get(ary.layout, "A") - retty = ary.copy(layout=layout) - return retty - - def resolve_real(self, ary): - return self._resolve_real_imag(ary, attr='real') - - def resolve_imag(self, ary): - return self._resolve_real_imag(ary, attr='imag') - - def _resolve_real_imag(self, ary, attr): - if ary.dtype in types.complex_domain: - return ary.copy(dtype=ary.dtype.underlying_float, layout='A') - elif ary.dtype in types.number_domain: - res = ary.copy(dtype=ary.dtype) - if attr == 'imag': - res = res.copy(readonly=True) - return res - else: - msg = "cannot access .{} of array of {}" - raise TypingError(msg.format(attr, ary.dtype)) - - @bound_function("array.transpose") - def resolve_transpose(self, ary, args, kws): - def sentry_shape_scalar(ty): - if ty in types.number_domain: - # Guard against non integer type - if not isinstance(ty, types.Integer): - raise TypeError("transpose() arg cannot be {0}".format(ty)) - return True - else: - return False - - assert not kws - if len(args) == 0: - return signature(self.resolve_T(ary)) - - if len(args) == 1: - shape, = args - - if sentry_shape_scalar(shape): - assert ary.ndim == 1 - return signature(ary, *args) - - shape = normalize_shape(shape) - if shape is None: - return - - assert ary.ndim == shape.count - return signature(self.resolve_T(ary), shape) - - else: - if any(not sentry_shape_scalar(a) for a in args): - raise TypeError("transpose({0}) is not supported".format( - ', '.join(args))) - assert ary.ndim == len(args) - return signature(self.resolve_T(ary), *args) - - @bound_function("array.copy") - def resolve_copy(self, ary, args, kws): - assert not args - assert not kws - retty = ary.copy(layout="C", readonly=False) - return signature(retty) - - @bound_function("array.item") - def resolve_item(self, ary, args, kws): - assert not kws - # We don't support explicit arguments as that's exactly equivalent - # to regular indexing. The no-argument form is interesting to - # allow some degree of genericity when writing functions. - if not args: - return signature(ary.dtype) - - @bound_function("array.itemset") - def resolve_itemset(self, ary, args, kws): - assert not kws - # We don't support explicit arguments as that's exactly equivalent - # to regular indexing. The no-argument form is interesting to - # allow some degree of genericity when writing functions. - if len(args) == 1: - return signature(types.none, ary.dtype) - - @bound_function("array.nonzero") - def resolve_nonzero(self, ary, args, kws): - assert not args - assert not kws - # 0-dim arrays return one result array - ndim = max(ary.ndim, 1) - retty = types.UniTuple(types.Array(types.intp, 1, 'C'), ndim) - return signature(retty) - - @bound_function("array.reshape") - def resolve_reshape(self, ary, args, kws): - def sentry_shape_scalar(ty): - if ty in types.number_domain: - # Guard against non integer type - if not isinstance(ty, types.Integer): - raise TypeError("reshape() arg cannot be {0}".format(ty)) - return True - else: - return False - - assert not kws - if ary.layout not in 'CF': - # only work for contiguous array - raise TypeError("reshape() supports contiguous array only") - - if len(args) == 1: - # single arg - shape, = args - - if sentry_shape_scalar(shape): - ndim = 1 - else: - shape = normalize_shape(shape) - if shape is None: - return - ndim = shape.count - retty = ary.copy(ndim=ndim) - return signature(retty, shape) - - elif len(args) == 0: - # no arg - raise TypeError("reshape() take at least one arg") - - else: - # vararg case - if any(not sentry_shape_scalar(a) for a in args): - raise TypeError("reshape({0}) is not supported".format( - ', '.join(args))) - - retty = ary.copy(ndim=len(args)) - return signature(retty, *args) - - @bound_function("array.sort") - def resolve_sort(self, ary, args, kws): - assert not args - assert not kws - if ary.ndim == 1: - return signature(types.none) - - @bound_function("array.argsort") - def resolve_argsort(self, ary, args, kws): - assert not args - kwargs = dict(kws) - kind = kwargs.pop('kind', types.Const('quicksort')) - if kwargs: - msg = "Unsupported keywords: {!r}" - raise TypingError(msg.format([k for k in kwargs.keys()])) - if ary.ndim == 1: - def argsort_stub(kind='quicksort'): - pass - pysig = utils.pysignature(argsort_stub) - sig = signature(types.Array(types.intp, 1, 'C'), kind).replace(pysig=pysig) - return sig - - @bound_function("array.view") - def resolve_view(self, ary, args, kws): - from .npydecl import _parse_dtype - assert not kws - dtype, = args - dtype = _parse_dtype(dtype) - if dtype is None: - return - retty = ary.copy(dtype=dtype) - return signature(retty, *args) - - @bound_function("array.astype") - def resolve_astype(self, ary, args, kws): - from .npydecl import _parse_dtype - assert not kws - dtype, = args - dtype = _parse_dtype(dtype) - if dtype is None: - return - if not self.context.can_convert(ary.dtype, dtype): - raise TypeError("astype(%s) not supported on %s: " - "cannot convert from %s to %s" - % (dtype, ary, ary.dtype, dtype)) - layout = ary.layout if ary.layout in 'CF' else 'C' - retty = ary.copy(dtype=dtype, layout=layout) - return signature(retty, *args) - - @bound_function("array.ravel") - def resolve_ravel(self, ary, args, kws): - # Only support no argument version (default order='C') - assert not kws - assert not args - return signature(ary.copy(ndim=1, layout='C')) - - @bound_function("array.flatten") - def resolve_flatten(self, ary, args, kws): - # Only support no argument version (default order='C') - assert not kws - assert not args - return signature(ary.copy(ndim=1, layout='C')) - - @bound_function("array.take") - def resolve_take(self, ary, args, kws): - assert not kws - argty, = args - if isinstance(argty, types.Integer): - sig = signature(ary.dtype, *args) - elif isinstance(argty, types.Array): - sig = signature(argty.copy(layout='C', dtype=ary.dtype), *args) - elif isinstance(argty, types.List): # 1d lists only - sig = signature(types.Array(ary.dtype, 1, 'C'), *args) - elif isinstance(argty, types.BaseTuple): - sig = signature(types.Array(ary.dtype, np.ndim(argty), 'C'), *args) - else: - raise TypeError("take(%s) not supported for %s" % argty) - return sig - - def generic_resolve(self, ary, attr): - # Resolution of other attributes, for record arrays - if isinstance(ary.dtype, types.Record): - if attr in ary.dtype.fields: - return ary.copy(dtype=ary.dtype.typeof(attr), layout='A') - - -@infer_getattr -class DTypeAttr(AttributeTemplate): - key = types.DType - - def resolve_type(self, ary): - # Wrap the numeric type in NumberClass - return types.NumberClass(ary.dtype) - - def resolve_kind(self, ary): - if isinstance(ary.key, types.scalars.Float): - val = 'f' - elif isinstance(ary.key, types.scalars.Integer): - val = 'i' - else: - return None # other types not supported yet - return types.Const(val) - -@infer -class StaticGetItemArray(AbstractTemplate): - key = "static_getitem" - - def generic(self, args, kws): - # Resolution of members for record and structured arrays - ary, idx = args - if (isinstance(ary, types.Array) and isinstance(idx, str) and - isinstance(ary.dtype, types.Record)): - if idx in ary.dtype.fields: - return ary.copy(dtype=ary.dtype.typeof(idx), layout='A') - - -@infer_getattr -class RecordAttribute(AttributeTemplate): - key = types.Record - - def generic_resolve(self, record, attr): - ret = record.typeof(attr) - assert ret - return ret - -@infer -class StaticGetItemRecord(AbstractTemplate): - key = "static_getitem" - - def generic(self, args, kws): - # Resolution of members for records - record, idx = args - if isinstance(record, types.Record) and isinstance(idx, str): - ret = record.typeof(idx) - assert ret - return ret - -@infer -class StaticSetItemRecord(AbstractTemplate): - key = "static_setitem" - - def generic(self, args, kws): - # Resolution of members for record and structured arrays - record, idx, value = args - if isinstance(record, types.Record) and isinstance(idx, str): - expectedty = record.typeof(idx) - if self.context.can_convert(value, expectedty) is not None: - return signature(types.void, record, types.Const(idx), value) - - -@infer_getattr -class ArrayCTypesAttribute(AttributeTemplate): - key = types.ArrayCTypes - - def resolve_data(self, ctinfo): - return types.uintp - - -@infer_getattr -class ArrayFlagsAttribute(AttributeTemplate): - key = types.ArrayFlags - - def resolve_contiguous(self, ctflags): - return types.boolean - - def resolve_c_contiguous(self, ctflags): - return types.boolean - - def resolve_f_contiguous(self, ctflags): - return types.boolean - - -@infer_getattr -class NestedArrayAttribute(ArrayAttribute): - key = types.NestedArray - - -def _expand_integer(ty): - """ - If *ty* is an integer, expand it to a machine int (like Numpy). - """ - if isinstance(ty, types.Integer): - if ty.signed: - return max(types.intp, ty) - else: - return max(types.uintp, ty) - elif isinstance(ty, types.Boolean): - return types.intp - else: - return ty - -def generic_homog(self, args, kws): - assert not args - assert not kws - return signature(self.this.dtype, recvr=self.this) - -def generic_expand(self, args, kws): - assert not args - assert not kws - return signature(_expand_integer(self.this.dtype), recvr=self.this) - -def sum_expand(self, args, kws): - """ - sum can be called with or without an axis parameter. - """ - pysig = None - if kws: - def sum_stub(axis): - pass - pysig = utils.pysignature(sum_stub) - # rewrite args - args = list(args) + [kws['axis']] - kws = None - args_len = len(args) - assert args_len <= 1 - if args_len == 0: - # No axis parameter so the return type of the summation is a scalar - # of the type of the array. - out = signature(_expand_integer(self.this.dtype), *args, - recvr=self.this) - else: - # There is an axis paramter so the return type of this summation is - # an array of dimension one less than the input array. - return_type = types.Array(dtype=_expand_integer(self.this.dtype), - ndim=self.this.ndim-1, layout='C') - out = signature(return_type, *args, recvr=self.this) - return out.replace(pysig=pysig) - -def generic_expand_cumulative(self, args, kws): - assert not args - assert not kws - assert isinstance(self.this, types.Array) - return_type = types.Array(dtype=_expand_integer(self.this.dtype), - ndim=1, layout='C') - return signature(return_type, recvr=self.this) - -def generic_hetero_real(self, args, kws): - assert not args - assert not kws - if isinstance(self.this.dtype, (types.Integer, types.Boolean)): - return signature(types.float64, recvr=self.this) - return signature(self.this.dtype, recvr=self.this) - -def generic_hetero_always_real(self, args, kws): - assert not args - assert not kws - if isinstance(self.this.dtype, (types.Integer, types.Boolean)): - return signature(types.float64, recvr=self.this) - if isinstance(self.this.dtype, types.Complex): - return signature(self.this.dtype.underlying_float, recvr=self.this) - return signature(self.this.dtype, recvr=self.this) - -def generic_index(self, args, kws): - assert not args - assert not kws - return signature(types.intp, recvr=self.this) - -def install_array_method(name, generic, support_literals=False): - my_attr = {"key": "array." + name, "generic": generic} - temp_class = type("Array_" + name, (AbstractTemplate,), my_attr) - if support_literals: - temp_class.support_literals = support_literals - def array_attribute_attachment(self, ary): - return types.BoundFunction(temp_class, ary) - - setattr(ArrayAttribute, "resolve_" + name, array_attribute_attachment) - -# Functions that return the same type as the array -for fname in ["min", "max"]: - install_array_method(fname, generic_homog) - -# Functions that return a machine-width type, to avoid overflows -install_array_method("prod", generic_expand) -install_array_method("sum", sum_expand, support_literals=True) - -# Functions that return a machine-width type, to avoid overflows -for fname in ["cumsum", "cumprod"]: - install_array_method(fname, generic_expand_cumulative) - -# Functions that require integer arrays get promoted to float64 return -for fName in ["mean"]: - install_array_method(fName, generic_hetero_real) - -# var and std by definition return in real space and int arrays -# get promoted to float64 return -for fName in ["var", "std"]: - install_array_method(fName, generic_hetero_always_real) - - -# Functions that return an index (intp) -install_array_method("argmin", generic_index) -install_array_method("argmax", generic_index) - - -@infer -class CmpOpEqArray(AbstractTemplate): - key = '==' - - def generic(self, args, kws): - assert not kws - [va, vb] = args - if isinstance(va, types.Array) and va == vb: - return signature(va.copy(dtype=types.boolean), va, vb) diff --git a/numba/numba/typing/bufproto.py b/numba/numba/typing/bufproto.py deleted file mode 100644 index 09ddbb50e..000000000 --- a/numba/numba/typing/bufproto.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Typing support for the buffer protocol (PEP 3118). -""" - -import array -import sys - -from numba import types - - -_pep3118_int_types = set('bBhHiIlLqQnN') - -_pep3118_scalar_map = { - 'f': types.float32, - 'd': types.float64, - 'Zf': types.complex64, - 'Zd': types.complex128, - } - -_type_map = { - bytearray: types.ByteArray, - array.array: types.PyArray, - } - -_type_map[memoryview] = types.MemoryView -if sys.version_info >= (3,): - _type_map[bytes] = types.Bytes - - -def decode_pep3118_format(fmt, itemsize): - """ - Return the Numba type for an item with format string *fmt* and size - *itemsize* (in bytes). - """ - # XXX reuse _dtype_from_pep3118() from np.core._internal? - if fmt in _pep3118_int_types: - # Determine int width and signedness - name = 'int%d' % (itemsize * 8,) - if fmt.isupper(): - name = 'u' + name - return types.Integer(name) - try: - # For the hard-coded types above, consider "=" the same as "@" - # (the default). This is because Numpy sometimes adds "=" - # in front of the PEP 3118 format string. - return _pep3118_scalar_map[fmt.lstrip('=')] - except KeyError: - raise ValueError("unsupported PEP 3118 format %r" % (fmt,)) - - -def get_type_class(typ): - """ - Get the Numba type class for buffer-compatible Python *typ*. - """ - try: - # Look up special case. - return _type_map[typ] - except KeyError: - # Fall back on generic one. - return types.Buffer - - -def infer_layout(val): - """ - Infer layout of the given memoryview *val*. - """ - if sys.version_info >= (3,): - return ('C' if val.c_contiguous else - 'F' if val.f_contiguous else - 'A') - # Python 2: best effort heuristic for 1d arrays - if val.ndim == 1 and val.strides[0] == val.itemsize: - return 'C' - return 'A' diff --git a/numba/numba/typing/builtins.py b/numba/numba/typing/builtins.py deleted file mode 100644 index f163144b5..000000000 --- a/numba/numba/typing/builtins.py +++ /dev/null @@ -1,971 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import itertools - -import numpy as np - -from numba import types, prange -from numba.parfor import internal_prange - -from numba.utils import PYVERSION, RANGE_ITER_OBJECTS, operator_map -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - AbstractTemplate, infer_global, infer, - infer_getattr, signature, bound_function, - make_callable_template) - - -@infer_global(print) -class Print(AbstractTemplate): - - def generic(self, args, kws): - for a in args: - sig = self.context.resolve_function_type("print_item", (a,), {}) - if sig is None: - raise TypeError("Type %s is not printable." % a) - assert sig.return_type is types.none - return signature(types.none, *args) - -@infer -class PrintItem(AbstractTemplate): - key = "print_item" - - def generic(self, args, kws): - arg, = args - return signature(types.none, *args) - - -@infer_global(abs) -class Abs(ConcreteTemplate): - int_cases = [signature(ty, ty) for ty in types.signed_domain] - real_cases = [signature(ty, ty) for ty in types.real_domain] - complex_cases = [signature(ty.underlying_float, ty) - for ty in types.complex_domain] - cases = int_cases + real_cases + complex_cases - - -@infer_global(slice) -class Slice(ConcreteTemplate): - key = slice - cases = [ - signature(types.slice2_type), - signature(types.slice2_type, types.none, types.none), - signature(types.slice2_type, types.none, types.intp), - signature(types.slice2_type, types.intp, types.none), - signature(types.slice2_type, types.intp, types.intp), - signature(types.slice3_type, types.intp, types.intp, types.intp), - signature(types.slice3_type, types.none, types.intp, types.intp), - signature(types.slice3_type, types.intp, types.none, types.intp), - signature(types.slice3_type, types.none, types.none, types.intp), - ] - - -class Range(ConcreteTemplate): - cases = [ - signature(types.range_state32_type, types.int32), - signature(types.range_state32_type, types.int32, types.int32), - signature(types.range_state32_type, types.int32, types.int32, - types.int32), - signature(types.range_state64_type, types.int64), - signature(types.range_state64_type, types.int64, types.int64), - signature(types.range_state64_type, types.int64, types.int64, - types.int64), - signature(types.unsigned_range_state64_type, types.uint64), - signature(types.unsigned_range_state64_type, types.uint64, types.uint64), - signature(types.unsigned_range_state64_type, types.uint64, types.uint64, - types.uint64), - ] - -for func in RANGE_ITER_OBJECTS: - infer_global(func, typing_key=range)(Range) - -infer_global(prange, typing_key=prange)(Range) -infer_global(internal_prange, typing_key=internal_prange)(Range) - -@infer -class GetIter(AbstractTemplate): - key = "getiter" - - def generic(self, args, kws): - assert not kws - [obj] = args - if isinstance(obj, types.IterableType): - return signature(obj.iterator_type, obj) - - -@infer -class IterNext(AbstractTemplate): - key = "iternext" - - def generic(self, args, kws): - assert not kws - [it] = args - if isinstance(it, types.IteratorType): - return signature(types.Pair(it.yield_type, types.boolean), it) - - -@infer -class PairFirst(AbstractTemplate): - """ - Given a heterogeneous pair, return the first element. - """ - key = "pair_first" - - def generic(self, args, kws): - assert not kws - [pair] = args - if isinstance(pair, types.Pair): - return signature(pair.first_type, pair) - - -@infer -class PairSecond(AbstractTemplate): - """ - Given a heterogeneous pair, return the second element. - """ - key = "pair_second" - - def generic(self, args, kws): - assert not kws - [pair] = args - if isinstance(pair, types.Pair): - return signature(pair.second_type, pair) - - -def choose_result_bitwidth(*inputs): - return max(types.intp.bitwidth, *(tp.bitwidth for tp in inputs)) - -def choose_result_int(*inputs): - """ - Choose the integer result type for an operation on integer inputs, - according to the integer typing NBEP. - """ - bitwidth = choose_result_bitwidth(*inputs) - signed = any(tp.signed for tp in inputs) - return types.Integer.from_bitwidth(bitwidth, signed) - - -# The "machine" integer types to take into consideration for operator typing -# (according to the integer typing NBEP) -machine_ints = ( - sorted(set((types.intp, types.int64))) + - sorted(set((types.uintp, types.uint64))) - ) - -# Explicit integer rules for binary operators; smaller ints will be -# automatically upcast. -integer_binop_cases = tuple( - signature(choose_result_int(op1, op2), op1, op2) - for op1, op2 in itertools.product(machine_ints, machine_ints) - ) - - -class BinOp(ConcreteTemplate): - cases = list(integer_binop_cases) - cases += [signature(op, op, op) for op in sorted(types.real_domain)] - cases += [signature(op, op, op) for op in sorted(types.complex_domain)] - - -@infer -class BinOpAdd(BinOp): - key = "+" - - -@infer -class BinOpSub(BinOp): - key = "-" - - -@infer -class BinOpMul(BinOp): - key = "*" - - -@infer -class BinOpDiv(BinOp): - key = "/?" - - -@infer -class BinOpMod(ConcreteTemplate): - key = "%" - cases = list(integer_binop_cases) - cases += [signature(op, op, op) for op in sorted(types.real_domain)] - - -@infer -class BinOpTrueDiv(ConcreteTemplate): - key = "/" - cases = [signature(types.float64, op1, op2) - for op1, op2 in itertools.product(machine_ints, machine_ints)] - cases += [signature(op, op, op) for op in sorted(types.real_domain)] - cases += [signature(op, op, op) for op in sorted(types.complex_domain)] - - -@infer -class BinOpFloorDiv(ConcreteTemplate): - key = "//" - cases = list(integer_binop_cases) - cases += [signature(op, op, op) for op in sorted(types.real_domain)] - - -@infer_global(divmod) -class DivMod(ConcreteTemplate): - _tys = machine_ints + sorted(types.real_domain) - cases = [signature(types.UniTuple(ty, 2), ty, ty) for ty in _tys] - - -@infer -class BinOpPower(ConcreteTemplate): - key = "**" - cases = list(integer_binop_cases) - # Ensure that float32 ** int doesn't go through DP computations - cases += [signature(types.float32, types.float32, op) - for op in (types.int32, types.int64, types.uint64)] - cases += [signature(types.float64, types.float64, op) - for op in (types.int32, types.int64, types.uint64)] - cases += [signature(op, op, op) - for op in sorted(types.real_domain)] - cases += [signature(op, op, op) - for op in sorted(types.complex_domain)] - - -@infer_global(pow) -class PowerBuiltin(BinOpPower): - key = pow - # TODO add 3 operand version - - -class BitwiseShiftOperation(ConcreteTemplate): - # For bitshifts, only the first operand's signedness matters - # to choose the operation's signedness (the second operand - # should always be positive but will generally be considered - # signed anyway, since it's often a constant integer). - # (also, see issue #1995 for right-shifts) - - # The RHS type is fixed to 64-bit signed/unsigned ints. - # The implementation will always cast the operands to the width of the - # result type, which is the widest between the LHS type and (u)intp. - cases = [signature(max(op, types.intp), op, op2) - for op in sorted(types.signed_domain) - for op2 in [types.uint64, types.int64]] - cases += [signature(max(op, types.uintp), op, op2) - for op in sorted(types.unsigned_domain) - for op2 in [types.uint64, types.int64]] - unsafe_casting = False - - -@infer -class BitwiseLeftShift(BitwiseShiftOperation): - key = "<<" - - -@infer -class BitwiseRightShift(BitwiseShiftOperation): - key = ">>" - - -class BitwiseLogicOperation(BinOp): - cases = [signature(types.boolean, types.boolean, types.boolean)] - cases += list(integer_binop_cases) - unsafe_casting = False - - -@infer -class BitwiseAnd(BitwiseLogicOperation): - key = "&" - - -@infer -class BitwiseOr(BitwiseLogicOperation): - key = "|" - - -@infer -class BitwiseXor(BitwiseLogicOperation): - key = "^" - - -# Bitwise invert and negate are special: we must not upcast the operand -# for unsigned numbers, as that would change the result. -# (i.e. ~np.int8(0) == 255 but ~np.int32(0) == 4294967295). - -@infer -class BitwiseInvert(ConcreteTemplate): - key = "~" - - # Note Numba follows the Numpy semantics of returning a bool, - # while Python returns an int. This makes it consistent with - # np.invert() and makes array expressions correct. - cases = [signature(types.boolean, types.boolean)] - cases += [signature(choose_result_int(op), op) for op in sorted(types.unsigned_domain)] - cases += [signature(choose_result_int(op), op) for op in sorted(types.signed_domain)] - - unsafe_casting = False - -class UnaryOp(ConcreteTemplate): - cases = [signature(choose_result_int(op), op) for op in sorted(types.unsigned_domain)] - cases += [signature(choose_result_int(op), op) for op in sorted(types.signed_domain)] - cases += [signature(op, op) for op in sorted(types.real_domain)] - cases += [signature(op, op) for op in sorted(types.complex_domain)] - cases += [signature(types.intp, types.boolean)] - - -@infer -class UnaryNegate(UnaryOp): - key = "-" - - -@infer -class UnaryPositive(UnaryOp): - key = "+" - - -@infer -class UnaryNot(ConcreteTemplate): - key = "not" - cases = [signature(types.boolean, types.boolean)] - cases += [signature(types.boolean, op) for op in sorted(types.signed_domain)] - cases += [signature(types.boolean, op) for op in sorted(types.unsigned_domain)] - cases += [signature(types.boolean, op) for op in sorted(types.real_domain)] - cases += [signature(types.boolean, op) for op in sorted(types.complex_domain)] - - -class OrderedCmpOp(ConcreteTemplate): - cases = [signature(types.boolean, types.boolean, types.boolean)] - cases += [signature(types.boolean, op, op) for op in sorted(types.signed_domain)] - cases += [signature(types.boolean, op, op) for op in sorted(types.unsigned_domain)] - cases += [signature(types.boolean, op, op) for op in sorted(types.real_domain)] - - -class UnorderedCmpOp(ConcreteTemplate): - cases = OrderedCmpOp.cases + [ - signature(types.boolean, op, op) for op in sorted(types.complex_domain)] - - -@infer -class CmpOpLt(OrderedCmpOp): - key = '<' - -@infer -class CmpOpLe(OrderedCmpOp): - key = '<=' - -@infer -class CmpOpGt(OrderedCmpOp): - key = '>' - -@infer -class CmpOpGe(OrderedCmpOp): - key = '>=' - -@infer -class CmpOpEq(UnorderedCmpOp): - key = '==' - -@infer -class ConstOpEq(AbstractTemplate): - key = '==' - def generic(self, args, kws): - assert not kws - (arg1, arg2) = args - if isinstance(arg1, types.Const) and isinstance(arg2, types.Const): - return signature(types.boolean, arg1, arg2) - -@infer -class ConstOpNotEq(ConstOpEq): - key = '!=' - -@infer -class CmpOpNe(UnorderedCmpOp): - key = '!=' - - -class TupleCompare(AbstractTemplate): - def generic(self, args, kws): - [lhs, rhs] = args - if isinstance(lhs, types.BaseTuple) and isinstance(rhs, types.BaseTuple): - for u, v in zip(lhs, rhs): - # Check element-wise comparability - res = self.context.resolve_function_type(self.key, (u, v), {}) - if res is None: - break - else: - return signature(types.boolean, lhs, rhs) - -@infer -class TupleEq(TupleCompare): - key = '==' - -@infer -class TupleNe(TupleCompare): - key = '!=' - -@infer -class TupleGe(TupleCompare): - key = '>=' - -@infer -class TupleGt(TupleCompare): - key = '>' - -@infer -class TupleLe(TupleCompare): - key = '<=' - -@infer -class TupleLt(TupleCompare): - key = '<' - -@infer -class TupleAdd(AbstractTemplate): - key = '+' - - def generic(self, args, kws): - if len(args) == 2: - a, b = args - if (isinstance(a, types.BaseTuple) and isinstance(b, types.BaseTuple) - and not isinstance(a, types.BaseNamedTuple) - and not isinstance(b, types.BaseNamedTuple)): - res = types.BaseTuple.from_types(tuple(a) + tuple(b)) - return signature(res, a, b) - - -# Register default implementations of binary inplace operators for -# immutable types. - -class InplaceImmutable(AbstractTemplate): - def generic(self, args, kws): - lhs, rhs = args - if not lhs.mutable: - return self.context.resolve_function_type(self.key[:-1], args, kws) - # Inplace ops on mutable arguments must be typed explicitly - -for _binop, _inp, op in operator_map: - if _inp: - template = type('InplaceImmutable_%s' % _binop, - (InplaceImmutable,), - dict(key=op + '=')) - infer(template) - - -class CmpOpIdentity(AbstractTemplate): - def generic(self, args, kws): - [lhs, rhs] = args - return signature(types.boolean, lhs, rhs) - - -@infer -class CmpOpIs(CmpOpIdentity): - key = 'is' - - -@infer -class CmpOpIsNot(CmpOpIdentity): - key = 'is not' - - -def normalize_1d_index(index): - """ - Normalize the *index* type (an integer or slice) for indexing a 1D - sequence. - """ - if isinstance(index, types.SliceType): - return index - - elif isinstance(index, types.Integer): - return types.intp if index.signed else types.uintp - - -@infer -class GetItemCPointer(AbstractTemplate): - key = "getitem" - - def generic(self, args, kws): - assert not kws - ptr, idx = args - if isinstance(ptr, types.CPointer) and isinstance(idx, types.Integer): - return signature(ptr.dtype, ptr, normalize_1d_index(idx)) - - -@infer -class SetItemCPointer(AbstractTemplate): - key = "setitem" - - def generic(self, args, kws): - assert not kws - ptr, idx, val = args - if isinstance(ptr, types.CPointer) and isinstance(idx, types.Integer): - return signature(types.none, ptr, normalize_1d_index(idx), ptr.dtype) - - -@infer_global(len) -class Len(AbstractTemplate): - key = len - - def generic(self, args, kws): - assert not kws - (val,) = args - if isinstance(val, (types.Buffer, types.BaseTuple)): - return signature(types.intp, val) - elif isinstance(val, (types.RangeType)): - return signature(val.dtype, val) - - -@infer -class TupleBool(AbstractTemplate): - key = "is_true" - - def generic(self, args, kws): - assert not kws - (val,) = args - if isinstance(val, (types.BaseTuple)): - return signature(types.boolean, val) - - -@infer -class StaticGetItemTuple(AbstractTemplate): - key = "static_getitem" - - def generic(self, args, kws): - tup, idx = args - if not isinstance(tup, types.BaseTuple): - return - if isinstance(idx, int): - return tup.types[idx] - elif isinstance(idx, slice): - return types.BaseTuple.from_types(tup.types[idx]) - - -# Generic implementation for "not in" - -@infer -class GenericNotIn(AbstractTemplate): - key = "not in" - - def generic(self, args, kws): - return self.context.resolve_function_type("in", args, kws) - - -#------------------------------------------------------------------------------- - -@infer_getattr -class MemoryViewAttribute(AttributeTemplate): - key = types.MemoryView - - if PYVERSION >= (3,): - def resolve_contiguous(self, buf): - return types.boolean - - def resolve_c_contiguous(self, buf): - return types.boolean - - def resolve_f_contiguous(self, buf): - return types.boolean - - def resolve_itemsize(self, buf): - return types.intp - - def resolve_nbytes(self, buf): - return types.intp - - def resolve_readonly(self, buf): - return types.boolean - - def resolve_shape(self, buf): - return types.UniTuple(types.intp, buf.ndim) - - def resolve_strides(self, buf): - return types.UniTuple(types.intp, buf.ndim) - - def resolve_ndim(self, buf): - return types.intp - - -#------------------------------------------------------------------------------- - - -@infer_getattr -class BooleanAttribute(AttributeTemplate): - key = types.Boolean - - def resolve___class__(self, ty): - return types.NumberClass(ty) - - @bound_function("number.item") - def resolve_item(self, ty, args, kws): - assert not kws - if not args: - return signature(ty) - - -@infer_getattr -class NumberAttribute(AttributeTemplate): - key = types.Number - - def resolve___class__(self, ty): - return types.NumberClass(ty) - - def resolve_real(self, ty): - return getattr(ty, "underlying_float", ty) - - def resolve_imag(self, ty): - return getattr(ty, "underlying_float", ty) - - @bound_function("complex.conjugate") - def resolve_conjugate(self, ty, args, kws): - assert not args - assert not kws - return signature(ty) - - @bound_function("number.item") - def resolve_item(self, ty, args, kws): - assert not kws - if not args: - return signature(ty) - - -@infer_getattr -class SliceAttribute(AttributeTemplate): - key = types.SliceType - - def resolve_start(self, ty): - return types.intp - - def resolve_stop(self, ty): - return types.intp - - def resolve_step(self, ty): - return types.intp - - -#------------------------------------------------------------------------------- - - -@infer_getattr -class NumberClassAttribute(AttributeTemplate): - key = types.NumberClass - - def resolve___call__(self, classty): - """ - Resolve a number class's constructor (e.g. calling int(...)) - """ - ty = classty.instance_type - - def typer(val): - if isinstance(val, (types.BaseTuple, types.Sequence)): - # Array constructor, e.g. np.int32([1, 2]) - sig = self.context.resolve_function_type( - np.array, (val,), {'dtype': types.DType(ty)}) - return sig.return_type - else: - # Scalar constructor, e.g. np.int32(42) - return ty - - return types.Function(make_callable_template(key=ty, typer=typer)) - - -def register_number_classes(register_global): - nb_types = set(types.number_domain) - nb_types.add(types.bool_) - - for ty in nb_types: - register_global(ty, types.NumberClass(ty)) - - -register_number_classes(infer_global) - - -#------------------------------------------------------------------------------ - - -class MinMaxBase(AbstractTemplate): - - def _unify_minmax(self, tys): - for ty in tys: - if not isinstance(ty, types.Number): - return - return self.context.unify_types(*tys) - - def generic(self, args, kws): - """ - Resolve a min() or max() call. - """ - assert not kws - - if not args: - return - if len(args) == 1: - # max(arg) only supported if arg is an iterable - if isinstance(args[0], types.BaseTuple): - tys = list(args[0]) - if not tys: - raise TypeError("%s() argument is an empty tuple" - % (self.key.__name__,)) - else: - return - else: - # max(*args) - tys = args - retty = self._unify_minmax(tys) - if retty is not None: - return signature(retty, *args) - - -@infer_global(max) -class Max(MinMaxBase): - pass - - -@infer_global(min) -class Min(MinMaxBase): - pass - - -@infer_global(round) -class Round(ConcreteTemplate): - if PYVERSION < (3, 0): - cases = [ - signature(types.float32, types.float32), - signature(types.float64, types.float64), - ] - else: - cases = [ - signature(types.intp, types.float32), - signature(types.int64, types.float64), - ] - cases += [ - signature(types.float32, types.float32, types.intp), - signature(types.float64, types.float64, types.intp), - ] - - -@infer_global(hash) -class Hash(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - arg, = args - if isinstance(arg, types.Hashable): - return signature(types.intp, *args) - - -#------------------------------------------------------------------------------ - - -@infer_global(bool) -class Bool(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - [arg] = args - if isinstance(arg, (types.Boolean, types.Number)): - return signature(types.boolean, arg) - # XXX typing for bool cannot be polymorphic because of the - # types.Function thing, so we redirect to the "is_true" - # intrinsic. - return self.context.resolve_function_type("is_true", args, kws) - - -@infer_global(int) -class Int(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - - [arg] = args - - if isinstance(arg, types.Integer): - return signature(arg, arg) - if isinstance(arg, (types.Float, types.Boolean)): - return signature(types.intp, arg) - - -@infer_global(float) -class Float(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - - [arg] = args - - if arg not in types.number_domain: - raise TypeError("float() only support for numbers") - - if arg in types.complex_domain: - raise TypeError("float() does not support complex") - - if arg in types.integer_domain: - return signature(types.float64, arg) - - elif arg in types.real_domain: - return signature(arg, arg) - - -@infer_global(complex) -class Complex(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - - if len(args) == 1: - [arg] = args - if arg not in types.number_domain: - raise TypeError("complex() only support for numbers") - if arg == types.float32: - return signature(types.complex64, arg) - else: - return signature(types.complex128, arg) - - elif len(args) == 2: - [real, imag] = args - if (real not in types.number_domain or - imag not in types.number_domain): - raise TypeError("complex() only support for numbers") - if real == imag == types.float32: - return signature(types.complex64, real, imag) - else: - return signature(types.complex128, real, imag) - - -#------------------------------------------------------------------------------ - -@infer_global(enumerate) -class Enumerate(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - it = args[0] - if len(args) > 1 and not args[1] in types.integer_domain: - raise TypeError("Only integers supported as start value in " - "enumerate") - elif len(args) > 2: - #let python raise its own error - enumerate(*args) - - if isinstance(it, types.IterableType): - enumerate_type = types.EnumerateType(it) - return signature(enumerate_type, *args) - - -@infer_global(zip) -class Zip(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if all(isinstance(it, types.IterableType) for it in args): - zip_type = types.ZipType(args) - return signature(zip_type, *args) - - -@infer_global(iter) -class Iter(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if len(args) == 1: - it = args[0] - if isinstance(it, types.IterableType): - return signature(it.iterator_type, *args) - - -@infer_global(next) -class Next(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if len(args) == 1: - it = args[0] - if isinstance(it, types.IteratorType): - return signature(it.yield_type, *args) - - -#------------------------------------------------------------------------------ - -@infer_global(type) -class TypeBuiltin(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if len(args) == 1: - # One-argument type() -> return the __class__ - classty = self.context.resolve_getattr(args[0], "__class__") - if classty is not None: - return signature(classty, *args) - - -#------------------------------------------------------------------------------ - -@infer_getattr -class OptionalAttribute(AttributeTemplate): - key = types.Optional - - def generic_resolve(self, optional, attr): - return self.context.resolve_getattr(optional.type, attr) - -#------------------------------------------------------------------------------ - -@infer_getattr -class DeferredAttribute(AttributeTemplate): - key = types.DeferredType - - def generic_resolve(self, deferred, attr): - return self.context.resolve_getattr(deferred.get(), attr) - -#------------------------------------------------------------------------------ - -from numba.targets.builtins import get_type_min_value, get_type_max_value - -@infer_global(get_type_min_value) -@infer_global(get_type_max_value) -class MinValInfer(AbstractTemplate): - def generic(self, args, kws): - assert not kws - assert len(args) == 1 - assert isinstance(args[0], (types.DType, types.NumberClass)) - return signature(args[0].dtype, *args) - -#------------------------------------------------------------------------------ - -from numba.extending import (typeof_impl, type_callable, models, register_model, - make_attribute_wrapper) - -class IndexValue(object): - """ - Index and value - """ - def __init__(self, ind, val): - self.index = ind - self.value = val - - def __repr__(self): - return 'IndexValue(%f, %f)' % (self.index, self.value) - -class IndexValueType(types.Type): - def __init__(self, val_typ): - self.val_typ = val_typ - super(IndexValueType, self).__init__( - name='IndexValueType({})'.format(val_typ)) - -@typeof_impl.register(IndexValue) -def typeof_index(val, c): - val_typ = typeof_impl(val.value, c) - return IndexValueType(val_typ) - -@type_callable(IndexValue) -def type_index_value(context): - def typer(ind, mval): - if ind == types.intp or ind == types.uintp: - return IndexValueType(mval) - return typer - -@register_model(IndexValueType) -class IndexValueModel(models.StructModel): - def __init__(self, dmm, fe_type): - members = [ - ('index', types.intp), - ('value', fe_type.val_typ), - ] - models.StructModel.__init__(self, dmm, fe_type, members) - -make_attribute_wrapper(IndexValueType, 'index', 'index') -make_attribute_wrapper(IndexValueType, 'value', 'value') diff --git a/numba/numba/typing/cffi_utils.py b/numba/numba/typing/cffi_utils.py deleted file mode 100644 index ee351a4f4..000000000 --- a/numba/numba/typing/cffi_utils.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Support for CFFI. Allows checking whether objects are CFFI functions and -obtaining the pointer and numba signature. -""" -from __future__ import print_function, division, absolute_import - -from types import BuiltinFunctionType -import ctypes - -from numba import types -from numba.errors import TypingError -from . import templates - -try: - import cffi - ffi = cffi.FFI() -except ImportError: - ffi = None - -SUPPORTED = ffi is not None -_ool_func_types = {} -_ool_func_ptr = {} -_ffi_instances = set() - - -def is_ffi_instance(obj): - # Compiled FFI modules have a member, ffi, which is an instance of - # CompiledFFI, which behaves similarly to an instance of cffi.FFI. In - # order to simplify handling a CompiledFFI object, we treat them as - # if they're cffi.FFI instances for typing and lowering purposes. - try: - return obj in _ffi_instances or isinstance(obj, cffi.FFI) - except TypeError: # Unhashable type possible - return False - -def is_cffi_func(obj): - """Check whether the obj is a CFFI function""" - try: - return ffi.typeof(obj).kind == 'function' - except TypeError: - try: - return obj in _ool_func_types - except: - return False - -def get_pointer(cffi_func): - """ - Get a pointer to the underlying function for a CFFI function as an - integer. - """ - if cffi_func in _ool_func_ptr: - return _ool_func_ptr[cffi_func] - return int(ffi.cast("uintptr_t", cffi_func)) - - -_cached_type_map = None - -def _type_map(): - """ - Lazily compute type map, as calling ffi.typeof() involves costly - parsing of C code... - """ - global _cached_type_map - if _cached_type_map is None: - _cached_type_map = { - ffi.typeof('bool') : types.boolean, - ffi.typeof('char') : types.char, - ffi.typeof('short') : types.short, - ffi.typeof('int') : types.intc, - ffi.typeof('long') : types.long_, - ffi.typeof('long long') : types.longlong, - ffi.typeof('unsigned char') : types.uchar, - ffi.typeof('unsigned short') : types.ushort, - ffi.typeof('unsigned int') : types.uintc, - ffi.typeof('unsigned long') : types.ulong, - ffi.typeof('unsigned long long') : types.ulonglong, - ffi.typeof('int8_t') : types.char, - ffi.typeof('uint8_t') : types.uchar, - ffi.typeof('int16_t') : types.short, - ffi.typeof('uint16_t') : types.ushort, - ffi.typeof('int32_t') : types.intc, - ffi.typeof('uint32_t') : types.uintc, - ffi.typeof('int64_t') : types.longlong, - ffi.typeof('uint64_t') : types.ulonglong, - ffi.typeof('float') : types.float_, - ffi.typeof('double') : types.double, - ffi.typeof('ssize_t') : types.intp, - ffi.typeof('size_t') : types.uintp, - ffi.typeof('void') : types.void, - } - return _cached_type_map - - -def map_type(cffi_type): - """ - Map CFFI type to numba type. - """ - kind = getattr(cffi_type, 'kind', '') - if kind == 'union': - raise TypeError("No support for CFFI union") - elif kind == 'function': - if cffi_type.ellipsis: - raise TypeError("vararg function is not supported") - restype = map_type(cffi_type.result) - argtypes = [map_type(arg) for arg in cffi_type.args] - return templates.signature(restype, *argtypes) - elif kind == 'pointer': - pointee = cffi_type.item - if pointee.kind == 'void': - return types.voidptr - else: - return types.CPointer(map_type(pointee)) - elif kind == 'array': - return map_type(cffi_type.item) - else: - result = _type_map().get(cffi_type) - if result is None: - raise TypeError(cffi_type) - return result - - -def make_function_type(cffi_func): - """ - Return a Numba type for the given CFFI function pointer. - """ - cffi_type = _ool_func_types.get(cffi_func) or ffi.typeof(cffi_func) - if getattr(cffi_type, 'kind', '') == 'struct': - raise TypeError('No support for CFFI struct values') - sig = map_type(cffi_type) - return types.ExternalFunctionPointer(sig, get_pointer=get_pointer) - - -registry = templates.Registry() - -@registry.register -class FFI_from_buffer(templates.AbstractTemplate): - key = 'ffi.from_buffer' - - def generic(self, args, kws): - if kws or len(args) != 1: - return - [ary] = args - if not isinstance(ary, types.Buffer): - raise TypingError("from_buffer() expected a buffer object, got %s" - % (ary,)) - if ary.layout not in ('C', 'F'): - raise TypingError("from_buffer() unsupported on non-contiguous buffers (got %s)" - % (ary,)) - if ary.layout != 'C' and ary.ndim > 1: - raise TypingError("from_buffer() only supports multidimensional arrays with C layout (got %s)" - % (ary,)) - ptr = types.CPointer(ary.dtype) - return templates.signature(ptr, ary) - -@registry.register_attr -class FFIAttribute(templates.AttributeTemplate): - key = types.ffi - - def resolve_from_buffer(self, ffi): - return types.BoundFunction(FFI_from_buffer, types.ffi) - - -def register_module(mod): - """ - Add typing for all functions in an out-of-line CFFI module to the typemap - """ - for f in dir(mod.lib): - f = getattr(mod.lib, f) - if isinstance(f, BuiltinFunctionType): - _ool_func_types[f] = mod.ffi.typeof(f) - addr = mod.ffi.addressof(mod.lib, f.__name__) - _ool_func_ptr[f] = int(mod.ffi.cast("uintptr_t", addr)) - _ffi_instances.add(mod.ffi) - -def register_type(cffi_type, numba_type): - """ - Add typing for a given CFFI type to the typemap - """ - tm = _type_map() - tm[cffi_type] = numba_type diff --git a/numba/numba/typing/cmathdecl.py b/numba/numba/typing/cmathdecl.py deleted file mode 100644 index 7241a2454..000000000 --- a/numba/numba/typing/cmathdecl.py +++ /dev/null @@ -1,71 +0,0 @@ -import cmath - -from numba import types, utils -from numba.typing.templates import (AbstractTemplate, ConcreteTemplate, - signature, Registry, bound_function) - -registry = Registry() -infer_global = registry.register_global - -# TODO: support non-complex arguments (floats and ints) - -@infer_global(cmath.acos) -@infer_global(cmath.acosh) -@infer_global(cmath.asin) -@infer_global(cmath.asinh) -@infer_global(cmath.atan) -@infer_global(cmath.atanh) -@infer_global(cmath.cos) -@infer_global(cmath.cosh) -@infer_global(cmath.exp) -@infer_global(cmath.log10) -@infer_global(cmath.sin) -@infer_global(cmath.sinh) -@infer_global(cmath.sqrt) -@infer_global(cmath.tan) -@infer_global(cmath.tanh) -class CMath_unary(ConcreteTemplate): - cases = [signature(tp, tp) for tp in types.complex_domain] - - -@infer_global(cmath.isinf) -@infer_global(cmath.isnan) -class CMath_predicate(ConcreteTemplate): - cases = [signature(types.boolean, tp) for tp in types.complex_domain] - - -if utils.PYVERSION >= (3, 2): - @infer_global(cmath.isfinite) - class CMath_isfinite(CMath_predicate): - pass - - -@infer_global(cmath.log) -class Cmath_log(ConcreteTemplate): - # unary cmath.log() - cases = [signature(tp, tp) for tp in types.complex_domain] - # binary cmath.log() - cases += [signature(tp, tp, tp) for tp in types.complex_domain] - - -@infer_global(cmath.phase) -class Cmath_phase(ConcreteTemplate): - cases = [signature(tp, types.complex128) for tp in [types.float64]] - cases += [signature(types.float32, types.complex64)] - - -@infer_global(cmath.polar) -class Cmath_polar(AbstractTemplate): - def generic(self, args, kws): - assert not kws - [tp] = args - if tp in types.complex_domain: - float_type = tp.underlying_float - return signature(types.UniTuple(float_type, 2), tp) - - -@infer_global(cmath.rect) -class Cmath_rect(ConcreteTemplate): - cases = [signature(types.complex128, tp, tp) - for tp in [types.float64]] - cases += [signature(types.complex64, types.float32, types.float32)] diff --git a/numba/numba/typing/collections.py b/numba/numba/typing/collections.py deleted file mode 100644 index af2d602c3..000000000 --- a/numba/numba/typing/collections.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import print_function, division, absolute_import - -from .. import types, utils, errors -from .templates import (AttributeTemplate, ConcreteTemplate, AbstractTemplate, - infer_global, infer, infer_getattr, - signature, bound_function, make_callable_template) -from .builtins import normalize_1d_index - - -@infer -class InContainer(AbstractTemplate): - key = "in" - - def generic(self, args, kws): - item, cont = args - if isinstance(cont, types.Container): - return signature(types.boolean, cont.dtype, cont) - -@infer_global(len) -class ContainerLen(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - (val,) = args - if isinstance(val, (types.Container)): - return signature(types.intp, val) - - -@infer -class SequenceBool(AbstractTemplate): - key = "is_true" - - def generic(self, args, kws): - assert not kws - (val,) = args - if isinstance(val, (types.Sequence)): - return signature(types.boolean, val) - -@infer -class GetItemSequence(AbstractTemplate): - key = "getitem" - - def generic(self, args, kws): - seq, idx = args - if isinstance(seq, types.Sequence): - idx = normalize_1d_index(idx) - if isinstance(idx, types.SliceType): - # Slicing a tuple only supported with static_getitem - if not isinstance(seq, types.BaseTuple): - return signature(seq, seq, idx) - elif isinstance(idx, types.Integer): - return signature(seq.dtype, seq, idx) - -@infer -class SetItemSequence(AbstractTemplate): - key = "setitem" - - def generic(self, args, kws): - seq, idx, value = args - if isinstance(seq, types.MutableSequence): - idx = normalize_1d_index(idx) - if isinstance(idx, types.SliceType): - return signature(types.none, seq, idx, seq) - elif isinstance(idx, types.Integer): - if not self.context.can_convert(value, seq.dtype): - msg = "invalid setitem with value of {} to element of {}" - raise errors.TypingError(msg.format(value, seq.dtype)) - return signature(types.none, seq, idx, seq.dtype) - - -@infer -class DelItemSequence(AbstractTemplate): - key = "delitem" - - def generic(self, args, kws): - seq, idx = args - if isinstance(seq, types.MutableSequence): - idx = normalize_1d_index(idx) - return signature(types.none, seq, idx) - - -# -------------------------------------------------------------------------- -# named tuples - -@infer_getattr -class NamedTupleAttribute(AttributeTemplate): - key = types.BaseNamedTuple - - def resolve___class__(self, tup): - return types.NamedTupleClass(tup.instance_class) - - def generic_resolve(self, tup, attr): - # Resolution of other attributes - try: - index = tup.fields.index(attr) - except ValueError: - return - return tup[index] - - -@infer_getattr -class NamedTupleClassAttribute(AttributeTemplate): - key = types.NamedTupleClass - - def resolve___call__(self, classty): - """ - Resolve the named tuple constructor, aka the class's __call__ method. - """ - instance_class = classty.instance_class - pysig = utils.pysignature(instance_class) - - def typer(*args, **kws): - # Fold keyword args - try: - bound = pysig.bind(*args, **kws) - except TypeError as e: - msg = "In '%s': %s" % (instance_class, e) - e.args = (msg,) - raise - assert not bound.kwargs - return types.BaseTuple.from_types(bound.args, instance_class) - - # Override the typer's pysig to match the namedtuple constructor's - typer.pysig = pysig - return types.Function(make_callable_template(self.key, typer)) diff --git a/numba/numba/typing/context.py b/numba/numba/typing/context.py deleted file mode 100644 index 418f4063f..000000000 --- a/numba/numba/typing/context.py +++ /dev/null @@ -1,625 +0,0 @@ -from __future__ import absolute_import, print_function - -import contextlib -import threading -import types as pytypes -import weakref -from collections import Sequence, defaultdict - -import numba -from numba import errors, types, utils -from numba.typeconv import Conversion, rules - -from . import templates -from .typeof import Purpose, typeof - - -class Rating(object): - __slots__ = "promote", "safe_convert", "unsafe_convert" - - def __init__(self): - self.promote = 0 - self.safe_convert = 0 - self.unsafe_convert = 0 - - def astuple(self): - """Returns a tuple suitable for comparing with the worse situation - start first. - """ - return (self.unsafe_convert, self.safe_convert, self.promote) - - def __add__(self, other): - if type(self) is not type(other): - return NotImplemented - rsum = Rating() - rsum.promote = self.promote + other.promote - rsum.safe_convert = self.safe_convert + other.safe_convert - rsum.unsafe_convert = self.unsafe_convert + other.unsafe_convert - return rsum - - -class CallStack(Sequence): - """ - A compile-time call stack - """ - - def __init__(self): - self._stack = [] - self._lock = threading.RLock() - - def __getitem__(self, index): - """ - Returns item in the stack where index=0 is the top and index=1 is - the second item from the top. - """ - return self._stack[len(self) - index - 1] - - def __len__(self): - return len(self._stack) - - @contextlib.contextmanager - def register(self, typeinfer, func_id, args): - # guard compiling the same function with the same signature - if self.match(func_id.func, args): - msg = "compiler re-entrant to the same function signature" - raise RuntimeError(msg) - self._lock.acquire() - self._stack.append(CallFrame(typeinfer, func_id, args)) - try: - yield - finally: - self._stack.pop() - self._lock.release() - - def finditer(self, py_func): - """ - Yields frame that matches the function object starting from the top - of stack. - """ - for frame in self: - if frame.func_id.func is py_func: - yield frame - - def findfirst(self, py_func): - """ - Returns the first result from `.finditer(py_func)`; or None if no match. - """ - try: - return next(self.finditer(py_func)) - except StopIteration: - return - - def match(self, py_func, args): - """ - Returns first function that matches *py_func* and the arguments types in - *args*; or, None if no match. - """ - for frame in self.finditer(py_func): - if frame.args == args: - return frame - - -class CallFrame(object): - """ - A compile-time call frame - """ - - def __init__(self, typeinfer, func_id, args): - self.typeinfer = typeinfer - self.func_id = func_id - self.args = args - - def __repr__(self): - return "CallFrame({}, {})".format(self.func_id, self.args) - - -class BaseContext(object): - """A typing context for storing function typing constrain template. - """ - - def __init__(self): - # A list of installed registries - self._registries = {} - # Typing declarations extracted from the registries or other sources - self._functions = defaultdict(list) - self._attributes = defaultdict(list) - self._globals = utils.UniqueDict() - self.tm = rules.default_type_manager - self.callstack = CallStack() - - # Initialize - self.init() - - def init(self): - """ - Initialize the typing context. Can be overriden by subclasses. - """ - - def refresh(self): - """ - Refresh context with new declarations from known registries. - Useful for third-party extensions. - """ - self.load_additional_registries() - # Some extensions may have augmented the builtin registry - self._load_builtins() - - def explain_function_type(self, func): - """ - Returns a string description of the type of a function - """ - desc = [] - defns = [] - param = False - if isinstance(func, types.Callable): - sigs, param = func.get_call_signatures() - defns.extend(sigs) - - elif func in self._functions: - for tpl in self._functions[func]: - param = param or hasattr(tpl, "generic") - defns.extend(getattr(tpl, "cases", [])) - - else: - msg = "No type info available for {func!r} as a callable." - desc.append(msg.format(func=func)) - - if defns: - desc = ["Known signatures:"] - for sig in defns: - desc.append(" * {0}".format(sig)) - - if param: - desc.append(" * parameterized") - - return "\n".join(desc) - - def resolve_function_type(self, func, args, kws, literals=None): - """ - Resolve function type *func* for argument types *args* and *kws*. - A signature is returned. - """ - if func not in self._functions: - # It's not a known function type, perhaps it's a global? - functy = self._lookup_global(func) - if functy is not None: - func = functy - if func in self._functions: - # Note: Duplicating code with types.Function.get_call_type(). - # *defns* are CallTemplates. - defns = self._functions[func] - for defn in defns: - res = defn.apply(args, kws) - if res is not None: - return res - - if isinstance(func, types.Type): - # If it's a type, it may support a __call__ method - func_type = self.resolve_getattr(func, "__call__") - if func_type is not None: - # The function has a __call__ method, type its call. - return self.resolve_function_type(func_type, args, kws) - - if isinstance(func, types.Callable): - # XXX fold this into the __call__ attribute logic? - return func.get_call_type_with_literals(self, args, kws, literals) - - def _get_attribute_templates(self, typ): - """ - Get matching AttributeTemplates for the Numba type. - """ - if typ in self._attributes: - for attrinfo in self._attributes[typ]: - yield attrinfo - else: - for cls in type(typ).__mro__: - if cls in self._attributes: - for attrinfo in self._attributes[cls]: - yield attrinfo - - def resolve_getattr(self, typ, attr): - """ - Resolve getting the attribute *attr* (a string) on the Numba type. - The attribute's type is returned, or None if resolution failed. - """ - for attrinfo in self._get_attribute_templates(typ): - ret = attrinfo.resolve(typ, attr) - if ret is not None: - return ret - - if isinstance(typ, types.Module): - attrty = self.resolve_module_constants(typ, attr) - if attrty is not None: - return attrty - - def resolve_setattr(self, target, attr, value): - """ - Resolve setting the attribute *attr* (a string) on the *target* type - to the given *value* type. - A function signature is returned, or None if resolution failed. - """ - for attrinfo in self._get_attribute_templates(target): - expectedty = attrinfo.resolve(target, attr) - # NOTE: convertibility from *value* to *expectedty* is left to - # the caller. - if expectedty is not None: - return templates.signature(types.void, target, expectedty) - - def resolve_static_getitem(self, value, index): - assert not isinstance(index, types.Type), index - args = value, index - kws = {} - return self.resolve_function_type("static_getitem", args, kws) - - def resolve_static_setitem(self, target, index, value): - assert not isinstance(index, types.Type), index - args = target, index, value - kws = {} - return self.resolve_function_type("static_setitem", args, kws) - - def resolve_setitem(self, target, index, value): - assert isinstance(index, types.Type), index - args = target, index, value - kws = {} - return self.resolve_function_type("setitem", args, kws) - - def resolve_delitem(self, target, index): - args = target, index - kws = {} - return self.resolve_function_type("delitem", args, kws) - - def resolve_module_constants(self, typ, attr): - """ - Resolve module-level global constants. - Return None or the attribute type - """ - assert isinstance(typ, types.Module) - attrval = getattr(typ.pymod, attr) - try: - return self.resolve_value_type(attrval) - except ValueError: - pass - - def resolve_argument_type(self, val): - """ - Return the numba type of a Python value that is being used - as a function argument. Integer types will all be considered - int64, regardless of size. - - ValueError is raised for unsupported types. - """ - try: - return typeof(val, Purpose.argument) - except ValueError: - if numba.cuda.is_cuda_array(val): - return typeof(numba.cuda.as_cuda_array(val), Purpose.argument) - else: - raise - - def resolve_value_type(self, val): - """ - Return the numba type of a Python value that is being used - as a runtime constant. - ValueError is raised for unsupported types. - """ - try: - ty = typeof(val, Purpose.constant) - except ValueError as e: - # Make sure the exception doesn't hold a reference to the user - # value. - typeof_exc = utils.erase_traceback(e) - else: - return ty - - if isinstance(val, (types.ExternalFunction, types.NumbaFunction)): - return val - - # Try to look up target specific typing information - ty = self._get_global_type(val) - if ty is not None: - return ty - - raise typeof_exc - - def _get_global_type(self, gv): - ty = self._lookup_global(gv) - if ty is not None: - return ty - if isinstance(gv, pytypes.ModuleType): - return types.Module(gv) - - def _load_builtins(self): - # Initialize declarations - from . import builtins, arraydecl, npdatetime - from . import ctypes_utils, bufproto - - self.install_registry(templates.builtin_registry) - - def load_additional_registries(self): - """ - Load target-specific registries. Can be overriden by subclasses. - """ - - def install_registry(self, registry): - """ - Install a *registry* (a templates.Registry instance) of function, - attribute and global declarations. - """ - try: - loader = self._registries[registry] - except KeyError: - loader = templates.RegistryLoader(registry) - self._registries[registry] = loader - for ftcls in loader.new_registrations("functions"): - self.insert_function(ftcls(self)) - for ftcls in loader.new_registrations("attributes"): - self.insert_attributes(ftcls(self)) - for gv, gty in loader.new_registrations("globals"): - existing = self._lookup_global(gv) - if existing is None: - self.insert_global(gv, gty) - else: - # A type was already inserted, see if we can add to it - newty = existing.augment(gty) - if newty is None: - raise TypeError("cannot augment %s with %s" % (existing, gty)) - self._remove_global(gv) - self._insert_global(gv, newty) - - def _lookup_global(self, gv): - """ - Look up the registered type for global value *gv*. - """ - try: - gv = weakref.ref(gv) - except TypeError: - pass - try: - return self._globals.get(gv, None) - except TypeError: - # Unhashable type - return None - - def _insert_global(self, gv, gty): - """ - Register type *gty* for value *gv*. Only a weak reference - to *gv* is kept, if possible. - """ - - def on_disposal(wr, pop=self._globals.pop): - # pop() is pre-looked up to avoid a crash late at shutdown on 3.5 - # (https://bugs.python.org/issue25217) - pop(wr) - - try: - gv = weakref.ref(gv, on_disposal) - except TypeError: - pass - self._globals[gv] = gty - - def _remove_global(self, gv): - """ - Remove the registered type for global value *gv*. - """ - try: - gv = weakref.ref(gv) - except TypeError: - pass - del self._globals[gv] - - def insert_global(self, gv, gty): - self._insert_global(gv, gty) - - def insert_attributes(self, at): - key = at.key - self._attributes[key].append(at) - - def insert_function(self, ft): - key = ft.key - self._functions[key].append(ft) - - def insert_user_function(self, fn, ft): - """Insert a user function. - - Args - ---- - - fn: - object used as callee - - ft: - function template - """ - self._insert_global(fn, types.Function(ft)) - - def can_convert(self, fromty, toty): - """ - Check whether conversion is possible from *fromty* to *toty*. - If successful, return a numba.typeconv.Conversion instance; - otherwise None is returned. - """ - if fromty == toty: - return Conversion.exact - else: - # First check with the type manager (some rules are registered - # at startup there, see numba.typeconv.rules) - conv = self.tm.check_compatible(fromty, toty) - if conv is not None: - return conv - - # Fall back on type-specific rules - forward = fromty.can_convert_to(self, toty) - backward = toty.can_convert_from(self, fromty) - if backward is None: - return forward - elif forward is None: - return backward - else: - return min(forward, backward) - - def _rate_arguments(self, actualargs, formalargs, unsafe_casting=True): - """ - Rate the actual arguments for compatibility against the formal - arguments. A Rating instance is returned, or None if incompatible. - """ - if len(actualargs) != len(formalargs): - return None - rate = Rating() - for actual, formal in zip(actualargs, formalargs): - conv = self.can_convert(actual, formal) - if conv is None: - return None - elif not unsafe_casting and conv >= Conversion.unsafe: - return None - - if conv == Conversion.promote: - rate.promote += 1 - elif conv == Conversion.safe: - rate.safe_convert += 1 - elif conv == Conversion.unsafe: - rate.unsafe_convert += 1 - elif conv == Conversion.exact: - pass - else: - raise Exception("unreachable", conv) - - return rate - - def install_possible_conversions(self, actualargs, formalargs): - """ - Install possible conversions from the actual argument types to - the formal argument types in the C++ type manager. - Return True if all arguments can be converted. - """ - if len(actualargs) != len(formalargs): - return False - for actual, formal in zip(actualargs, formalargs): - if self.tm.check_compatible(actual, formal) is not None: - # This conversion is already known - continue - conv = self.can_convert(actual, formal) - if conv is None: - return False - assert conv is not Conversion.exact - self.tm.set_compatible(actual, formal, conv) - return True - - def resolve_overload( - self, key, cases, args, kws, allow_ambiguous=True, unsafe_casting=True - ): - """ - Given actual *args* and *kws*, find the best matching - signature in *cases*, or None if none matches. - *key* is used for error reporting purposes. - If *allow_ambiguous* is False, a tie in the best matches - will raise an error. - If *unsafe_casting* is False, unsafe casting is forbidden. - """ - assert not kws, "Keyword arguments are not supported, yet" - options = {"unsafe_casting": unsafe_casting} - # Rate each case - candidates = [] - for case in cases: - if len(args) == len(case.args): - rating = self._rate_arguments(args, case.args, **options) - if rating is not None: - candidates.append((rating.astuple(), case)) - - # Find the best case - candidates.sort(key=lambda i: i[0]) - if candidates: - best_rate, best = candidates[0] - if not allow_ambiguous: - # Find whether there is a tie and if so, raise an error - tied = [] - for rate, case in candidates: - if rate != best_rate: - break - tied.append(case) - if len(tied) > 1: - args = (key, args, "\n".join(map(str, tied))) - msg = "Ambiguous overloading for %s %s:\n%s" % args - raise TypeError(msg) - # Simply return the best matching candidate in order. - # If there is a tie, since list.sort() is stable, the first case - # in the original order is returned. - # (this can happen if e.g. a function template exposes - # (int32, int32) -> int32 and (int64, int64) -> int64, - # and you call it with (int16, int16) arguments) - return best - - def unify_types(self, *typelist): - # Sort the type list according to bit width before doing - # pairwise unification (with thanks to aterrel). - def keyfunc(obj): - """Uses bitwidth to order numeric-types. - Fallback to stable, deterministic sort. - """ - return getattr(obj, "bitwidth", 0) - - typelist = sorted(typelist, key=keyfunc) - unified = typelist[0] - for tp in typelist[1:]: - unified = self.unify_pairs(unified, tp) - if unified is None: - break - return unified - - def unify_pairs(self, first, second): - """ - Try to unify the two given types. A third type is returned, - or None in case of failure. - """ - if first == second: - return first - - if first is types.undefined: - return second - elif second is types.undefined: - return first - - # Types with special unification rules - unified = first.unify(self, second) - if unified is not None: - return unified - - unified = second.unify(self, first) - if unified is not None: - return unified - - # Other types with simple conversion rules - conv = self.can_convert(fromty=first, toty=second) - if conv is not None and conv <= Conversion.safe: - # Can convert from first to second - return second - - conv = self.can_convert(fromty=second, toty=first) - if conv is not None and conv <= Conversion.safe: - # Can convert from second to first - return first - - # Cannot unify - return None - - -class Context(BaseContext): - def load_additional_registries(self): - from . import ( - cffi_utils, - cmathdecl, - enumdecl, - listdecl, - mathdecl, - npydecl, - operatordecl, - randomdecl, - setdecl, - ) - - self.install_registry(cffi_utils.registry) - self.install_registry(cmathdecl.registry) - self.install_registry(enumdecl.registry) - self.install_registry(listdecl.registry) - self.install_registry(mathdecl.registry) - self.install_registry(npydecl.registry) - self.install_registry(operatordecl.registry) - self.install_registry(randomdecl.registry) - self.install_registry(setdecl.registry) diff --git a/numba/numba/typing/ctypes_utils.py b/numba/numba/typing/ctypes_utils.py deleted file mode 100644 index accbf4eb8..000000000 --- a/numba/numba/typing/ctypes_utils.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Support for typing ctypes function pointers. -""" - -from __future__ import absolute_import - -import ctypes -import sys - -from numba import types -from . import templates -from .typeof import typeof_impl - - -_FROM_CTYPES = { - ctypes.c_bool: types.boolean, - - ctypes.c_int8: types.int8, - ctypes.c_int16: types.int16, - ctypes.c_int32: types.int32, - ctypes.c_int64: types.int64, - - ctypes.c_uint8: types.uint8, - ctypes.c_uint16: types.uint16, - ctypes.c_uint32: types.uint32, - ctypes.c_uint64: types.uint64, - - ctypes.c_float: types.float32, - ctypes.c_double: types.float64, - - ctypes.c_void_p: types.voidptr, - ctypes.py_object: types.ffi_forced_object, -} - -_TO_CTYPES = {v: k for (k, v) in _FROM_CTYPES.items()} - - -def from_ctypes(ctypeobj): - """ - Convert the given ctypes type to a Numba type. - """ - if ctypeobj is None: - # Special case for the restype of void-returning functions - return types.none - - assert isinstance(ctypeobj, type), ctypeobj - - def _convert_internal(ctypeobj): - # Recursive helper - if issubclass(ctypeobj, ctypes._Pointer): - valuety = _convert_internal(ctypeobj._type_) - if valuety is not None: - return types.CPointer(valuety) - else: - return _FROM_CTYPES.get(ctypeobj) - - ty = _convert_internal(ctypeobj) - if ty is None: - raise TypeError("Unsupported ctypes type: %s" % ctypeobj) - return ty - - -def to_ctypes(ty): - """ - Convert the given Numba type to a ctypes type. - """ - assert isinstance(ty, types.Type), ty - - if ty is types.none: - # Special case for the restype of void-returning functions - return None - - def _convert_internal(ty): - if isinstance(ty, types.CPointer): - return ctypes.POINTER(_convert_internal(ty.dtype)) - else: - return _TO_CTYPES.get(ty) - - ctypeobj = _convert_internal(ty) - if ctypeobj is None: - raise TypeError("Cannot convert Numba type '%s' to ctypes type" - % (ty,)) - return ctypeobj - - -def is_ctypes_funcptr(obj): - try: - # Is it something of which we can get the address - ctypes.cast(obj, ctypes.c_void_p) - except ctypes.ArgumentError: - return False - else: - # Does it define argtypes and restype - return hasattr(obj, 'argtypes') and hasattr(obj, 'restype') - - -def get_pointer(ctypes_func): - """ - Get a pointer to the underlying function for a ctypes function as an - integer. - """ - return ctypes.cast(ctypes_func, ctypes.c_void_p).value - - -def make_function_type(cfnptr): - """ - Return a Numba type for the given ctypes function pointer. - """ - if cfnptr.argtypes is None: - raise TypeError("ctypes function %r doesn't define its argument types; " - "consider setting the `argtypes` attribute" - % (cfnptr.__name__,)) - cargs = [from_ctypes(a) - for a in cfnptr.argtypes] - cret = from_ctypes(cfnptr.restype) - if sys.platform == 'win32' and not cfnptr._flags_ & ctypes._FUNCFLAG_CDECL: - # 'stdcall' calling convention under Windows - cconv = 'x86_stdcallcc' - else: - # Default C calling convention - cconv = None - - sig = templates.signature(cret, *cargs) - return types.ExternalFunctionPointer(sig, cconv=cconv, - get_pointer=get_pointer) diff --git a/numba/numba/typing/enumdecl.py b/numba/numba/typing/enumdecl.py deleted file mode 100644 index 450146ee4..000000000 --- a/numba/numba/typing/enumdecl.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Typing for enums. -""" - -from numba import types -from numba.typing.templates import (AbstractTemplate, AttributeTemplate, - signature, Registry) - -registry = Registry() -infer = registry.register -infer_global = registry.register_global -infer_getattr = registry.register_attr - - -@infer_getattr -class EnumAttribute(AttributeTemplate): - key = types.EnumMember - - def resolve_value(self, ty): - return ty.dtype - - -@infer_getattr -class EnumClassAttribute(AttributeTemplate): - key = types.EnumClass - - def generic_resolve(self, ty, attr): - """ - Resolve attributes of an enum class as enum members. - """ - if attr in ty.instance_class.__members__: - return ty.member_type - - -@infer -class EnumClassStaticGetItem(AbstractTemplate): - key = "static_getitem" - - def generic(self, args, kws): - enum, idx = args - if (isinstance(enum, types.EnumClass) - and idx in enum.instance_class.__members__): - return enum.member_type - - -class EnumCompare(AbstractTemplate): - - def generic(self, args, kws): - [lhs, rhs] = args - if (isinstance(lhs, types.EnumMember) - and isinstance(rhs, types.EnumMember) - and lhs == rhs): - return signature(types.boolean, lhs, rhs) - - -@infer -class EnumEq(EnumCompare): - key = '==' - - -@infer -class EnumNe(EnumCompare): - key = '!=' diff --git a/numba/numba/typing/listdecl.py b/numba/numba/typing/listdecl.py deleted file mode 100644 index a2f495172..000000000 --- a/numba/numba/typing/listdecl.py +++ /dev/null @@ -1,226 +0,0 @@ -from __future__ import absolute_import, print_function - -from .. import types -from .templates import (ConcreteTemplate, AbstractTemplate, AttributeTemplate, - CallableTemplate, Registry, signature, bound_function, - make_callable_template) -# Ensure list is typed as a collection as well -from . import collections - - -registry = Registry() -infer = registry.register -infer_global = registry.register_global -infer_getattr = registry.register_attr - - -@infer_global(list) -class ListBuiltin(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if args: - iterable, = args - if isinstance(iterable, types.IterableType): - dtype = iterable.iterator_type.yield_type - return signature(types.List(dtype), iterable) - else: - return signature(types.List(types.undefined)) - - -@infer_global(sorted) -class SortedBuiltin(CallableTemplate): - - def generic(self): - def typer(iterable, reverse=None): - if not isinstance(iterable, types.IterableType): - return - if (reverse is not None and - not isinstance(reverse, types.Boolean)): - return - return types.List(iterable.iterator_type.yield_type) - - return typer - - -@infer_getattr -class ListAttribute(AttributeTemplate): - key = types.List - - # NOTE: some of these should be Sequence / MutableSequence methods - - @bound_function("list.append") - def resolve_append(self, list, args, kws): - item, = args - assert not kws - unified = self.context.unify_pairs(list.dtype, item) - if unified is not None: - sig = signature(types.none, unified) - sig.recvr = list.copy(dtype=unified) - return sig - - @bound_function("list.clear") - def resolve_clear(self, list, args, kws): - assert not args - assert not kws - return signature(types.none) - - @bound_function("list.copy") - def resolve_copy(self, list, args, kws): - assert not args - assert not kws - return signature(list) - - @bound_function("list.count") - def resolve_count(self, list, args, kws): - item, = args - assert not kws - return signature(types.intp, list.dtype) - - @bound_function("list.extend") - def resolve_extend(self, list, args, kws): - iterable, = args - assert not kws - if not isinstance(iterable, types.IterableType): - return - - dtype = iterable.iterator_type.yield_type - unified = self.context.unify_pairs(list.dtype, dtype) - if unified is not None: - sig = signature(types.none, iterable) - sig.recvr = list.copy(dtype=unified) - return sig - - @bound_function("list.index") - def resolve_index(self, list, args, kws): - assert not kws - if len(args) == 1: - return signature(types.intp, list.dtype) - elif len(args) == 2: - if isinstance(args[1], types.Integer): - return signature(types.intp, list.dtype, types.intp) - elif len(args) == 3: - if (isinstance(args[1], types.Integer) - and isinstance(args[2], types.Integer)): - return signature(types.intp, list.dtype, types.intp, types.intp) - - @bound_function("list.insert") - def resolve_insert(self, list, args, kws): - idx, item = args - assert not kws - if isinstance(idx, types.Integer): - unified = self.context.unify_pairs(list.dtype, item) - if unified is not None: - sig = signature(types.none, types.intp, unified) - sig.recvr = list.copy(dtype=unified) - return sig - - @bound_function("list.pop") - def resolve_pop(self, list, args, kws): - assert not kws - if not args: - return signature(list.dtype) - else: - idx, = args - if isinstance(idx, types.Integer): - return signature(list.dtype, types.intp) - - @bound_function("list.remove") - def resolve_remove(self, list, args, kws): - assert not kws - if len(args) == 1: - return signature(types.none, list.dtype) - - @bound_function("list.reverse") - def resolve_reverse(self, list, args, kws): - assert not args - assert not kws - return signature(types.none) - - def resolve_sort(self, list): - def typer(reverse=None): - if (reverse is not None and - not isinstance(reverse, types.Boolean)): - return - return types.none - - return types.BoundFunction(make_callable_template(key="list.sort", - typer=typer, - recvr=list), - list) - - -@infer -class AddList(AbstractTemplate): - key = "+" - - def generic(self, args, kws): - if len(args) == 2: - a, b = args - if isinstance(a, types.List) and isinstance(b, types.List): - unified = self.context.unify_pairs(a, b) - if unified is not None: - return signature(unified, a, b) - - -@infer -class InplaceAddList(AbstractTemplate): - key = "+=" - - def generic(self, args, kws): - if len(args) == 2: - a, b = args - if isinstance(a, types.List) and isinstance(b, types.List): - if self.context.can_convert(b.dtype, a.dtype): - return signature(a, a, b) - - -@infer -class MulList(AbstractTemplate): - key = "*" - - def generic(self, args, kws): - a, b = args - if isinstance(a, types.List) and isinstance(b, types.Integer): - return signature(a, a, types.intp) - - -@infer -class InplaceMulList(MulList): - key = "*=" - - -class ListCompare(AbstractTemplate): - - def generic(self, args, kws): - [lhs, rhs] = args - if isinstance(lhs, types.List) and isinstance(rhs, types.List): - # Check element-wise comparability - res = self.context.resolve_function_type(self.key, - (lhs.dtype, rhs.dtype), {}) - if res is not None: - return signature(types.boolean, lhs, rhs) - -@infer -class ListEq(ListCompare): - key = '==' - -@infer -class ListNe(ListCompare): - key = '!=' - -@infer -class ListLt(ListCompare): - key = '<' - -@infer -class ListLe(ListCompare): - key = '<=' - -@infer -class ListGt(ListCompare): - key = '>' - -@infer -class ListGe(ListCompare): - key = '>=' diff --git a/numba/numba/typing/mathdecl.py b/numba/numba/typing/mathdecl.py deleted file mode 100644 index a1d25274c..000000000 --- a/numba/numba/typing/mathdecl.py +++ /dev/null @@ -1,132 +0,0 @@ -import math -from numba import types, utils -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - signature, Registry) - -registry = Registry() -infer_global = registry.register_global - - -@infer_global(math.exp) -@infer_global(math.expm1) -@infer_global(math.fabs) -@infer_global(math.sqrt) -@infer_global(math.log) -@infer_global(math.log1p) -@infer_global(math.log10) -@infer_global(math.sin) -@infer_global(math.cos) -@infer_global(math.tan) -@infer_global(math.sinh) -@infer_global(math.cosh) -@infer_global(math.tanh) -@infer_global(math.asin) -@infer_global(math.acos) -@infer_global(math.atan) -@infer_global(math.asinh) -@infer_global(math.acosh) -@infer_global(math.atanh) -@infer_global(math.degrees) -@infer_global(math.radians) -@infer_global(math.erf) -@infer_global(math.erfc) -@infer_global(math.gamma) -@infer_global(math.lgamma) -class Math_unary(ConcreteTemplate): - cases = [ - signature(types.float64, types.int64), - signature(types.float64, types.uint64), - signature(types.float32, types.float32), - signature(types.float64, types.float64), - ] - - -@infer_global(math.atan2) -class Math_atan2(ConcreteTemplate): - cases = [ - signature(types.float64, types.int64, types.int64), - signature(types.float64, types.uint64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.trunc) -class Math_converter(ConcreteTemplate): - cases = [ - signature(types.intp, types.intp), - signature(types.int64, types.int64), - signature(types.uint64, types.uint64), - signature(types.int64, types.float32), - signature(types.int64, types.float64), - ] - -# math.floor and math.ceil return float on 2.x, int on 3.x -if utils.PYVERSION > (3, 0): - @infer_global(math.floor) - @infer_global(math.ceil) - class Math_floor_ceil(Math_converter): - pass -else: - @infer_global(math.floor) - @infer_global(math.ceil) - class Math_floor_ceil(Math_unary): - pass - - -@infer_global(math.copysign) -class Math_copysign(ConcreteTemplate): - cases = [ - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.hypot) -class Math_hypot(ConcreteTemplate): - cases = [ - signature(types.float64, types.int64, types.int64), - signature(types.float64, types.uint64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - - -@infer_global(math.isinf) -@infer_global(math.isnan) -class Math_predicate(ConcreteTemplate): - cases = [ - signature(types.boolean, types.int64), - signature(types.boolean, types.uint64), - signature(types.boolean, types.float32), - signature(types.boolean, types.float64), - ] - -if utils.PYVERSION >= (3, 2): - @infer_global(math.isfinite) - class Math_isfinite(Math_predicate): - pass - - -@infer_global(math.pow) -class Math_pow(ConcreteTemplate): - cases = [ - signature(types.float64, types.float64, types.int64), - signature(types.float64, types.float64, types.uint64), - signature(types.float32, types.float32, types.float32), - signature(types.float64, types.float64, types.float64), - ] - -@infer_global(math.frexp) -class Math_frexp(ConcreteTemplate): - cases = [ - signature(types.Tuple((types.float64, types.intc)), types.float64), - signature(types.Tuple((types.float32, types.intc)), types.float32), - ] - -@infer_global(math.ldexp) -class Math_ldexp(ConcreteTemplate): - cases = [ - signature(types.float64, types.float64, types.intc), - signature(types.float32, types.float32, types.intc), - ] diff --git a/numba/numba/typing/npdatetime.py b/numba/numba/typing/npdatetime.py deleted file mode 100644 index b5de03ec8..000000000 --- a/numba/numba/typing/npdatetime.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Typing declarations for np.timedelta64. -""" - -from __future__ import print_function, division, absolute_import - -from itertools import product - -from numba import npdatetime, types -from numba.utils import PYVERSION -from numba.typing.templates import (AttributeTemplate, ConcreteTemplate, - AbstractTemplate, infer_global, infer, - infer_getattr, signature) - - -# timedelta64-only operations - -class TimedeltaUnaryOp(AbstractTemplate): - - def generic(self, args, kws): - if len(args) == 2: - # Guard against binary + and - - return - op, = args - if not isinstance(op, types.NPTimedelta): - return - return signature(op, op) - - -class TimedeltaBinOp(AbstractTemplate): - - def generic(self, args, kws): - if len(args) == 1: - # Guard against unary + and - - return - left, right = args - if not all(isinstance(tp, types.NPTimedelta) for tp in args): - return - if npdatetime.can_cast_timedelta_units(left.unit, right.unit): - return signature(right, left, right) - elif npdatetime.can_cast_timedelta_units(right.unit, left.unit): - return signature(left, left, right) - - -class TimedeltaCmpOp(AbstractTemplate): - - def generic(self, args, kws): - # For equality comparisons, all units are inter-comparable - left, right = args - if not all(isinstance(tp, types.NPTimedelta) for tp in args): - return - return signature(types.boolean, left, right) - - -class TimedeltaOrderedCmpOp(AbstractTemplate): - - def generic(self, args, kws): - # For ordered comparisons, units must be compatible - left, right = args - if not all(isinstance(tp, types.NPTimedelta) for tp in args): - return - if (npdatetime.can_cast_timedelta_units(left.unit, right.unit) or - npdatetime.can_cast_timedelta_units(right.unit, left.unit)): - return signature(types.boolean, left, right) - - -class TimedeltaMixOp(AbstractTemplate): - - def generic(self, args, kws): - """ - (timedelta64, {int, float}) -> timedelta64 - ({int, float}, timedelta64) -> timedelta64 - """ - left, right = args - if isinstance(right, types.NPTimedelta): - td, other = right, left - sig_factory = lambda other: signature(td, other, td) - elif isinstance(left, types.NPTimedelta): - td, other = left, right - sig_factory = lambda other: signature(td, td, other) - else: - return - if not isinstance(other, (types.Float, types.Integer)): - return - # Force integer types to convert to signed because it matches - # timedelta64 semantics better. - if isinstance(other, types.Integer): - other = types.int64 - return sig_factory(other) - - -class TimedeltaDivOp(AbstractTemplate): - - def generic(self, args, kws): - """ - (timedelta64, {int, float}) -> timedelta64 - (timedelta64, timedelta64) -> float - """ - left, right = args - if not isinstance(left, types.NPTimedelta): - return - if isinstance(right, types.NPTimedelta): - if (npdatetime.can_cast_timedelta_units(left.unit, right.unit) - or npdatetime.can_cast_timedelta_units(right.unit, left.unit)): - return signature(types.float64, left, right) - elif isinstance(right, (types.Float)): - return signature(left, left, right) - elif isinstance(right, (types.Integer)): - # Force integer types to convert to signed because it matches - # timedelta64 semantics better. - return signature(left, left, types.int64) - - -@infer -class TimedeltaUnaryPos(TimedeltaUnaryOp): - key = "+" - -@infer -class TimedeltaUnaryNeg(TimedeltaUnaryOp): - key = "-" - -@infer -class TimedeltaBinAdd(TimedeltaBinOp): - key = "+" - -@infer -class TimedeltaBinSub(TimedeltaBinOp): - key = "-" - -@infer -class TimedeltaBinMult(TimedeltaMixOp): - key = "*" - -@infer -class TimedeltaTrueDiv(TimedeltaDivOp): - key = "/" - -@infer -class TimedeltaFloorDiv(TimedeltaDivOp): - key = "//" - -@infer -class TimedeltaLegacyDiv(TimedeltaDivOp): - key = "/?" - -@infer -class TimedeltaCmpEq(TimedeltaCmpOp): - key = '==' - -@infer -class TimedeltaCmpNe(TimedeltaCmpOp): - key = '!=' - -@infer -class TimedeltaCmpLt(TimedeltaOrderedCmpOp): - key = '<' - -@infer -class TimedeltaCmpLE(TimedeltaOrderedCmpOp): - key = '<=' - -@infer -class TimedeltaCmpGt(TimedeltaOrderedCmpOp): - key = '>' - -@infer -class TimedeltaCmpGE(TimedeltaOrderedCmpOp): - key = '>=' - - -@infer_global(abs) -class TimedeltaAbs(TimedeltaUnaryOp): - pass - - -# datetime64 operations - -@infer -class DatetimePlusTimedelta(AbstractTemplate): - key = '+' - - def generic(self, args, kws): - if len(args) == 1: - # Guard against unary + - return - left, right = args - if isinstance(right, types.NPTimedelta): - dt = left - td = right - elif isinstance(left, types.NPTimedelta): - dt = right - td = left - else: - return - if isinstance(dt, types.NPDatetime): - unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit) - if unit is not None: - return signature(types.NPDatetime(unit), left, right) - -@infer -class DatetimeMinusTimedelta(AbstractTemplate): - key = '-' - - def generic(self, args, kws): - if len(args) == 1: - # Guard against unary - - return - dt, td = args - if isinstance(dt, types.NPDatetime) and isinstance(td, types.NPTimedelta): - unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit) - if unit is not None: - return signature(types.NPDatetime(unit), dt, td) - -@infer -class DatetimeMinusDatetime(AbstractTemplate): - key = '-' - - def generic(self, args, kws): - if len(args) == 1: - # Guard against unary - - return - left, right = args - if isinstance(left, types.NPDatetime) and isinstance(right, types.NPDatetime): - # All units compatible! Yoohoo! - unit = npdatetime.get_best_unit(left.unit, right.unit) - return signature(types.NPTimedelta(unit), left, right) - - -class DatetimeCmpOp(AbstractTemplate): - - def generic(self, args, kws): - # For datetime64 comparisons, all units are inter-comparable - left, right = args - if not all(isinstance(tp, types.NPDatetime) for tp in args): - return - return signature(types.boolean, left, right) - - -@infer -class DatetimeCmpEq(DatetimeCmpOp): - key = '==' - -@infer -class DatetimeCmpNe(DatetimeCmpOp): - key = '!=' - -@infer -class DatetimeCmpLt(DatetimeCmpOp): - key = '<' - -@infer -class DatetimeCmpLE(DatetimeCmpOp): - key = '<=' - -@infer -class DatetimeCmpGt(DatetimeCmpOp): - key = '>' - -@infer -class DatetimeCmpGE(DatetimeCmpOp): - key = '>=' diff --git a/numba/numba/typing/npydecl.py b/numba/numba/typing/npydecl.py deleted file mode 100644 index 235e3ade4..000000000 --- a/numba/numba/typing/npydecl.py +++ /dev/null @@ -1,1272 +0,0 @@ -from __future__ import absolute_import, print_function - -import warnings - -import numpy as np - -from .. import types, utils -from .templates import (AttributeTemplate, AbstractTemplate, CallableTemplate, - Registry, signature) - -from ..numpy_support import (ufunc_find_matching_loop, - supported_ufunc_loop, as_dtype, - from_dtype, as_dtype, resolve_output_type, - carray, farray) -from ..numpy_support import version as numpy_version -from ..errors import TypingError, PerformanceWarning -from numba import pndindex - -registry = Registry() -infer = registry.register -infer_global = registry.register_global -infer_getattr = registry.register_attr - - -class Numpy_rules_ufunc(AbstractTemplate): - @classmethod - def _handle_inputs(cls, ufunc, args, kws): - """ - Process argument types to a given *ufunc*. - Returns a (base types, explicit outputs, ndims, layout) tuple where: - - `base types` is a tuple of scalar types for each input - - `explicit outputs` is a tuple of explicit output types (arrays) - - `ndims` is the number of dimensions of the loop and also of - any outputs, explicit or implicit - - `layout` is the layout for any implicit output to be allocated - """ - nin = ufunc.nin - nout = ufunc.nout - nargs = ufunc.nargs - - # preconditions - assert nargs == nin + nout - - if nout > 1: - msg = "ufunc '{0}': not supported in this mode (more than 1 output)" - raise TypingError(msg=msg.format(ufunc.__name__)) - - if len(args) < nin: - msg = "ufunc '{0}': not enough arguments ({1} found, {2} required)" - raise TypingError(msg=msg.format(ufunc.__name__, len(args), nin)) - - if len(args) > nargs: - msg = "ufunc '{0}': too many arguments ({1} found, {2} maximum)" - raise TypingError(msg=msg.format(ufunc.__name__, len(args), nargs)) - - args = [a.as_array if isinstance(a, types.ArrayCompatible) else a - for a in args] - arg_ndims = [a.ndim if isinstance(a, types.ArrayCompatible) else 0 - for a in args] - ndims = max(arg_ndims) - - # explicit outputs must be arrays (no explicit scalar return values supported) - explicit_outputs = args[nin:] - - # all the explicit outputs must match the number max number of dimensions - if not all(d == ndims for d in arg_ndims[nin:]): - msg = "ufunc '{0}' called with unsuitable explicit output arrays." - raise TypingError(msg=msg.format(ufunc.__name__)) - - if not all(isinstance(output, types.ArrayCompatible) - for output in explicit_outputs): - msg = "ufunc '{0}' called with an explicit output that is not an array" - raise TypingError(msg=msg.format(ufunc.__name__)) - - # find the kernel to use, based only in the input types (as does NumPy) - base_types = [x.dtype if isinstance(x, types.ArrayCompatible) else x - for x in args] - - # Figure out the output array layout, if needed. - layout = None - if ndims > 0 and (len(explicit_outputs) < ufunc.nout): - layout = 'C' - layouts = [x.layout if isinstance(x, types.ArrayCompatible) else '' - for x in args] - - # Prefer C contig if any array is C contig. - # Next, prefer F contig. - # Defaults to C contig if not layouts are C/F. - if 'C' not in layouts and 'F' in layouts: - layout = 'F' - - return base_types, explicit_outputs, ndims, layout - - @property - def ufunc(self): - return self.key - - def generic(self, args, kws): - ufunc = self.ufunc - base_types, explicit_outputs, ndims, layout = self._handle_inputs( - ufunc, args, kws) - ufunc_loop = ufunc_find_matching_loop(ufunc, base_types) - if ufunc_loop is None: - raise TypingError("can't resolve ufunc {0} for types {1}".format(ufunc.__name__, args)) - - # check if all the types involved in the ufunc loop are supported in this mode - if not supported_ufunc_loop(ufunc, ufunc_loop): - msg = "ufunc '{0}' using the loop '{1}' not supported in this mode" - raise TypingError(msg=msg.format(ufunc.__name__, ufunc_loop.ufunc_sig)) - - # if there is any explicit output type, check that it is valid - explicit_outputs_np = [as_dtype(tp.dtype) for tp in explicit_outputs] - - # Numpy will happily use unsafe conversions (although it will actually warn) - if not all (np.can_cast(fromty, toty, 'unsafe') for (fromty, toty) in - zip(ufunc_loop.numpy_outputs, explicit_outputs_np)): - msg = "ufunc '{0}' can't cast result to explicit result type" - raise TypingError(msg=msg.format(ufunc.__name__)) - - # A valid loop was found that is compatible. The result of type inference should - # be based on the explicit output types, and when not available with the type given - # by the selected NumPy loop - out = list(explicit_outputs) - implicit_output_count = ufunc.nout - len(explicit_outputs) - if implicit_output_count > 0: - # XXX this is currently wrong for datetime64 and timedelta64, - # as ufunc_find_matching_loop() doesn't do any type inference. - ret_tys = ufunc_loop.outputs[-implicit_output_count:] - if ndims > 0: - assert layout is not None - ret_tys = [types.Array(dtype=ret_ty, ndim=ndims, layout=layout) - for ret_ty in ret_tys] - ret_tys = [resolve_output_type(self.context, args, ret_ty) - for ret_ty in ret_tys] - out.extend(ret_tys) - - # note: although the previous code should support multiple return values, only one - # is supported as of now (signature may not support more than one). - # there is an check enforcing only one output - out.extend(args) - return signature(*out) - - -@infer -class UnaryPositiveArray(AbstractTemplate): - '''Typing template class for +(array) expressions. This operator is - special because there is no Numpy ufunc associated with it; we - include typing for it here (numba.typing.npydecl) because this is - where the remaining array operators are defined. - ''' - key = "+" - - def generic(self, args, kws): - assert not kws - if len(args) == 1 and isinstance(args[0], types.ArrayCompatible): - arg_ty = args[0] - return arg_ty.copy()(arg_ty) - - -class NumpyRulesArrayOperator(Numpy_rules_ufunc): - _op_map = { - '+': "add", - '-': "subtract", - '*': "multiply", - '/?': "divide", - '/': "true_divide", - '//': "floor_divide", - '%': "remainder", - '**': "power", - '<<': "left_shift", - '>>': "right_shift", - '&': "bitwise_and", - '|': "bitwise_or", - '^': "bitwise_xor", - '==': "equal", - '>': "greater", - '>=': "greater_equal", - '<': "less", - '<=': "less_equal", - '!=': "not_equal", - } - - @property - def ufunc(self): - return getattr(np, self._op_map[self.key]) - - @classmethod - def install_operations(cls): - for op, ufunc_name in cls._op_map.items(): - infer(type("NumpyRulesArrayOperator_" + ufunc_name, (cls,), - dict(key=op))) - - def generic(self, args, kws): - '''Overloads and calls base class generic() method, returning - None if a TypingError occurred. - - Returning None for operators is important since operators are - heavily overloaded, and by suppressing type errors, we allow - type inference to check other possibilities before giving up - (particularly user-defined operators). - ''' - try: - sig = super(NumpyRulesArrayOperator, self).generic(args, kws) - except TypingError: - return None - if sig is None: - return None - args = sig.args - # Only accept at least one array argument, otherwise the operator - # doesn't involve Numpy's ufunc machinery. - if not any(isinstance(arg, types.ArrayCompatible) - for arg in args): - return None - return sig - - -_binop_map = NumpyRulesArrayOperator._op_map - -class NumpyRulesInplaceArrayOperator(NumpyRulesArrayOperator): - _op_map = dict((inp, _binop_map[binop]) - for (inp, binop) in utils.inplace_map.items() - if binop in _binop_map) - - def generic(self, args, kws): - # Type the inplace operator as if an explicit output was passed, - # to handle type resolution correctly. - # (for example int8[:] += int16[:] should use an int8[:] output, - # not int16[:]) - lhs, rhs = args - if not isinstance(lhs, types.ArrayCompatible): - return - args = args + (lhs,) - sig = super(NumpyRulesInplaceArrayOperator, self).generic(args, kws) - # Strip off the fake explicit output - assert len(sig.args) == 3 - real_sig = signature(sig.return_type, *sig.args[:2]) - return real_sig - - -class NumpyRulesUnaryArrayOperator(NumpyRulesArrayOperator): - _op_map = { - # Positive is a special case since there is no Numpy ufunc - # corresponding to it (it's essentially an identity operator). - # See UnaryPositiveArray, above. - '-': "negative", - '~': "invert", - } - - def generic(self, args, kws): - assert not kws - if len(args) == 1 and isinstance(args[0], types.ArrayCompatible): - return super(NumpyRulesUnaryArrayOperator, self).generic(args, kws) - - -# list of unary ufuncs to register - -_math_operations = [ "add", "subtract", "multiply", - "logaddexp", "logaddexp2", "true_divide", - "floor_divide", "negative", "power", - "remainder", "fmod", "absolute", - "rint", "sign", "conjugate", "exp", "exp2", - "log", "log2", "log10", "expm1", "log1p", - "sqrt", "square", "reciprocal", - "divide", "mod", "abs", "fabs" ] - -_trigonometric_functions = [ "sin", "cos", "tan", "arcsin", - "arccos", "arctan", "arctan2", - "hypot", "sinh", "cosh", "tanh", - "arcsinh", "arccosh", "arctanh", - "deg2rad", "rad2deg", "degrees", - "radians" ] - -_bit_twiddling_functions = ["bitwise_and", "bitwise_or", - "bitwise_xor", "invert", - "left_shift", "right_shift", - "bitwise_not" ] - -_comparison_functions = [ "greater", "greater_equal", "less", - "less_equal", "not_equal", "equal", - "logical_and", "logical_or", - "logical_xor", "logical_not", - "maximum", "minimum", "fmax", "fmin" ] - -_floating_functions = [ "isfinite", "isinf", "isnan", "signbit", - "copysign", "nextafter", "modf", "ldexp", - "frexp", "floor", "ceil", "trunc", - "spacing" ] - - -# This is a set of the ufuncs that are not yet supported by Lowering. In order -# to trigger no-python mode we must not register them until their Lowering is -# implemented. -# -# It also works as a nice TODO list for ufunc support :) -_unsupported = set([ 'frexp', # this one is tricky, as it has 2 returns - 'modf', # this one also has 2 returns - ]) - -# A list of ufuncs that are in fact aliases of other ufuncs. They need to insert the -# resolve method, but not register the ufunc itself -_aliases = set(["bitwise_not", "mod", "abs"]) - -# In python3 np.divide is mapped to np.true_divide -if np.divide == np.true_divide: - _aliases.add("divide") - -def _numpy_ufunc(name): - func = getattr(np, name) - class typing_class(Numpy_rules_ufunc): - key = func - - typing_class.__name__ = "resolve_{0}".format(name) - - if not name in _aliases: - infer_global(func, types.Function(typing_class)) - -all_ufuncs = sum([_math_operations, _trigonometric_functions, - _bit_twiddling_functions, _comparison_functions, - _floating_functions], []) - -supported_ufuncs = [x for x in all_ufuncs if x not in _unsupported] - -for func in supported_ufuncs: - _numpy_ufunc(func) - -all_ufuncs = [getattr(np, name) for name in all_ufuncs] -supported_ufuncs = [getattr(np, name) for name in supported_ufuncs] - -NumpyRulesUnaryArrayOperator.install_operations() -NumpyRulesArrayOperator.install_operations() -NumpyRulesInplaceArrayOperator.install_operations() - -supported_array_operators = set( - NumpyRulesUnaryArrayOperator._op_map.keys()).union( - NumpyRulesArrayOperator._op_map.keys()) - -del _math_operations, _trigonometric_functions, _bit_twiddling_functions -del _comparison_functions, _floating_functions, _unsupported -del _aliases, _numpy_ufunc - - -# ----------------------------------------------------------------------------- -# Install global helpers for array methods. - -class Numpy_method_redirection(AbstractTemplate): - """ - A template redirecting a Numpy global function (e.g. np.sum) to an - array method of the same name (e.g. ndarray.sum). - """ - - def generic(self, args, kws): - pysig = None - if kws: - if self.method_name == 'sum': - def sum_stub(arr, axis): - pass - pysig = utils.pysignature(sum_stub) - elif self.method_name == 'argsort': - def argsort_stub(arr, kind='quicksort'): - pass - pysig = utils.pysignature(argsort_stub) - else: - fmt = "numba doesn't support kwarg for {}" - raise TypingError(fmt.format(self.method_name)) - - arr = args[0] - # This will return a BoundFunction - meth_ty = self.context.resolve_getattr(arr, self.method_name) - # Resolve arguments on the bound function - meth_sig = self.context.resolve_function_type(meth_ty, args[1:], kws) - if meth_sig is not None: - return meth_sig.as_function().replace(pysig=pysig) - - -# Function to glue attributes onto the numpy-esque object -def _numpy_redirect(fname): - numpy_function = getattr(np, fname) - cls = type("Numpy_redirect_{0}".format(fname), (Numpy_method_redirection,), - dict(key=numpy_function, method_name=fname)) - infer_global(numpy_function, types.Function(cls)) - # special case literal support for 'sum' - if fname in ['sum', 'argsort']: - cls.support_literals = True - -for func in ['min', 'max', 'sum', 'prod', 'mean', 'var', 'std', - 'cumsum', 'cumprod', 'argmin', 'argmax', 'argsort', - 'nonzero', 'ravel']: - _numpy_redirect(func) - - -# ----------------------------------------------------------------------------- -# Numpy scalar constructors - -# Register np.int8, etc. as convertors to the equivalent Numba types -np_types = set(getattr(np, str(nb_type)) for nb_type in types.number_domain) -np_types.add(np.bool_) -# Those may or may not be aliases (depending on the Numpy build / version) -np_types.add(np.intc) -np_types.add(np.intp) -np_types.add(np.uintc) -np_types.add(np.uintp) - - -def register_number_classes(register_global): - for np_type in np_types: - nb_type = getattr(types, np_type.__name__) - - register_global(np_type, types.NumberClass(nb_type)) - - -register_number_classes(infer_global) - - -# ----------------------------------------------------------------------------- -# Numpy array constructors - -def _parse_shape(shape): - ndim = None - if isinstance(shape, types.Integer): - ndim = 1 - elif isinstance(shape, (types.Tuple, types.UniTuple)): - if all(isinstance(s, types.Integer) for s in shape): - ndim = len(shape) - return ndim - -def _parse_dtype(dtype): - if isinstance(dtype, types.DTypeSpec): - return dtype.dtype - -def _parse_nested_sequence(context, typ): - """ - Parse a (possibly 0d) nested sequence type. - A (ndim, dtype) tuple is returned. Note the sequence may still be - heterogeneous, as long as it converts to the given dtype. - """ - if isinstance(typ, (types.Buffer,)): - raise TypingError("%r not allowed in a homogeneous sequence" % typ) - elif isinstance(typ, (types.Sequence,)): - n, dtype = _parse_nested_sequence(context, typ.dtype) - return n + 1, dtype - elif isinstance(typ, (types.BaseTuple,)): - if typ.count == 0: - # Mimick Numpy's behaviour - return 1, types.float64 - n, dtype = _parse_nested_sequence(context, typ[0]) - dtypes = [dtype] - for i in range(1, typ.count): - _n, dtype = _parse_nested_sequence(context, typ[i]) - if _n != n: - raise TypingError("type %r does not have a regular shape" - % (typ,)) - dtypes.append(dtype) - dtype = context.unify_types(*dtypes) - if dtype is None: - raise TypingError("cannot convert %r to a homogeneous type" % typ) - return n + 1, dtype - else: - # Scalar type => check it's valid as a Numpy array dtype - as_dtype(typ) - return 0, typ - - - -@infer_global(np.array) -class NpArray(CallableTemplate): - """ - Typing template for np.array(). - """ - - def generic(self): - def typer(object, dtype=None): - ndim, seq_dtype = _parse_nested_sequence(self.context, object) - if dtype is None: - dtype = seq_dtype - else: - dtype = _parse_dtype(dtype) - if dtype is None: - return - return types.Array(dtype, ndim, 'C') - - return typer - - -@infer_global(np.empty) -@infer_global(np.zeros) -@infer_global(np.ones) -class NdConstructor(CallableTemplate): - """ - Typing template for np.empty(), .zeros(), .ones(). - """ - - def generic(self): - def typer(shape, dtype=None): - if dtype is None: - nb_dtype = types.double - else: - nb_dtype = _parse_dtype(dtype) - - ndim = _parse_shape(shape) - if nb_dtype is not None and ndim is not None: - return types.Array(dtype=nb_dtype, ndim=ndim, layout='C') - - return typer - - -@infer_global(np.empty_like) -@infer_global(np.zeros_like) -class NdConstructorLike(CallableTemplate): - """ - Typing template for np.empty_like(), .zeros_like(), .ones_like(). - """ - - def generic(self): - """ - np.empty_like(array) -> empty array of the same shape and layout - np.empty_like(scalar) -> empty 0-d array of the scalar type - """ - def typer(arg, dtype=None): - if dtype is not None: - nb_dtype = _parse_dtype(dtype) - elif isinstance(arg, types.Array): - nb_dtype = arg.dtype - else: - nb_dtype = arg - if nb_dtype is not None: - if isinstance(arg, types.Array): - layout = arg.layout if arg.layout != 'A' else 'C' - return arg.copy(dtype=nb_dtype, layout=layout, readonly=False) - else: - return types.Array(nb_dtype, 0, 'C') - - return typer - - -infer_global(np.ones_like)(NdConstructorLike) - - -if numpy_version >= (1, 8): - @infer_global(np.full) - class NdFull(CallableTemplate): - - def generic(self): - def typer(shape, fill_value, dtype=None): - if dtype is None: - if numpy_version < (1, 12): - nb_dtype = types.float64 - else: - nb_dtype = fill_value - else: - nb_dtype = _parse_dtype(dtype) - - ndim = _parse_shape(shape) - if nb_dtype is not None and ndim is not None: - return types.Array(dtype=nb_dtype, ndim=ndim, layout='C') - - return typer - - @infer_global(np.full_like) - class NdFullLike(CallableTemplate): - - def generic(self): - """ - np.full_like(array, val) -> array of the same shape and layout - np.full_like(scalar, val) -> 0-d array of the scalar type - """ - def typer(arg, fill_value, dtype=None): - if dtype is not None: - nb_dtype = _parse_dtype(dtype) - elif isinstance(arg, types.Array): - nb_dtype = arg.dtype - else: - nb_dtype = arg - if nb_dtype is not None: - if isinstance(arg, types.Array): - return arg.copy(dtype=nb_dtype, readonly=False) - else: - return types.Array(dtype=nb_dtype, ndim=0, layout='C') - - return typer - - -@infer_global(np.identity) -class NdIdentity(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - n = args[0] - if not isinstance(n, types.Integer): - return - if len(args) >= 2: - nb_dtype = _parse_dtype(args[1]) - else: - nb_dtype = types.float64 - - if nb_dtype is not None: - return_type = types.Array(ndim=2, dtype=nb_dtype, layout='C') - return signature(return_type, *args) - - -def _infer_dtype_from_inputs(inputs): - return dtype - - -@infer_global(np.eye) -class NdEye(CallableTemplate): - - def generic(self): - def typer(N, M=None, k=None, dtype=None): - if dtype is None: - nb_dtype = types.float64 - else: - nb_dtype = _parse_dtype(dtype) - if nb_dtype is not None: - return types.Array(ndim=2, dtype=nb_dtype, layout='C') - - return typer - - -@infer_global(np.arange) -class NdArange(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if len(args) >= 4: - dtype = _parse_dtype(args[3]) - bounds = args[:3] - else: - bounds = args - if any(isinstance(arg, types.Complex) for arg in bounds): - dtype = types.complex128 - elif any(isinstance(arg, types.Float) for arg in bounds): - dtype = types.float64 - else: - dtype = max(bounds) - if not all(isinstance(arg, types.Number) for arg in bounds): - return - return_type = types.Array(ndim=1, dtype=dtype, layout='C') - return signature(return_type, *args) - - -@infer_global(np.linspace) -class NdLinspace(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - bounds = args[:2] - if not all(isinstance(arg, types.Number) for arg in bounds): - return - if len(args) >= 3: - num = args[2] - if not isinstance(num, types.Integer): - return - if len(args) >= 4: - # Not supporting the other arguments as it would require - # keyword arguments for reasonable use. - return - if any(isinstance(arg, types.Complex) for arg in bounds): - dtype = types.complex128 - else: - dtype = types.float64 - return_type = types.Array(ndim=1, dtype=dtype, layout='C') - return signature(return_type, *args) - - -@infer_global(np.frombuffer) -class NdFromBuffer(CallableTemplate): - - def generic(self): - def typer(buffer, dtype=None): - if not isinstance(buffer, types.Buffer) or buffer.layout != 'C': - return - if dtype is None: - nb_dtype = types.float64 - else: - nb_dtype = _parse_dtype(dtype) - - if nb_dtype is not None: - return types.Array(dtype=nb_dtype, ndim=1, layout='C', - readonly=not buffer.mutable) - - return typer - - -@infer_global(np.sort) -class NdSort(CallableTemplate): - - def generic(self): - def typer(a): - if isinstance(a, types.Array) and a.ndim == 1: - return a - - return typer - - -@infer_global(np.asfortranarray) -class AsFortranArray(CallableTemplate): - - def generic(self): - def typer(a): - if isinstance(a, types.Array): - return a.copy(layout='F', ndim=max(a.ndim, 1)) - - return typer - - -@infer_global(np.ascontiguousarray) -class AsContiguousArray(CallableTemplate): - - def generic(self): - def typer(a): - if isinstance(a, types.Array): - return a.copy(layout='C', ndim=max(a.ndim, 1)) - - return typer - - -@infer_global(np.copy) -class NdCopy(CallableTemplate): - - def generic(self): - def typer(a): - if isinstance(a, types.Array): - layout = 'F' if a.layout == 'F' else 'C' - return a.copy(layout=layout, readonly=False) - - return typer - - -@infer_global(np.expand_dims) -class NdExpandDims(CallableTemplate): - - def generic(self): - def typer(a, axis): - if (not isinstance(a, types.Array) - or not isinstance(axis, types.Integer)): - return - - layout = a.layout if a.ndim <= 1 else 'A' - return a.copy(ndim=a.ndim + 1, layout=layout) - - return typer - - -class BaseAtLeastNdTemplate(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if not args or not all(isinstance(a, types.Array) for a in args): - return - - rets = [self.convert_array(a) for a in args] - if len(rets) > 1: - retty = types.BaseTuple.from_types(rets) - else: - retty = rets[0] - return signature(retty, *args) - - -@infer_global(np.atleast_1d) -class NdAtLeast1d(BaseAtLeastNdTemplate): - - def convert_array(self, a): - return a.copy(ndim=max(a.ndim, 1)) - - -@infer_global(np.atleast_2d) -class NdAtLeast2d(BaseAtLeastNdTemplate): - - def convert_array(self, a): - return a.copy(ndim=max(a.ndim, 2)) - - -@infer_global(np.atleast_3d) -class NdAtLeast3d(BaseAtLeastNdTemplate): - - def convert_array(self, a): - return a.copy(ndim=max(a.ndim, 3)) - - -def _homogeneous_dims(context, func_name, arrays): - ndim = arrays[0].ndim - for a in arrays: - if a.ndim != ndim: - raise TypeError("%s(): all the input arrays " - "must have same number of dimensions" - % func_name) - return ndim - -def _sequence_of_arrays(context, func_name, arrays, - dim_chooser=_homogeneous_dims): - if (not isinstance(arrays, types.BaseTuple) - or not len(arrays) - or not all(isinstance(a, types.Array) for a in arrays)): - raise TypeError("%s(): expecting a non-empty tuple of arrays, " - "got %s" % (func_name, arrays)) - - ndim = dim_chooser(context, func_name, arrays) - - dtype = context.unify_types(*(a.dtype for a in arrays)) - if dtype is None: - raise TypeError("%s(): input arrays must have " - "compatible dtypes" % func_name) - - return dtype, ndim - -def _choose_concatenation_layout(arrays): - # Only create a F array if all input arrays have F layout. - # This is a simplified version of Numpy's behaviour, - # while Numpy's actually processes the input strides to - # decide on optimal output strides - # (see PyArray_CreateMultiSortedStridePerm()). - return 'F' if all(a.layout == 'F' for a in arrays) else 'C' - - -@infer_global(np.concatenate) -class NdConcatenate(CallableTemplate): - - def generic(self): - def typer(arrays, axis=None): - if axis is not None and not isinstance(axis, types.Integer): - # Note Numpy allows axis=None, but it isn't documented: - # https://github.com/numpy/numpy/issues/7968 - return - - dtype, ndim = _sequence_of_arrays(self.context, - "np.concatenate", arrays) - if ndim == 0: - raise TypeError("zero-dimensional arrays cannot be concatenated") - - layout = _choose_concatenation_layout(arrays) - - return types.Array(dtype, ndim, layout) - - return typer - - -if numpy_version >= (1, 10): - @infer_global(np.stack) - class NdStack(CallableTemplate): - - def generic(self): - def typer(arrays, axis=None): - if axis is not None and not isinstance(axis, types.Integer): - # Note Numpy allows axis=None, but it isn't documented: - # https://github.com/numpy/numpy/issues/7968 - return - - dtype, ndim = _sequence_of_arrays(self.context, - "np.stack", arrays) - - # This diverges from Numpy's behaviour, which simply inserts - # a new stride at the requested axis (therefore can return - # a 'A' array). - layout = 'F' if all(a.layout == 'F' for a in arrays) else 'C' - - return types.Array(dtype, ndim + 1, layout) - - return typer - - -class BaseStackTemplate(CallableTemplate): - - def generic(self): - def typer(arrays): - dtype, ndim = _sequence_of_arrays(self.context, - self.func_name, arrays) - - ndim = max(ndim, self.ndim_min) - layout = _choose_concatenation_layout(arrays) - - return types.Array(dtype, ndim, layout) - - return typer - - -@infer_global(np.hstack) -class NdStack(BaseStackTemplate): - func_name = "np.hstack" - ndim_min = 1 - -@infer_global(np.vstack) -class NdStack(BaseStackTemplate): - func_name = "np.vstack" - ndim_min = 2 - -@infer_global(np.dstack) -class NdStack(BaseStackTemplate): - func_name = "np.dstack" - ndim_min = 3 - - - -def _column_stack_dims(context, func_name, arrays): - # column_stack() allows stacking 1-d and 2-d arrays together - for a in arrays: - if a.ndim < 1 or a.ndim > 2: - raise TypeError("np.column_stack() is only defined on " - "1-d and 2-d arrays") - return 2 - - -@infer_global(np.column_stack) -class NdColumnStack(CallableTemplate): - - def generic(self): - def typer(arrays): - dtype, ndim = _sequence_of_arrays(self.context, - "np.column_stack", arrays, - dim_chooser=_column_stack_dims) - - layout = _choose_concatenation_layout(arrays) - - return types.Array(dtype, ndim, layout) - - return typer - - -# ----------------------------------------------------------------------------- -# Linear algebra - - -class MatMulTyperMixin(object): - - def matmul_typer(self, a, b, out=None): - """ - Typer function for Numpy matrix multiplication. - """ - if not isinstance(a, types.Array) or not isinstance(b, types.Array): - return - if not all(x.ndim in (1, 2) for x in (a, b)): - raise TypingError("%s only supported on 1-D and 2-D arrays" - % (self.func_name, )) - # Output dimensionality - ndims = set([a.ndim, b.ndim]) - if ndims == set([2]): - # M * M - out_ndim = 2 - elif ndims == set([1, 2]): - # M* V and V * M - out_ndim = 1 - elif ndims == set([1]): - # V * V - out_ndim = 0 - - if out is not None: - if out_ndim == 0: - raise TypeError("explicit output unsupported for vector * vector") - elif out.ndim != out_ndim: - raise TypeError("explicit output has incorrect dimensionality") - if not isinstance(out, types.Array) or out.layout != 'C': - raise TypeError("output must be a C-contiguous array") - all_args = (a, b, out) - else: - all_args = (a, b) - - if not all(x.layout in 'CF' for x in (a, b)): - warnings.warn("%s is faster on contiguous arrays, called on %s" - % (self.func_name, (a, b)), PerformanceWarning) - if not all(x.dtype == a.dtype for x in all_args): - raise TypingError("%s arguments must all have " - "the same dtype" % (self.func_name,)) - if not isinstance(a.dtype, (types.Float, types.Complex)): - raise TypingError("%s only supported on " - "float and complex arrays" - % (self.func_name,)) - if out: - return out - elif out_ndim > 0: - return types.Array(a.dtype, out_ndim, 'C') - else: - return a.dtype - - -@infer_global(np.dot) -class Dot(MatMulTyperMixin, CallableTemplate): - func_name = "np.dot()" - - def generic(self): - def typer(a, b, out=None): - # NOTE: np.dot() and the '@' operator have distinct semantics - # for >2-D arrays, but we don't support them. - return self.matmul_typer(a, b, out) - - return typer - - -@infer_global(np.vdot) -class VDot(CallableTemplate): - - def generic(self): - def typer(a, b): - if not isinstance(a, types.Array) or not isinstance(b, types.Array): - return - if not all(x.ndim == 1 for x in (a, b)): - raise TypingError("np.vdot() only supported on 1-D arrays") - if not all(x.layout in 'CF' for x in (a, b)): - warnings.warn("np.vdot() is faster on contiguous arrays, called on %s" - % ((a, b),), PerformanceWarning) - if not all(x.dtype == a.dtype for x in (a, b)): - raise TypingError("np.vdot() arguments must all have " - "the same dtype") - if not isinstance(a.dtype, (types.Float, types.Complex)): - raise TypingError("np.vdot() only supported on " - "float and complex arrays") - return a.dtype - - return typer - - -@infer -class MatMul(MatMulTyperMixin, AbstractTemplate): - key = "@" - func_name = "'@'" - - def generic(self, args, kws): - assert not kws - restype = self.matmul_typer(*args) - if restype is not None: - return signature(restype, *args) - - -def _check_linalg_matrix(a, func_name): - if not isinstance(a, types.Array): - return - if not a.ndim == 2: - raise TypingError("np.linalg.%s() only supported on 2-D arrays" - % func_name) - if not isinstance(a.dtype, (types.Float, types.Complex)): - raise TypingError("np.linalg.%s() only supported on " - "float and complex arrays" % func_name) - -# ----------------------------------------------------------------------------- -# Miscellaneous functions - -@infer_global(np.ndenumerate) -class NdEnumerate(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - arr, = args - - if isinstance(arr, types.Array): - enumerate_type = types.NumpyNdEnumerateType(arr) - return signature(enumerate_type, *args) - - -@infer_global(np.nditer) -class NdIter(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if len(args) != 1: - return - arrays, = args - - if isinstance(arrays, types.BaseTuple): - if not arrays: - return - arrays = list(arrays) - else: - arrays = [arrays] - nditerty = types.NumpyNdIterType(arrays) - return signature(nditerty, *args) - - -@infer_global(pndindex) -@infer_global(np.ndindex) -class NdIndex(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - - # Either ndindex(shape) or ndindex(*shape) - if len(args) == 1 and isinstance(args[0], types.BaseTuple): - tup = args[0] - if tup.count > 0 and not isinstance(tup, types.UniTuple): - # Heterogeneous tuple - return - shape = list(tup) - else: - shape = args - - if all(isinstance(x, types.Integer) for x in shape): - iterator_type = types.NumpyNdIndexType(len(shape)) - return signature(iterator_type, *args) - - -# We use the same typing key for np.round() and np.around() to -# re-use the implementations automatically. -@infer_global(np.round) -@infer_global(np.around, typing_key=np.round) -class Round(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - assert 1 <= len(args) <= 3 - - arg = args[0] - if len(args) == 1: - decimals = types.intp - out = None - else: - decimals = args[1] - if len(args) == 2: - out = None - else: - out = args[2] - - supported_scalars = (types.Integer, types.Float, types.Complex) - if isinstance(arg, supported_scalars): - assert out is None - return signature(arg, *args) - if (isinstance(arg, types.Array) and isinstance(arg.dtype, supported_scalars) and - isinstance(out, types.Array) and isinstance(out.dtype, supported_scalars) and - out.ndim == arg.ndim): - # arg can only be complex if out is complex too - if (not isinstance(arg.dtype, types.Complex) - or isinstance(out.dtype, types.Complex)): - return signature(out, *args) - - -@infer_global(np.where) -class Where(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - - if len(args) == 1: - # 0-dim arrays return one result array - ary = args[0] - ndim = max(ary.ndim, 1) - retty = types.UniTuple(types.Array(types.intp, 1, 'C'), ndim) - return signature(retty, ary) - - elif len(args) == 3: - # NOTE: contrary to Numpy, we only support homogeneous arguments - cond, x, y = args - if isinstance(cond, types.Array): - # array where() - if (cond.ndim == x.ndim == y.ndim and - x.dtype == y.dtype): - retty = types.Array(x.dtype, x.ndim, x.layout) - return signature(retty, *args) - else: - # scalar where() - if not isinstance(x, types.Array) and x == y: - retty = types.Array(x, 0, 'C') - return signature(retty, *args) - - -@infer_global(np.sinc) -class Sinc(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - assert len(args) == 1 - arg = args[0] - supported_scalars = (types.Float, types.Complex) - if (isinstance(arg, supported_scalars) or - (isinstance(arg, types.Array) and - isinstance(arg.dtype, supported_scalars))): - return signature(arg, arg) - - -@infer_global(np.angle) -class Angle(CallableTemplate): - """ - Typing template for np.angle() - """ - def generic(self): - def typer(z, deg=False): - if isinstance(z, types.Array): - dtype = z.dtype - else: - dtype = z - if isinstance(dtype, types.Complex): - ret_dtype = dtype.underlying_float - elif isinstance(dtype, types.Float): - ret_dtype = dtype - else: - return - if isinstance(z, types.Array): - return z.copy(dtype=ret_dtype) - else: - return ret_dtype - return typer - - -@infer_global(np.diag) -class DiagCtor(CallableTemplate): - """ - Typing template for np.diag() - """ - def generic(self): - def typer(ref, k=0): - if isinstance(ref, types.Array): - if ref.ndim == 1: - rdim = 2 - elif ref.ndim == 2: - rdim = 1 - else: - return None - return types.Array(ndim=rdim, dtype=ref.dtype, layout='C') - return typer - - -@infer_global(np.take) -class Take(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - assert len(args) == 2 - arr, ind = args - if isinstance(ind, types.Number): - retty = arr.dtype - elif isinstance(ind, types.Array): - retty = types.Array(ndim=ind.ndim, dtype=arr.dtype, layout='C') - elif isinstance(ind, types.List): - retty = types.Array(ndim=1, dtype=arr.dtype, layout='C') - elif isinstance(ind, types.BaseTuple): - retty = types.Array(ndim=np.ndim(ind), dtype=arr.dtype, layout='C') - else: - return None - - return signature(retty, *args) - -# ----------------------------------------------------------------------------- -# Numba helpers - -@infer_global(carray) -class NumbaCArray(CallableTemplate): - layout = 'C' - - def generic(self): - func_name = self.key.__name__ - - def typer(ptr, shape, dtype=types.none): - if ptr is types.voidptr: - ptr_dtype = None - elif isinstance(ptr, types.CPointer): - ptr_dtype = ptr.dtype - else: - raise TypeError("%s(): pointer argument expected, got '%s'" - % (func_name, ptr)) - - if dtype is types.none: - if ptr_dtype is None: - raise TypeError("%s(): explicit dtype required for void* argument" - % (func_name,)) - dtype = ptr_dtype - elif isinstance(dtype, types.DTypeSpec): - dtype = dtype.dtype - if ptr_dtype is not None and dtype != ptr_dtype: - raise TypeError("%s(): mismatching dtype '%s' for pointer type '%s'" - % (func_name, dtype, ptr)) - else: - raise TypeError("%s(): invalid dtype spec '%s'" - % (func_name, dtype)) - - ndim = _parse_shape(shape) - if ndim is None: - raise TypeError("%s(): invalid shape '%s'" - % (func_name, shape)) - - return types.Array(dtype, ndim, self.layout) - - return typer - - -@infer_global(farray) -class NumbaFArray(NumbaCArray): - layout = 'F' diff --git a/numba/numba/typing/operatordecl.py b/numba/numba/typing/operatordecl.py deleted file mode 100644 index 953a1259e..000000000 --- a/numba/numba/typing/operatordecl.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Typing declarations for the operator module. -""" - -import operator - -from numba import types -from numba import utils -from numba.typing.templates import (ConcreteTemplate, AbstractTemplate, - signature, Registry) - -registry = Registry() -infer_getattr = registry.register_attr -infer_global = registry.register_global - - -class MappedOperator(AbstractTemplate): - - # Whether the args to the operator and the operator module func are reversed - reverse_args = False - - def generic(self, args, kws): - assert not kws - args = args[::-1] if self.reverse_args else args - sig = self.context.resolve_function_type(self.op, args, kws) - if self.reverse_args and sig is not None: - sig = signature(sig.return_type, *sig.args[::-1]) - return sig - - -class MappedInplaceOperator(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if not args: - return - first = args[0] - op = self.mutable_op if first.mutable else self.immutable_op - return self.context.resolve_function_type(op, args, kws) - - -# Redirect all functions in the operator module to the corresponding -# built-in operators. - -for name, inplace_name, op in utils.operator_map: - op_func = getattr(operator, name) - op_type = type('Operator_' + name, (MappedOperator,), - {'key': op_func, 'op': op, - 'reverse_args': op == 'in'}) - infer_global(op_func, types.Function(op_type)) - - if inplace_name: - op_func = getattr(operator, inplace_name) - op_type = type('Operator_' + inplace_name, (MappedInplaceOperator,), - {'key': op_func, - 'mutable_op': op + '=', - 'immutable_op': op}) - infer_global(op_func, types.Function(op_type)) diff --git a/numba/numba/typing/randomdecl.py b/numba/numba/typing/randomdecl.py deleted file mode 100644 index d36c48cbf..000000000 --- a/numba/numba/typing/randomdecl.py +++ /dev/null @@ -1,288 +0,0 @@ -from __future__ import absolute_import, print_function - -import random - -import numpy as np - -from .. import types -from .templates import (ConcreteTemplate, AbstractTemplate, AttributeTemplate, - CallableTemplate, Registry, signature) - - -registry = Registry() -infer = registry.register -infer_global = registry.register_global -infer_getattr = registry.register_attr - - -# random.random(), random.seed() etc. are not plain functions, they are bound -# methods of a private object. We have to be careful to use a well-known -# object (e.g. the string "random.seed") as a key, not the bound method itself. -# (same for np.random.random(), etc.) - -_int_types = sorted(set((types.intp, types.int64))) -# Should we support float32? -_float_types = [types.float64] - - -# -# Basics -# - -def normalize_shape(shape): - if isinstance(shape, types.Integer): - return types.intp, 1 - elif (isinstance(shape, types.BaseTuple) and - all(isinstance(v, types.Integer)) for v in shape): - ndim = len(shape) - return types.UniTuple(types.intp, ndim), ndim - else: - raise TypeError("invalid size type %s" % (shape,)) - - -class RandomTemplate(CallableTemplate): - """ - A template helper to transparently handle the typing of array-returning - np.random.* functions. - """ - - def array_typer(self, scalar_typer, size=None): - prefix = self.key.split('.')[0] - assert prefix in ('np', 'random'), self.key - - if size is None: - # Scalar variant - def typer(*args, **kwargs): - return scalar_typer(*args, **kwargs) - else: - # Array variant (only for the 'np.random.*' namespace) - def typer(*args, **kwargs): - if prefix == 'random': - raise TypeError("unexpected size parameter for %r" - % (self.key,)) - shape, ndim = normalize_shape(size) - # Type the scalar variant and wrap the result in an array - # of the appropriate dimensionality. - sig = scalar_typer(*args, **kwargs) - if sig is not None: - return signature( - types.Array(sig.return_type, ndim, 'C'), - *(sig.args + (shape,))) - - return typer - - -class ConcreteRandomTemplate(RandomTemplate): - """ - A RandomTemplate subclass using the `cases` attribute as a list of - allowed scalar signatures. - """ - - def array_typer(self, size=None): - key = self.key - cases = self.cases - context = self.context - - def concrete_scalar_typer(*args, **kwargs): - # Filter out omitted args - while args and args[-1] is None: - args = args[:-1] - return context.resolve_overload(key, cases, args, kwargs) - - return RandomTemplate.array_typer(self, concrete_scalar_typer, size) - - -@infer_global(random.getrandbits, typing_key="random.getrandbits") -class Random_getrandbits(ConcreteTemplate): - cases = [signature(types.uint64, types.int32)] - -@infer_global(random.random, typing_key="random.random") -@infer_global(np.random.random, typing_key="np.random.random") -class Random_random(ConcreteRandomTemplate): - cases = [signature(types.float64)] - - def generic(self): - def typer(size=None): - return self.array_typer(size)() - return typer - - -@infer_global(random.randint, typing_key="random.randint") -class Random_randint(ConcreteTemplate): - cases = [signature(tp, tp, tp) for tp in _int_types] - -@infer_global(np.random.randint, typing_key="np.random.randint") -class Random_randint(ConcreteRandomTemplate): - cases = [signature(tp, tp) for tp in _int_types] - cases += [signature(tp, tp, tp) for tp in _int_types] - - def generic(self): - def typer(low, high=None, size=None): - return self.array_typer(size)(low, high) - return typer - - -@infer_global(random.randrange, typing_key="random.randrange") -class Random_randrange(ConcreteTemplate): - cases = [signature(tp, tp) for tp in _int_types] - cases += [signature(tp, tp, tp) for tp in _int_types] - cases += [signature(tp, tp, tp, tp) for tp in _int_types] - -@infer_global(random.seed, typing_key="random.seed") -@infer_global(np.random.seed, typing_key="np.random.seed") -class Random_seed(ConcreteTemplate): - cases = [signature(types.void, types.uint32)] - - -# -# Distributions -# - -@infer_global(np.random.geometric, typing_key="np.random.geometric") -@infer_global(np.random.logseries, typing_key="np.random.logseries") -@infer_global(np.random.zipf, typing_key="np.random.zipf") -class Numpy_geometric(ConcreteRandomTemplate): - cases = [signature(types.int64, tp) for tp in _float_types] - - def generic(self): - def typer(a, size=None): - return self.array_typer(size)(a) - return typer - -@infer_global(np.random.binomial, typing_key="np.random.binomial") -@infer_global(np.random.negative_binomial, - typing_key="np.random.negative_binomial") -class Numpy_negative_binomial(ConcreteRandomTemplate): - cases = [signature(types.int64, types.int64, tp) for tp in _float_types] - - def generic(self): - def typer(n, p, size=None): - return self.array_typer(size)(n, p) - return typer - -@infer_global(np.random.poisson, typing_key="np.random.poisson") -class Numpy_poisson(ConcreteRandomTemplate): - cases = [signature(types.int64, tp) for tp in _float_types] - cases += [signature(types.int64)] - - def generic(self): - def typer(lam=None, size=None): - return self.array_typer(size)(lam) - return typer - -@infer_global(np.random.exponential, typing_key="np.random.exponential") -@infer_global(np.random.rayleigh, typing_key="np.random.rayleigh") -class Numpy_exponential(ConcreteRandomTemplate): - cases = [signature(tp, tp) for tp in _float_types] - cases += [signature(tp) for tp in _float_types] - - def generic(self): - def typer(scale=None, size=None): - return self.array_typer(size)(scale) - return typer - -@infer_global(np.random.hypergeometric, typing_key="np.random.hypergeometric") -class Numpy_hypergeometric(ConcreteRandomTemplate): - cases = [signature(tp, tp, tp, tp) for tp in _int_types] - - def generic(self): - def typer(ngood, nbad, nsample, size=None): - return self.array_typer(size)(ngood, nbad, nsample) - return typer - -@infer_global(np.random.laplace, typing_key="np.random.laplace") -@infer_global(np.random.logistic, typing_key="np.random.logistic") -@infer_global(np.random.lognormal, typing_key="np.random.lognormal") -@infer_global(np.random.normal, typing_key="np.random.normal") -class Numpy_normal(ConcreteRandomTemplate): - cases = [signature(tp, tp, tp) for tp in _float_types] - cases += [signature(tp, tp) for tp in _float_types] - cases += [signature(tp) for tp in _float_types] - - def generic(self): - def typer(loc=None, scale=None, size=None): - return self.array_typer(size)(loc, scale) - return typer - -@infer_global(np.random.gamma, typing_key="np.random.gamma") -class Numpy_gamma(ConcreteRandomTemplate): - cases = [signature(tp, tp, tp) for tp in _float_types] - cases += [signature(tp, tp) for tp in _float_types] - - def generic(self): - def typer(shape, scale=None, size=None): - return self.array_typer(size)(shape, scale) - return typer - -@infer_global(np.random.triangular, typing_key="np.random.triangular") -class Random_ternary_distribution(ConcreteRandomTemplate): - cases = [signature(tp, tp, tp, tp) for tp in _float_types] - - def generic(self): - def typer(left, mode, right, size=None): - return self.array_typer(size)(left, mode, right) - return typer - - -@infer_global(np.random.beta, typing_key="np.random.beta") -@infer_global(np.random.f, typing_key="np.random.f") -@infer_global(np.random.gumbel, typing_key="np.random.gumbel") -@infer_global(np.random.uniform, typing_key="np.random.uniform") -@infer_global(np.random.vonmises, typing_key="np.random.vonmises") -@infer_global(np.random.wald, typing_key="np.random.wald") -@infer_global(random.betavariate, typing_key="random.betavariate") -@infer_global(random.gammavariate, typing_key="random.gammavariate") -@infer_global(random.gauss, typing_key="random.gauss") -@infer_global(random.lognormvariate, typing_key="random.lognormvariate") -@infer_global(random.normalvariate, typing_key="random.normalvariate") -@infer_global(random.uniform, typing_key="random.uniform") -@infer_global(random.vonmisesvariate, typing_key="random.vonmisesvariate") -@infer_global(random.weibullvariate, typing_key="random.weibullvariate") -class Random_binary_distribution(ConcreteRandomTemplate): - cases = [signature(tp, tp, tp) for tp in _float_types] - - def generic(self): - def typer(a, b, size=None): - return self.array_typer(size)(a, b) - return typer - - -@infer_global(np.random.chisquare, typing_key="np.random.chisquare") -@infer_global(np.random.pareto, typing_key="np.random.pareto") -@infer_global(np.random.power, typing_key="np.random.power") -@infer_global(np.random.standard_gamma, typing_key="np.random.standard_gamma") -@infer_global(np.random.standard_t, typing_key="np.random.standard_t") -@infer_global(np.random.weibull, typing_key="np.random.weibull") -@infer_global(random.expovariate, typing_key="random.expovariate") -@infer_global(random.paretovariate, typing_key="random.paretovariate") -class Random_unary_distribution(ConcreteRandomTemplate): - cases = [signature(tp, tp) for tp in _float_types] - - def generic(self): - def typer(a, size=None): - return self.array_typer(size)(a) - return typer - - -@infer_global(np.random.standard_cauchy, - typing_key="np.random.standard_cauchy") -@infer_global(np.random.standard_normal, - typing_key="np.random.standard_normal") -@infer_global(np.random.standard_exponential, - typing_key="np.random.standard_exponential") -class Random_nullary_distribution(ConcreteRandomTemplate): - cases = [signature(tp) for tp in _float_types] - - def generic(self): - def typer(size=None): - return self.array_typer(size)() - return typer - - -@infer_global(random.triangular, typing_key="random.triangular") -class Random_triangular(ConcreteTemplate): - cases = [signature(tp, tp, tp) for tp in _float_types] - cases += [signature(tp, tp, tp, tp) for tp in _float_types] - -# NOTE: some functions can have @overloads in numba.targets.randomimpl, -# and therefore don't need a typing declaration here. diff --git a/numba/numba/typing/setdecl.py b/numba/numba/typing/setdecl.py deleted file mode 100644 index f3974269b..000000000 --- a/numba/numba/typing/setdecl.py +++ /dev/null @@ -1,190 +0,0 @@ -from __future__ import absolute_import, print_function - -from .. import types -from .templates import (ConcreteTemplate, AbstractTemplate, AttributeTemplate, - CallableTemplate, Registry, signature, bound_function, - make_callable_template) -# Ensure set is typed as a collection as well -from . import collections - - -registry = Registry() -infer = registry.register -infer_global = registry.register_global -infer_getattr = registry.register_attr - - -@infer_global(set) -class SetBuiltin(AbstractTemplate): - - def generic(self, args, kws): - assert not kws - if args: - # set(iterable) - iterable, = args - if isinstance(iterable, types.IterableType): - dtype = iterable.iterator_type.yield_type - if isinstance(dtype, types.Hashable): - return signature(types.Set(dtype), iterable) - else: - # set() - return signature(types.Set(types.undefined)) - - -@infer_getattr -class SetAttribute(AttributeTemplate): - key = types.Set - - @bound_function("set.add") - def resolve_add(self, set, args, kws): - item, = args - assert not kws - unified = self.context.unify_pairs(set.dtype, item) - if unified is not None: - sig = signature(types.none, unified) - sig.recvr = set.copy(dtype=unified) - return sig - - @bound_function("set.clear") - def resolve_clear(self, set, args, kws): - assert not kws - if not args: - return signature(types.none) - - @bound_function("set.copy") - def resolve_copy(self, set, args, kws): - assert not kws - if not args: - return signature(set) - - @bound_function("set.discard") - def resolve_discard(self, set, args, kws): - item, = args - assert not kws - return signature(types.none, set.dtype) - - @bound_function("set.pop") - def resolve_pop(self, set, args, kws): - assert not kws - if not args: - return signature(set.dtype) - - @bound_function("set.remove") - def resolve_remove(self, set, args, kws): - item, = args - assert not kws - return signature(types.none, set.dtype) - - @bound_function("set.update") - def resolve_update(self, set, args, kws): - iterable, = args - assert not kws - if not isinstance(iterable, types.IterableType): - return - - dtype = iterable.iterator_type.yield_type - unified = self.context.unify_pairs(set.dtype, dtype) - if unified is not None: - sig = signature(types.none, iterable) - sig.recvr = set.copy(dtype=unified) - return sig - - def _resolve_xxx_update(self, set, args, kws): - assert not kws - iterable, = args - # Set arguments only supported for now - # (note we can mix non-reflected and reflected arguments) - if isinstance(iterable, types.Set) and iterable.dtype == set.dtype: - return signature(types.none, iterable) - - @bound_function("set.difference_update") - def resolve_difference_update(self, set, args, kws): - return self._resolve_xxx_update(set, args, kws) - - @bound_function("set.intersection_update") - def resolve_intersection_update(self, set, args, kws): - return self._resolve_xxx_update(set, args, kws) - - @bound_function("set.symmetric_difference_update") - def resolve_symmetric_difference_update(self, set, args, kws): - return self._resolve_xxx_update(set, args, kws) - - def _resolve_operator(self, set, args, kws): - assert not kws - iterable, = args - # Set arguments only supported for now - # (note we can mix non-reflected and reflected arguments) - if isinstance(iterable, types.Set) and iterable.dtype == set.dtype: - return signature(set, iterable) - - @bound_function("set.difference") - def resolve_difference(self, set, args, kws): - return self._resolve_operator(set, args, kws) - - @bound_function("set.intersection") - def resolve_intersection(self, set, args, kws): - return self._resolve_operator(set, args, kws) - - @bound_function("set.symmetric_difference") - def resolve_symmetric_difference(self, set, args, kws): - return self._resolve_operator(set, args, kws) - - @bound_function("set.union") - def resolve_union(self, set, args, kws): - return self._resolve_operator(set, args, kws) - - def _resolve_comparator(self, set, args, kws): - assert not kws - arg, = args - if arg == set: - return signature(types.boolean, arg) - - @bound_function("set.isdisjoint") - def resolve_isdisjoint(self, set, args, kws): - return self._resolve_comparator(set, args, kws) - - @bound_function("set.issubset") - def resolve_issubset(self, set, args, kws): - return self._resolve_comparator(set, args, kws) - - @bound_function("set.issuperset") - def resolve_issuperset(self, set, args, kws): - return self._resolve_comparator(set, args, kws) - - -class SetOperator(AbstractTemplate): - - def generic(self, args, kws): - if len(args) != 2: - return - a, b = args - if (isinstance(a, types.Set) and isinstance(b, types.Set) - and a.dtype == b.dtype): - return signature(a, *args) - - -class SetComparison(AbstractTemplate): - - def generic(self, args, kws): - if len(args) != 2: - return - a, b = args - if isinstance(a, types.Set) and isinstance(b, types.Set) and a == b: - return signature(types.boolean, *args) - - -for op_key in '&|^-': - @infer - class ConcreteSetOperator(SetOperator): - key = op_key - - @infer - class ConcreteInplaceSetOperator(SetOperator): - key = op_key + '=' - - -for op_key in ('==', '!=', '<', '<=', '>=', '>'): - @infer - class ConcreteSetComparison(SetComparison): - key = op_key - diff --git a/numba/numba/typing/templates.py b/numba/numba/typing/templates.py deleted file mode 100644 index 27ad4f01e..000000000 --- a/numba/numba/typing/templates.py +++ /dev/null @@ -1,715 +0,0 @@ -""" -Define typing templates -""" -from __future__ import print_function, division, absolute_import - -import functools -from functools import reduce -import operator -import sys -from types import MethodType - -from .. import types, utils -from ..errors import TypingError, UntypedAttributeError - - -class Signature(object): - """ - The signature of a function call or operation, i.e. its argument types - and return type. - """ - - # XXX Perhaps the signature should be a BoundArguments, instead - # of separate args and pysig... - __slots__ = 'return_type', 'args', 'recvr', 'pysig' - - def __init__(self, return_type, args, recvr, pysig=None): - if isinstance(args, list): - args = tuple(args) - self.return_type = return_type - self.args = args - self.recvr = recvr - self.pysig = pysig - - def replace(self, **kwargs): - """Copy and replace the given attributes provided as keyword arguments. - Returns an updated copy. - """ - curstate = dict(return_type=self.return_type, - args=self.args, - recvr=self.recvr, - pysig=self.pysig) - curstate.update(kwargs) - return Signature(**curstate) - - def __getstate__(self): - """ - Needed because of __slots__. - """ - return self.return_type, self.args, self.recvr, self.pysig - - def __setstate__(self, state): - """ - Needed because of __slots__. - """ - self.return_type, self.args, self.recvr, self.pysig = state - - def __hash__(self): - return hash((self.args, self.return_type)) - - def __eq__(self, other): - if isinstance(other, Signature): - return (self.args == other.args and - self.return_type == other.return_type and - self.recvr == other.recvr and - self.pysig == other.pysig) - - def __ne__(self, other): - return not (self == other) - - def __repr__(self): - return "%s -> %s" % (self.args, self.return_type) - - @property - def is_method(self): - """ - Whether this signature represents a bound method or a regular - function. - """ - return self.recvr is not None - - def as_method(self): - """ - Convert this signature to a bound method signature. - """ - if self.recvr is not None: - return self - sig = signature(self.return_type, *self.args[1:], - recvr=self.args[0]) - return sig - - def as_function(self): - """ - Convert this signature to a regular function signature. - """ - if self.recvr is None: - return self - sig = signature(self.return_type, *((self.recvr,) + self.args)) - return sig - - -def make_concrete_template(name, key, signatures): - baseclasses = (ConcreteTemplate,) - gvars = dict(key=key, cases=list(signatures)) - return type(name, baseclasses, gvars) - - -def make_callable_template(key, typer, recvr=None): - """ - Create a callable template with the given key and typer function. - """ - def generic(self): - return typer - - name = "%s_CallableTemplate" % (key,) - bases = (CallableTemplate,) - class_dict = dict(key=key, generic=generic, recvr=recvr) - return type(name, bases, class_dict) - - -def signature(return_type, *args, **kws): - recvr = kws.pop('recvr', None) - assert not kws - return Signature(return_type, args, recvr=recvr) - - -def fold_arguments(pysig, args, kws, normal_handler, default_handler, - stararg_handler): - """ - Given the signature *pysig*, explicit *args* and *kws*, resolve - omitted arguments and keyword arguments. A tuple of positional - arguments is returned. - Various handlers allow to process arguments: - - normal_handler(index, param, value) is called for normal arguments - - default_handler(index, param, default) is called for omitted arguments - - stararg_handler(index, param, values) is called for a "*args" argument - """ - ba = pysig.bind(*args, **kws) - defargs = [] - for i, param in enumerate(pysig.parameters.values()): - name = param.name - default = param.default - if param.kind == param.VAR_POSITIONAL: - # stararg may be omitted, in which case its "default" value - # is simply the empty tuple - ba.arguments[name] = stararg_handler(i, param, - ba.arguments.get(name, ())) - elif name in ba.arguments: - # Non-stararg, present - ba.arguments[name] = normal_handler(i, param, ba.arguments[name]) - else: - # Non-stararg, omitted - assert default is not param.empty - ba.arguments[name] = default_handler(i, param, default) - if ba.kwargs: - # There's a remaining keyword argument, e.g. if omitting - # some argument with a default value before it. - raise NotImplementedError("unhandled keyword argument: %s" - % list(ba.kwargs)) - # Collect args in the right order - args = tuple(ba.arguments[param.name] - for param in pysig.parameters.values()) - return args - - -class FunctionTemplate(object): - # Set to true to disable unsafe cast. - # subclass overide-able - unsafe_casting = True - - # Whether the typing support literals - support_literals = False - - def __init__(self, context): - self.context = context - - def _select(self, cases, args, kws): - options = { - 'unsafe_casting': self.unsafe_casting, - } - selected = self.context.resolve_overload(self.key, cases, args, kws, - **options) - return selected - - def get_impl_key(self, sig): - """ - Return the key for looking up the implementation for the given - signature on the target context. - """ - # Lookup the key on the class, to avoid binding it with `self`. - key = type(self).key - # On Python 2, we must also take care about unbound methods - if isinstance(key, MethodType): - assert key.im_self is None - key = key.im_func - return key - - -class AbstractTemplate(FunctionTemplate): - """ - Defines method ``generic(self, args, kws)`` which compute a possible - signature base on input types. The signature does not have to match the - input types. It is compared against the input types afterwards. - """ - - def apply(self, args, kws): - generic = getattr(self, "generic") - sig = generic(args, kws) - - # Unpack optional type if no matching signature - if not sig and any(isinstance(x, types.Optional) for x in args): - def unpack_opt(x): - if isinstance(x, types.Optional): - return x.type - else: - return x - - args = list(map(unpack_opt, args)) - assert not kws # Not supported yet - sig = generic(args, kws) - - return sig - - -class CallableTemplate(FunctionTemplate): - """ - Base class for a template defining a ``generic(self)`` method - returning a callable to be called with the actual ``*args`` and - ``**kwargs`` representing the call signature. The callable has - to return a return type, a full signature, or None. The signature - does not have to match the input types. It is compared against the - input types afterwards. - """ - recvr = None - - def apply(self, args, kws): - generic = getattr(self, "generic") - typer = generic() - sig = typer(*args, **kws) - - # Unpack optional type if no matching signature - if sig is None: - if any(isinstance(x, types.Optional) for x in args): - def unpack_opt(x): - if isinstance(x, types.Optional): - return x.type - else: - return x - - args = list(map(unpack_opt, args)) - sig = typer(*args, **kws) - if sig is None: - return - - # Get the pysig - try: - pysig = typer.pysig - except AttributeError: - pysig = utils.pysignature(typer) - - # Fold any keyword arguments - bound = pysig.bind(*args, **kws) - if bound.kwargs: - raise TypingError("unsupported call signature") - if not isinstance(sig, Signature): - # If not a signature, `sig` is assumed to be the return type - if not isinstance(sig, types.Type): - raise TypeError("invalid return type for callable template: got %r" - % (sig,)) - sig = signature(sig, *bound.args) - if self.recvr is not None: - sig.recvr = self.recvr - # Hack any omitted parameters out of the typer's pysig, - # as lowering expects an exact match between formal signature - # and actual args. - if len(bound.args) < len(pysig.parameters): - parameters = list(pysig.parameters.values())[:len(bound.args)] - pysig = pysig.replace(parameters=parameters) - sig.pysig = pysig - cases = [sig] - return self._select(cases, bound.args, bound.kwargs) - - -class ConcreteTemplate(FunctionTemplate): - """ - Defines attributes "cases" as a list of signature to match against the - given input types. - """ - - def apply(self, args, kws): - cases = getattr(self, 'cases') - return self._select(cases, args, kws) - - -class _OverloadFunctionTemplate(AbstractTemplate): - """ - A base class of templates for overload functions. - """ - - def generic(self, args, kws): - """ - Type the overloaded function by compiling the appropriate - implementation for the given args. - """ - cache_key = self.context, args, tuple(kws.items()) - try: - disp = self._impl_cache[cache_key] - except KeyError: - # Get the overload implementation for the given types - pyfunc = self._overload_func(*args, **kws) - if pyfunc is None: - # No implementation => fail typing - self._impl_cache[cache_key] = None - return - from numba import jit - jitdecor = jit(nopython=True, **self._jit_options) - disp = self._impl_cache[cache_key] = jitdecor(pyfunc) - else: - if disp is None: - return - - # Compile and type it for the given types - disp_type = types.Dispatcher(disp) - sig = disp_type.get_call_type(self.context, args, kws) - # Store the compiled overload for use in the lowering phase - self._compiled_overloads[sig.args] = disp_type.get_overload(sig) - return sig - - def get_impl_key(self, sig): - """ - Return the key for looking up the implementation for the given - signature on the target context. - """ - return self._compiled_overloads[sig.args] - - -def make_overload_template(func, overload_func, jit_options): - """ - Make a template class for function *func* overloaded by *overload_func*. - Compiler options are passed as a dictionary to *jit_options*. - """ - func_name = getattr(func, '__name__', str(func)) - name = "OverloadTemplate_%s" % (func_name,) - base = _OverloadFunctionTemplate - dct = dict(key=func, _overload_func=staticmethod(overload_func), - _impl_cache={}, _compiled_overloads={}, _jit_options=jit_options) - return type(base)(name, (base,), dct) - - -class _IntrinsicTemplate(AbstractTemplate): - """ - A base class of templates for intrinsic definition - """ - - def generic(self, args, kws): - """ - Type the intrinsic by the arguments. - """ - from numba.targets.imputils import lower_builtin - - cache_key = self.context, args, tuple(kws.items()) - try: - return self._impl_cache[cache_key] - except KeyError: - result = self._definition_func(self.context, *args, **kws) - if result is None: - return - [sig, imp] = result - pysig = utils.pysignature(self._definition_func) - # omit context argument from user function - parameters = list(pysig.parameters.values())[1:] - sig.pysig = pysig.replace(parameters=parameters) - self._impl_cache[cache_key] = sig - self._overload_cache[sig.args] = imp - # register the lowering - lower_builtin(imp, *sig.args)(imp) - return sig - - def get_impl_key(self, sig): - """ - Return the key for looking up the implementation for the given - signature on the target context. - """ - return self._overload_cache[sig.args] - - -def make_intrinsic_template(handle, defn, name): - """ - Make a template class for a intrinsic handle *handle* defined by the - function *defn*. The *name* is used for naming the new template class. - """ - base = _IntrinsicTemplate - name = "_IntrinsicTemplate_%s" % (name) - dct = dict(key=handle, _definition_func=staticmethod(defn), - _impl_cache={}, _overload_cache={}) - return type(base)(name, (base,), dct) - - -class AttributeTemplate(object): - _initialized = False - - def __init__(self, context): - self._lazy_class_init() - self.context = context - - def resolve(self, value, attr): - return self._resolve(value, attr) - - @classmethod - def _lazy_class_init(cls): - if not cls._initialized: - cls.do_class_init() - cls._initialized = True - - @classmethod - def do_class_init(cls): - """ - Class-wide initialization. Can be overriden by subclasses to - register permanent typing or target hooks. - """ - - def _resolve(self, value, attr): - fn = getattr(self, "resolve_%s" % attr, None) - if fn is None: - fn = self.generic_resolve - if fn is NotImplemented: - if isinstance(value, types.Module): - return self.context.resolve_module_constants(value, attr) - else: - return None - else: - return fn(value, attr) - else: - return fn(value) - - generic_resolve = NotImplemented - - -class _OverloadAttributeTemplate(AttributeTemplate): - """ - A base class of templates for @overload_attribute functions. - """ - - def __init__(self, context): - super(_OverloadAttributeTemplate, self).__init__(context) - self.context = context - - @classmethod - def do_class_init(cls): - """ - Register attribute implementation. - """ - from numba.targets.imputils import lower_getattr - attr = cls._attr - - @lower_getattr(cls.key, attr) - def getattr_impl(context, builder, typ, value): - sig_args = (typ,) - sig_kws = {} - typing_context = context.typing_context - disp = cls._get_dispatcher(typing_context, typ, attr, sig_args, sig_kws) - disp_type = types.Dispatcher(disp) - sig = disp_type.get_call_type(typing_context, sig_args, sig_kws) - call = context.get_function(disp_type, sig) - return call(builder, (value,)) - - @classmethod - def _get_dispatcher(cls, context, typ, attr, sig_args, sig_kws): - """ - Get the compiled dispatcher implementing the attribute for - the given formal signature. - """ - cache_key = context, typ, attr - try: - disp = cls._impl_cache[cache_key] - except KeyError: - # Get the overload implementation for the given type - pyfunc = cls._overload_func(*sig_args, **sig_kws) - if pyfunc is None: - # No implementation => fail typing - cls._impl_cache[cache_key] = None - return - - from numba import jit - disp = cls._impl_cache[cache_key] = jit(nopython=True)(pyfunc) - return disp - - def _resolve_impl_sig(self, typ, attr, sig_args, sig_kws): - """ - Compute the actual implementation sig for the given formal argument types. - """ - disp = self._get_dispatcher(self.context, typ, attr, sig_args, sig_kws) - if disp is None: - return None - - # Compile and type it for the given types - disp_type = types.Dispatcher(disp) - sig = disp_type.get_call_type(self.context, sig_args, sig_kws) - return sig - - def _resolve(self, typ, attr): - if self._attr != attr: - return None - - sig = self._resolve_impl_sig(typ, attr, (typ,), {}) - return sig.return_type - - -class _OverloadMethodTemplate(_OverloadAttributeTemplate): - """ - A base class of templates for @overload_method functions. - """ - - @classmethod - def do_class_init(cls): - """ - Register generic method implementation. - """ - from numba.targets.imputils import lower_builtin - attr = cls._attr - - @lower_builtin((cls.key, attr), cls.key, types.VarArg(types.Any)) - def method_impl(context, builder, sig, args): - typ = sig.args[0] - typing_context = context.typing_context - disp = cls._get_dispatcher(typing_context, typ, attr, sig.args, {}) - disp_type = types.Dispatcher(disp) - sig = disp_type.get_call_type(typing_context, sig.args, {}) - call = context.get_function(disp_type, sig) - # Link dependent library - cg = context.codegen() - for lib in getattr(call, 'libs', ()): - cg.add_linking_library(lib) - return call(builder, args) - - def _resolve(self, typ, attr): - if self._attr != attr: - return None - - assert isinstance(typ, self.key) - - class MethodTemplate(AbstractTemplate): - key = (self.key, attr) - def generic(_, args, kws): - args = (typ,) + args - sig = self._resolve_impl_sig(typ, attr, args, kws) - if sig is not None: - return sig.as_method() - - return types.BoundFunction(MethodTemplate, typ) - - -def make_overload_attribute_template(typ, attr, overload_func, - base=_OverloadAttributeTemplate): - """ - Make a template class for attribute *attr* of *typ* overloaded by - *overload_func*. - """ - assert isinstance(typ, types.Type) or issubclass(typ, types.Type) - name = "OverloadTemplate_%s_%s" % (typ, attr) - # Note the implementation cache is subclass-specific - dct = dict(key=typ, _attr=attr, _impl_cache={}, - _overload_func=staticmethod(overload_func), - ) - return type(base)(name, (base,), dct) - - -def make_overload_method_template(typ, attr, overload_func): - """ - Make a template class for method *attr* of *typ* overloaded by - *overload_func*. - """ - return make_overload_attribute_template(typ, attr, overload_func, - base=_OverloadMethodTemplate) - - -def bound_function(template_key): - """ - Wrap an AttributeTemplate resolve_* method to allow it to - resolve an instance method's signature rather than a instance attribute. - The wrapped method must return the resolved method's signature - according to the given self type, args, and keywords. - - It is used thusly: - - class ComplexAttributes(AttributeTemplate): - @bound_function("complex.conjugate") - def resolve_conjugate(self, ty, args, kwds): - return ty - - *template_key* (e.g. "complex.conjugate" above) will be used by the - target to look up the method's implementation, as a regular function. - """ - def wrapper(method_resolver): - @functools.wraps(method_resolver) - def attribute_resolver(self, ty): - class MethodTemplate(AbstractTemplate): - key = template_key - def generic(_, args, kws): - sig = method_resolver(self, ty, args, kws) - if sig is not None and sig.recvr is None: - sig.recvr = ty - return sig - - return types.BoundFunction(MethodTemplate, ty) - return attribute_resolver - return wrapper - - -class MacroTemplate(object): - pass - - -# ----------------------------- - -class Registry(object): - """ - A registry of typing declarations. The registry stores such declarations - for functions, attributes and globals. - """ - - def __init__(self): - self.functions = [] - self.attributes = [] - self.globals = [] - - def register(self, item): - assert issubclass(item, FunctionTemplate) - self.functions.append(item) - return item - - def register_attr(self, item): - assert issubclass(item, AttributeTemplate) - self.attributes.append(item) - return item - - def register_global(self, val=None, typ=None, **kwargs): - """ - Register the typing of a global value. - Functional usage with a Numba type:: - register_global(value, typ) - - Decorator usage with a template class:: - @register_global(value, typing_key=None) - class Template: - ... - """ - if typ is not None: - # register_global(val, typ) - assert val is not None - assert not kwargs - self.globals.append((val, typ)) - else: - def decorate(cls, typing_key): - class Template(cls): - key = typing_key - if callable(val): - typ = types.Function(Template) - else: - raise TypeError("cannot infer type for global value %r") - self.globals.append((val, typ)) - return cls - - # register_global(val, typing_key=None)(