From 48c3b985f9d66d53076cf6ce87767f1d58ee88ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Louf?= Date: Mon, 23 Aug 2021 22:50:08 +0200 Subject: [PATCH] Use aeppl to build the logprob --- aehmc/trajectory.py | 9 +- examples/HMC.ipynb | 1154 ++++++------------------------------------- 2 files changed, 147 insertions(+), 1016 deletions(-) diff --git a/aehmc/trajectory.py b/aehmc/trajectory.py index f30d1b0..f28b1fd 100644 --- a/aehmc/trajectory.py +++ b/aehmc/trajectory.py @@ -35,15 +35,14 @@ def static_integration( integrator: Callable, step_size: float, num_integration_steps: int, - direction: int = 1, ) -> Callable: """Generate a trajectory by integrating several times in one direction.""" - directed_step_size = direction * step_size - def integrate(q_init, p_init, energy_init, energy_grad_init) -> IntegratorStateType: - def one_step(q, p, energy, energy_grad): - new_state = integrator(q, p, energy, energy_grad, directed_step_size) + def one_step(q, p, potential_energy, potential_energy_grad): + new_state = integrator( + q, p, potential_energy, potential_energy_grad, step_size + ) return new_state [q, p, energy, energy_grad], _ = aesara.scan( diff --git a/examples/HMC.ipynb b/examples/HMC.ipynb index f5713aa..e7d9985 100644 --- a/examples/HMC.ipynb +++ b/examples/HMC.ipynb @@ -18,20 +18,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n", - "WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n" + "WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n", + "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n" ] } ], "source": [ - "import aesara\n", - "from aesara.tensor.random.utils import RandomStream\n", - "import aesara.tensor as aet\n", - "from aeppl.logpdf import logpdf\n", "import numpy as np\n", "import pymc3 as pm\n", "\n", - "import aehmc.hmc as hmc" + "import aesara\n", + "import aesara.tensor as aet\n", + "from aesara.tensor.random.utils import RandomStream\n", + "from aeppl.transforms import TransformValuesOpt, LogTransform\n", + "\n", + "from aeppl import joint_logprob\n", + "\n", + "import aehmc.hmc as hmc\n", + "from aehmc.utils import RaveledParamsMap" ] }, { @@ -41,9 +45,12 @@ "metadata": {}, "outputs": [], "source": [ - "NUM_INTEGRATION_STEPS = 10\n", - "STEP_SIZE = 0.0001\n", - "NUM_SAMPLES = 4_000" + "NUM_INTEGRATION_STEPS = 1024\n", + "STEP_SIZE = 5e-5\n", + "PATH_LENGTH = STEP_SIZE * NUM_INTEGRATION_STEPS\n", + "\n", + "NUM_SAMPLES = 1_000\n", + "BURNIN = 100" ] }, { @@ -53,18 +60,18 @@ "source": [ "## The model: a simple linear regression\n", "\n", - "We consider a simple one-dimensional regression problem. To make it a little challenging, we create a dataset with 100,000 points:" + "We consider a simple one-dimensional regression problem. To make it a little challenging, we create a dataset with 10,000 points:" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "eb721064", "metadata": {}, "outputs": [], "source": [ - "X = np.random.normal(0, 1, size=(100_000,))\n", - "y = 3 * X + np.random.normal(0,1)" + "X_val = np.random.normal(0, 1, size=(10_000,))\n", + "y_val = 3 * X_val + np.random.normal(0,1)" ] }, { @@ -77,10 +84,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "287315f9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'X' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m y_observed = pm.Normal(\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"y_observed\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmu\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mX\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0msigma\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoise\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mobserved\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'X' is not defined" + ] + } + ], "source": [ "with pm.Model() as pymc3_model:\n", " weights = pm.Normal(\"weights\", mu=0, sigma=1)\n", @@ -100,88 +119,87 @@ "source": [ "### Aeppl implementation\n", "\n", - "We can implement the logpdf of this model directly using `aesara` and `aeppl`:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4521e461", - "metadata": {}, - "outputs": [], - "source": [ - "def aeppl_model_logpdf(position, X, y):\n", - " weights = position[0]\n", - " noise = position[1]\n", - " l = 0\n", - " l += aet.sum(logpdf(aet.random.normal(0, 1), weights))\n", - " l += aet.sum(logpdf(aet.random.gamma(2, 1), noise))\n", - " mu = aet.dot(X, weights)\n", - " l += aet.sum(logpdf(aet.random.normal(mu, noise), y))\n", - " return l" - ] - }, - { - "cell_type": "markdown", - "id": "8694e9fd", - "metadata": {}, - "source": [ - "Let us condition the model on the data:" + "We can implement the posterior log-probability density function of this model directly using `aesara` and `aeppl`:" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "8b2e9c26", + "execution_count": 9, + "id": "c3d77c35", "metadata": {}, "outputs": [], "source": [ - "aeppl_loglikelihood = lambda position: aeppl_model_logpdf(position, aet.as_tensor(X), aet.as_tensor(y))" - ] - }, - { - "cell_type": "markdown", - "id": "27dfdeb3", - "metadata": {}, - "source": [ - "We can check that the loglikelihood returns the same values as the logpdf produced by PyMC3:" + "X_at = aet.as_tensor(X_val)\n", + "\n", + "N_rv = aet.random.gamma(2.0, 1.0, name=\"N\")\n", + "W_rv = aet.random.normal(0, 1.0, name=\"W\")\n", + "Y_rv = aet.random.normal(X_at.dot(W_rv), N_rv, name=\"Y\")\n", + "\n", + "\n", + "# `aehmc` kernels take a vector as an input. We use a `RaveledParamsMap` to unravel this vector\n", + "# and retrieve ta values for `W_rv` and `Y_rv`.\n", + "rp_map = RaveledParamsMap((W_rv, N_rv))\n", + "\n", + "\n", + "def aeppl_logprob(q):\n", + " uraveled_params = rp_map.unravel_params(q)\n", + " N_vv = uraveled_params[N_rv]\n", + " W_vv = uraveled_params[W_rv]\n", + " \n", + " # The gamma distribution's support is the positive real numbers; algorithms like HMC however prefer to \n", + " # with parameters on the real line. We thus apply a log transform to `N_vv`. PyMC3 applies this \n", + " # transform automatically.\n", + " transforms_op = TransformValuesOpt({N_vv: LogTransform()})\n", + "\n", + " logprob = joint_logprob(Y_rv, {Y_rv: aet.as_tensor(y_val), N_rv: N_vv, W_rv: W_vv}, extra_rewrites=transforms_op)\n", + " return logprob" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "e2b01601", + "execution_count": 10, + "id": "5f79145d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(-323256.47056254)" + "array(-32227.18839188)" ] }, - "execution_count": 7, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "q = aet.vector(\"q\")\n", - "aeppl_loglikelihood(q).eval({q: np.array([3, 10])})" + "q = aet.vector('q')\n", + "\n", + "logprob_fn = aesara.function((q,), aeppl_logprob(q))\n", + "logprob_fn([3., np.log(10.)])" + ] + }, + { + "cell_type": "markdown", + "id": "27dfdeb3", + "metadata": {}, + "source": [ + "We can check that the loglikelihood returns the same values as the logpdf produced by PyMC3:" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "id": "97d5859b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(-323254.16797744)" + "array(-10.81376835)" ] }, - "execution_count": 8, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -198,24 +216,6 @@ "## Sampling with aehmc" ] }, - { - "cell_type": "markdown", - "id": "8fa1bf47", - "metadata": {}, - "source": [ - "`aehmc`'s HMC algorithm takes a potential function as an input, which here can be taken as minus the loglikelihood:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "b4d42a80", - "metadata": {}, - "outputs": [], - "source": [ - "potential = lambda q: -aeppl_loglikelihood(q)" - ] - }, { "cell_type": "markdown", "id": "0b543da1", @@ -234,14 +234,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "4d853e81", "metadata": {}, "outputs": [], "source": [ - "srng = RandomStream(seed=59)\n", - "inverse_mass_matrix = aet.as_tensor(np.array([1, 1]))\n", - "kernel = hmc.kernel(srng, potential, STEP_SIZE, inverse_mass_matrix, NUM_INTEGRATION_STEPS)" + "srng = RandomStream(seed=1)\n", + "inverse_mass_matrix = aet.as_tensor(np.array([1., 1.]))\n", + "kernel = hmc.kernel(srng, aeppl_logprob, STEP_SIZE, inverse_mass_matrix, NUM_INTEGRATION_STEPS)" ] }, { @@ -254,13 +254,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "6729c9d3", "metadata": {}, "outputs": [], "source": [ "q = aet.vector(\"q\")\n", - "potential_energy = potential(q)\n", + "potential_energy = -aeppl_logprob(q)\n", "potential_energy_grad = aesara.grad(potential_energy, wrt=q)\n", "\n", "next_step = kernel(q, potential_energy, potential_energy_grad)\n", @@ -277,25 +277,25 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "34be0a4a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[array([1.01084087, 2.99503283]),\n", - " array(235786.64172406),\n", - " array([-22079.25441109, 10555.51460628])]" + "[array([ 3.00280254, -1.64604608]),\n", + " array(-4196.38241395),\n", + " array([1703.66049661, 3868.43841482])]" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "kernel_fn(np.array([1., 3.]))" + "kernel_fn(np.array([3., np.log(.21)]))" ] }, { @@ -316,15 +316,15 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "865b2355", "metadata": {}, "outputs": [], "source": [ - "def build_trajectory_generator(srng, kernel_generator, potential_fn, num_samples):\n", + "def build_trajectory_generator(srng, kernel_generator, logprob_fn, num_samples):\n", " \"\"\"Builds a function which, given an initial position, returns the full trajectory.\"\"\"\n", " q = aet.vector(\"q\")\n", - " potential_energy = potential_fn(q)\n", + " potential_energy = -logprob_fn(q)\n", " potential_energy_grad = aesara.grad(potential_energy, wrt=q)\n", "\n", " step_size = aet.scalar(\"step_size\")\n", @@ -332,7 +332,7 @@ " num_integration_steps = aet.scalar(\"num_integration_steps\", dtype=\"int32\")\n", "\n", " kernel = kernel_generator(\n", - " srng, potential_fn, step_size, inverse_mass_matrix, num_integration_steps\n", + " srng, logprob_fn, step_size, inverse_mass_matrix, num_integration_steps\n", " )\n", "\n", " trajectory, updates = aesara.scan(\n", @@ -348,7 +348,7 @@ " (q, step_size, inverse_mass_matrix, num_integration_steps),\n", " trajectory,\n", " updates=updates,\n", - " profile=True\n", + " mode='FAST_RUN'\n", " )\n", "\n", " return trajectory_generator" @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "bb7f49e7", "metadata": {}, "outputs": [ @@ -372,19 +372,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 16.7 s, sys: 92.3 ms, total: 16.8 s\n", - "Wall time: 17.3 s\n" + "CPU times: user 47.3 s, sys: 39.5 ms, total: 47.4 s\n", + "Wall time: 47.8 s\n" ] } ], "source": [ "%%time\n", "trajectory_generator = build_trajectory_generator(\n", - " srng, hmc.kernel, potential, num_samples=NUM_SAMPLES\n", + " srng, hmc.kernel, aeppl_logprob, num_samples=NUM_SAMPLES\n", ")\n", "\n", - "initial_position = np.array([1., 3.])\n", - "inverse_mass_matrix = np.array([1, 1])\n", + "initial_position = np.array([3., np.log(1.5)])\n", + "inverse_mass_matrix = np.array([1., 1.])\n", "position, *_ = trajectory_generator(initial_position, STEP_SIZE, inverse_mass_matrix, NUM_INTEGRATION_STEPS)" ] }, @@ -398,25 +398,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "db1f3960", "metadata": {}, "outputs": [ { "data": { + "image/png": "\n", "text/plain": [ - "Text(0.5, 0, 'noise')" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" + "
" ] }, "metadata": { @@ -426,13 +416,10 @@ } ], "source": [ - "from matplotlib import pyplot as plt\n", + "import arviz as az\n", "\n", - "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12,5))\n", - "ax1.hist(position[1000:, 0], density=True)\n", - "ax1.set_xlabel(\"weights\")\n", - "ax2.hist(position[1000:, 1], density=True)\n", - "ax2.set_xlabel(\"noise\")" + "aehmc_trace = az.from_dict(posterior={\"W\": position[BURNIN:, 0], \"N\": np.exp(position[BURNIN:, 1])})\n", + "az.plot_trace(aehmc_trace);" ] }, { @@ -447,904 +434,49 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "c02b9965", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":3: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.\n", - "Sequential sampling (1 chains in 1 job)\n", - "HamiltonianMC: [noise, weights]\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " \n", - " 9.60% [384/4000 03:45<35:23 Sampling chain 0, 0 divergences]\n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sampling 1 chain for 0 tune and 384 draw iterations (0 + 384 draws total) took 226 seconds.\n", - "The acceptance probability does not match the target. It is 0.9999081203429263, but should be close to 0.65. Try to increase the number of tuning steps.\n", - "Only one chain was sampled, this makes it impossible to run some convergence checks\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 3min 46s, sys: 290 ms, total: 3min 46s\n", - "Wall time: 3min 47s\n" - ] - } - ], - "source": [ - "%%time\n", - "with pymc3_model:\n", - " step = pm.step_methods.hmc.hmc.HamiltonianMC([weights, noise], path_length=NUM_INTEGRATION_STEPS, step_scale=STEP_SIZE)\n", - " posterior = pm.sample(tune=0, draws=NUM_SAMPLES, step=[step], chains=1)" - ] - }, - { - "cell_type": "markdown", - "id": "0ceb7c22", - "metadata": {}, - "source": [ - "`aehmc`'s kernel is orders of magnitude faster than PyMC3's." - ] - }, - { - "cell_type": "markdown", - "id": "85de7daa", - "metadata": {}, - "source": [ - "## Profiling `aehmc`'s generated trajectory" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "0ea2e2f6", - "metadata": {}, "outputs": [], "source": [ - "aesara.config.profile = True\n", - "aesara.config.profile_optimizer = True" + "import time \n", + "\n", + "with pymc3_model:\n", + " step = pm.step_methods.hmc.hmc.HamiltonianMC(\n", + " [weights, noise],\n", + " path_length=PATH_LENGTH,\n", + " step_scale=STEP_SIZE,\n", + " adapt_step_size=False,\n", + " )\n", + " \n", + " # Set initial values\n", + " weights.tag.test_value = initial_position[0]\n", + " noise.tag.test_value = initial_position[1]\n", + " \n", + "with pymc3_model:\n", + " start = time.perf_counter()\n", + " posterior = pm.sample(\n", + " tune=0,\n", + " draws=NUM_SAMPLES,\n", + " step=[step],\n", + " chains=1,\n", + " cores=1,\n", + " return_inferencedata=False,\n", + " compute_convergence_checks=False\n", + " )\n", + " end = time.perf_counter()\n", + " print(end - start)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "c541cfc2", + "execution_count": null, + "id": "ec971367", "metadata": {}, "outputs": [], "source": [ - "trajectory_generator = build_trajectory_generator(\n", - " srng, hmc.kernel, potential, num_samples=NUM_SAMPLES\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "5be43ffe", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Function profiling\n", - "==================\n", - " Message: :24\n", - " Time in 0 calls to Function.__call__: 0.000000e+00s\n", - " Total compile time: 7.750485e-01s\n", - " Number of Apply nodes: 44\n", - " Aesara Optimizer time: 2.904701e-01s\n", - " Aesara validate time: 2.713919e-03s\n", - " Aesara Linker time (includes C, CUDA code generation/compiling): 0.4756960868835449s\n", - " Import time 0.000000e+00s\n", - " Node make_thunk time 4.746168e-01s\n", - " Node forall_inplace,cpu,scan_fn}(TensorConstant{4000}, IncSubtensor{Set;:int64:}.0, IncSubtensor{InplaceSet;:int64:}.0, IncSubtensor{InplaceSet;:int64:}.0, RandomStateSharedVariable(), RandomStateSharedVariable(), inverse_mass_matrix, num_integration_steps, step_size, Elemwise{Composite{sqrt(reciprocal(i0))}}.0, InplaceDimShuffle{x}.0) time 4.592261e-01s\n", - " Node Elemwise{Composite{(i0 - (i1 * i2))}}(TensorConstant{[-3.669986...70107078]}, TensorConstant{[-0.730944...05940817]}, InplaceDimShuffle{x}.0) time 1.163006e-03s\n", - " Node Shape_i{0}(inverse_mass_matrix) time 8.831024e-04s\n", - " Node Elemwise{mul,no_inplace}(Elemwise{true_div,no_inplace}.0, TensorConstant{[-0.730944...05940817]}) time 6.392002e-04s\n", - " Node Elemwise{Composite{(i0 + (i1 * sqr(i2)) + Switch(i3, (Switch(i4, i5, i6) - i7), i8) + i9)}}[(0, 6)](TensorConstant{(1,) of -0..5332046727}, TensorConstant{(1,) of -0.5}, InplaceDimShuffle{x}.0, InplaceDimShuffle{x}.0, InplaceDimShuffle{x}.0, TensorConstant{(1,) of -inf}, InplaceDimShuffle{x}.0, InplaceDimShuffle{x}.0, TensorConstant{(1,) of -inf}, InplaceDimShuffle{x}.0) time 6.022453e-04s\n", - "\n", - "Time in all call to aesara.grad() 1.597838e-01s\n", - "Time since aesara import 251.709s\n", - "Optimizer Profile\n", - "-----------------\n", - " SeqOptimizer OPT_FAST_RUN time 0.290s for 162/44 nodes before/after optimization\n", - " 0.050s for callback\n", - " 0.003s for fgraph.validate()\n", - " time - (name, class, index, nodes before, nodes after) - validate time\n", - " 0.073859s - ('canonicalize', 'EquilibriumOptimizer', 7, 146, 69) - 0.001s\n", - " EquilibriumOptimizer canonicalize\n", - " time 0.074s for 4 passes\n", - " nb nodes (start, end, max) 146 69 146\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.047s\n", - " time in global optimizers 0.000s\n", - " time in final optimizers 0.021s\n", - " time in cleanup optimizers 0.003s\n", - " 0 - 0.052s 101 (0.018s in global opts, 0.000s io_toposort) - 146 nodes - ('MergeOptimizer', 29) ('local_dimshuffle_lift', 16) ('local_neg_to_mul', 10) ('local_upcast_elemwise_constant_inputs', 9) ('local_mul_canonizer', 7) ...\n", - " 1 - 0.013s 31 (0.003s in global opts, 0.000s io_toposort) - 88 nodes - ('MergeOptimizer', 7) ('local_mul_canonizer', 6) ('local_subtensor_make_vector', 3) ('local_add_canonizer', 3) ('local_div_switch_sink', 2) ...\n", - " 2 - 0.004s 3 (0.000s in global opts, 0.000s io_toposort) - 70 nodes - ('local_sum_prod_all_to_none', 1) ('local_sum_prod_div_dimshuffle', 1) ('local_mul_canonizer', 1)\n", - " 3 - 0.004s 0 (0.000s in global opts, 0.000s io_toposort) - 69 nodes - \n", - " times - times applied - nb node created - name:\n", - " 0.021s - 2 - 0 - topo_constant_folding\n", - " 0.007s - 14 - 15 - local_mul_canonizer\n", - " 0.006s - 10 - 18 - local_add_canonizer\n", - " 0.006s - 17 - 22 - local_dimshuffle_lift\n", - " 0.005s - 5 - 20 - local_greedy_distributor\n", - " 0.005s - 10 - 30 - local_upcast_elemwise_constant_inputs\n", - " 0.003s - 10 - 16 - local_neg_to_mul\n", - " 0.003s - 36 - 0 - MergeOptimizer\n", - " 0.002s - 4 - 6 - local_fill_sink\n", - " 0.001s - 2 - 4 - local_div_switch_sink\n", - " 0.001s - 3 - 6 - local_sum_prod_div_dimshuffle\n", - " 0.001s - 6 - 6 - local_sum_prod_all_to_none\n", - " 0.001s - 4 - 6 - local_shape_to_shape_i\n", - " 0.001s - 1 - 2 - local_mul_switch_sink\n", - " 0.000s - 6 - 0 - local_useless_fill\n", - " 0.000s - 3 - 0 - local_subtensor_make_vector\n", - " 0.000s - 2 - 0 - local_pow_canonicalize\n", - " 0.008s - in 73 optimization that were not used (display only those with a runtime > 0)\n", - " 0.001s - local_func_inv\n", - " 0.001s - local_mul_zero\n", - " 0.001s - local_useless_elemwise\n", - " 0.001s - local_useless_elemwise_comparison\n", - " 0.001s - local_merge_switch_same_cond\n", - " 0.000s - local_one_minus_erf2\n", - " 0.000s - local_track_shape_i\n", - " 0.000s - local_view_op\n", - " 0.000s - local_one_minus_erf\n", - " 0.000s - local_useless_switch\n", - " 0.000s - local_exp_log\n", - " 0.000s - local_useless_composite\n", - " 0.000s - local_cast_cast\n", - " 0.000s - local_expm1\n", - " 0.000s - local_useless_subtensor\n", - " 0.000s - local_IncSubtensor_serialize\n", - " 0.000s - local_useless_rebroadcast\n", - " 0.000s - local_useless_slice\n", - " 0.000s - local_useless_inc_subtensor\n", - " 0.000s - local_zero_div\n", - " 0.000s - local_setsubtensor_of_constants\n", - " 0.000s - local_subtensor_remove_broadcastable_index\n", - " 0.000s - local_incsubtensor_of_zeros\n", - " 0.000s - local_subtensor_lift\n", - " 0.000s - local_lift_transpose_through_dot\n", - " 0.000s - local_op_of_op\n", - " 0.000s - local_subtensor_inc_subtensor\n", - " 0.000s - local_useless_inc_subtensor_alloc\n", - " 0.000s - local_sumsqr2dot\n", - " 0.000s - local_reduce_broadcastable\n", - " 0.000s - local_subtensor_merge\n", - " 0.000s - local_subtensor_of_alloc\n", - " 0.000s - local_reduce_join\n", - " 0.000s - local_subtensor_of_dot\n", - " 0.000s - local_useless_reduce\n", - " 0.000s - local_rebroadcast_lift\n", - " 0.000s - local_incsubtensor_of_zeros_to_setsubtensor\n", - " 0.000s - local_scalar_tensor_scalar\n", - "\n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (130, 90, 40)\n", - " init io_toposort 0.00015473365783691406\n", - " loop time 0.017719030380249023\n", - " callback_time 0.002961397171020508\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 55 constant= 34\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " Iter 1\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (75, 70, 5)\n", - " init io_toposort 9.5367431640625e-05\n", - " loop time 0.0024902820587158203\n", - " callback_time 0.00038123130798339844\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 11 constant= 10\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " Iter 2\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (69, 69, 0)\n", - " init io_toposort 8.654594421386719e-05\n", - " loop time 4.9114227294921875e-05\n", - " callback_time 0.0\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 0 constant= 0\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " Iter 3\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (69, 69, 0)\n", - " init io_toposort 8.726119995117188e-05\n", - " loop time 4.9591064453125e-05\n", - " callback_time 0.0\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 0 constant= 0\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " 0.057521s - ('scan_eqopt1', 'EquilibriumOptimizer', 2, 153, 159) - 0.000s\n", - " EquilibriumOptimizer scan_eqopt1\n", - " time 0.058s for 3 passes\n", - " nb nodes (start, end, max) 153 159 165\n", - " time io_toposort 0.001s\n", - " time in local optimizers 0.000s\n", - " time in global optimizers 0.056s\n", - " time in final optimizers 0.000s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.023s 1 (0.022s in global opts, 0.000s io_toposort) - 165 nodes - ('SeqOpt([, , , , ])', 1)\n", - " 1 - 0.020s 1 (0.020s in global opts, 0.000s io_toposort) - 159 nodes - ('SeqOpt([, , , , ])', 1)\n", - " 2 - 0.014s 0 (0.014s in global opts, 0.000s io_toposort) - 159 nodes - \n", - " times - times applied - nb node created - name:\n", - " 0.056s - 2 - 14 - SeqOpt([, , , , ])\n", - " 0.000s - in 0 optimization that were not used (display only those with a runtime > 0)\n", - "\n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " SeqOptimizer all_pushout_opt time 0.022s for 153/165 nodes before/after optimization\n", - " 0.000s for callback\n", - " 0.000s for fgraph.validate()\n", - " 0.012066s - ('scanOp_pushout_nonseqs_ops', 'PushOutNonSeqScan', 1, 153, 165) - 0.000s\n", - " 0.005079s - ('scanOp_pushout_output', 'PushOutScanOutput', 4, 165, 165) - 0.000s\n", - " 0.004303s - ('scanOp_pushout_seqs_ops', 'PushOutSeqScan', 2, 165, 165) - 0.000s\n", - " 0.000423s - ('remove_constants_and_unused_inputs_scan', 'TopoOptimizer', 0, 153, 153) - 0.000s\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs0\n", - " nb_node (start, end, changed) (153, 153, 0)\n", - " init io_toposort 0.00017571449279785156\n", - " loop time 0.00024318695068359375\n", - " callback_time 0.0\n", - " 0.000234s - ('scan_pushout_dot1', 'PushOutDot1', 3, 165, 165) - 0.000s\n", - "\n", - " Iter 1\n", - " SeqOptimizer all_pushout_opt time 0.020s for 165/159 nodes before/after optimization\n", - " 0.000s for callback\n", - " 0.000s for fgraph.validate()\n", - " 0.006255s - ('remove_constants_and_unused_inputs_scan', 'TopoOptimizer', 0, 165, 159) - 0.000s\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs0\n", - " nb_node (start, end, changed) (165, 159, 1)\n", - " init io_toposort 0.0001823902130126953\n", - " loop time 0.006066799163818359\n", - " callback_time 7.271766662597656e-05\n", - " 0.004653s - ('scanOp_pushout_nonseqs_ops', 'PushOutNonSeqScan', 1, 159, 159) - 0.000s\n", - " 0.004620s - ('scanOp_pushout_output', 'PushOutScanOutput', 4, 159, 159) - 0.000s\n", - " 0.004264s - ('scanOp_pushout_seqs_ops', 'PushOutSeqScan', 2, 159, 159) - 0.000s\n", - " 0.000224s - ('scan_pushout_dot1', 'PushOutDot1', 3, 159, 159) - 0.000s\n", - "\n", - " Iter 2\n", - " SeqOptimizer all_pushout_opt time 0.014s for 159/159 nodes before/after optimization\n", - " 0.000s for callback\n", - " 0.000s for fgraph.validate()\n", - " 0.004682s - ('scanOp_pushout_nonseqs_ops', 'PushOutNonSeqScan', 1, 159, 159) - 0.000s\n", - " 0.004488s - ('scanOp_pushout_output', 'PushOutScanOutput', 4, 159, 159) - 0.000s\n", - " 0.004261s - ('scanOp_pushout_seqs_ops', 'PushOutSeqScan', 2, 159, 159) - 0.000s\n", - " 0.000419s - ('remove_constants_and_unused_inputs_scan', 'TopoOptimizer', 0, 159, 159) - 0.000s\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs0\n", - " nb_node (start, end, changed) (159, 159, 0)\n", - " init io_toposort 0.00017571449279785156\n", - " loop time 0.00023865699768066406\n", - " callback_time 0.0\n", - " 0.000225s - ('scan_pushout_dot1', 'PushOutDot1', 3, 159, 159) - 0.000s\n", - "\n", - " 0.048995s - ('specialize', 'EquilibriumOptimizer', 16, 141, 68) - 0.000s\n", - " EquilibriumOptimizer specialize\n", - " time 0.049s for 4 passes\n", - " nb nodes (start, end, max) 141 68 141\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.018s\n", - " time in global optimizers 0.001s\n", - " time in final optimizers 0.028s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.033s 20 (0.023s in global opts, 0.000s io_toposort) - 141 nodes - ('local_shape_to_shape_i', 5) ('local_add_specialize', 3) ('local_useless_slice', 3) ('local_sum_prod_mul_by_scalar', 2) ('local_pow_specialize', 2) ...\n", - " 1 - 0.011s 14 (0.004s in global opts, 0.000s io_toposort) - 83 nodes - ('local_subtensor_make_vector', 4) ('local_useless_subtensor', 4) ('local_mul_specialize', 2) ('local_dimshuffle_lift', 2) ('topo_constant_folding', 1) ...\n", - " 2 - 0.002s 2 (0.000s in global opts, 0.000s io_toposort) - 70 nodes - ('local_useless_subtensor', 2)\n", - " 3 - 0.002s 0 (0.000s in global opts, 0.000s io_toposort) - 68 nodes - \n", - " times - times applied - nb node created - name:\n", - " 0.028s - 2 - 0 - topo_constant_folding\n", - " 0.003s - 3 - 6 - local_add_specialize\n", - " 0.003s - 2 - 15 - local_dimshuffle_lift\n", - " 0.001s - 2 - 8 - local_sum_prod_mul_by_scalar\n", - " 0.001s - 5 - 5 - local_shape_to_shape_i\n", - " 0.001s - 3 - 3 - local_mul_specialize\n", - " 0.000s - 6 - 0 - local_useless_subtensor\n", - " 0.000s - 3 - 3 - local_useless_slice\n", - " 0.000s - 1 - 1 - local_div_to_reciprocal\n", - " 0.000s - 2 - 2 - local_pow_specialize\n", - " 0.000s - 4 - 0 - local_subtensor_make_vector\n", - " 0.000s - 1 - 2 - local_rebroadcast_lift\n", - " 0.000s - 2 - 0 - local_remove_useless_assert\n", - " 0.007s - in 65 optimization that were not used (display only those with a runtime > 0)\n", - " 0.001s - local_func_inv\n", - " 0.001s - local_one_minus_erf\n", - " 0.001s - local_one_minus_erf2\n", - " 0.001s - local_elemwise_alloc\n", - " 0.001s - local_useless_elemwise\n", - " 0.001s - crossentropy_to_crossentropy_with_softmax_with_bias\n", - " 0.000s - local_useless_elemwise_comparison\n", - " 0.000s - local_track_shape_i\n", - " 0.000s - local_useless_switch\n", - " 0.000s - local_exp_log\n", - " 0.000s - local_abs_merge\n", - " 0.000s - local_expm1\n", - " 0.000s - local_cast_cast\n", - " 0.000s - local_logsoftmax\n", - " 0.000s - local_elemwise_sub_zeros\n", - " 0.000s - local_alloc_unary\n", - " 0.000s - local_useless_rebroadcast\n", - " 0.000s - local_useless_inc_subtensor\n", - " 0.000s - local_mul_switch_sink\n", - " 0.000s - local_subtensor_remove_broadcastable_index\n", - " 0.000s - local_grad_log_erfc_neg\n", - " 0.000s - local_zero_div\n", - " 0.000s - local_mul_to_sqr\n", - " 0.000s - local_subtensor_inc_subtensor\n", - " 0.000s - local_sum_prod_div_dimshuffle\n", - " 0.000s - local_useless_inc_subtensor_alloc\n", - " 0.000s - local_reduce_broadcastable\n", - " 0.000s - local_subtensor_merge\n", - " 0.000s - local_subtensor_of_alloc\n", - " 0.000s - local_scalar_tensor_scalar\n", - " 0.000s - local_incsubtensor_of_zeros_to_setsubtensor\n", - " 0.000s - local_subtensor_of_dot\n", - " 0.000s - local_neg_neg\n", - " 0.000s - local_canonicalize_alloc\n", - " 0.000s - local_advanced_indexing_crossentropy_onehot\n", - " 0.000s - local_sumsqr2dot\n", - " 0.000s - local_opt_alloc\n", - " 0.000s - local_log1p\n", - " 0.000s - local_neg_div_neg\n", - " 0.000s - local_log_erfc\n", - " 0.000s - local_log_add\n", - " 0.000s - local_useless_alloc\n", - " 0.000s - local_merge_alloc\n", - "\n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (146, 83, 63)\n", - " init io_toposort 0.0001766681671142578\n", - " loop time 0.022988557815551758\n", - " callback_time 0.0035560131072998047\n", - " Iter 1\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (78, 70, 8)\n", - " init io_toposort 9.369850158691406e-05\n", - " loop time 0.004173994064331055\n", - " callback_time 0.0006353855133056641\n", - " Iter 2\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (68, 68, 0)\n", - " init io_toposort 8.392333984375e-05\n", - " loop time 4.863739013671875e-05\n", - " callback_time 0.0\n", - " Iter 3\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (68, 68, 0)\n", - " init io_toposort 8.416175842285156e-05\n", - " loop time 4.887580871582031e-05\n", - " callback_time 0.0\n", - " 0.023798s - ('scan_eqopt2', 'EquilibriumOptimizer', 13, 67, 67) - 0.000s\n", - " EquilibriumOptimizer scan_eqopt2\n", - " time 0.024s for 2 passes\n", - " nb nodes (start, end, max) 67 67 67\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.000s\n", - " time in global optimizers 0.023s\n", - " time in final optimizers 0.000s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.019s 1 (0.018s in global opts, 0.000s io_toposort) - 67 nodes - ('remove_constants_and_unused_inputs_scan', 1)\n", - " 1 - 0.005s 0 (0.005s in global opts, 0.000s io_toposort) - 67 nodes - \n", - " times - times applied - nb node created - name:\n", - " 0.014s - 1 - 1 - remove_constants_and_unused_inputs_scan\n", - " 0.009s - in 5 optimization that were not used (display only those with a runtime > 0)\n", - " 0.008s - scan_merge_inouts\n", - " 0.001s - remove_constants_and_unused_inputs_scan\n", - " 0.000s - remove_constants_and_unused_inputs_scan\n", - " 0.000s - constant_folding\n", - " 0.000s - \n", - "\n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " TopoOptimizer constant_folding_for_scan2\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.058547973632812e-05\n", - " loop time 5.14984130859375e-05\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs1\n", - " nb_node (start, end, changed) (67, 67, 1)\n", - " init io_toposort 7.748603820800781e-05\n", - " loop time 0.013595104217529297\n", - " callback_time 0.00722956657409668\n", - " TopoOptimizer scanop_remove_constants_and_unused_inputs2\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.0108642578125e-05\n", - " loop time 0.00016832351684570312\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_merge_inouts\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.249282836914062e-05\n", - " loop time 0.003879547119140625\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs3\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.702278137207031e-05\n", - " loop time 0.00017213821411132812\n", - " callback_time 0.0\n", - " Iter 1\n", - " TopoOptimizer constant_folding_for_scan2\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.20159912109375e-05\n", - " loop time 4.792213439941406e-05\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs1\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 7.891654968261719e-05\n", - " loop time 0.0001666545867919922\n", - " callback_time 0.0\n", - " TopoOptimizer scanop_remove_constants_and_unused_inputs2\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 7.605552673339844e-05\n", - " loop time 0.00016427040100097656\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_merge_inouts\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 7.939338684082031e-05\n", - " loop time 0.003902912139892578\n", - " callback_time 0.0\n", - " TopoOptimizer scanOp_remove_constants_and_unused_inputs3\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.630752563476562e-05\n", - " loop time 0.0001811981201171875\n", - " callback_time 0.0\n", - " 0.022581s - ('scanOp_save_mem', 'ScanSaveMem', 14, 67, 141) - 0.000s\n", - " 0.010699s - ('scanOp_make_inplace', 'ScanInplaceOptimizer', 49, 44, 44) - 0.000s\n", - " 0.010460s - ('ShapeOpt', 'ShapeOptimizer', 3, 159, 159) - 0.000s\n", - " 0.009874s - ('BlasOpt', 'SeqOptimizer', 15, 141, 141) - 0.000s\n", - " SeqOptimizer BlasOpt time 0.010s for 141/141 nodes before/after optimization\n", - " 0.000s for callback\n", - " 0.000s for fgraph.validate()\n", - " 0.006044s - ('use_c_blas', 'TopoOptimizer', 4, 141, 141) - 0.000s\n", - " TopoOptimizer use_c_blas\n", - " nb_node (start, end, changed) (141, 141, 0)\n", - " init io_toposort 0.00017404556274414062\n", - " loop time 0.005860805511474609\n", - " callback_time 0.0\n", - " 0.002404s - ('gemm_optimizer', 'GemmOptimizer', 1, 141, 141) - 0.000s\n", - " GemmOptimizer\n", - " nb_iter 1\n", - " nb_replacement 0\n", - " nb_replacement_didn_t_remove 0\n", - " nb_inconsistency_make 0\n", - " nb_inconsistency_replace 0\n", - " time_canonicalize 0.001386880874633789\n", - " time_factor_can 0\n", - " time_factor_list 0\n", - " time_toposort 0.00017309188842773438\n", - " validate_time 0.0\n", - " callback_time 0.0\n", - " 0.000430s - ('local_dot22_to_dot22scalar', 'TopoOptimizer', 2, 141, 141) - 0.000s\n", - " TopoOptimizer local_dot22_to_dot22scalar\n", - " nb_node (start, end, changed) (141, 141, 0)\n", - " init io_toposort 0.00017309188842773438\n", - " loop time 0.0002491474151611328\n", - " callback_time 0.0\n", - " 0.000421s - ('local_gemm_to_gemv', 'EquilibriumOptimizer', 3, 141, 141) - 0.000s\n", - " EquilibriumOptimizer local_gemm_to_gemv\n", - " time 0.000s for 1 passes\n", - " nb nodes (start, end, max) 141 141 141\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.000s\n", - " time in global optimizers 0.000s\n", - " time in final optimizers 0.000s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.000s 0 (0.000s in global opts, 0.000s io_toposort) - 141 nodes - \n", - " 0.000282s - ('local_dot_to_dot22', 'TopoOptimizer', 0, 141, 141) - 0.000s\n", - " TopoOptimizer local_dot_to_dot22\n", - " nb_node (start, end, changed) (141, 141, 0)\n", - " init io_toposort 0.0001773834228515625\n", - " loop time 9.584426879882812e-05\n", - " callback_time 0.0\n", - " 0.000279s - ('use_scipy_ger', 'TopoOptimizer', 5, 141, 141) - 0.000s\n", - " TopoOptimizer scipy_blas\n", - " nb_node (start, end, changed) (141, 141, 0)\n", - " init io_toposort 0.00018358230590820312\n", - " loop time 8.749961853027344e-05\n", - " callback_time 0.0\n", - "\n", - " 0.009009s - ('elemwise_fusion', 'SeqOptimizer', 21, 68, 47) - 0.000s\n", - " SeqOptimizer elemwise_fusion time 0.009s for 68/47 nodes before/after optimization\n", - " 0.001s for callback\n", - " 0.000s for fgraph.validate()\n", - " 0.008479s - ('composite_elemwise_fusion', 'FusionOptimizer', 1, 67, 47) - 0.000s\n", - " FusionOptimizer\n", - " nb_iter 2\n", - " nb_replacement 6\n", - " nb_inconsistency_replace 0\n", - " validate_time 2.4080276489257812e-05\n", - " callback_time 0.00044274330139160156\n", - " time_toposort 0.00014829635620117188\n", - " 0.000519s - ('local_add_mul_fusion', 'FusionOptimizer', 0, 68, 67) - 0.000s\n", - " FusionOptimizer\n", - " nb_iter 2\n", - " nb_replacement 1\n", - " nb_inconsistency_replace 0\n", - " validate_time 4.0531158447265625e-06\n", - " callback_time 0.0001246929168701172\n", - " time_toposort 0.00016832351684570312\n", - "\n", - " 0.006206s - ('inplace_elemwise_opt', 'InplaceElemwiseOptimizer', 48, 44, 44) - 0.001s\n", - " InplaceElemwiseOptimizer \n", - " node_before 44\n", - " nb_call_replace 10\n", - " nb_call_validate 10\n", - " nb_inconsistent 5\n", - " ndim nb\n", - " 0 5\n", - " 1 5\n", - " 0.004699s - ('stabilize', 'EquilibriumOptimizer', 9, 69, 67) - 0.000s\n", - " EquilibriumOptimizer stabilize\n", - " time 0.005s for 2 passes\n", - " nb nodes (start, end, max) 69 67 69\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.002s\n", - " time in global optimizers 0.000s\n", - " time in final optimizers 0.002s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.003s 3 (0.002s in global opts, 0.000s io_toposort) - 69 nodes - ('local_fill_to_alloc', 2) ('topo_constant_folding', 1)\n", - " 1 - 0.001s 0 (0.000s in global opts, 0.000s io_toposort) - 67 nodes - \n", - " times - times applied - nb node created - name:\n", - " 0.002s - 1 - 0 - topo_constant_folding\n", - " 0.000s - 2 - 2 - local_fill_to_alloc\n", - " 0.002s - in 40 optimization that were not used (display only those with a runtime > 0)\n", - " 0.001s - local_greedy_distributor\n", - " 0.000s - local_one_minus_erf2\n", - " 0.000s - local_sigm_times_exp\n", - " 0.000s - crossentropy_to_crossentropy_with_softmax_with_bias\n", - " 0.000s - local_useless_elemwise_comparison\n", - " 0.000s - local_one_minus_erf\n", - " 0.000s - local_expm1\n", - " 0.000s - local_exp_over_1_plus_exp\n", - " 0.000s - local_setsubtensor_of_constants\n", - " 0.000s - local_grad_log_erfc_neg\n", - " 0.000s - local_incsubtensor_of_zeros\n", - " 0.000s - Elemwise{log,no_inplace}(Elemwise{sigmoid,no_inplace}(x)) -> Elemwise{neg,no_inplace}(Elemwise{softplus,no_inplace}(Elemwise{neg,no_inplace}(x)))\n", - " 0.000s - local_useless_inc_subtensor_alloc\n", - " 0.000s - Elemwise{log,no_inplace}(Elemwise{sub,no_inplace}(y subject to , Elemwise{sigmoid,no_inplace}(x))) -> Elemwise{neg,no_inplace}(Elemwise{softplus,no_inplace}(x))\n", - " 0.000s - local_subtensor_of_dot\n", - " 0.000s - local_log1p\n", - " 0.000s - local_log_add\n", - " 0.000s - local_log_erfc\n", - " 0.000s - local_canonicalize_alloc\n", - " 0.000s - local_useless_alloc\n", - " 0.000s - local_merge_alloc\n", - "\n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (69, 67, 2)\n", - " init io_toposort 8.678436279296875e-05\n", - " loop time 0.0013842582702636719\n", - " callback_time 0.00018548965454101562\n", - " Iter 1\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.487701416015625e-05\n", - " loop time 4.792213439941406e-05\n", - " callback_time 0.0\n", - " 0.002686s - ('useless', 'TopoOptimizer', 4, 159, 146) - 0.000s\n", - " TopoOptimizer useless\n", - " nb_node (start, end, changed) (159, 146, 13)\n", - " init io_toposort 0.00017905235290527344\n", - " loop time 0.0024917125701904297\n", - " callback_time 0.0003857612609863281\n", - " LocalOptGroup\n", - " ---------------------\n", - " time taken - times applied - times tried - name - node_created:\n", - " -0.000s - 0 - 5 - local_useless_inc_subtensor_alloc - 0\n", - " -0.000s - 0 - 11 - local_subtensor_of_alloc - 0\n", - " -0.000s - 0 - 11 - local_subtensor_make_vector - 0\n", - " -0.000s - 6 - 10 - local_useless_reduce - 0\n", - " -0.000s - 4 - 6 - local_useless_fill - 0\n", - " -0.000s - 0 - 5 - local_useless_inc_subtensor - 0\n", - " -0.000s - 0 - 3 - local_useless_rebroadcast - 0\n", - " -0.000s - 0 - 11 - local_useless_slice - 0\n", - " -0.000s - 0 - 94 - local_useless_switch - 0\n", - " -0.000s - 0 - 157 - local_view_op - 0\n", - " -0.000s - 0 - 94 - local_useless_elemwise_comparison - 0\n", - " -0.000s - 3 - 97 - local_useless_elemwise - 0\n", - " 0.000s - in 9 optimization that were not used (display those with runtime greater than 0)\n", - "\n", - " 0.002225s - ('merge1', 'MergeOptimizer', 1, 162, 153) - 0.000s\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 42 constant= 33\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " 0.001299s - ('local_inplace_setsubtensor', 'TopoOptimizer', 37, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_setsubtensor\n", - " nb_node (start, end, changed) (44, 44, 4)\n", - " init io_toposort 5.125999450683594e-05\n", - " loop time 0.001239776611328125\n", - " callback_time 0.0007669925689697266\n", - " 0.000679s - ('local_IncSubtensor_serialize', 'TopoOptimizer', 6, 146, 146) - 0.000s\n", - " TopoOptimizer pre_local_IncSubtensor_serialize\n", - " nb_node (start, end, changed) (146, 146, 1)\n", - " init io_toposort 0.00016379356384277344\n", - " loop time 0.0005068778991699219\n", - " callback_time 7.82012939453125e-05\n", - " 0.000599s - ('blas_opt_inplace', 'TopoOptimizer', 42, 44, 44) - 0.000s\n", - " TopoOptimizer InplaceBlasOpt\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.054473876953125e-05\n", - " loop time 0.0005350112915039062\n", - " callback_time 0.0\n", - " 0.000420s - ('uncanonicalize', 'EquilibriumOptimizer', 18, 68, 68) - 0.000s\n", - " EquilibriumOptimizer uncanonicalize\n", - " time 0.000s for 1 passes\n", - " nb nodes (start, end, max) 68 68 68\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.000s\n", - " time in global optimizers 0.000s\n", - " time in final optimizers 0.000s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.000s 0 (0.000s in global opts, 0.000s io_toposort) - 68 nodes - \n", - " Global, final and clean up optimizers\n", - " Iter 0\n", - " TopoOptimizer topo_constant_folding\n", - " nb_node (start, end, changed) (68, 68, 0)\n", - " init io_toposort 7.963180541992188e-05\n", - " loop time 4.887580871582031e-05\n", - " callback_time 0.0\n", - " 0.000415s - ('merge2', 'MergeOptimizer', 23, 47, 44) - 0.000s\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 10 constant= 7\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " 0.000296s - ('inline_ofg_expansion', 'TopoOptimizer', 0, 162, 162) - 0.000s\n", - " TopoOptimizer inline_ofg_expansion\n", - " nb_node (start, end, changed) (162, 162, 0)\n", - " init io_toposort 0.00018310546875\n", - " loop time 0.00010323524475097656\n", - " callback_time 0.0\n", - " 0.000284s - ('c_blas_destructive', 'TopoOptimizer', 44, 44, 44) - 0.000s\n", - " TopoOptimizer c_blas_destructive\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.53131103515625e-05\n", - " loop time 0.00022101402282714844\n", - " callback_time 0.0\n", - " 0.000249s - ('gpuablas_opt_inplace', 'TopoOptimizer', 43, 44, 44) - 0.000s\n", - " TopoOptimizer InplaceGpuaBlasOpt\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.888938903808594e-05\n", - " loop time 0.0001709461212158203\n", - " callback_time 0.0\n", - " 0.000241s - ('local_dnna_conv_inplace', 'TopoOptimizer', 45, 44, 44) - 0.000s\n", - " TopoOptimizer local_dnna_conv_inplace\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.7697296142578125e-05\n", - " loop time 0.00016760826110839844\n", - " callback_time 0.0\n", - " 0.000212s - ('specialize_device', 'EquilibriumOptimizer', 19, 68, 68) - 0.000s\n", - " EquilibriumOptimizer specialize_device\n", - " time 0.000s for 1 passes\n", - " nb nodes (start, end, max) 68 68 68\n", - " time io_toposort 0.000s\n", - " time in local optimizers 0.000s\n", - " time in global optimizers 0.000s\n", - " time in final optimizers 0.000s\n", - " time in cleanup optimizers 0.000s\n", - " 0 - 0.000s 0 (0.000s in global opts, 0.000s io_toposort) - 68 nodes - \n", - " 0.000201s - ('local_elemwise_alloc', 'TopoOptimizer', 11, 67, 67) - 0.000s\n", - " TopoOptimizer local_elemwise_alloc\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.153915405273438e-05\n", - " loop time 0.00011348724365234375\n", - " callback_time 0.0\n", - " 0.000200s - ('add_destroy_handler', 'AddDestroyHandler', 24, 44, 44) - 0.000s\n", - " 0.000200s - ('AbstractConvCheck', 'TopoOptimizer', 20, 68, 68) - 0.000s\n", - " TopoOptimizer AbstractConvCheck\n", - " nb_node (start, end, changed) (68, 68, 0)\n", - " init io_toposort 8.106231689453125e-05\n", - " loop time 0.00011086463928222656\n", - " callback_time 0.0\n", - " 0.000167s - ('local_log_sum_exp', 'TopoOptimizer', 12, 67, 67) - 0.000s\n", - " TopoOptimizer local_log_sum_exp\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 7.939338684082031e-05\n", - " loop time 8.416175842285156e-05\n", - " callback_time 0.0\n", - " 0.000165s - ('local_fill_to_alloc', 'TopoOptimizer', 10, 67, 67) - 0.000s\n", - " TopoOptimizer local_fill_to_alloc\n", - " nb_node (start, end, changed) (67, 67, 0)\n", - " init io_toposort 8.082389831542969e-05\n", - " loop time 7.700920104980469e-05\n", - " callback_time 0.0\n", - " 0.000108s - ('local_advincsub1_gpua_inplace', 'TopoOptimizer', 25, 44, 44) - 0.000s\n", - " TopoOptimizer local_advincsub1_gpua_inplace\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.698204040527344e-05\n", - " loop time 4.2438507080078125e-05\n", - " callback_time 0.0\n", - " 0.000103s - ('local_inplace_sparse_block_gemv', 'TopoOptimizer', 38, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_sparse_block_gemv\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 6.031990051269531e-05\n", - " loop time 3.170967102050781e-05\n", - " callback_time 0.0\n", - " 0.000099s - ('crossentropy_to_crossentropy_with_softmax', 'FromFunctionOptimizer', 17, 68, 68) - 0.000s\n", - " 0.000098s - ('make_ger_destructive', 'TopoOptimizer', 46, 44, 44) - 0.000s\n", - " TopoOptimizer make_scipy_blas_destructive\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 6.198883056640625e-05\n", - " loop time 3.0040740966796875e-05\n", - " callback_time 0.0\n", - " 0.000097s - ('cond_make_inplace', 'TopoOptimizer', 50, 44, 44) - 0.000s\n", - " TopoOptimizer cond_make_inplace\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.888938903808594e-05\n", - " loop time 3.314018249511719e-05\n", - " callback_time 0.0\n", - " 0.000096s - ('local_inplace_gpu_magma_cholesky', 'TopoOptimizer', 34, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_gpu_magma_cholesky\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.650520324707031e-05\n", - " loop time 3.170967102050781e-05\n", - " callback_time 0.0\n", - " 0.000095s - ('local_batch_norm_inference_inplace', 'TopoOptimizer', 26, 44, 44) - 0.000s\n", - " TopoOptimizer local_batch_norm_inference_inplace\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.2928924560546875e-05\n", - " loop time 3.170967102050781e-05\n", - " callback_time 0.0\n", - " 0.000095s - ('local_inplace_DiagonalSubtensor', 'TopoOptimizer', 32, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_DiagonalSubtensor\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.030632019042969e-05\n", - " loop time 4.00543212890625e-05\n", - " callback_time 0.0\n", - " 0.000093s - ('local_inplace_sparseblockouter', 'TopoOptimizer', 41, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_sparseblockouter\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.459785461425781e-05\n", - " loop time 3.123283386230469e-05\n", - " callback_time 0.0\n", - " 0.000093s - ('local_batch_norm_inplace_output', 'TopoOptimizer', 27, 44, 44) - 0.000s\n", - " TopoOptimizer local_batch_norm_inplace_output\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.1975250244140625e-05\n", - " loop time 3.170967102050781e-05\n", - " callback_time 0.0\n", - " 0.000092s - ('local_inplace_sparseblockgemv', 'TopoOptimizer', 40, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_sparseblockgemv\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.221366882324219e-05\n", - " loop time 3.0994415283203125e-05\n", - " callback_time 0.0\n", - " 0.000091s - ('local_inplace_sparse_block_outer', 'TopoOptimizer', 39, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_sparse_block_outer\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.340576171875e-05\n", - " loop time 3.0040740966796875e-05\n", - " callback_time 0.0\n", - " 0.000091s - ('local_inplace_gpu_cholesky', 'TopoOptimizer', 33, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_gpu_cholesky\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.054473876953125e-05\n", - " loop time 3.123283386230469e-05\n", - " callback_time 0.0\n", - " 0.000090s - ('local_inplace_gpu_solve', 'TopoOptimizer', 36, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_gpu_solve\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.078315734863281e-05\n", - " loop time 3.170967102050781e-05\n", - " callback_time 0.0\n", - " 0.000090s - ('random_make_inplace', 'TopoOptimizer', 51, 44, 44) - 0.000s\n", - " TopoOptimizer random_make_inplace\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.3882598876953125e-05\n", - " loop time 3.1948089599609375e-05\n", - " callback_time 0.0\n", - " 0.000089s - ('local_inplace_AdvancedIncSubtensor1', 'TopoOptimizer', 31, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_AdvancedIncSubtensor1\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.030632019042969e-05\n", - " loop time 3.147125244140625e-05\n", - " callback_time 0.0\n", - " 0.000089s - ('local_inplace_AdvancedIncSubtensor', 'TopoOptimizer', 30, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_AdvancedIncSubtensor\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.078315734863281e-05\n", - " loop time 3.0994415283203125e-05\n", - " callback_time 0.0\n", - " 0.000089s - ('local_batch_norm_inplace_running_mean', 'TopoOptimizer', 28, 44, 44) - 0.000s\n", - " TopoOptimizer local_batch_norm_inplace_running_mean\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.14984130859375e-05\n", - " loop time 2.9087066650390625e-05\n", - " callback_time 0.0\n", - " 0.000087s - ('local_inplace_gpu_magma_matrix_inverse', 'TopoOptimizer', 35, 44, 44) - 0.000s\n", - " TopoOptimizer local_inplace_gpu_magma_matrix_inverse\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.125999450683594e-05\n", - " loop time 3.123283386230469e-05\n", - " callback_time 0.0\n", - " 0.000087s - ('mrg_random_make_inplace', 'TopoOptimizer', 52, 44, 44) - 0.000s\n", - " TopoOptimizer random_make_inplace_mrg\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.1975250244140625e-05\n", - " loop time 3.0994415283203125e-05\n", - " callback_time 0.0\n", - " 0.000087s - ('local_batch_norm_inplace_running_var', 'TopoOptimizer', 29, 44, 44) - 0.000s\n", - " TopoOptimizer local_batch_norm_inplace_running_var\n", - " nb_node (start, end, changed) (44, 44, 0)\n", - " init io_toposort 5.030632019042969e-05\n", - " loop time 2.8848648071289062e-05\n", - " callback_time 0.0\n", - " 0.000079s - ('gpua_elemwise_fusion', 'FusionOptimizer', 22, 47, 47) - 0.000s\n", - " FusionOptimizer\n", - " nb_iter 1\n", - " nb_replacement 0\n", - " nb_inconsistency_replace 0\n", - " validate_time 0.0\n", - " callback_time 0.0\n", - " time_toposort 6.0558319091796875e-05\n", - " 0.000072s - ('gpua_inplace_opt', 'InplaceElemwiseOptimizer', 47, 44, 44) - 0.000s\n", - " InplaceElemwiseOptimizer \n", - " node_before 44\n", - " nb_call_replace 0\n", - " nb_call_validate 0\n", - " nb_inconsistent 0\n", - " 0.000036s - ('merge1.1', 'MergeOptimizer', 5, 146, 146) - 0.000s\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 1 constant= 1\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " 0.000009s - ('merge3', 'MergeOptimizer', 53, 44, 44) - 0.000s\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 0 constant= 0\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - " 0.000003s - ('merge1.2', 'MergeOptimizer', 8, 69, 69) - 0.000s\n", - " MergeOptimizer\n", - " nb fail= 0 merged= 0 constant= 0\n", - " time replace=0.00 validate=0.00 callback=0.00\n", - "\n", - "Here are tips to potentially make your code run faster\n", - " (if you think of new ones, suggest them on the mailing list).\n", - " Test them first, as they are not guaranteed to always provide a speedup.\n", - " - Try the Aesara flag floatX=float32\n" - ] - } - ], - "source": [ - "trajectory_generator.profile.summary()" + "posterior_trace = az.from_pymc3(posterior[BURNIN:], model=pymc3_model)\n", + "az.plot_trace(posterior_trace);" ] } ], @@ -1364,7 +496,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.6" } }, "nbformat": 4,